diff options
Diffstat (limited to 'arch/x86/kernel/alternative.c')
| -rw-r--r-- | arch/x86/kernel/alternative.c | 645 |
1 files changed, 417 insertions, 228 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 45d79ea890a..703130f469e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1,33 +1,29 @@ +#define pr_fmt(fmt) "SMP alternatives: " fmt + #include <linux/module.h> #include <linux/sched.h> -#include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/list.h> -#include <linux/kprobes.h> +#include <linux/stringify.h> #include <linux/mm.h> #include <linux/vmalloc.h> +#include <linux/memory.h> +#include <linux/stop_machine.h> +#include <linux/slab.h> +#include <linux/kdebug.h> #include <asm/alternative.h> #include <asm/sections.h> #include <asm/pgtable.h> #include <asm/mce.h> #include <asm/nmi.h> -#include <asm/vsyscall.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/io.h> +#include <asm/fixmap.h> #define MAX_PATCH_LEN (255-1) -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - -static int debug_alternative; +static int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) { @@ -46,7 +42,7 @@ static int __init setup_noreplace_smp(char *str) __setup("noreplace-smp", setup_noreplace_smp); #ifdef CONFIG_PARAVIRT -static int noreplace_paravirt = 0; +static int __initdata_or_module noreplace_paravirt = 0; static int __init setup_noreplace_paravirt(char *str) { @@ -56,18 +52,36 @@ static int __init setup_noreplace_paravirt(char *str) __setup("noreplace-paravirt", setup_noreplace_paravirt); #endif -#define DPRINTK(fmt, args...) if (debug_alternative) \ - printk(KERN_DEBUG fmt, args) - -#ifdef GENERIC_NOP1 -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.section .rodata, \"a\"\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -extern const unsigned char intelnops[]; -static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { +#define DPRINTK(fmt, ...) \ +do { \ + if (debug_alternative) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +} while (0) + +/* + * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes + * that correspond to that nop. Getting from one nop to the next, we + * add to the array the offset that is equal to the sum of all sizes of + * nops preceding the one we are after. + * + * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the + * nice symmetry of sizes of the previous nops. + */ +#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) +static const unsigned char intelnops[] = +{ + GENERIC_NOP1, + GENERIC_NOP2, + GENERIC_NOP3, + GENERIC_NOP4, + GENERIC_NOP5, + GENERIC_NOP6, + GENERIC_NOP7, + GENERIC_NOP8, + GENERIC_NOP5_ATOMIC +}; +static const unsigned char * const intel_nops[ASM_NOP_MAX+2] = +{ NULL, intelnops, intelnops + 1, @@ -77,15 +91,25 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { intelnops + 1 + 2 + 3 + 4 + 5, intelnops + 1 + 2 + 3 + 4 + 5 + 6, intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, + intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, }; #endif #ifdef K8_NOP1 -asm("\t.section .rodata, \"a\"\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -extern const unsigned char k8nops[]; -static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { +static const unsigned char k8nops[] = +{ + K8_NOP1, + K8_NOP2, + K8_NOP3, + K8_NOP4, + K8_NOP5, + K8_NOP6, + K8_NOP7, + K8_NOP8, + K8_NOP5_ATOMIC +}; +static const unsigned char * const k8_nops[ASM_NOP_MAX+2] = +{ NULL, k8nops, k8nops + 1, @@ -95,15 +119,25 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { k8nops + 1 + 2 + 3 + 4 + 5, k8nops + 1 + 2 + 3 + 4 + 5 + 6, k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, + k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, }; #endif -#ifdef K7_NOP1 -asm("\t.section .rodata, \"a\"\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); -extern const unsigned char k7nops[]; -static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { +#if defined(K7_NOP1) && !defined(CONFIG_X86_64) +static const unsigned char k7nops[] = +{ + K7_NOP1, + K7_NOP2, + K7_NOP3, + K7_NOP4, + K7_NOP5, + K7_NOP6, + K7_NOP7, + K7_NOP8, + K7_NOP5_ATOMIC +}; +static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = +{ NULL, k7nops, k7nops + 1, @@ -113,15 +147,25 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { k7nops + 1 + 2 + 3 + 4 + 5, k7nops + 1 + 2 + 3 + 4 + 5 + 6, k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, + k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, }; #endif #ifdef P6_NOP1 -asm("\t.section .rodata, \"a\"\np6nops: " - P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 - P6_NOP7 P6_NOP8); -extern const unsigned char p6nops[]; -static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { +static const unsigned char p6nops[] = +{ + P6_NOP1, + P6_NOP2, + P6_NOP3, + P6_NOP4, + P6_NOP5, + P6_NOP6, + P6_NOP7, + P6_NOP8, + P6_NOP5_ATOMIC +}; +static const unsigned char * const p6_nops[ASM_NOP_MAX+2] = +{ NULL, p6nops, p6nops + 1, @@ -131,129 +175,155 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { p6nops + 1 + 2 + 3 + 4 + 5, p6nops + 1 + 2 + 3 + 4 + 5 + 6, p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, + p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, }; #endif +/* Initialize these to a safe default */ #ifdef CONFIG_X86_64 +const unsigned char * const *ideal_nops = p6_nops; +#else +const unsigned char * const *ideal_nops = intel_nops; +#endif -extern char __vsyscall_0; -static inline const unsigned char*const * find_nop_table(void) -{ - return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; -} - -#else /* CONFIG_X86_64 */ - -static const struct nop { - int cpuid; - const unsigned char *const *noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { X86_FEATURE_P4, p6_nops }, - { X86_FEATURE_P3, p6_nops }, - { -1, NULL } -}; - -static const unsigned char*const * find_nop_table(void) +void __init arch_init_ideal_nops(void) { - const unsigned char *const *noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + /* + * Due to a decoder implementation quirk, some + * specific Intel CPUs actually perform better with + * the "k8_nops" than with the SDM-recommended NOPs. + */ + if (boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 0x0f && + boot_cpu_data.x86_model != 0x1c && + boot_cpu_data.x86_model != 0x26 && + boot_cpu_data.x86_model != 0x27 && + boot_cpu_data.x86_model < 0x30) { + ideal_nops = k8_nops; + } else if (boot_cpu_has(X86_FEATURE_NOPL)) { + ideal_nops = p6_nops; + } else { +#ifdef CONFIG_X86_64 + ideal_nops = k8_nops; +#else + ideal_nops = intel_nops; +#endif } + break; + default: +#ifdef CONFIG_X86_64 + ideal_nops = k8_nops; +#else + if (boot_cpu_has(X86_FEATURE_K8)) + ideal_nops = k8_nops; + else if (boot_cpu_has(X86_FEATURE_K7)) + ideal_nops = k7_nops; + else + ideal_nops = intel_nops; +#endif } - return noptable; } -#endif /* CONFIG_X86_64 */ - /* Use this to add nops to a buffer, then text_poke the whole buffer. */ -static void add_nops(void *insns, unsigned int len) +static void __init_or_module add_nops(void *insns, unsigned int len) { - const unsigned char *const *noptable = find_nop_table(); - while (len > 0) { unsigned int noplen = len; if (noplen > ASM_NOP_MAX) noplen = ASM_NOP_MAX; - memcpy(insns, noptable[noplen], noplen); + memcpy(insns, ideal_nops[noplen], noplen); insns += noplen; len -= noplen; } } extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -extern u8 *__smp_locks[], *__smp_locks_end[]; +extern s32 __smp_locks[], __smp_locks_end[]; +void *text_poke_early(void *addr, const void *opcode, size_t len); /* Replace instructions with better alternatives for this CPU type. This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where + self modifying code. This implies that asymmetric systems where APs have less capabilities than the boot processor are not handled. Tough. Make sure you disable such features by hand. */ -void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +void __init_or_module apply_alternatives(struct alt_instr *start, + struct alt_instr *end) { struct alt_instr *a; - char insnbuf[MAX_PATCH_LEN]; + u8 *instr, *replacement; + u8 insnbuf[MAX_PATCH_LEN]; - DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); + DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite a previous scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ for (a = start; a < end; a++) { - u8 *instr = a->instr; + instr = (u8 *)&a->instr_offset + a->instr_offset; + replacement = (u8 *)&a->repl_offset + a->repl_offset; BUG_ON(a->replacementlen > a->instrlen); BUG_ON(a->instrlen > sizeof(insnbuf)); + BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); if (!boot_cpu_has(a->cpuid)) continue; -#ifdef CONFIG_X86_64 - /* vsyscall code is not mapped yet. resolve it manually. */ - if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { - instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); - DPRINTK("%s: vsyscall fixup: %p => %p\n", - __FUNCTION__, a->instr, instr); - } -#endif - memcpy(insnbuf, a->replacement, a->replacementlen); + + memcpy(insnbuf, replacement, a->replacementlen); + + /* 0xe8 is a relative jump; fix the offset. */ + if (*insnbuf == 0xe8 && a->replacementlen == 5) + *(s32 *)(insnbuf + 1) += replacement - instr; + add_nops(insnbuf + a->replacementlen, a->instrlen - a->replacementlen); - text_poke(instr, insnbuf, a->instrlen); + + text_poke_early(instr, insnbuf, a->instrlen); } } #ifdef CONFIG_SMP -static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) +static void alternatives_smp_lock(const s32 *start, const s32 *end, + u8 *text, u8 *text_end) { - u8 **ptr; + const s32 *poff; - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) - continue; - if (*ptr > text_end) + mutex_lock(&text_mutex); + for (poff = start; poff < end; poff++) { + u8 *ptr = (u8 *)poff + *poff; + + if (!*poff || ptr < text || ptr >= text_end) continue; - text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ - }; + /* turn DS segment override prefix into lock prefix */ + if (*ptr == 0x3e) + text_poke(ptr, ((unsigned char []){0xf0}), 1); + } + mutex_unlock(&text_mutex); } -static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) +static void alternatives_smp_unlock(const s32 *start, const s32 *end, + u8 *text, u8 *text_end) { - u8 **ptr; - char insn[1]; + const s32 *poff; - if (noreplace_smp) - return; + mutex_lock(&text_mutex); + for (poff = start; poff < end; poff++) { + u8 *ptr = (u8 *)poff + *poff; - add_nops(insn, 1); - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) - continue; - if (*ptr > text_end) + if (!*poff || ptr < text || ptr >= text_end) continue; - text_poke(*ptr, insn, 1); - }; + /* turn lock prefix into DS segment override prefix */ + if (*ptr == 0xf0) + text_poke(ptr, ((unsigned char []){0x3E}), 1); + } + mutex_unlock(&text_mutex); } struct smp_alt_module { @@ -262,8 +332,8 @@ struct smp_alt_module { char *name; /* ptrs to lock prefixes */ - u8 **locks; - u8 **locks_end; + const s32 *locks; + const s32 *locks_end; /* .text segment, needed to avoid patching init code ;) */ u8 *text; @@ -272,29 +342,28 @@ struct smp_alt_module { struct list_head next; }; static LIST_HEAD(smp_alt_modules); -static DEFINE_SPINLOCK(smp_alt); -static int smp_mode = 1; /* protected by smp_alt */ +static DEFINE_MUTEX(smp_alt); +static bool uniproc_patched = false; /* protected by smp_alt */ -void alternatives_smp_module_add(struct module *mod, char *name, - void *locks, void *locks_end, - void *text, void *text_end) +void __init_or_module alternatives_smp_module_add(struct module *mod, + char *name, + void *locks, void *locks_end, + void *text, void *text_end) { struct smp_alt_module *smp; - unsigned long flags; - if (noreplace_smp) - return; + mutex_lock(&smp_alt); + if (!uniproc_patched) + goto unlock; - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } + if (num_possible_cpus() == 1) + /* Don't bother remembering, we'll never have to undo it. */ + goto smp_unlock; smp = kzalloc(sizeof(*smp), GFP_KERNEL); if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ + /* we'll run the (safe but slow) SMP code then ... */ + goto unlock; smp->mod = mod; smp->name = name; @@ -303,90 +372,79 @@ void alternatives_smp_module_add(struct module *mod, char *name, smp->text = text; smp->text_end = text_end; DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", - __FUNCTION__, smp->locks, smp->locks_end, + __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - spin_lock_irqsave(&smp_alt, flags); list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); - spin_unlock_irqrestore(&smp_alt, flags); +smp_unlock: + alternatives_smp_unlock(locks, locks_end, text, text_end); +unlock: + mutex_unlock(&smp_alt); } -void alternatives_smp_module_del(struct module *mod) +void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - unsigned long flags; - - if (smp_alt_once || noreplace_smp) - return; - spin_lock_irqsave(&smp_alt, flags); + mutex_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - spin_unlock_irqrestore(&smp_alt, flags); - DPRINTK("%s: %s\n", __FUNCTION__, item->name); kfree(item); - return; + break; } - spin_unlock_irqrestore(&smp_alt, flags); + mutex_unlock(&smp_alt); } -void alternatives_smp_switch(int smp) +void alternatives_enable_smp(void) { struct smp_alt_module *mod; - unsigned long flags; -#ifdef CONFIG_LOCKDEP - /* - * Older binutils section handling bug prevented - * alternatives-replacement from working reliably. - * - * If this still occurs then you should see a hang - * or crash shortly after this line: - */ - printk("lockdep: fixing up alternatives.\n"); -#endif + /* Why bother if there are no other CPUs? */ + BUG_ON(num_possible_cpus() == 1); - if (noreplace_smp || smp_alt_once) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); - - spin_lock_irqsave(&smp_alt, flags); + mutex_lock(&smp_alt); - /* - * Avoid unnecessary switches because it forces JIT based VMs to - * throw away all cached translations, which can be quite costly. - */ - if (smp == smp_mode) { - /* nothing */ - } else if (smp) { - printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); + if (uniproc_patched) { + pr_info("switching to SMP code\n"); + BUG_ON(num_online_cpus() != 1); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); - } else { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); + uniproc_patched = false; } - smp_mode = smp; - spin_unlock_irqrestore(&smp_alt, flags); + mutex_unlock(&smp_alt); } +/* Return 1 if the address range is reserved for smp-alternatives */ +int alternatives_text_reserved(void *start, void *end) +{ + struct smp_alt_module *mod; + const s32 *poff; + u8 *text_start = start; + u8 *text_end = end; + + list_for_each_entry(mod, &smp_alt_modules, next) { + if (mod->text > text_end || mod->text_end < text_start) + continue; + for (poff = mod->locks; poff < mod->locks_end; poff++) { + const u8 *ptr = (const u8 *)poff + *poff; + + if (text_start <= ptr && text_end > ptr) + return 1; + } + } + + return 0; +} #endif #ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) +void __init_or_module apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) { struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; @@ -407,7 +465,7 @@ void apply_paravirt(struct paravirt_patch_site *start, /* Pad the rest with nops */ add_nops(insnbuf + used, p->len - used); - text_poke(p->instr, insnbuf, p->len); + text_poke_early(p->instr, insnbuf, p->len); } } extern struct paravirt_patch_site __start_parainstructions[], @@ -416,73 +474,204 @@ extern struct paravirt_patch_site __start_parainstructions[], void __init alternative_instructions(void) { - unsigned long flags; - /* The patching is not fully atomic, so try to avoid local interruptions that might execute the to be patched code. Other CPUs are not running. */ stop_nmi(); -#ifdef CONFIG_X86_MCE - stop_mce(); -#endif - local_irq_save(flags); - apply_alternatives(__alt_instructions, __alt_instructions_end); + /* + * Don't stop machine check exceptions while patching. + * MCEs only happen when something got corrupted and in this + * case we must do something about the corruption. + * Ignoring it is worse than a unlikely patching race. + * Also machine checks tend to be broadcast and if one CPU + * goes into machine check the others follow quickly, so we don't + * expect a machine check to cause undue problems during to code + * patching. + */ - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif + apply_alternatives(__alt_instructions, __alt_instructions_end); #ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - } else { + /* Patch to UP if other cpus not imminent. */ + if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { + uniproc_patched = true; alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, _text, _etext); - - /* Only switch to UP mode if we don't immediately boot others */ - if (num_possible_cpus() == 1 || setup_max_cpus <= 1) - alternatives_smp_switch(0); } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - local_irq_restore(flags); - if (smp_alt_once) + if (!uniproc_patched || num_possible_cpus() == 1) free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); +#endif + + apply_paravirt(__parainstructions, __parainstructions_end); restart_nmi(); -#ifdef CONFIG_X86_MCE - restart_mce(); -#endif } -/* - * Warning: +/** + * text_poke_early - Update instructions on a live kernel at boot time + * @addr: address to modify + * @opcode: source of the copy + * @len: length to copy + * * When you use this code to patch more than one byte of an instruction * you need to make sure that other CPUs cannot execute this code in parallel. - * Also no thread must be currently preempted in the middle of these instructions. - * And on the local CPU you need to be protected again NMI or MCE handlers - * seeing an inconsistent instruction while you patch. + * Also no thread must be currently preempted in the middle of these + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. */ -void __kprobes text_poke(void *addr, unsigned char *opcode, int len) +void *__init_or_module text_poke_early(void *addr, const void *opcode, + size_t len) { + unsigned long flags; + local_irq_save(flags); memcpy(addr, opcode, len); sync_core(); + local_irq_restore(flags); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ + return addr; } + +/** + * text_poke - Update instructions on a live kernel + * @addr: address to modify + * @opcode: source of the copy + * @len: length to copy + * + * Only atomic text poke/set should be allowed when not doing early patching. + * It means the size must be writable atomically and the address must be aligned + * in a way that permits an atomic write. It also makes sure we fit on a single + * page. + * + * Note: Must be called under text_mutex. + */ +void *text_poke(void *addr, const void *opcode, size_t len) +{ + unsigned long flags; + char *vaddr; + struct page *pages[2]; + int i; + + if (!core_kernel_text((unsigned long)addr)) { + pages[0] = vmalloc_to_page(addr); + pages[1] = vmalloc_to_page(addr + PAGE_SIZE); + } else { + pages[0] = virt_to_page(addr); + WARN_ON(!PageReserved(pages[0])); + pages[1] = virt_to_page(addr + PAGE_SIZE); + } + BUG_ON(!pages[0]); + local_irq_save(flags); + set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0])); + if (pages[1]) + set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1])); + vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0); + memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); + clear_fixmap(FIX_TEXT_POKE0); + if (pages[1]) + clear_fixmap(FIX_TEXT_POKE1); + local_flush_tlb(); + sync_core(); + /* Could also do a CLFLUSH here to speed up CPU recovery; but + that causes hangs on some VIA CPUs. */ + for (i = 0; i < len; i++) + BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); + local_irq_restore(flags); + return addr; +} + +static void do_sync_core(void *info) +{ + sync_core(); +} + +static bool bp_patching_in_progress; +static void *bp_int3_handler, *bp_int3_addr; + +int poke_int3_handler(struct pt_regs *regs) +{ + /* bp_patching_in_progress */ + smp_rmb(); + + if (likely(!bp_patching_in_progress)) + return 0; + + if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr) + return 0; + + /* set up the specified breakpoint handler */ + regs->ip = (unsigned long) bp_int3_handler; + + return 1; + +} + +/** + * text_poke_bp() -- update instructions on live kernel on SMP + * @addr: address to patch + * @opcode: opcode of new instruction + * @len: length to copy + * @handler: address to jump to when the temporary breakpoint is hit + * + * Modify multi-byte instruction by using int3 breakpoint on SMP. + * We completely avoid stop_machine() here, and achieve the + * synchronization using int3 breakpoint. + * + * The way it is done: + * - add a int3 trap to the address that will be patched + * - sync cores + * - update all but the first byte of the patched range + * - sync cores + * - replace the first byte (int3) by the first byte of + * replacing opcode + * - sync cores + * + * Note: must be called under text_mutex. + */ +void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) +{ + unsigned char int3 = 0xcc; + + bp_int3_handler = handler; + bp_int3_addr = (u8 *)addr + sizeof(int3); + bp_patching_in_progress = true; + /* + * Corresponding read barrier in int3 notifier for + * making sure the in_progress flags is correctly ordered wrt. + * patching + */ + smp_wmb(); + + text_poke(addr, &int3, sizeof(int3)); + + on_each_cpu(do_sync_core, NULL, 1); + + if (len - sizeof(int3) > 0) { + /* patch all but the first byte */ + text_poke((char *)addr + sizeof(int3), + (const char *) opcode + sizeof(int3), + len - sizeof(int3)); + /* + * According to Intel, this core syncing is very likely + * not necessary and we'd be safe even without it. But + * better safe than sorry (plus there's not only Intel). + */ + on_each_cpu(do_sync_core, NULL, 1); + } + + /* patch the first byte */ + text_poke(addr, opcode, sizeof(int3)); + + on_each_cpu(do_sync_core, NULL, 1); + + bp_patching_in_progress = false; + smp_wmb(); + + return addr; +} + |
