diff options
52 files changed, 311 insertions, 259 deletions
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 0695be538de..c3750c2c411 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -2,8 +2,14 @@ #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/list.h> +#include <linux/kprobes.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> #include <asm/alternative.h> #include <asm/sections.h> +#include <asm/pgtable.h> +#include <asm/mce.h> +#include <asm/nmi.h> #ifdef CONFIG_HOTPLUG_CPU static int smp_alt_once; @@ -150,7 +156,7 @@ static void nop_out(void *insns, unsigned int len) unsigned int noplen = len; if (noplen > ASM_NOP_MAX) noplen = ASM_NOP_MAX; - memcpy(insns, noptable[noplen], noplen); + text_poke(insns, noptable[noplen], noplen); insns += noplen; len -= noplen; } @@ -202,7 +208,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) continue; if (*ptr > text_end) continue; - **ptr = 0xf0; /* lock prefix */ + text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ }; } @@ -360,10 +366,6 @@ void apply_paravirt(struct paravirt_patch_site *start, /* Pad the rest with nops */ nop_out(p->instr + used, p->len - used); } - - /* Sync to be conservative, in case we patched following - * instructions */ - sync_core(); } extern struct paravirt_patch_site __start_parainstructions[], __stop_parainstructions[]; @@ -373,6 +375,14 @@ void __init alternative_instructions(void) { unsigned long flags; + /* The patching is not fully atomic, so try to avoid local interruptions + that might execute the to be patched code. + Other CPUs are not running. */ + stop_nmi(); +#ifdef CONFIG_MCE + stop_mce(); +#endif + local_irq_save(flags); apply_alternatives(__alt_instructions, __alt_instructions_end); @@ -405,4 +415,37 @@ void __init alternative_instructions(void) #endif apply_paravirt(__parainstructions, __parainstructions_end); local_irq_restore(flags); + + restart_nmi(); +#ifdef CONFIG_MCE + restart_mce(); +#endif +} + +/* + * Warning: + * When you use this code to patch more than one byte of an instruction + * you need to make sure that other CPUs cannot execute this code in parallel. + * Also no thread must be currently preempted in the middle of these instructions. + * And on the local CPU you need to be protected again NMI or MCE handlers + * seeing an inconsistent instruction while you patch. + */ +void __kprobes text_poke(void *oaddr, unsigned char *opcode, int len) +{ + u8 *addr = oaddr; + if (!pte_write(*lookup_address((unsigned long)addr))) { + struct page *p[2] = { virt_to_page(addr), virt_to_page(addr+PAGE_SIZE) }; + addr = vmap(p, 2, VM_MAP, PAGE_KERNEL); + if (!addr) + return; + addr += ((unsigned long)oaddr) % PAGE_SIZE; + } + memcpy(addr, opcode, len); + sync_core(); + /* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline + case. */ + if (cpu_has_clflush) + asm("clflush (%0) " :: "r" (oaddr) : "memory"); + if (addr != oaddr) + vunmap(addr); } diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 815a5f0aa47..c7ba455d5ac 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -231,6 +231,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) switch (c->x86) { case 15: + /* Use K8 tuning for Fam10h and Fam11h */ + case 0x10: + case 0x11: set_bit(X86_FEATURE_K8, c->x86_capability); break; case 6: diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c index 194144539a6..461dabc4e49 100644 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c @@ -79,7 +79,7 @@ #include <linux/smp.h> #include <linux/cpufreq.h> #include <linux/pci.h> -#include <asm/processor.h> +#include <asm/processor-cyrix.h> #include <asm/errno.h> /* PCI config registers, all at F0 */ diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index e88d2fba156..122d2d75aa9 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -4,7 +4,7 @@ #include <linux/pci.h> #include <asm/dma.h> #include <asm/io.h> -#include <asm/processor.h> +#include <asm/processor-cyrix.h> #include <asm/timer.h> #include <asm/pci-direct.h> #include <asm/tsc.h> diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 56cd485b127..34c781eddee 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -60,6 +60,20 @@ void mcheck_init(struct cpuinfo_x86 *c) } } +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + static int __init mcheck_disable(char *str) { mce_disabled = 1; diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c index 1001f1e0fe6..2287d4863a8 100644 --- a/arch/i386/kernel/cpu/mtrr/cyrix.c +++ b/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -3,6 +3,7 @@ #include <asm/mtrr.h> #include <asm/msr.h> #include <asm/io.h> +#include <asm/processor-cyrix.h> #include "mtrr.h" int arr3_protected; diff --git a/arch/i386/kernel/cpu/mtrr/state.c b/arch/i386/kernel/cpu/mtrr/state.c index 7b39a2f954d..c9014ca4a57 100644 --- a/arch/i386/kernel/cpu/mtrr/state.c +++ b/arch/i386/kernel/cpu/mtrr/state.c @@ -3,6 +3,7 @@ #include <asm/io.h> #include <asm/mtrr.h> #include <asm/msr.h> +#include <asm-i386/processor-cyrix.h> #include "mtrr.h" diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c index 30b5e48aa76..4be488e73be 100644 --- a/arch/i386/kernel/cpu/perfctr-watchdog.c +++ b/arch/i386/kernel/cpu/perfctr-watchdog.c @@ -325,7 +325,7 @@ static struct wd_ops k7_wd_ops = { .stop = single_msr_stop_watchdog, .perfctr = MSR_K7_PERFCTR0, .evntsel = MSR_K7_EVNTSEL0, - .checkbit = 1ULL<<63, + .checkbit = 1ULL<<47, }; /* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ @@ -346,7 +346,9 @@ static int setup_p6_watchdog(unsigned nmi_hz) perfctr_msr = MSR_P6_PERFCTR0; evntsel_msr = MSR_P6_EVNTSEL0; - wrmsrl(perfctr_msr, 0UL); + /* KVM doesn't implement this MSR */ + if (wrmsr_safe(perfctr_msr, 0, 0) < 0) + return 0; evntsel = P6_EVNTSEL_INT | P6_EVNTSEL_OS diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index dde828a333c..448a50b1324 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -35,6 +35,7 @@ #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/uaccess.h> +#include <asm/alternative.h> void jprobe_return_end(void); @@ -169,16 +170,12 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) void __kprobes arch_arm_kprobe(struct kprobe *p) { - *p->addr = BREAKPOINT_INSTRUCTION; - flush_icache_range((unsigned long) p->addr, - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); + text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); } void __kprobes arch_disarm_kprobe(struct kprobe *p) { - *p->addr = p->opcode; - flush_icache_range((unsigned long) p->addr, - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); + text_poke(p->addr, &p->opcode, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 03b7f5584d7..99beac7f96c 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -353,7 +353,7 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) * Take the local apic timer and PIT/HPET into account. We don't * know which one is active, when we have highres/dyntick on */ - sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); + sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_cpu(cpu).irqs[0]; /* if the none of the timers isn't firing, this cpu isn't doing much */ if (!touched && last_irq_sums[cpu] == sum) { diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 53f07a8275e..79c167fcaee 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -124,20 +124,28 @@ unsigned paravirt_patch_ignore(unsigned len) return len; } +struct branch { + unsigned char opcode; + u32 delta; +} __attribute__((packed)); + unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, void *site, u16 site_clobbers, unsigned len) { unsigned char *call = site; unsigned long delta = (unsigned long)target - (unsigned long)(call+5); + struct branch b; if (tgt_clobbers & ~site_clobbers) return len; /* target would clobber too much for this site */ if (len < 5) return len; /* call too long for patch site */ - *call++ = 0xe8; /* call */ - *(unsigned long *)call = delta; + b.opcode = 0xe8; /* call */ + b.delta = delta; + BUILD_BUG_ON(sizeof(b) != 5); + text_poke(call, (unsigned char *)&b, 5); return 5; } @@ -150,8 +158,9 @@ unsigned paravirt_patch_jmp(void *target, void *site, unsigned len) if (len < 5) return len; /* call too long for patch site */ - *jmp++ = 0xe9; /* jmp */ - *(unsigned long *)jmp = delta; + b.opcode = 0xe9; /* jmp */ + b.delta = delta; + text_poke(call, (unsigned char *)&b, 5); return 5; } diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index d574e38f0f7..f5dd85656c1 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -199,6 +199,13 @@ asmlinkage int sys_sigreturn(unsigned long __unused) return eax; badframe: + if (show_unhandled_signals && printk_ratelimit()) + printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx" + " esp:%lx oeax:%lx\n", + current->pid > 1 ? KERN_INFO : KERN_EMERG, + current->comm, current->pid, frame, regs->eip, + regs->esp, regs->orig_eax); + force_sig(SIGSEGV, current); return 0; } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 5910d3fac56..e4f61d1c624 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -308,7 +308,7 @@ cpumask_t cpu_coregroup_map(int cpu) /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; -void set_cpu_sibling_map(int cpu) +void __cpuinit set_cpu_sibling_map(int cpu) { int i; struct cpuinfo_x86 *c = cpu_data; diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 57772a18c39..cfffe3dd9e8 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -618,6 +618,13 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, current->thread.error_code = error_code; current->thread.trap_no = 13; + if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && + printk_ratelimit()) + printk(KERN_INFO + "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", + current->comm, current->pid, + regs->eip, regs->esp, error_code); + force_sig(SIGSEGV, current); return; @@ -768,6 +775,8 @@ static __kprobes void default_do_nmi(struct pt_regs * regs) reassert_nmi(); } +static int ignore_nmis; + fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) { int cpu; @@ -778,11 +787,24 @@ fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) ++nmi_count(cpu); - default_do_nmi(regs); + if (!ignore_nmis) + default_do_nmi(regs); nmi_exit(); } +void stop_nmi(void) +{ + acpi_nmi_disable(); + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; + acpi_nmi_enable(); +} + #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index e92a1012493..01ffdd4964f 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -283,6 +283,8 @@ static inline int vmalloc_fault(unsigned long address) return 0; } +int show_unhandled_signals = 1; + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -469,6 +471,14 @@ bad_area_nosemaphore: if (is_prefetch(regs, address, error_code)) return; + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk("%s%s[%d]: segfault at %08lx eip %08lx " + "esp %08lx error %lx\n", + tsk->pid > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, tsk->pid, address, regs->eip, + regs->esp, error_code); + } tsk->thread.cr2 = address; /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index e1a9a805c44..c3b9905af2d 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -800,17 +800,9 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = PFN_ALIGN(_etext) - start; -#ifndef CONFIG_KPROBES -#ifdef CONFIG_HOTPLUG_CPU - /* It must still be possible to apply SMP alternatives. */ - if (num_possible_cpus() <= 1) -#endif - { - change_page_attr(virt_to_page(start), - size >> PAGE_SHIFT, PAGE_KERNEL_RX); - printk("Write protecting the kernel text: %luk\n", size >> 10); - } -#endif + change_page_attr(virt_to_page(start), + size >> PAGE_SHIFT, PAGE_KERNEL_RX); + printk("Write protecting the kernel text: %luk\n", size >> 10); start += size; size = (unsigned long)__end_rodata - start; change_page_attr(virt_to_page(start), diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index c2aaec5289d..4100ddc52f0 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -941,13 +941,6 @@ static const struct file_operations spufs_signal1_nosched_fops = { .mmap = spufs_signal1_mmap, }; -static const struct file_operations spufs_signal1_nosched_fops = { - .open = spufs_signal1_open, - .release = spufs_signal1_release, - .write = spufs_signal1_write, - .mmap = spufs_signal1_mmap, -}; - static int spufs_signal2_open(struct inode *inode, struct file *file) { struct spufs_inode_info *i = SPUFS_I(inode); @@ -1083,13 +1076,6 @@ static const struct file_operations spufs_signal2_nosched_fops = { .mmap = spufs_signal2_mmap, }; -static const struct file_operations spufs_signal2_nosched_fops = { - .open = spufs_signal2_open, - .release = spufs_signal2_release, - .write = spufs_signal2_write, - .mmap = spufs_signal2_mmap, -}; - static void spufs_signal1_type_set(void *data, u64 val) { struct spu_context *ctx = data; diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile index c9f2da7496c..877c0bdbbc6 100644 --- a/arch/x86_64/boot/compressed/Makefile +++ b/arch/x86_64/boot/compressed/Makefile @@ -3,8 +3,6 @@ # # create a compressed vmlinux image from the original vmlinux # -# Note all the files here are compiled/linked as 32bit executables. -# targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 941c84baecc..e89abcdbdde 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -25,7 +25,7 @@ */ .text - .section .bootstrap.text + .section .text.head .code64 .globl startup_64 startup_64: @@ -243,10 +243,16 @@ ENTRY(secondary_startup_64) lretq /* SMP bootup changes these two */ +#ifndef CONFIG_HOTPLUG_CPU + .pushsection .init.data +#endif .align 8 .globl initial_code initial_code: .quad x86_64_start_kernel +#ifndef CONFIG_HOTPLUG_CPU + .popsection +#endif .globl init_rsp init_rsp: .quad init_thread_union+THREAD_SIZE-8 diff --git a/arch/x86_64/kernel/hpet.c b/arch/x86_64/kernel/hpet.c index 636f4f9fc6b..e2d1b912e15 100644 --- a/arch/x86_64/kernel/hpet.c +++ b/arch/x86_64/kernel/hpet.c @@ -133,7 +133,7 @@ struct clocksource clocksource_hpet = { .vread = vread_hpet, }; -int hpet_arch_init(void) +int __init hpet_arch_init(void) { unsigned int id; u64 tmp; diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index d4a0d0ac993..a30e004682e 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -39,9 +39,9 @@ #include <linux/module.h> #include <linux/kdebug.h> -#include <asm/cacheflush.h> #include <asm/pgtable.h> #include <asm/uaccess.h> +#include <asm/alternative.h> void jprobe_return_end(void); static void __kprobes arch_copy_kprobe(struct kprobe *p); @@ -209,16 +209,12 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) void __kprobes arch_arm_kprobe(struct kprobe *p) { - *p->addr = BREAKPOINT_INSTRUCTION; - flush_icache_range((unsigned long) p->addr, - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); + text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); } void __kprobes arch_disarm_kprobe(struct kprobe *p) { - *p->addr = p->opcode; - flush_icache_range((unsigned long) p->addr, - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); + text_poke(p->addr, &p->opcode, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 4d8450ee363..a66d607f5b9 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -667,6 +667,20 @@ static struct miscdevice mce_log_device = { &mce_chrdev_ops, }; +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + /* * Old style boot options parsing. Only for compatibility. */ diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index edbbc59b752..cb8ee9d02f8 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -384,11 +384,14 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) return rc; } +static unsigned ignore_nmis; + asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) { nmi_enter(); add_pda(__nmi_count,1); - default_do_nmi(regs); + if (!ignore_nmis) + default_do_nmi(regs); nmi_exit(); } @@ -401,6 +404,18 @@ int do_nmi_callback(struct pt_regs * regs, int cpu) return 0; |