From 4c7f8900f1d8a0e464e7092f132a7e93f7c20f2f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Feb 2008 07:06:55 +0100 Subject: x86: stackprotector & PARAVIRT fix on paravirt enabled 64-bit kernels the paravirt ops do function calls themselves - which is bad with the stackprotector - for example pda_init() loads 0 into %gs and then does MSR_GS_BASE write (which modifies gs.base) - but that MSR write is a function call on paravirt, which with stackprotector tries to read the stack canary from the PDA ... crashing the bootup. the solution was suggested by Arjan van de Ven: to exclude paravirt.c from stackprotector, too many lowlevel functionality is in it. It's not like we'll have paravirt functions with character arrays on their stack anyway... Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b472..8161e5a91ca 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,6 +14,7 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc_64.o := $(nostackp) +CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o -- cgit v1.2.3-18-g5258 From e00320875d0cc5f8099a7227b2f25fbb3231268d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 08:48:23 +0100 Subject: x86: fix stackprotector canary updates during context switches fix a bug noticed and fixed by pageexec@freemail.hu. if built with -fstack-protector-all then we'll have canary checks built into the __switch_to() function. That does not work well with the canary-switching code there: while we already use the %rsp of the new task, we still call __switch_to() whith the previous task's canary value in the PDA, hence the __switch_to() ssp prologue instructions will store the previous canary. Then we update the PDA and upon return from __switch_to() the canary check triggers and we panic. so update the canary after we have called __switch_to(), where we are at the same stackframe level as the last stackframe of the next (and now freshly current) task. Note: this means that we call __switch_to() [and its sub-functions] still with the old canary, but that is not a problem, both the previous and the next task has a high-quality canary. The only (mostly academic) disadvantage is that the canary of one task may leak onto the stack of another task, increasing the risk of information leaks, were an attacker able to read the stack of specific tasks (but not that of others). To solve this we'll have to reorganize the way we switch tasks, and move the PDA setting into the switch_to() assembly code. That will happen in another patch. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 1 - include/asm-x86/pda.h | 2 -- include/asm-x86/system.h | 6 +++++- include/linux/sched.h | 3 +-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988..d8640388039 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -640,7 +640,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); #ifdef CONFIG_CC_STACKPROTECTOR - write_pda(stack_canary, next_p->stack_canary); /* * Build time only check to make sure the stack_canary is at * offset 40 in the pda; this is a gcc ABI requirement diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 101fb9e1195..62b734986a4 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -16,11 +16,9 @@ struct x8664_pda { unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int cpunumber; /* 36 Logical CPU number */ -#ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at offset 40!!! */ -#endif char *irqstackptr; unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index a2f04cd79b2..172f5418509 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -92,6 +92,8 @@ do { \ ".globl thread_return\n" \ "thread_return:\n\t" \ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,%%gs:%P[pda_canary]\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -103,7 +105,9 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\ + [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \ + [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4..d6a51515878 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1096,10 +1096,9 @@ struct task_struct { pid_t pid; pid_t tgid; -#ifdef CONFIG_CC_STACKPROTECTOR /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary; -#endif + /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with -- cgit v1.2.3-18-g5258 From ce22bd92cba0958e052cb1ce0f89f1d3a02b60a7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 12 May 2008 15:44:31 +0200 Subject: x86: setup stack canary for the idle threads The idle threads for non-boot CPUs are a bit special in how they are created; the result is that these don't have the stack canary set up properly in their PDA. Easiest fix is to just always set the PDA up correctly when entering the idle thread; this is a NOP for the boot cpu. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d8640388039..9e69e223023 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -146,6 +146,15 @@ static inline void play_dead(void) void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; + +#ifdef CONFIG_CC_STACKPROTECTOR + /* + * If we're the non-boot CPU, nothing set the PDA stack + * canary up for us. This is as good a place as any for + * doing that. + */ + write_pda(stack_canary, current->stack_canary); +#endif /* endless idle loop with no priority at all */ while (1) { tick_nohz_stop_sched_tick(); -- cgit v1.2.3-18-g5258 From 7e09b2a02dae4616a5a26000169964b32f86cd35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:22:34 +0100 Subject: x86: fix canary of the boot CPU's idle task the boot CPU's idle task has a zero stackprotector canary value. this is a special task that is never forked, so the fork code does not randomize its canary. Do it when we hit cpu_idle(). Academic sidenote: this means that the early init code runs with a zero canary and hence the canary becomes predictable for this short, boot-only amount of time. Although attack vectors against early init code are very rare, it might make sense to move this initialization to an earlier point. (to one of the early init functions that never return - such as start_kernel()) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9e69e223023..d4c7ac7aa43 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -150,9 +150,13 @@ void cpu_idle(void) #ifdef CONFIG_CC_STACKPROTECTOR /* * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us. This is as good a place as any for - * doing that. + * canary up for us - and if we are the boot CPU we have + * a 0 stack canary. This is a good place for updating + * it, as we wont ever return from this function (so the + * invalid canaries already on the stack wont ever + * trigger): */ + current->stack_canary = get_random_int(); write_pda(stack_canary, current->stack_canary); #endif /* endless idle loop with no priority at all */ -- cgit v1.2.3-18-g5258 From 517a92c4e19fcea815332d3155e9fb7723251274 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:02:13 +0100 Subject: panic: print more informative messages on stackprotect failure pointed out by pageexec@freemail.hu: we just simply panic() when there's a stackprotector attack - giving the attacked person no information about what kernel code the attack went against. print out the attacked function. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 425567f45b9..f236001cc4d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -327,7 +327,8 @@ EXPORT_SYMBOL(warn_on_slowpath); */ void __stack_chk_fail(void) { - panic("stack-protector: Kernel stack is corrupted"); + panic("stack-protector: Kernel stack is corrupted in: %p\n", + __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); #endif -- cgit v1.2.3-18-g5258 From 5cb273013e182a35e7db614d3e20a144cba71e53 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:07:01 +0100 Subject: panic: print out stacktrace if DEBUG_BUGVERBOSE if CONFIG_DEBUG_BUGVERBOSE is set then the user most definitely wanted to see as much information about kernel crashes as possible - so give them at least a stack dump. this is particularly useful for stackprotector related panics, where the stacktrace can give us the exact location of the (attempted) attack. Pointed out by pageexec@freemail.hu in the stackprotector breakage threads. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index f236001cc4d..17aad578a2f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -80,6 +80,9 @@ NORET_TYPE void panic(const char * fmt, ...) vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); +#ifdef CONFIG_DEBUG_BUGVERBOSE + dump_stack(); +#endif bust_spinlocks(0); /* -- cgit v1.2.3-18-g5258 From 72370f2a5b227bd3817593a6b15ea3f53f51dfcb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Feb 2008 16:15:34 +0100 Subject: x86: if stackprotector is enabled, thn use stack-protector-all by default also enable the rodata and nx tests. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 3 ++- arch/x86/Kconfig.debug | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dcbec34154c..83d8392c133 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1142,7 +1142,7 @@ config SECCOMP config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on X86_64 && EXPERIMENTAL && BROKEN + depends on X86_64 help This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of critical functions, a canary @@ -1159,6 +1159,7 @@ config CC_STACKPROTECTOR config CC_STACKPROTECTOR_ALL bool "Use stack-protector for all functions" depends on CC_STACKPROTECTOR + default y help Normally, GCC only inserts the canary value protection for functions that use large-ish on-stack buffers. By enabling diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ac1e31ba479..b5c5b55d043 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -102,6 +102,7 @@ config DIRECT_GBPAGES config DEBUG_RODATA_TEST bool "Testcase for the DEBUG_RODATA feature" depends on DEBUG_RODATA + default y help This option enables a testcase for the DEBUG_RODATA feature as well as for the change_page_attr() infrastructure. -- cgit v1.2.3-18-g5258 From 9b5609fd773e6ac0b1d6d6e1bf68f32cca64e06b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:41:09 +0100 Subject: stackprotector: include files create for core kernel files to include. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/stackprotector.h | 4 ++++ include/linux/stackprotector.h | 8 ++++++++ init/main.c | 1 + 3 files changed, 13 insertions(+) create mode 100644 include/asm-x86/stackprotector.h create mode 100644 include/linux/stackprotector.h diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h new file mode 100644 index 00000000000..dcac7a6bdba --- /dev/null +++ b/include/asm-x86/stackprotector.h @@ -0,0 +1,4 @@ +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#endif diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h new file mode 100644 index 00000000000..d3e8bbe602f --- /dev/null +++ b/include/linux/stackprotector.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_STACKPROTECTOR_H +#define _LINUX_STACKPROTECTOR_H 1 + +#ifdef CONFIG_CC_STACKPROTECTOR +# include +#endif + +#endif diff --git a/init/main.c b/init/main.c index f7fb20021d4..a84322ca64a 100644 --- a/init/main.c +++ b/init/main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-18-g5258 From 18aa8bb12dcb10adc3d7c9d69714d53667c0ab7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:42:02 +0100 Subject: stackprotector: add boot_init_stack_canary() add the boot_init_stack_canary() and make the secondary idle threads use it. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 6 ++---- include/asm-x86/stackprotector.h | 20 ++++++++++++++++++++ include/linux/stackprotector.h | 4 ++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d4c7ac7aa43..5107cb214c7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -147,7 +147,6 @@ void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; -#ifdef CONFIG_CC_STACKPROTECTOR /* * If we're the non-boot CPU, nothing set the PDA stack * canary up for us - and if we are the boot CPU we have @@ -156,9 +155,8 @@ void cpu_idle(void) * invalid canaries already on the stack wont ever * trigger): */ - current->stack_canary = get_random_int(); - write_pda(stack_canary, current->stack_canary); -#endif + boot_init_stack_canary(); + /* endless idle loop with no priority at all */ while (1) { tick_nohz_stop_sched_tick(); diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h index dcac7a6bdba..0f91f7a2688 100644 --- a/include/asm-x86/stackprotector.h +++ b/include/asm-x86/stackprotector.h @@ -1,4 +1,24 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + /* + * If we're the non-boot CPU, nothing set the PDA stack + * canary up for us - and if we are the boot CPU we have + * a 0 stack canary. This is a good place for updating + * it, as we wont ever return from this function (so the + * invalid canaries already on the stack wont ever + * trigger): + */ + current->stack_canary = get_random_int(); + write_pda(stack_canary, current->stack_canary); +} + #endif diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index d3e8bbe602f..422e71aafd0 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -3,6 +3,10 @@ #ifdef CONFIG_CC_STACKPROTECTOR # include +#else +static inline void boot_init_stack_canary(void) +{ +} #endif #endif -- cgit v1.2.3-18-g5258 From 420594296838fdc9a674470d710cda7d1487f9f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:44:08 +0100 Subject: x86: fix the stackprotector canary of the boot CPU Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 1 + include/linux/stackprotector.h | 4 ++++ init/main.c | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5107cb214c7..cce47f7fbf2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -16,6 +16,7 @@ #include +#include #include #include #include diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index 422e71aafd0..6f3e54c704c 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -1,6 +1,10 @@ #ifndef _LINUX_STACKPROTECTOR_H #define _LINUX_STACKPROTECTOR_H 1 +#include +#include +#include + #ifdef CONFIG_CC_STACKPROTECTOR # include #else diff --git a/init/main.c b/init/main.c index a84322ca64a..b44e4eb0f5e 100644 --- a/init/main.c +++ b/init/main.c @@ -546,6 +546,12 @@ asmlinkage void __init start_kernel(void) unwind_init(); lockdep_init(); debug_objects_early_init(); + + /* + * Set up the the initial canary ASAP: + */ + boot_init_stack_canary(); + cgroup_init_early(); local_irq_disable(); -- cgit v1.2.3-18-g5258 From 960a672bd9f1ec06e8f197cf81a50fd07ea02e7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:56:04 +0100 Subject: x86: stackprotector: mix TSC to the boot canary mix the TSC to the boot canary. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/stackprotector.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h index 0f91f7a2688..3baf7ad89be 100644 --- a/include/asm-x86/stackprotector.h +++ b/include/asm-x86/stackprotector.h @@ -1,6 +1,8 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +#include + /* * Initialize the stackprotector canary value. * @@ -9,16 +11,28 @@ */ static __always_inline void boot_init_stack_canary(void) { + u64 canary; + u64 tsc; + /* * If we're the non-boot CPU, nothing set the PDA stack * canary up for us - and if we are the boot CPU we have * a 0 stack canary. This is a good place for updating * it, as we wont ever return from this function (so the * invalid canaries already on the stack wont ever - * trigger): + * trigger). + * + * We both use the random pool and the current TSC as a source + * of randomness. The TSC only matters for very early init, + * there it already has some randomness on most systems. Later + * on during the bootup the random pool has true entropy too. */ - current->stack_canary = get_random_int(); - write_pda(stack_canary, current->stack_canary); + get_random_bytes(&canary, sizeof(canary)); + tsc = __native_read_tsc(); + canary += tsc + (tsc << 32UL); + + current->stack_canary = canary; + write_pda(stack_canary, canary); } #endif -- cgit v1.2.3-18-g5258 From 113c5413cf9051cc50b88befdc42e3402bb92115 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 10:36:03 +0100 Subject: x86: unify stackprotector features streamline the stackprotector features under a single option and make the stronger feature the one accessible. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 83d8392c133..0cd1695c24f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1140,13 +1140,17 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config CC_STACKPROTECTOR_ALL + bool + config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" depends on X86_64 + select CC_STACKPROTECTOR_ALL help - This option turns on the -fstack-protector GCC feature. This - feature puts, at the beginning of critical functions, a canary - value on the stack just before the return address, and validates + This option turns on the -fstack-protector GCC feature. This + feature puts, at the beginning of functions, a canary value on + the stack just before the return address, and validates the value just before actually returning. Stack based buffer overflows (that need to overwrite this return address) now also overwrite the canary, which gets detected and the attack is then @@ -1154,16 +1158,8 @@ config CC_STACKPROTECTOR This feature requires gcc version 4.2 or above, or a distribution gcc with the feature backported. Older versions are automatically - detected and for those versions, this configuration option is ignored. - -config CC_STACKPROTECTOR_ALL - bool "Use stack-protector for all functions" - depends on CC_STACKPROTECTOR - default y - help - Normally, GCC only inserts the canary value protection for - functions that use large-ish on-stack buffers. By enabling - this option, GCC will be asked to do this for ALL functions. + detected and for those versions, this configuration option is + ignored. (and a warning is printed during bootup) source kernel/Kconfig.hz -- cgit v1.2.3-18-g5258 From 54371a43a66f4477889769b4fa00df936855dc8f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 15 Feb 2008 15:33:12 -0800 Subject: x86: add CONFIG_CC_STACKPROTECTOR self-test This patch adds a simple self-test capability to the stackprotector feature. The test deliberately overflows a stack buffer and then checks if the canary trap function gets called. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index 17aad578a2f..50cf9257b23 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -324,14 +324,82 @@ EXPORT_SYMBOL(warn_on_slowpath); #endif #ifdef CONFIG_CC_STACKPROTECTOR + +static unsigned long __stack_check_testing; +/* + * Self test function for the stack-protector feature. + * This test requires that the local variable absolutely has + * a stack slot, hence the barrier()s. + */ +static noinline void __stack_chk_test_func(void) +{ + unsigned long foo; + barrier(); + /* + * we need to make sure we're not about to clobber the return address, + * while real exploits do this, it's unhealthy on a running system. + * Besides, if we would, the test is already failed anyway so + * time to pull the emergency brake on it. + */ + if ((unsigned long)__builtin_return_address(0) == + *(((unsigned long *)&foo)+1)) { + printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); + return; + } +#ifdef CONFIG_FRAME_POINTER + /* We also don't want to clobber the frame pointer */ + if ((unsigned long)__builtin_return_address(0) == + *(((unsigned long *)&foo)+2)) { + printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); + return; + } +#endif + barrier(); + if (current->stack_canary == *(((unsigned long *)&foo)+1)) + *(((unsigned long *)&foo)+1) = 0; + else + printk(KERN_ERR "No -fstack-protector canary found\n"); + barrier(); +} + +static int __stack_chk_test(void) +{ + printk(KERN_INFO "Testing -fstack-protector-all feature\n"); + __stack_check_testing = (unsigned long)&__stack_chk_test_func; + __stack_chk_test_func(); + if (__stack_check_testing) { + printk(KERN_ERR "-fstack-protector-all test failed\n"); + WARN_ON(1); + } + return 0; +} /* * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ void __stack_chk_fail(void) { + if (__stack_check_testing == (unsigned long)&__stack_chk_test_func) { + long delta; + + delta = (unsigned long)__builtin_return_address(0) - + __stack_check_testing; + /* + * The test needs to happen inside the test function, so + * check if the return address is close to that function. + * The function is only 2 dozen bytes long, but keep a wide + * safety margin to avoid panic()s for normal users regardless + * of the quality of the compiler. + */ + if (delta >= 0 && delta <= 400) { + __stack_check_testing = 0; + return; + } + } panic("stack-protector: Kernel stack is corrupted in: %p\n", __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); + +late_initcall(__stack_chk_test); #endif -- cgit v1.2.3-18-g5258 From b719ac56c0032bc1602914c6ea70b0f1581b08c7 Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Mon, 14 Apr 2008 10:03:50 -0700 Subject: panic.c: fix whitespace additions trivial: remove white space addition in stack protector Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 50cf9257b23..866be9b72e4 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -341,14 +341,14 @@ static noinline void __stack_chk_test_func(void) * Besides, if we would, the test is already failed anyway so * time to pull the emergency brake on it. */ - if ((unsigned long)__builtin_return_address(0) == + if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+1)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); return; } #ifdef CONFIG_FRAME_POINTER /* We also don't want to clobber the frame pointer */ - if ((unsigned long)__builtin_return_address(0) == + if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+2)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); return; -- cgit v1.2.3-18-g5258 From b40a4392a3c262e0d1b5379b4e142a8eefa63439 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 18 Apr 2008 06:16:45 -0700 Subject: stackprotector: turn not having the right gcc into a #warning If the user selects the stack-protector config option, but does not have a gcc that has the right bits enabled (for example because it isn't build with a glibc that supports TLS, as is common for cross-compilers, but also because it may be too old), then the runtime test fails right now. This patch adds a warning message for this scenario. This warning accomplishes two goals 1) the user is informed that the security option he selective isn't available 2) the user is suggested to turn of the CONFIG option that won't work for him, and would make the runtime test fail anyway. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Makefile | 2 +- kernel/panic.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3cff3c894cf..c3e0eeeb1dd 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -73,7 +73,7 @@ else stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ - "$(CC)" -fstack-protector ) + "$(CC)" "-fstack-protector -DGCC_HAS_SP" ) stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ "$(CC)" -fstack-protector-all ) diff --git a/kernel/panic.c b/kernel/panic.c index 866be9b72e4..6729e3f4ebc 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -325,6 +325,9 @@ EXPORT_SYMBOL(warn_on_slowpath); #ifdef CONFIG_CC_STACKPROTECTOR +#ifndef GCC_HAS_SP +#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. +#endif static unsigned long __stack_check_testing; /* * Self test function for the stack-protector feature. -- cgit v1.2.3-18-g5258 From 7c9f8861e6c9c839f913e49b98c3854daca18f27 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 22 Apr 2008 16:38:23 -0500 Subject: stackprotector: use canary at end of stack to indicate overruns at oops time (Updated with a common max-stack-used checker that knows about the canary, as suggested by Joe Perches) Use a canary at the end of the stack to clearly indicate at oops time whether the stack has ever overflowed. This is a very simple implementation with a couple of drawbacks: 1) a thread may legitimately use exactly up to the last word on the stack -- but the chances of doing this and then oopsing later seem slim 2) it's possible that the stack usage isn't dense enough that the canary location could get skipped over -- but the worst that happens is that we don't flag the overrun -- though this happens fairly often in my testing :( With the code in place, an intentionally-bloated stack oops might do: BUG: unable to handle kernel paging request at ffff8103f84cc680 IP: [] update_curr+0x9a/0xa8 PGD 8063 PUD 0 Thread overran stack or stack corrupted Oops: 0000 [1] SMP CPU 0 ... ... unless the stack overrun is so bad that it corrupts some other thread. Signed-off-by: Eric Sandeen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/fault.c | 7 +++++++ include/linux/magic.h | 1 + include/linux/sched.h | 13 +++++++++++++ kernel/exit.c | 5 +---- kernel/fork.c | 5 +++++ kernel/sched.c | 7 +------ 6 files changed, 28 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fd7e1798c75..1f524df68b9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -581,6 +582,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long address; int write, si_code; int fault; + unsigned long *stackend; + #ifdef CONFIG_X86_64 unsigned long flags; #endif @@ -850,6 +853,10 @@ no_context: show_fault_oops(regs, error_code, address); + stackend = end_of_stack(tsk); + if (*stackend != STACK_END_MAGIC) + printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); + tsk->thread.cr2 = address; tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; diff --git a/include/linux/magic.h b/include/linux/magic.h index 1fa0c2ce4de..74e68e20116 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -42,4 +42,5 @@ #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA +#define STACK_END_MAGIC 0x57AC6E9D #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d6a51515878..c5181e77f30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1969,6 +1969,19 @@ static inline unsigned long *end_of_stack(struct task_struct *p) extern void thread_info_cache_init(void); +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long stack_not_used(struct task_struct *p) +{ + unsigned long *n = end_of_stack(p); + + do { /* Skip over canary */ + n++; + } while (!*n); + + return (unsigned long)n - (unsigned long)end_of_stack(p); +} +#endif + /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ diff --git a/kernel/exit.c b/kernel/exit.c index 8f6185e69b6..fb8de6cbf2c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -899,12 +899,9 @@ static void check_stack_usage(void) { static DEFINE_SPINLOCK(low_water_lock); static int lowest_to_date = THREAD_SIZE; - unsigned long *n = end_of_stack(current); unsigned long free; - while (*n == 0) - n++; - free = (unsigned long)n - (unsigned long)end_of_stack(current); + free = stack_not_used(current); if (free >= lowest_to_date) return; diff --git a/kernel/fork.c b/kernel/fork.c index 19908b26cf8..d428336e7aa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -186,6 +187,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; + unsigned long *stackend; + int err; prepare_to_copy(orig); @@ -211,6 +214,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto out; setup_thread_stack(tsk, orig); + stackend = end_of_stack(tsk); + *stackend = STACK_END_MAGIC; /* for overflow detection */ #ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int(); diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a9153..a964ed94509 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5748,12 +5748,7 @@ void sched_show_task(struct task_struct *p) printk(KERN_CONT " %016lx ", thread_saved_pc(p)); #endif #ifdef CONFIG_DEBUG_STACK_USAGE - { - unsigned long *n = end_of_stack(p); - while (!*n) - n++; - free = (unsigned long)n - (unsigned long)end_of_stack(p); - } + free = stack_not_used(p); #endif printk(KERN_CONT "%5lu %5d %6d\n", free, task_pid_nr(p), task_pid_nr(p->real_parent)); -- cgit v1.2.3-18-g5258 From aa92db14270b79f0f91a9060b547a46f9e2639da Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 11 Jul 2008 05:09:55 -0700 Subject: stackprotector: better self-test check stackprotector functionality by manipulating the canary briefly during bootup. far more robust than trying to overflow the stack. (which is architecture dependent, etc.) Signed-off-by: Ingo Molnar --- kernel/panic.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 6729e3f4ebc..28153aec710 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -347,22 +347,18 @@ static noinline void __stack_chk_test_func(void) if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+1)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - return; } #ifdef CONFIG_FRAME_POINTER /* We also don't want to clobber the frame pointer */ if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+2)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - return; } #endif - barrier(); - if (current->stack_canary == *(((unsigned long *)&foo)+1)) - *(((unsigned long *)&foo)+1) = 0; - else + if (current->stack_canary != *(((unsigned long *)&foo)+1)) printk(KERN_ERR "No -fstack-protector canary found\n"); - barrier(); + + current->stack_canary = ~current->stack_canary; } static int __stack_chk_test(void) @@ -373,7 +369,8 @@ static int __stack_chk_test(void) if (__stack_check_testing) { printk(KERN_ERR "-fstack-protector-all test failed\n"); WARN_ON(1); - } + }; + current->stack_canary = ~current->stack_canary; return 0; } /* -- cgit v1.2.3-18-g5258 From af9ff7868f0f76d3364351b1641b9dfa99588e77 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 12 Jul 2008 09:36:38 -0700 Subject: x86: simplify stackprotector self-check Clean up the code by removing no longer needed code; make sure the pda is updated and kept in sync Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/asm-x86/pda.h | 1 + kernel/panic.c | 29 +++++++---------------------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 62b734986a4..a5ff5bb7629 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -131,4 +131,5 @@ do { \ #define PDA_STACKOFFSET (5*8) +#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary) #endif diff --git a/kernel/panic.c b/kernel/panic.c index 28153aec710..87445a894c3 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -328,37 +328,21 @@ EXPORT_SYMBOL(warn_on_slowpath); #ifndef GCC_HAS_SP #warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. #endif + static unsigned long __stack_check_testing; + /* * Self test function for the stack-protector feature. * This test requires that the local variable absolutely has - * a stack slot, hence the barrier()s. + * a stack slot. */ static noinline void __stack_chk_test_func(void) { - unsigned long foo; - barrier(); - /* - * we need to make sure we're not about to clobber the return address, - * while real exploits do this, it's unhealthy on a running system. - * Besides, if we would, the test is already failed anyway so - * time to pull the emergency brake on it. - */ - if ((unsigned long)__builtin_return_address(0) == - *(((unsigned long *)&foo)+1)) { - printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - } -#ifdef CONFIG_FRAME_POINTER - /* We also don't want to clobber the frame pointer */ - if ((unsigned long)__builtin_return_address(0) == - *(((unsigned long *)&foo)+2)) { - printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - } -#endif - if (current->stack_canary != *(((unsigned long *)&foo)+1)) - printk(KERN_ERR "No -fstack-protector canary found\n"); + unsigned long dummy_buffer[64]; /* force gcc to use the canary */ current->stack_canary = ~current->stack_canary; + refresh_stack_canary(); + dummy_buffer[3] = 1; /* fool gcc into keeping the variable */ } static int __stack_chk_test(void) @@ -371,6 +355,7 @@ static int __stack_chk_test(void) WARN_ON(1); }; current->stack_canary = ~current->stack_canary; + refresh_stack_canary(); return 0; } /* -- cgit v1.2.3-18-g5258 From 4f962d4d65923d7b722192e729840cfb79af0a5a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Jul 2008 21:42:44 +0200 Subject: stackprotector: remove self-test turns out gcc generates such stackprotector-failure sequences in certain circumstances: movq -8(%rbp), %rax # D.16032, xorq %gs:40, %rax #, jne .L17 #, leave ret .L17: call __stack_chk_fail # .size __stack_chk_test_func, .-__stack_chk_test_func .section .init.text,"ax",@progbits .type panic_setup, @function panic_setup: pushq %rbp # note that there's no jump back to the failing context after the call to __stack_chk_fail - i.e. it has a ((noreturn)) attribute. Which is fair enough in the normal case but kills the self-test. (as we cannot reliably return in the self-test) Signed-off-by: Ingo Molnar --- kernel/panic.c | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 87445a894c3..c35c9eca3eb 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -329,62 +329,15 @@ EXPORT_SYMBOL(warn_on_slowpath); #warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. #endif -static unsigned long __stack_check_testing; - -/* - * Self test function for the stack-protector feature. - * This test requires that the local variable absolutely has - * a stack slot. - */ -static noinline void __stack_chk_test_func(void) -{ - unsigned long dummy_buffer[64]; /* force gcc to use the canary */ - - current->stack_canary = ~current->stack_canary; - refresh_stack_canary(); - dummy_buffer[3] = 1; /* fool gcc into keeping the variable */ -} - -static int __stack_chk_test(void) -{ - printk(KERN_INFO "Testing -fstack-protector-all feature\n"); - __stack_check_testing = (unsigned long)&__stack_chk_test_func; - __stack_chk_test_func(); - if (__stack_check_testing) { - printk(KERN_ERR "-fstack-protector-all test failed\n"); - WARN_ON(1); - }; - current->stack_canary = ~current->stack_canary; - refresh_stack_canary(); - return 0; -} /* * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ void __stack_chk_fail(void) { - if (__stack_check_testing == (unsigned long)&__stack_chk_test_func) { - long delta; - - delta = (unsigned long)__builtin_return_address(0) - - __stack_check_testing; - /* - * The test needs to happen inside the test function, so - * check if the return address is close to that function. - * The function is only 2 dozen bytes long, but keep a wide - * safety margin to avoid panic()s for normal users regardless - * of the quality of the compiler. - */ - if (delta >= 0 && delta <= 400) { - __stack_check_testing = 0; - return; - } - } panic("stack-protector: Kernel stack is corrupted in: %p\n", __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); -late_initcall(__stack_chk_test); #endif -- cgit v1.2.3-18-g5258 From 09b3ec7315a18d885127544204f1e389d41058d0 Mon Sep 17 00:00:00 2001 From: Frederik Deweerdt Date: Mon, 12 Jan 2009 22:35:42 +0100 Subject: x86, tlb flush_data: replace per_cpu with an array Impact: micro-optimization, memory reduction On x86_64 flush tlb data is stored in per_cpu variables. This is unnecessary because only the first NUM_INVALIDATE_TLB_VECTORS entries are accessed. This patch aims at making the code less confusing (there's nothing really "per_cpu") by using a plain array. It also would save some memory on most distros out there (Ubuntu x86_64 has NR_CPUS=64 by default). [ Ravikiran G Thirumalai also pointed out that the correct alignment is ____cacheline_internodealigned_in_smp, so that there's no bouncing on vsmp. ] Signed-off-by: Frederik Deweerdt Acked-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e4..8cfea5d1451 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -33,7 +33,7 @@ * To avoid global state use 8 different call vectors. * Each CPU uses a specific vector to trigger flushes on other * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. + * the right array slot for the flush data. * * With more than 8 CPUs they are hashed to the 8 available * vectors. The limited global vector space forces us to this right now. @@ -48,13 +48,13 @@ union smp_flush_state { unsigned long flush_va; spinlock_t tlbstate_lock; }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; + char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; +} ____cacheline_internodealigned_in_smp; /* State is put into the per CPU data section, but padded to a full cache line because other CPUs can access it and we don't want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); +static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; /* * We cannot call mmdrop() because we are in interrupt context, @@ -129,7 +129,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * Use that to determine where the sender put the data. */ sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; if (!cpu_isset(cpu, f->flush_cpumask)) goto out; @@ -169,7 +169,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; /* * Could avoid this lock when @@ -205,8 +205,8 @@ static int __cpuinit init_smp_flush(void) { int i; - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); + for (i = 0; i < ARRAY_SIZE(flush_state); i++) + spin_lock_init(&flush_state[i].tlbstate_lock); return 0; } -- cgit v1.2.3-18-g5258 From 0a2a18b721abc960fbcada406746877d22340a60 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 23:37:16 +0100 Subject: x86: change the default cache size to 64 bytes Right now the generic cacheline size is 128 bytes - that is wasteful when structures are aligned, as all modern x86 CPUs have an (effective) cacheline sizes of 64 bytes. It was set to 128 bytes due to some cacheline aliasing problems on older P4 systems, but those are many years old and we dont optimize for them anymore. (They'll still get the 128 bytes cacheline size if the kernel is specifically built for Pentium 4) Signed-off-by: Ingo Molnar Acked-by: Arjan van de Ven --- arch/x86/Kconfig.cpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8078955845a..cdf4a962323 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -307,10 +307,10 @@ config X86_CMPXCHG config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC + default "7" if MPENTIUM4 || MPSC default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU config X86_XADD def_bool y -- cgit v1.2.3-18-g5258 From 5cd7376200be7b8bab085557ff5876b04bd84191 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 15:46:08 +0100 Subject: fix: crash: IP: __bitmap_intersects+0x48/0x73 -tip testing found this crash: > [ 35.258515] calling acpi_cpufreq_init+0x0/0x127 @ 1 > [ 35.264127] BUG: unable to handle kernel NULL pointer dereference at (null) > [ 35.267554] IP: [] __bitmap_intersects+0x48/0x73 > [ 35.267554] PGD 0 > [ 35.267554] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c is still broken: there's no allocation of the variable mask, so we pass in an uninitialized cmd.mask field to drv_read(), which then passes it to the scheduler which then crashes ... Switch it over to the much simpler constant-cpumask-pointers approach. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 6f11e029e8c..019276717a7 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -145,7 +145,7 @@ typedef union { struct drv_cmd { unsigned int type; - cpumask_var_t mask; + const struct cpumask *mask; drv_addr_union addr; u32 val; }; @@ -235,8 +235,7 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - cpumask_copy(cmd.mask, mask); - + cmd.mask = mask; drv_read(&cmd); dprintk("get_cur_val = %u\n", cmd.val); @@ -403,9 +402,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return -ENODEV; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return -ENOMEM; - perf = data->acpi_data; result = cpufreq_frequency_table_target(policy, data->freq_table, @@ -450,9 +446,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, /* cpufreq holds the hotplug lock, so we are safe from here on */ if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); + cmd.mask = policy->cpus; else - cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); + cmd.mask = cpumask_of(policy->cpu); freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; @@ -479,7 +475,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, perf->state = next_perf_state; out: - free_cpumask_var(cmd.mask); return result; } -- cgit v1.2.3-18-g5258 From f2a082711905312dc7b6675e913fee0c4689f7ae Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 15 Jan 2009 09:19:32 -0800 Subject: x86: fix build warning when CONFIG_NUMA not defined. Impact: fix build warning The macro cpu_to_node did not reference it's argument, and instead simply returned a 0. This causes a "unused variable" warning if it's the only reference in a function (show_cache_disable). Replace it with the more correct inline function. Signed-off-by: Mike Travis --- arch/x86/include/asm/topology.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4e2f2e0aab2..d0c68e29163 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -192,9 +192,20 @@ extern int __node_distance(int, int); #else /* !CONFIG_NUMA */ -#define numa_node_id() 0 -#define cpu_to_node(cpu) 0 -#define early_cpu_to_node(cpu) 0 +static inline int numa_node_id(void) +{ + return 0; +} + +static inline int cpu_to_node(int cpu) +{ + return 0; +} + +static inline int early_cpu_to_node(int cpu) +{ + return 0; +} static inline const cpumask_t *cpumask_of_node(int node) { -- cgit v1.2.3-18-g5258 From c99dbbe9f8f6b3e9383e64710217e873431d1c31 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 15 Jan 2009 12:09:44 -0800 Subject: sched: fix warning on ia64 Andrew Morton reported this warning on ia64: kernel/sched.c: In function `sd_init_NODE': kernel/sched.c:7449: warning: comparison of distinct pointer types lacks a cast Using the untyped min() function produces such warnings. Fix: type the constant 32 as unsigned int to match typeof(num_online_cpus). Reported-by: Andrew Morton Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 32f3af1641c..3193f4417e1 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -84,7 +84,7 @@ void build_cpu_to_node_map(void); .child = NULL, \ .groups = NULL, \ .min_interval = 8, \ - .max_interval = 8*(min(num_online_cpus(), 32)), \ + .max_interval = 8*(min(num_online_cpus(), 32U)), \ .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_nice_tries = 2, \ -- cgit v1.2.3-18-g5258 From 68564a46976017496c2227660930d81240f82355 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: work_on_cpu: don't try to get_online_cpus() in work_on_cpu. Impact: remove potential circular lock dependency with cpu hotplug lock This has caused more problems than it solved, with a pile of cpu hotplug locking issues. Followup patches will get_online_cpus() in callers that need it, but if they don't do it they're no worse than before when they were using set_cpus_allowed without locking. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- kernel/workqueue.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2f445833ae3..a35afdbc016 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -991,8 +991,8 @@ static void do_work_for_cpu(struct work_struct *w) * @fn: the function to run * @arg: the function arg * - * This will return -EINVAL in the cpu is not online, or the return value - * of @fn otherwise. + * This will return the value @fn returns. + * It is up to the caller to ensure that the cpu doesn't go offline. */ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) { @@ -1001,14 +1001,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) INIT_WORK(&wfc.work, do_work_for_cpu); wfc.fn = fn; wfc.arg = arg; - get_online_cpus(); - if (unlikely(!cpu_online(cpu))) - wfc.ret = -EINVAL; - else { - schedule_work_on(cpu, &wfc.work); - flush_work(&wfc.work); - } - put_online_cpus(); + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); return wfc.ret; } -- cgit v1.2.3-18-g5258 From e1d9ec6246a2668a5d037f529877efb7cf176af8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: work_on_cpu: Use our own workqueue. Impact: remove potential clashes with generic kevent workqueue Annoyingly, some places we want to use work_on_cpu are already in workqueues. As per Ingo's suggestion, we create a different workqueue for work_on_cpu. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- kernel/workqueue.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a35afdbc016..1f0c509b40d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -971,6 +971,8 @@ undo: } #ifdef CONFIG_SMP +static struct workqueue_struct *work_on_cpu_wq __read_mostly; + struct work_for_cpu { struct work_struct work; long (*fn)(void *); @@ -1001,7 +1003,7 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) INIT_WORK(&wfc.work, do_work_for_cpu); wfc.fn = fn; wfc.arg = arg; - schedule_work_on(cpu, &wfc.work); + queue_work_on(cpu, work_on_cpu_wq, &wfc.work); flush_work(&wfc.work); return wfc.ret; @@ -1019,4 +1021,8 @@ void __init init_workqueues(void) hotcpu_notifier(workqueue_cpu_callback, 0); keventd_wq = create_workqueue("events"); BUG_ON(!keventd_wq); +#ifdef CONFIG_SMP + work_on_cpu_wq = create_workqueue("work_on_cpu"); + BUG_ON(!work_on_cpu_wq); +#endif } -- cgit v1.2.3-18-g5258 From 6eb714c63ed5bd663627f7dda8c4d5258f3b64ef Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: cpufreq: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write Impact: use new work_on_cpu function to reduce stack usage Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with a work_on_cpu function for drv_read() and drv_write(). Basically converts do_drv_{read,write} into "work_on_cpu" functions that are now called by drv_read and drv_write. Note: This patch basically reverts 50c668d6 which reverted 7503bfba, now that the work_on_cpu() function is more stable. Signed-off-by: Mike Travis Acked-by: Rusty Russell Tested-by: Dieter Ries Tested-by: Maciej Rutecki Cc: Dave Jones Cc: --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 019276717a7..4b1c319d30c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -150,8 +150,9 @@ struct drv_cmd { u32 val; }; -static void do_drv_read(struct drv_cmd *cmd) +static long do_drv_read(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 h; switch (cmd->type) { @@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd) default: break; } + return 0; } -static void do_drv_write(struct drv_cmd *cmd) +static long do_drv_write(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 lo, hi; switch (cmd->type) { @@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd) default: break; } + return 0; } static void drv_read(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - set_cpus_allowed_ptr(current, cmd->mask); - do_drv_read(cmd); - set_cpus_allowed_ptr(current, &saved_mask); + work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); } static void drv_write(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; unsigned int i; for_each_cpu(i, cmd->mask) { - set_cpus_allowed_ptr(current, cpumask_of(i)); - do_drv_write(cmd); + work_on_cpu(i, do_drv_write, cmd); } - - set_cpus_allowed_ptr(current, &saved_mask); - return; } static u32 get_cur_val(const struct cpumask *mask) @@ -367,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return freq; } -static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, +static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, struct acpi_cpufreq_data *data) { unsigned int cur_freq; -- cgit v1.2.3-18-g5258 From cef30b3a84e1c7cbd49987d83c5863c001445842 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:58:13 -0800 Subject: x86: put trigger in to detect mismatched apic versions. Fire off one message if two apic's discovered with different apic versions. Signed-off-by: Mike Travis --- arch/x86/kernel/apic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0f830e4f567..db0998641c5 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1833,6 +1833,11 @@ void __cpuinit generic_processor_info(int apicid, int version) num_processors++; cpu = cpumask_next_zero(-1, cpu_present_mask); + if (version != apic_version[boot_cpu_physical_apicid]) + WARN_ONCE(1, + "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", + apic_version[boot_cpu_physical_apicid], cpu, version); + physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { /* -- cgit v1.2.3-18-g5258 From 5cdc5e9e69d4dc3a3630ae1fa666401b2a8dcde6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Jan 2009 20:49:37 +0100 Subject: x86: fully honor "nolapic", fix Impact: build fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 48578795583..9ca12af6c87 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1131,7 +1131,9 @@ void __cpuinit setup_local_APIC(void) int i, j; if (disable_apic) { +#ifdef CONFIG_X86_IO_APIC disable_ioapic_setup(); +#endif return; } -- cgit v1.2.3-18-g5258 From c6e50f93db5bd0895ec7c7d1b6f3886c6e1f11b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 Jan 2009 12:29:19 +0900 Subject: x86: cleanup stack protector Impact: cleanup Make the following cleanups. * remove duplicate comment from boot_init_stack_canary() which fits better in the other place - cpu_idle(). * move stack_canary offset check from __switch_to() to boot_init_stack_canary(). Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 -- arch/x86/include/asm/stackprotector.h | 13 ++++++------- arch/x86/kernel/process_64.c | 7 ------- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 5976cd803e9..4a8c9d382c9 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -40,6 +40,4 @@ extern void pda_init(int); #endif -#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary) - #endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index c7f0d10bae7..2383e5bb475 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -16,13 +16,12 @@ static __always_inline void boot_init_stack_canary(void) u64 tsc; /* - * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us - and if we are the boot CPU we have - * a 0 stack canary. This is a good place for updating - * it, as we wont ever return from this function (so the - * invalid canaries already on the stack wont ever - * trigger). - * + * Build time only check to make sure the stack_canary is at + * offset 40 in the pda; this is a gcc ABI requirement + */ + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); + + /* * We both use the random pool and the current TSC as a source * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index aa89eabf09e..088bc9a0f82 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -638,13 +638,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - KERNEL_STACK_OFFSET); -#ifdef CONFIG_CC_STACKPROTECTOR - /* - * Build time only check to make sure the stack_canary is at - * offset 40 in the pda; this is a gcc ABI requirement - */ - BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); -#endif /* * Now maybe reload the debug registers and handle I/O bitmaps -- cgit v1.2.3-18-g5258 From b4a8f7a262e79ecb0b39beb1449af524a78887f8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 Jan 2009 12:29:19 +0900 Subject: x86: conditionalize stack canary handling in hot path Impact: no unnecessary stack canary swapping during context switch There's no point in moving stack_canary around during context switch if it's not enabled. Conditionalize it. Signed-off-by: Tejun Heo --- arch/x86/include/asm/system.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8cadfe9b119..b77bd8bd3cc 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -86,17 +86,28 @@ do { \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ "r12", "r13", "r14", "r15" +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,%%gs:%P[pda_canary]\n\t" +#define __switch_canary_param \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ + , [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_param +#endif /* CC_STACKPROTECTOR */ + /* Save restore flags to clear handle leaking NT */ #define switch_to(prev, next, last) \ - asm volatile(SAVE_CONTEXT \ + asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ "movq "__percpu_arg([c