From ee9c578527a93c66becb526c4a122c5358a959c5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 13:59:33 +0100 Subject: dyntick: Remove last reminants of dyntick support Remove the last reminants of dyntick support from the generic kernel. Signed-off-by: Russell King --- kernel/sysctl.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d7ffdc59816..76426d93007 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -131,8 +131,6 @@ extern int sysctl_userprocess_debug; extern int spin_retry; #endif -extern int sysctl_hz_timer; - #ifdef CONFIG_BSD_PROCESS_ACCT extern int acct_parm[]; #endif @@ -561,16 +559,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#endif -#ifdef CONFIG_NO_IDLE_HZ - { - .ctl_name = KERN_HZ_TIMER, - .procname = "hz_timer", - .data = &sysctl_hz_timer, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, #endif { .ctl_name = KERN_S390_USER_DEBUG_LOGGING, -- cgit v1.2.3-70-g09d2 From b0fc494fae96a7089f3651cb451f461c7291244c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add ftrace_enabled sysctl to disable mcount function This patch adds back the sysctl ftrace_enabled. This time it is defaulted to on, if DYNAMIC_FTRACE is configured. When ftrace_enabled is disabled, the ftrace function is set to the stub return. If DYNAMIC_FTRACE is also configured, on ftrace_enabled = 0, the registered ftrace functions will all be set to jmps, but no more new calls to ftrace recording (used to find the ftrace calling sites) will be called. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 6 +++ kernel/sysctl.c | 11 +++++ kernel/trace/ftrace.c | 125 ++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 124 insertions(+), 18 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 90dbc0ee204..ccd8537dbdb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,6 +5,12 @@ #include +extern int ftrace_enabled; +extern int +ftrace_enable_sysctl(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 29116652dca..efaf7c5500e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -455,6 +456,16 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_FTRACE + { + .ctl_name = CTL_UNNUMBERED, + .procname = "ftrace_enabled", + .data = &ftrace_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ftrace_enable_sysctl, + }, +#endif #ifdef CONFIG_KMOD { .ctl_name = KERN_MODPROBE, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d1ae2ba2527..d3de37299ba 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -20,12 +20,24 @@ #include #include #include +#include #include #include #include "trace.h" +#ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_ENABLED_INIT 1 +#else +# define FTRACE_ENABLED_INIT 0 +#endif + +int ftrace_enabled = FTRACE_ENABLED_INIT; +static int last_ftrace_enabled = FTRACE_ENABLED_INIT; + static DEFINE_SPINLOCK(ftrace_lock); +static DEFINE_MUTEX(ftrace_sysctl_lock); + static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, @@ -78,14 +90,16 @@ static int notrace __register_ftrace_function(struct ftrace_ops *ops) smp_wmb(); ftrace_list = ops; - /* - * For one func, simply call it directly. - * For more than one func, call the chain. - */ - if (ops->next == &ftrace_list_end) - ftrace_trace_function = ops->func; - else - ftrace_trace_function = ftrace_list_func; + if (ftrace_enabled) { + /* + * For one func, simply call it directly. + * For more than one func, call the chain. + */ + if (ops->next == &ftrace_list_end) + ftrace_trace_function = ops->func; + else + ftrace_trace_function = ftrace_list_func; + } spin_unlock(&ftrace_lock); @@ -120,10 +134,12 @@ static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) *p = (*p)->next; - /* If we only have one func left, then call that directly */ - if (ftrace_list == &ftrace_list_end || - ftrace_list->next == &ftrace_list_end) - ftrace_trace_function = ftrace_list->func; + if (ftrace_enabled) { + /* If we only have one func left, then call that directly */ + if (ftrace_list == &ftrace_list_end || + ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; + } out: spin_unlock(&ftrace_lock); @@ -263,7 +279,8 @@ static void notrace ftrace_startup(void) goto out; __unregister_ftrace_function(&ftrace_shutdown_ops); - ftrace_run_startup_code(); + if (ftrace_enabled) + ftrace_run_startup_code(); out: mutex_unlock(&ftraced_lock); } @@ -275,13 +292,32 @@ static void notrace ftrace_shutdown(void) if (ftraced_suspend) goto out; - ftrace_run_shutdown_code(); + if (ftrace_enabled) + ftrace_run_shutdown_code(); __register_ftrace_function(&ftrace_shutdown_ops); out: mutex_unlock(&ftraced_lock); } +static void notrace ftrace_startup_sysctl(void) +{ + mutex_lock(&ftraced_lock); + /* ftraced_suspend is true if we want ftrace running */ + if (ftraced_suspend) + ftrace_run_startup_code(); + mutex_unlock(&ftraced_lock); +} + +static void notrace ftrace_shutdown_sysctl(void) +{ + mutex_lock(&ftraced_lock); + /* ftraced_suspend is true if ftrace is running */ + if (ftraced_suspend) + ftrace_run_shutdown_code(); + mutex_unlock(&ftraced_lock); +} + static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; @@ -341,8 +377,9 @@ static int notrace ftraced(void *ignore) /* check once a second */ schedule_timeout(HZ); + mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftraced_lock); - if (ftraced_trigger && !ftraced_suspend) { + if (ftrace_enabled && ftraced_trigger && !ftraced_suspend) { ftrace_record_suspend++; ftrace_update_code(); usecs = nsecs_to_usecs(ftrace_update_time); @@ -360,6 +397,7 @@ static int notrace ftraced(void *ignore) ftrace_record_suspend--; } mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_sysctl_lock); ftrace_shutdown_replenish(); @@ -389,8 +427,10 @@ static int __init notrace ftrace_shutdown_init(void) core_initcall(ftrace_shutdown_init); #else -# define ftrace_startup() do { } while (0) -# define ftrace_shutdown() do { } while (0) +# define ftrace_startup() do { } while (0) +# define ftrace_shutdown() do { } while (0) +# define ftrace_startup_sysctl() do { } while (0) +# define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** @@ -406,9 +446,15 @@ core_initcall(ftrace_shutdown_init); */ int register_ftrace_function(struct ftrace_ops *ops) { + int ret; + + mutex_lock(&ftrace_sysctl_lock); ftrace_startup(); - return __register_ftrace_function(ops); + ret = __register_ftrace_function(ops); + mutex_unlock(&ftrace_sysctl_lock); + + return ret; } /** @@ -421,10 +467,53 @@ int unregister_ftrace_function(struct ftrace_ops *ops) { int ret; + mutex_lock(&ftrace_sysctl_lock); ret = __unregister_ftrace_function(ops); if (ftrace_list == &ftrace_list_end) ftrace_shutdown(); + mutex_unlock(&ftrace_sysctl_lock); + + return ret; +} + +notrace int +ftrace_enable_sysctl(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + mutex_lock(&ftrace_sysctl_lock); + + ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + + if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) + goto out; + + last_ftrace_enabled = ftrace_enabled; + + if (ftrace_enabled) { + + ftrace_startup_sysctl(); + + /* we are starting ftrace again */ + if (ftrace_list != &ftrace_list_end) { + if (ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; + else + ftrace_trace_function = ftrace_list_func; + } + + } else { + /* stopping ftrace calls (just send to ftrace_stub) */ + ftrace_trace_function = ftrace_stub; + + ftrace_shutdown_sysctl(); + } + + out: + mutex_unlock(&ftrace_sysctl_lock); return ret; } -- cgit v1.2.3-70-g09d2 From 9c44bc03fff44ff04237a7d92e35304a0e50c331 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:04 +0200 Subject: softlockup: allow panic on lockup allow users to configure the softlockup detector to generate a panic instead of a warning message. high-availability systems might opt for this strict method (combined with panic_timeout= boot option/sysctl), instead of generating softlockup warnings ad infinitum. also, automated tests work better if the system reboots reliably (into a safe kernel) in case of a lockup. The full spectrum of configurability is supported: boot option, sysctl option and Kconfig option. it's default-disabled. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 3 +++ include/linux/sched.h | 3 ++- kernel/softlockup.c | 21 +++++++++++++++++++++ kernel/sysctl.c | 11 +++++++++++ lib/Kconfig.debug | 26 +++++++++++++++++++++++++- 5 files changed, 62 insertions(+), 2 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e07c432c731..042588fa12e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1971,6 +1971,9 @@ and is between 256 and 4096 characters. It is defined in the file snd-ymfpci= [HW,ALSA] + softlockup_panic= + [KNL] Should the soft-lockup detector generate panics. + sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/sonypi.txt diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4..71f5972dc48 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -294,7 +294,8 @@ extern void softlockup_tick(void); extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); -extern unsigned long softlockup_thresh; +extern unsigned int softlockup_panic; +extern unsigned long softlockup_thresh; extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 01b6522fd92..78e0ad21cb0 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -27,6 +27,21 @@ static DEFINE_PER_CPU(struct task_struct *, watchdog_task); static int __read_mostly did_panic; unsigned long __read_mostly softlockup_thresh = 60; +/* + * Should we panic (and reboot, if panic_timeout= is set) when a + * soft-lockup occurs: + */ +unsigned int __read_mostly softlockup_panic = + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; + +static int __init softlockup_panic_setup(char *str) +{ + softlockup_panic = simple_strtoul(str, NULL, 0); + + return 1; +} +__setup("softlockup_panic=", softlockup_panic_setup); + static int softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) { @@ -120,6 +135,9 @@ void softlockup_tick(void) else dump_stack(); spin_unlock(&print_lock); + + if (softlockup_panic) + panic("softlockup: hung tasks"); } /* @@ -172,6 +190,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now) t->last_switch_timestamp = now; touch_nmi_watchdog(); + + if (softlockup_panic) + panic("softlockup: blocked tasks"); } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 29116652dca..2d3b388c402 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -727,6 +727,17 @@ static struct ctl_table kern_table[] = { }, #endif #ifdef CONFIG_DETECT_SOFTLOCKUP + { + .ctl_name = CTL_UNNUMBERED, + .procname = "softlockup_panic", + .data = &softlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, { .ctl_name = CTL_UNNUMBERED, .procname = "softlockup_thresh", diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d2099f41aa1..509ae35a9ef 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -147,7 +147,7 @@ config DETECT_SOFTLOCKUP help Say Y here to enable the kernel to detect "soft lockups", which are bugs that cause the kernel to loop in kernel - mode for more than 10 seconds, without giving other tasks a + mode for more than 60 seconds, without giving other tasks a chance to run. When a soft-lockup is detected, the kernel will print the @@ -159,6 +159,30 @@ config DETECT_SOFTLOCKUP can be detected via the NMI-watchdog, on platforms that support it.) +config BOOTPARAM_SOFTLOCKUP_PANIC + bool "Panic (Reboot) On Soft Lockups" + depends on DETECT_SOFTLOCKUP + help + Say Y here to enable the kernel to panic on "soft lockups", + which are bugs that cause the kernel to loop in kernel + mode for more than 60 seconds, without giving other tasks a + chance to run. + + The panic can be used in combination with panic_timeout, + to cause the system to reboot automatically after a + lockup has been detected. This feature is useful for + high-availability systems that have uptime guarantees and + where a lockup must be resolved ASAP. + + Say N if unsure. + +config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE + int + depends on DETECT_SOFTLOCKUP + range 0 1 + default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC + default 1 if BOOTPARAM_SOFTLOCKUP_PANIC + config SCHED_DEBUG bool "Collect scheduler debugging info" depends on DEBUG_KERNEL && PROC_FS -- cgit v1.2.3-70-g09d2 From 9383d9679056e6cc4e7ff70f31da945a268238f4 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Mon, 12 May 2008 21:21:14 +0200 Subject: softlockup: fix softlockup_thresh unaligned access and disable detection at runtime Fix unaligned access errors when setting softlockup_thresh on 64 bit platforms. Allow softlockup detection to be disabled by setting softlockup_thresh <= 0. Detect that boot time softlockup detection has been disabled earlier in softlockup_tick. Signed-off-by: Dimitri Sivanich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 +- kernel/softlockup.c | 12 ++++++++++-- kernel/sysctl.c | 9 +++++---- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 71f5972dc48..ea26221644e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -295,10 +295,10 @@ extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; -extern unsigned long softlockup_thresh; extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; +extern int softlockup_thresh; #else static inline void softlockup_tick(void) { diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 78e0ad21cb0..a3a0b239b7f 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -25,7 +25,7 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp); static DEFINE_PER_CPU(struct task_struct *, watchdog_task); static int __read_mostly did_panic; -unsigned long __read_mostly softlockup_thresh = 60; +int __read_mostly softlockup_thresh = 60; /* * Should we panic (and reboot, if panic_timeout= is set) when a @@ -94,6 +94,14 @@ void softlockup_tick(void) struct pt_regs *regs = get_irq_regs(); unsigned long now; + /* Is detection switched off? */ + if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) { + /* Be sure we don't false trigger if switched back on */ + if (touch_timestamp) + per_cpu(touch_timestamp, this_cpu) = 0; + return; + } + if (touch_timestamp == 0) { touch_softlockup_watchdog(); return; @@ -104,7 +112,7 @@ void softlockup_tick(void) /* report at most once a second */ if ((print_timestamp >= touch_timestamp && print_timestamp < (touch_timestamp + 1)) || - did_panic || !per_cpu(watchdog_task, this_cpu)) { + did_panic) { return; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2d3b388c402..31c19a79738 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -84,12 +84,13 @@ extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; /* Constants used for minimum and maximum */ -#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) +#ifdef CONFIG_HIGHMEM static int one = 1; #endif #ifdef CONFIG_DETECT_SOFTLOCKUP static int sixty = 60; +static int neg_one = -1; #endif #ifdef CONFIG_MMU @@ -742,11 +743,11 @@ static struct ctl_table kern_table[] = { .ctl_name = CTL_UNNUMBERED, .procname = "softlockup_thresh", .data = &softlockup_thresh, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, - .extra1 = &one, + .extra1 = &neg_one, .extra2 = &sixty, }, { -- cgit v1.2.3-70-g09d2 From 1c4cd6dd1d0fd3057bb6b8c87460049497889d1b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:14 +0200 Subject: softlockup: fix softlockup_thresh fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 31c19a79738..a829dc8d7a9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -84,7 +84,7 @@ extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; /* Constants used for minimum and maximum */ -#ifdef CONFIG_HIGHMEM +#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) static int one = 1; #endif -- cgit v1.2.3-70-g09d2 From 31a72bce0bd6f3e0114009288bccbc96376eeeca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 18 Jun 2008 09:26:49 -0700 Subject: rcu: make rcutorture more vicious: reinstate boot-time testing This patch re-institutes the ability to build rcutorture directly into the Linux kernel. The reason that this capability was removed was that this could result in your kernel being pretty much useless, as rcutorture would be running starting from early boot. This problem has been avoided by (1) making rcutorture run only three seconds of every six by default, (2) adding a CONFIG_RCU_TORTURE_TEST_RUNNABLE that permits rcutorture to be quiesced at boot time, and (3) adding a sysctl in /proc named /proc/sys/kernel/rcutorture_runnable that permits rcutorture to be quiesced and unquiesced when built into the kernel. Please note that this /proc file is -not- available when rcutorture is built as a module. Please also note that to get the earlier take-no-prisoners behavior, you must use the boot command line to set rcutorture's "stutter" parameter to zero. The rcutorture quiescing mechanism is currently quite crude: loops in each rcutorture process that poll a global variable once per tick. Suggestions for improvement are welcome. The default action will be to reduce the polling rate to a few times per second. Signed-off-by: Paul E. McKenney Suggested-by: Ingo Molnar Signed-off-by: Ingo Molnar --- Documentation/RCU/torture.txt | 21 ++++++++++++++------- kernel/rcutorture.c | 9 ++++++++- kernel/sysctl.c | 13 +++++++++++++ lib/Kconfig.debug | 21 +++++++++++++++++++-- 4 files changed, 54 insertions(+), 10 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 02b3d14c020..516527d4bc5 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt @@ -10,13 +10,20 @@ status messages via printk(), which can be examined via the dmesg command (perhaps grepping for "torture"). The test is started when the module is loaded, and stops when the module is unloaded. -However, actually setting this config option to "y" results in the system -running the test immediately upon boot, and ending only when the system -is taken down. Normally, one will instead want to build the system -with CONFIG_RCU_TORTURE_TEST=m and to use modprobe and rmmod to control -the test, perhaps using a script similar to the one shown at the end of -this document. Note that you will need CONFIG_MODULE_UNLOAD in order -to be able to end the test. +CONFIG_RCU_TORTURE_TEST_RUNNABLE + +It is also possible to specify CONFIG_RCU_TORTURE_TEST=y, which will +result in the tests being loaded into the base kernel. In this case, +the CONFIG_RCU_TORTURE_TEST_RUNNABLE config option is used to specify +whether the RCU torture tests are to be started immediately during +boot or whether the /proc/sys/kernel/rcutorture_runnable file is used +to enable them. This /proc file can be used to repeatedly pause and +restart the tests, regardless of the initial state specified by the +CONFIG_RCU_TORTURE_TEST_RUNNABLE config option. + +You will normally -not- want to start the RCU torture tests during boot +(and thus the default is CONFIG_RCU_TORTURE_TEST_RUNNABLE=n), but doing +this can sometimes be useful in finding boot-time bugs. MODULE PARAMETERS diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 98ae7d16822..27003e2421c 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -125,6 +125,13 @@ static struct list_head rcu_torture_removed; static int stutter_pause_test = 0; +#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) +#define RCUTORTURE_RUNNABLE_INIT 1 +#else +#define RCUTORTURE_RUNNABLE_INIT 0 +#endif +int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; + /* * Allocate an element from the rcu_tortures pool. */ @@ -188,7 +195,7 @@ rcu_random(struct rcu_random_state *rrsp) static void rcu_stutter_wait(void) { - while (stutter_pause_test) + while (stutter_pause_test || !rcutorture_runnable) schedule_timeout_interruptible(1); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 29116652dca..c6887cf135c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -82,6 +82,9 @@ extern int maps_protect; extern int sysctl_stat_interval; extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; +#ifdef CONFIG_RCU_TORTURE_TEST +extern int rcutorture_runnable; +#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ /* Constants used for minimum and maximum */ #if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) @@ -813,6 +816,16 @@ static struct ctl_table kern_table[] = { .child = key_sysctls, }, #endif +#ifdef CONFIG_RCU_TORTURE_TEST + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rcutorture_runnable", + .data = &rcutorture_runnable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index f51ba2fa266..c35a86a516a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -537,11 +537,28 @@ config RCU_TORTURE_TEST on the RCU infrastructure. The kernel module may be built after the fact on the running kernel to be tested, if desired. - Say Y here if you want RCU torture tests to start automatically - at boot time (you probably don't). + Say Y here if you want RCU torture tests to be built into + the kernel. Say M if you want the RCU torture tests to build as a module. Say N if you are unsure. +config RCU_TORTURE_TEST_RUNNABLE + bool "torture tests for RCU runnable by default" + depends on RCU_TORTURE_TEST = y + default n + help + This option provides a way to build the RCU torture tests + directly into the kernel without them starting up at boot + time. You can use /proc/sys/kernel/rcutorture_runnable + to manually override this setting. This /proc file is + available only when the RCU torture tests have been built + into the kernel. + + Say Y here if you want the RCU torture tests to start during + boot (you probably don't). + Say N here if you want the RCU torture tests to start only + after being manually enabled via /proc. + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL -- cgit v1.2.3-70-g09d2 From 2398f2c6d34b43025f274fc42eaca34d23ec2320 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:35 +0200 Subject: sched: update shares on wakeup We found that the affine wakeup code needs rather accurate load figures to be effective. The trouble is that updating the load figures is fairly expensive with group scheduling. Therefore ratelimit the updating. Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ kernel/sched.c | 30 +++++++++++++++++++++++++++++- kernel/sched_features.h | 3 ++- kernel/sysctl.c | 8 ++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index eaf821072db..835b6c6fcc5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -783,6 +783,8 @@ struct sched_domain { unsigned int balance_interval; /* initialise to 1. units in ms. */ unsigned int nr_balance_failed; /* initialise to 0 */ + u64 last_update; + #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -1605,6 +1607,7 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_shares_ratelimit; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, diff --git a/kernel/sched.c b/kernel/sched.c index 1cff969f664..62db0891025 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -777,6 +777,12 @@ late_initcall(sched_init_debug); */ const_debug unsigned int sysctl_sched_nr_migrate = 32; +/* + * ratelimit for updating the group shares. + * default: 0.5ms + */ +const_debug unsigned int sysctl_sched_shares_ratelimit = 500000; + /* * period over which we measure -rt task cpu usage in us. * default: 1s @@ -1590,7 +1596,13 @@ tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd) static void update_shares(struct sched_domain *sd) { - walk_tg_tree(tg_nop, tg_shares_up, 0, sd); + u64 now = cpu_clock(raw_smp_processor_id()); + s64 elapsed = now - sd->last_update; + + if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { + sd->last_update = now; + walk_tg_tree(tg_nop, tg_shares_up, 0, sd); + } } static void update_shares_locked(struct rq *rq, struct sched_domain *sd) @@ -2199,6 +2211,22 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) if (!sched_feat(SYNC_WAKEUPS)) sync = 0; +#ifdef CONFIG_SMP + if (sched_feat(LB_WAKEUP_UPDATE)) { + struct sched_domain *sd; + + this_cpu = raw_smp_processor_id(); + cpu = task_cpu(p); + + for_each_domain(this_cpu, sd) { + if (cpu_isset(cpu, sd->span)) { + update_shares(sd); + break; + } + } + } +#endif + smp_wmb(); rq = task_rq_lock(p, &flags); old_state = p->state; diff --git a/kernel/sched_features.h b/kernel/sched_features.h index d56e3053e74..7d616d2a2a3 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -8,4 +8,5 @@ SCHED_FEAT(SYNC_WAKEUPS, 1) SCHED_FEAT(HRTICK, 1) SCHED_FEAT(DOUBLE_TICK, 0) SCHED_FEAT(ASYM_GRAN, 1) -SCHED_FEAT(LB_BIAS, 0) \ No newline at end of file +SCHED_FEAT(LB_BIAS, 0) +SCHED_FEAT(LB_WAKEUP_UPDATE, 1) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 29116652dca..fe8cdc80ff0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -264,6 +264,14 @@ static struct ctl_table kern_table[] = { .extra1 = &min_wakeup_granularity_ns, .extra2 = &max_wakeup_granularity_ns, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_shares_ratelimit", + .data = &sysctl_sched_shares_ratelimit, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = CTL_UNNUMBERED, .procname = "sched_child_runs_first", -- cgit v1.2.3-70-g09d2 From 4dca10a96041f78bed11ce9e4a5cfde813ec4ccb Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 7 Jul 2008 18:37:04 -0700 Subject: softlockup: fix invalid proc_handler for softlockup_panic The type of softlockup_panic is int, but the proc_handler is proc_doulongvec_minmax(). This handler is for unsigned long. This handler should be proc_dointvec_minmax(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3e56d2f9141..ab59ac008ca 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -741,7 +741,7 @@ static struct ctl_table kern_table[] = { .data = &softlockup_panic, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &one, -- cgit v1.2.3-70-g09d2 From a1ef5adb4cad43460ebba23c5a78cf4a55bb6a5b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 8 Jul 2008 19:00:17 +0200 Subject: remove CONFIG_KMOD from core kernel code Always compile request_module when the kernel allows modules. Signed-off-by: Johannes Berg Signed-off-by: Rusty Russell --- include/linux/kmod.h | 2 +- kernel/exec_domain.c | 2 +- kernel/kmod.c | 2 +- kernel/sysctl.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 9bdb28d6660..0509c4ce485 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -25,7 +25,7 @@ #define KMOD_PATH_LEN 256 -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES /* modprobe exit status on success, -ve on error. Return value * usually useless though. */ extern int request_module(const char * name, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index a9e6bad9f70..c1ef192aa65 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -65,7 +65,7 @@ lookup_exec_domain(u_long personality) goto out; } -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES read_unlock(&exec_domains_lock); request_module("personality-%ld", pers); read_lock(&exec_domains_lock); diff --git a/kernel/kmod.c b/kernel/kmod.c index 8df97d3dfda..90d7af1c165 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -42,7 +42,7 @@ extern int max_threads; static struct workqueue_struct *khelper_wq; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES /* modprobe_path is set via /proc/sys. diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6b16e16428d..b859e6b5a76 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -110,7 +110,7 @@ static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES extern char modprobe_path[]; #endif #ifdef CONFIG_CHR_DEV_SG @@ -475,7 +475,7 @@ static struct ctl_table kern_table[] = { .proc_handler = &ftrace_enable_sysctl, }, #endif -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES { .ctl_name = KERN_MODPROBE, .procname = "modprobe", -- cgit v1.2.3-70-g09d2 From c748e1340e0de3fa7fed86f8bdf499be9242afff Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 23 Jul 2008 21:27:03 -0700 Subject: mm/vmstat.c: proper externs This patch adds proper extern declarations for five variables in include/linux/vmstat.h Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 4 ---- include/linux/vmstat.h | 6 ++++++ kernel/sysctl.c | 2 +- mm/vmstat.c | 1 + 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index c652d469dc0..b14f43d25e9 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off, #undef K } -extern const struct seq_operations fragmentation_op; static int fragmentation_open(struct inode *inode, struct file *file) { (void)inode; @@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = { .release = seq_release, }; -extern const struct seq_operations pagetypeinfo_op; static int pagetypeinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &pagetypeinfo_op); @@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = { .release = seq_release, }; -extern const struct seq_operations zoneinfo_op; static int zoneinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &zoneinfo_op); @@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = { .release = seq_release, }; -extern const struct seq_operations vmstat_op; static int vmstat_open(struct inode *inode, struct file *file) { return seq_open(file, &vmstat_op); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index e83b69346d2..58334d43951 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -44,6 +44,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NR_VM_EVENT_ITEMS }; +extern const struct seq_operations fragmentation_op; +extern const struct seq_operations pagetypeinfo_op; +extern const struct seq_operations zoneinfo_op; +extern const struct seq_operations vmstat_op; +extern int sysctl_stat_interval; + #ifdef CONFIG_VM_EVENT_COUNTERS /* * Light weight per cpu counter implementation. diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2a7b9d88706..1f7b3b76a16 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,6 @@ extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; extern int maps_protect; -extern int sysctl_stat_interval; extern int latencytop_enabled; extern int sysctl_nr_open_min, sysctl_nr_open_max; #ifdef CONFIG_RCU_TORTURE_TEST diff --git a/mm/vmstat.c b/mm/vmstat.c index c3d4a781802..b0d08e667ec 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #ifdef CONFIG_VM_EVENT_COUNTERS -- cgit v1.2.3-70-g09d2 From e5ff215941d59f8ae6bf58f6428dc5c26745a612 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 23 Jul 2008 21:27:42 -0700 Subject: hugetlb: multiple hstates for multiple page sizes Add basic support for more than one hstate in hugetlbfs. This is the key to supporting multiple hugetlbfs page sizes at once. - Rather than a single hstate, we now have an array, with an iterator - default_hstate continues to be the struct hstate which we use by default - Add functions for architectures to register new hstates [akpm@linux-foundation.org: coding-style fixes] Acked-by: Adam Litke Acked-by: Nishanth Aravamudan Signed-off-by: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 19 ++++++- kernel/sysctl.c | 8 ++- mm/hugetlb.c | 148 +++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 142 insertions(+), 33 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ad2271e11f9..b75bdb4deba 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -36,8 +36,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma); void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); -extern unsigned long max_huge_pages; -extern unsigned long sysctl_overcommit_huge_pages; extern unsigned long hugepages_treat_as_movable; extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; @@ -181,7 +179,17 @@ struct hstate { unsigned int surplus_huge_pages_node[MAX_NUMNODES]; }; -extern struct hstate default_hstate; +void __init hugetlb_add_hstate(unsigned order); +struct hstate *size_to_hstate(unsigned long size); + +#ifndef HUGE_MAX_HSTATE +#define HUGE_MAX_HSTATE 1 +#endif + +extern struct hstate hstates[HUGE_MAX_HSTATE]; +extern unsigned int default_hstate_idx; + +#define default_hstate (hstates[default_hstate_idx]) static inline struct hstate *hstate_vma(struct vm_area_struct *vma) { @@ -230,6 +238,11 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h) #include +static inline struct hstate *page_hstate(struct page *page) +{ + return size_to_hstate(PAGE_SIZE << compound_order(page)); +} + #else struct hstate {}; #define hstate_file(f) NULL diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1f7b3b76a16..1a8299d1fe5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -959,7 +959,7 @@ static struct ctl_table vm_table[] = { #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", - .data = &max_huge_pages, + .data = NULL, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &hugetlb_sysctl_handler, @@ -985,10 +985,12 @@ static struct ctl_table vm_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nr_overcommit_hugepages", - .data = &sysctl_overcommit_huge_pages, - .maxlen = sizeof(sysctl_overcommit_huge_pages), + .data = NULL, + .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &hugetlb_overcommit_handler, + .extra1 = (void *)&hugetlb_zero, + .extra2 = (void *)&hugetlb_infinity, }, #endif { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0d8153e25f0..82378d44a0c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -22,12 +22,19 @@ #include "internal.h" const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; -unsigned long max_huge_pages; -unsigned long sysctl_overcommit_huge_pages; static gfp_t htlb_alloc_mask = GFP_HIGHUSER; unsigned long hugepages_treat_as_movable; -struct hstate default_hstate; +static int max_hstate; +unsigned int default_hstate_idx; +struct hstate hstates[HUGE_MAX_HSTATE]; + +/* for command line parsing */ +static struct hstate * __initdata parsed_hstate; +static unsigned long __initdata default_hstate_max_huge_pages; + +#define for_each_hstate(h) \ + for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++) /* * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages @@ -454,13 +461,24 @@ static void update_and_free_page(struct hstate *h, struct page *page) __free_pages(page, huge_page_order(h)); } +struct hstate *size_to_hstate(unsigned long size) +{ + struct hstate *h; + + for_each_hstate(h) { + if (huge_page_size(h) == size) + return h; + } + return NULL; +} + static void free_huge_page(struct page *page) { /* * Can't pass hstate in here because it is called from the * compound page destructor. */ - struct hstate *h = &default_hstate; + struct hstate *h = page_hstate(page); int nid = page_to_nid(page); struct address_space *mapping; @@ -887,39 +905,94 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, return page; } -static int __init hugetlb_init(void) +static void __init hugetlb_init_one_hstate(struct hstate *h) { unsigned long i; - struct hstate *h = &default_hstate; - - if (HPAGE_SHIFT == 0) - return 0; - - if (!h->order) { - h->order = HPAGE_SHIFT - PAGE_SHIFT; - h->mask = HPAGE_MASK; - } for (i = 0; i < MAX_NUMNODES; ++i) INIT_LIST_HEAD(&h->hugepage_freelists[i]); h->hugetlb_next_nid = first_node(node_online_map); - for (i = 0; i < max_huge_pages; ++i) { + for (i = 0; i < h->max_huge_pages; ++i) { if (!alloc_fresh_huge_page(h)) break; } - max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i; - printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n", - h->free_huge_pages); + h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i; +} + +static void __init hugetlb_init_hstates(void) +{ + struct hstate *h; + + for_each_hstate(h) { + hugetlb_init_one_hstate(h); + } +} + +static void __init report_hugepages(void) +{ + struct hstate *h; + + for_each_hstate(h) { + printk(KERN_INFO "Total HugeTLB memory allocated, " + "%ld %dMB pages\n", + h->free_huge_pages, + 1 << (h->order + PAGE_SHIFT - 20)); + } +} + +static int __init hugetlb_init(void) +{ + BUILD_BUG_ON(HPAGE_SHIFT == 0); + + if (!size_to_hstate(HPAGE_SIZE)) { + hugetlb_add_hstate(HUGETLB_PAGE_ORDER); + parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; + } + default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; + + hugetlb_init_hstates(); + + report_hugepages(); + return 0; } module_init(hugetlb_init); +/* Should be called on processing a hugepagesz=... option */ +void __init hugetlb_add_hstate(unsigned order) +{ + struct hstate *h; + if (size_to_hstate(PAGE_SIZE << order)) { + printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n"); + return; + } + BUG_ON(max_hstate >= HUGE_MAX_HSTATE); + BUG_ON(order == 0); + h = &hstates[max_hstate++]; + h->order = order; + h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); + hugetlb_init_one_hstate(h); + parsed_hstate = h; +} + static int __init hugetlb_setup(char *s) { - if (sscanf(s, "%lu", &max_huge_pages) <= 0) - max_huge_pages = 0; + unsigned long *mhp; + + /* + * !max_hstate means we haven't parsed a hugepagesz= parameter yet, + * so this hugepages= parameter goes to the "default hstate". + */ + if (!max_hstate) + mhp = &default_hstate_max_huge_pages; + else + mhp = &parsed_hstate->max_huge_pages; + + if (sscanf(s, "%lu", mhp) <= 0) + *mhp = 0; + return 1; } __setup("hugepages=", hugetlb_setup); @@ -950,7 +1023,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count) if (PageHighMem(page)) continue; list_del(&page->lru); - update_and_free_page(page); + update_and_free_page(h, page); h->free_huge_pages--; h->free_huge_pages_node[page_to_nid(page)]--; } @@ -963,10 +1036,9 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count) #endif #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) -static unsigned long set_max_huge_pages(unsigned long count) +static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count) { unsigned long min_count, ret; - struct hstate *h = &default_hstate; /* * Increase the pool size @@ -1037,8 +1109,19 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { + struct hstate *h = &default_hstate; + unsigned long tmp; + + if (!write) + tmp = h->max_huge_pages; + + table->data = &tmp; + table->maxlen = sizeof(unsigned long); proc_doulongvec_minmax(table, write, file, buffer, length, ppos); - max_huge_pages = set_max_huge_pages(max_huge_pages); + + if (write) + h->max_huge_pages = set_max_huge_pages(h, tmp); + return 0; } @@ -1059,10 +1142,21 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; + unsigned long tmp; + + if (!write) + tmp = h->nr_overcommit_huge_pages; + + table->data = &tmp; + table->maxlen = sizeof(unsigned long); proc_doulongvec_minmax(table, write, file, buffer, length, ppos); - spin_lock(&hugetlb_lock); - h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages; - spin_unlock(&hugetlb_lock); + + if (write) { + spin_lock(&hugetlb_lock); + h->nr_overcommit_huge_pages = tmp; + spin_unlock(&hugetlb_lock); + } + return 0; } -- cgit v1.2.3-70-g09d2