From cf7df378aa4ff7da3a44769b7ff6e9eef1a9f3db Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Wed, 12 Jun 2013 14:04:37 -0700 Subject: reboot: rigrate shutdown/reboot to boot cpu We recently noticed that reboot of a 1024 cpu machine takes approx 16 minutes of just stopping the cpus. The slowdown was tracked to commit f96972f2dc63 ("kernel/sys.c: call disable_nonboot_cpus() in kernel_restart()"). The current implementation does all the work of hot removing the cpus before halting the system. We are switching to just migrating to the boot cpu and then continuing with shutdown/reboot. This also has the effect of not breaking x86's command line parameter for specifying the reboot cpu. Note, this code was shamelessly copied from arch/x86/kernel/reboot.c with bits removed pertaining to the reboot_cpu command line parameter. Signed-off-by: Robin Holt Tested-by: Shawn Guo Cc: "Srivatsa S. Bhat" Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Russ Anderson Cc: Robin Holt Cc: Russell King Cc: Guan Xuetao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index b95d3c72ba2..2bbd9a73b54 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -362,6 +362,29 @@ int unregister_reboot_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_reboot_notifier); +/* Add backwards compatibility for stable trees. */ +#ifndef PF_NO_SETAFFINITY +#define PF_NO_SETAFFINITY PF_THREAD_BOUND +#endif + +static void migrate_to_reboot_cpu(void) +{ + /* The boot cpu is always logical cpu 0 */ + int cpu = 0; + + cpu_hotplug_disable(); + + /* Make certain the cpu I'm about to reboot on is online */ + if (!cpu_online(cpu)) + cpu = cpumask_first(cpu_online_mask); + + /* Prevent races with other tasks migrating this task */ + current->flags |= PF_NO_SETAFFINITY; + + /* Make certain I only run on the appropriate processor */ + set_cpus_allowed_ptr(current, cpumask_of(cpu)); +} + /** * kernel_restart - reboot the system * @cmd: pointer to buffer containing command to execute for restart @@ -373,7 +396,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); - disable_nonboot_cpus(); + migrate_to_reboot_cpu(); syscore_shutdown(); if (!cmd) printk(KERN_EMERG "Restarting system.\n"); @@ -400,7 +423,7 @@ static void kernel_shutdown_prepare(enum system_states state) void kernel_halt(void) { kernel_shutdown_prepare(SYSTEM_HALT); - disable_nonboot_cpus(); + migrate_to_reboot_cpu(); syscore_shutdown(); printk(KERN_EMERG "System halted.\n"); kmsg_dump(KMSG_DUMP_HALT); @@ -419,7 +442,7 @@ void kernel_power_off(void) kernel_shutdown_prepare(SYSTEM_POWER_OFF); if (pm_power_off_prepare) pm_power_off_prepare(); - disable_nonboot_cpus(); + migrate_to_reboot_cpu(); syscore_shutdown(); printk(KERN_EMERG "Power down.\n"); kmsg_dump(KMSG_DUMP_POWEROFF); -- cgit v1.2.3-70-g09d2 From 7ec75e1ca1bd35872a5c7b33da4b05395bc74364 Mon Sep 17 00:00:00 2001 From: liguang Date: Wed, 3 Jul 2013 15:05:00 -0700 Subject: kernel/sys.c: sys_reboot(): fix malformed panic message If LINUX_REBOOT_CMD_HALT for reboot failed, the message "cannot halt" will stay on the same line with the next message, so append a '\n'. Signed-off-by: liguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 2bbd9a73b54..c1da757a97b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -511,7 +511,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, case LINUX_REBOOT_CMD_HALT: kernel_halt(); do_exit(0); - panic("cannot halt"); + panic("cannot halt.\n"); case LINUX_REBOOT_CMD_POWER_OFF: kernel_power_off(); -- cgit v1.2.3-70-g09d2 From 45c64940c8bb64a042464ecec89d95eb4cce9b07 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:05:01 -0700 Subject: kernel/sys.c:do_sysinfo(): use get_monotonic_boottime() Change do_sysinfo() to use get_monotonic_boottime() instead of do_posix_clock_monotonic_gettime() + monotonic_to_bootbased(). Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: John Stultz Cc: Tomas Janousek Cc: Tomas Smetana Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index c1da757a97b..7bf50dcc6d5 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2355,8 +2355,7 @@ static int do_sysinfo(struct sysinfo *info) memset(info, 0, sizeof(struct sysinfo)); - ktime_get_ts(&tp); - monotonic_to_bootbased(&tp); + get_monotonic_boottime(&tp); info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); -- cgit v1.2.3-70-g09d2 From 81dabb464139324c005159f5afba377104d8828d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Jul 2013 15:08:26 -0700 Subject: exit.c: unexport __set_special_pids() Move __set_special_pids() from exit.c to sys.c close to its single caller and make it static. And rename it to set_special_pids(), another helper with this name has gone away. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 -- kernel/exit.c | 11 ----------- kernel/sys.c | 13 ++++++++++++- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'kernel/sys.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index ec80684a012..cdd5407b37e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1950,8 +1950,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); -extern void __set_special_pids(struct pid *pid); - /* per-UID process charging. */ extern struct user_struct * alloc_uid(kuid_t); static inline struct user_struct *get_uid(struct user_struct *u) diff --git a/kernel/exit.c b/kernel/exit.c index 7bb73f9d09d..3a77cd9390a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -312,17 +312,6 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) } } -void __set_special_pids(struct pid *pid) -{ - struct task_struct *curr = current->group_leader; - - if (task_session(curr) != pid) - change_pid(curr, PIDTYPE_SID, pid); - - if (task_pgrp(curr) != pid) - change_pid(curr, PIDTYPE_PGID, pid); -} - /* * Let kernel threads use this to say that they allow a certain signal. * Must not be used if kthread was cloned with CLONE_SIGHAND. diff --git a/kernel/sys.c b/kernel/sys.c index 7bf50dcc6d5..071de900c82 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1309,6 +1309,17 @@ out: return retval; } +static void set_special_pids(struct pid *pid) +{ + struct task_struct *curr = current->group_leader; + + if (task_session(curr) != pid) + change_pid(curr, PIDTYPE_SID, pid); + + if (task_pgrp(curr) != pid) + change_pid(curr, PIDTYPE_PGID, pid); +} + SYSCALL_DEFINE0(setsid) { struct task_struct *group_leader = current->group_leader; @@ -1328,7 +1339,7 @@ SYSCALL_DEFINE0(setsid) goto out; group_leader->signal->leader = 1; - __set_special_pids(sid); + set_special_pids(sid); proc_clear_tty(group_leader); -- cgit v1.2.3-70-g09d2 From 0efbee70890c992f31a7b294ac654ff6c62d51c5 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Mon, 8 Jul 2013 16:01:31 -0700 Subject: reboot: remove -stable friendly PF_THREAD_BOUND define Remove the prior patch's #define for easier backporting to the stable releases. Signed-off-by: Robin Holt Cc: H. Peter Anvin Cc: Russ Anderson Cc: Robin Holt Cc: Russell King Cc: Guan Xuetao Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 071de900c82..b882440bd0c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -362,11 +362,6 @@ int unregister_reboot_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_reboot_notifier); -/* Add backwards compatibility for stable trees. */ -#ifndef PF_NO_SETAFFINITY -#define PF_NO_SETAFFINITY PF_THREAD_BOUND -#endif - static void migrate_to_reboot_cpu(void) { /* The boot cpu is always logical cpu 0 */ -- cgit v1.2.3-70-g09d2 From 15d94b82565ebfb0cf27830b96e6cf5ed2d12a9a Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Mon, 8 Jul 2013 16:01:32 -0700 Subject: reboot: move shutdown/reboot related functions to kernel/reboot.c This patch is preparatory. It moves reboot related syscall, etc functions from kernel/sys.c to kernel/reboot.c. Signed-off-by: Robin Holt Cc: H. Peter Anvin Cc: Russ Anderson Cc: Robin Holt Cc: Russell King Cc: Guan Xuetao Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Makefile | 2 +- kernel/reboot.c | 346 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/sys.c | 331 ----------------------------------------------------- 3 files changed, 347 insertions(+), 332 deletions(-) create mode 100644 kernel/reboot.c (limited to 'kernel/sys.c') diff --git a/kernel/Makefile b/kernel/Makefile index 271fd3119af..470839d1a30 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o cred.o \ + notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/reboot.c b/kernel/reboot.c new file mode 100644 index 00000000000..37d2636a65c --- /dev/null +++ b/kernel/reboot.c @@ -0,0 +1,346 @@ +/* + * linux/kernel/reboot.c + * + * Copyright (C) 2013 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * this indicates whether you can reboot with ctrl-alt-del: the default is yes + */ + +int C_A_D = 1; +struct pid *cad_pid; +EXPORT_SYMBOL(cad_pid); + +/* + * If set, this is used for preparing the system to power off. + */ + +void (*pm_power_off_prepare)(void); + +/** + * emergency_restart - reboot the system + * + * Without shutting down any hardware or taking any locks + * reboot the system. This is called when we know we are in + * trouble so this is our best effort to reboot. This is + * safe to call in interrupt context. + */ +void emergency_restart(void) +{ + kmsg_dump(KMSG_DUMP_EMERG); + machine_emergency_restart(); +} +EXPORT_SYMBOL_GPL(emergency_restart); + +void kernel_restart_prepare(char *cmd) +{ + blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); + system_state = SYSTEM_RESTART; + usermodehelper_disable(); + device_shutdown(); +} + +/** + * register_reboot_notifier - Register function to be called at reboot time + * @nb: Info about notifier function to be called + * + * Registers a function with the list of functions + * to be called at reboot time. + * + * Currently always returns zero, as blocking_notifier_chain_register() + * always returns zero. + */ +int register_reboot_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&reboot_notifier_list, nb); +} +EXPORT_SYMBOL(register_reboot_notifier); + +/** + * unregister_reboot_notifier - Unregister previously registered reboot notifier + * @nb: Hook to be unregistered + * + * Unregisters a previously registered reboot + * notifier function. + * + * Returns zero on success, or %-ENOENT on failure. + */ +int unregister_reboot_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); +} +EXPORT_SYMBOL(unregister_reboot_notifier); + +static void migrate_to_reboot_cpu(void) +{ + /* The boot cpu is always logical cpu 0 */ + int cpu = 0; + + cpu_hotplug_disable(); + + /* Make certain the cpu I'm about to reboot on is online */ + if (!cpu_online(cpu)) + cpu = cpumask_first(cpu_online_mask); + + /* Prevent races with other tasks migrating this task */ + current->flags |= PF_NO_SETAFFINITY; + + /* Make certain I only run on the appropriate processor */ + set_cpus_allowed_ptr(current, cpumask_of(cpu)); +} + +/** + * kernel_restart - reboot the system + * @cmd: pointer to buffer containing command to execute for restart + * or %NULL + * + * Shutdown everything and perform a clean reboot. + * This is not safe to call in interrupt context. + */ +void kernel_restart(char *cmd) +{ + kernel_restart_prepare(cmd); + migrate_to_reboot_cpu(); + syscore_shutdown(); + if (!cmd) + printk(KERN_EMERG "Restarting system.\n"); + else + printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); + kmsg_dump(KMSG_DUMP_RESTART); + machine_restart(cmd); +} +EXPORT_SYMBOL_GPL(kernel_restart); + +static void kernel_shutdown_prepare(enum system_states state) +{ + blocking_notifier_call_chain(&reboot_notifier_list, + (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); + system_state = state; + usermodehelper_disable(); + device_shutdown(); +} +/** + * kernel_halt - halt the system + * + * Shutdown everything and perform a clean system halt. + */ +void kernel_halt(void) +{ + kernel_shutdown_prepare(SYSTEM_HALT); + migrate_to_reboot_cpu(); + syscore_shutdown(); + printk(KERN_EMERG "System halted.\n"); + kmsg_dump(KMSG_DUMP_HALT); + machine_halt(); +} + +EXPORT_SYMBOL_GPL(kernel_halt); + +/** + * kernel_power_off - power_off the system + * + * Shutdown everything and perform a clean system power_off. + */ +void kernel_power_off(void) +{ + kernel_shutdown_prepare(SYSTEM_POWER_OFF); + if (pm_power_off_prepare) + pm_power_off_prepare(); + migrate_to_reboot_cpu(); + syscore_shutdown(); + printk(KERN_EMERG "Power down.\n"); + kmsg_dump(KMSG_DUMP_POWEROFF); + machine_power_off(); +} +EXPORT_SYMBOL_GPL(kernel_power_off); + +static DEFINE_MUTEX(reboot_mutex); + +/* + * Reboot system call: for obvious reasons only root may call it, + * and even root needs to set up some magic numbers in the registers + * so that some mistake won't make this reboot the whole machine. + * You can also set the meaning of the ctrl-alt-del-key here. + * + * reboot doesn't sync: do that yourself before calling this. + */ +SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, + void __user *, arg) +{ + struct pid_namespace *pid_ns = task_active_pid_ns(current); + char buffer[256]; + int ret = 0; + + /* We only trust the superuser with rebooting the system. */ + if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) + return -EPERM; + + /* For safety, we require "magic" arguments. */ + if (magic1 != LINUX_REBOOT_MAGIC1 || + (magic2 != LINUX_REBOOT_MAGIC2 && + magic2 != LINUX_REBOOT_MAGIC2A && + magic2 != LINUX_REBOOT_MAGIC2B && + magic2 != LINUX_REBOOT_MAGIC2C)) + return -EINVAL; + + /* + * If pid namespaces are enabled and the current task is in a child + * pid_namespace, the command is handled by reboot_pid_ns() which will + * call do_exit(). + */ + ret = reboot_pid_ns(pid_ns, cmd); + if (ret) + return ret; + + /* Instead of trying to make the power_off code look like + * halt when pm_power_off is not set do it the easy way. + */ + if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) + cmd = LINUX_REBOOT_CMD_HALT; + + mutex_lock(&reboot_mutex); + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + kernel_restart(NULL); + break; + + case LINUX_REBOOT_CMD_CAD_ON: + C_A_D = 1; + break; + + case LINUX_REBOOT_CMD_CAD_OFF: + C_A_D = 0; + break; + + case LINUX_REBOOT_CMD_HALT: + kernel_halt(); + do_exit(0); + panic("cannot halt"); + + case LINUX_REBOOT_CMD_POWER_OFF: + kernel_power_off(); + do_exit(0); + break; + + case LINUX_REBOOT_CMD_RESTART2: + if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { + ret = -EFAULT; + break; + } + buffer[sizeof(buffer) - 1] = '\0'; + + kernel_restart(buffer); + break; + +#ifdef CONFIG_KEXEC + case LINUX_REBOOT_CMD_KEXEC: + ret = kernel_kexec(); + break; +#endif + +#ifdef CONFIG_HIBERNATION + case LINUX_REBOOT_CMD_SW_SUSPEND: + ret = hibernate(); + break; +#endif + + default: + ret = -EINVAL; + break; + } + mutex_unlock(&reboot_mutex); + return ret; +} + +static void deferred_cad(struct work_struct *dummy) +{ + kernel_restart(NULL); +} + +/* + * This function gets called by ctrl-alt-del - ie the keyboard interrupt. + * As it's called within an interrupt, it may NOT sync: the only choice + * is whether to reboot at once, or just ignore the ctrl-alt-del. + */ +void ctrl_alt_del(void) +{ + static DECLARE_WORK(cad_work, deferred_cad); + + if (C_A_D) + schedule_work(&cad_work); + else + kill_cad_pid(SIGINT, 1); +} + +char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; + +static int __orderly_poweroff(bool force) +{ + char **argv; + static char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL + }; + int ret; + + argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); + if (argv) { + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); + argv_free(argv); + } else { + printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", + __func__, poweroff_cmd); + ret = -ENOMEM; + } + + if (ret && force) { + printk(KERN_WARNING "Failed to start orderly shutdown: " + "forcing the issue\n"); + /* + * I guess this should try to kick off some daemon to sync and + * poweroff asap. Or not even bother syncing if we're doing an + * emergency shutdown? + */ + emergency_sync(); + kernel_power_off(); + } + + return ret; +} + +static bool poweroff_force; + +static void poweroff_work_func(struct work_struct *work) +{ + __orderly_poweroff(poweroff_force); +} + +static DECLARE_WORK(poweroff_work, poweroff_work_func); + +/** + * orderly_poweroff - Trigger an orderly system poweroff + * @force: force poweroff if command execution fails + * + * This may be called from any context to trigger a system shutdown. + * If the orderly shutdown fails, it will force an immediate shutdown. + */ +int orderly_poweroff(bool force) +{ + if (force) /* do not override the pending "true" */ + poweroff_force = true; + schedule_work(&poweroff_work); + return 0; +} +EXPORT_SYMBOL_GPL(orderly_poweroff); diff --git a/kernel/sys.c b/kernel/sys.c index b882440bd0c..771129b299f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -115,20 +115,6 @@ int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; EXPORT_SYMBOL(fs_overflowuid); EXPORT_SYMBOL(fs_overflowgid); -/* - * this indicates whether you can reboot with ctrl-alt-del: the default is yes - */ - -int C_A_D = 1; -struct pid *cad_pid; -EXPORT_SYMBOL(cad_pid); - -/* - * If set, this is used for preparing the system to power off. - */ - -void (*pm_power_off_prepare)(void); - /* * Returns true if current's euid is same as p's uid or euid, * or has CAP_SYS_NICE to p's user_ns. @@ -308,261 +294,6 @@ out_unlock: return retval; } -/** - * emergency_restart - reboot the system - * - * Without shutting down any hardware or taking any locks - * reboot the system. This is called when we know we are in - * trouble so this is our best effort to reboot. This is - * safe to call in interrupt context. - */ -void emergency_restart(void) -{ - kmsg_dump(KMSG_DUMP_EMERG); - machine_emergency_restart(); -} -EXPORT_SYMBOL_GPL(emergency_restart); - -void kernel_restart_prepare(char *cmd) -{ - blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); - system_state = SYSTEM_RESTART; - usermodehelper_disable(); - device_shutdown(); -} - -/** - * register_reboot_notifier - Register function to be called at reboot time - * @nb: Info about notifier function to be called - * - * Registers a function with the list of functions - * to be called at reboot time. - * - * Currently always returns zero, as blocking_notifier_chain_register() - * always returns zero. - */ -int register_reboot_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&reboot_notifier_list, nb); -} -EXPORT_SYMBOL(register_reboot_notifier); - -/** - * unregister_reboot_notifier - Unregister previously registered reboot notifier - * @nb: Hook to be unregistered - * - * Unregisters a previously registered reboot - * notifier function. - * - * Returns zero on success, or %-ENOENT on failure. - */ -int unregister_reboot_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); -} -EXPORT_SYMBOL(unregister_reboot_notifier); - -static void migrate_to_reboot_cpu(void) -{ - /* The boot cpu is always logical cpu 0 */ - int cpu = 0; - - cpu_hotplug_disable(); - - /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(cpu)) - cpu = cpumask_first(cpu_online_mask); - - /* Prevent races with other tasks migrating this task */ - current->flags |= PF_NO_SETAFFINITY; - - /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, cpumask_of(cpu)); -} - -/** - * kernel_restart - reboot the system - * @cmd: pointer to buffer containing command to execute for restart - * or %NULL - * - * Shutdown everything and perform a clean reboot. - * This is not safe to call in interrupt context. - */ -void kernel_restart(char *cmd) -{ - kernel_restart_prepare(cmd); - migrate_to_reboot_cpu(); - syscore_shutdown(); - if (!cmd) - printk(KERN_EMERG "Restarting system.\n"); - else - printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); - kmsg_dump(KMSG_DUMP_RESTART); - machine_restart(cmd); -} -EXPORT_SYMBOL_GPL(kernel_restart); - -static void kernel_shutdown_prepare(enum system_states state) -{ - blocking_notifier_call_chain(&reboot_notifier_list, - (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); - system_state = state; - usermodehelper_disable(); - device_shutdown(); -} -/** - * kernel_halt - halt the system - * - * Shutdown everything and perform a clean system halt. - */ -void kernel_halt(void) -{ - kernel_shutdown_prepare(SYSTEM_HALT); - migrate_to_reboot_cpu(); - syscore_shutdown(); - printk(KERN_EMERG "System halted.\n"); - kmsg_dump(KMSG_DUMP_HALT); - machine_halt(); -} - -EXPORT_SYMBOL_GPL(kernel_halt); - -/** - * kernel_power_off - power_off the system - * - * Shutdown everything and perform a clean system power_off. - */ -void kernel_power_off(void) -{ - kernel_shutdown_prepare(SYSTEM_POWER_OFF); - if (pm_power_off_prepare) - pm_power_off_prepare(); - migrate_to_reboot_cpu(); - syscore_shutdown(); - printk(KERN_EMERG "Power down.\n"); - kmsg_dump(KMSG_DUMP_POWEROFF); - machine_power_off(); -} -EXPORT_SYMBOL_GPL(kernel_power_off); - -static DEFINE_MUTEX(reboot_mutex); - -/* - * Reboot system call: for obvious reasons only root may call it, - * and even root needs to set up some magic numbers in the registers - * so that some mistake won't make this reboot the whole machine. - * You can also set the meaning of the ctrl-alt-del-key here. - * - * reboot doesn't sync: do that yourself before calling this. - */ -SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, - void __user *, arg) -{ - struct pid_namespace *pid_ns = task_active_pid_ns(current); - char buffer[256]; - int ret = 0; - - /* We only trust the superuser with rebooting the system. */ - if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) - return -EPERM; - - /* For safety, we require "magic" arguments. */ - if (magic1 != LINUX_REBOOT_MAGIC1 || - (magic2 != LINUX_REBOOT_MAGIC2 && - magic2 != LINUX_REBOOT_MAGIC2A && - magic2 != LINUX_REBOOT_MAGIC2B && - magic2 != LINUX_REBOOT_MAGIC2C)) - return -EINVAL; - - /* - * If pid namespaces are enabled and the current task is in a child - * pid_namespace, the command is handled by reboot_pid_ns() which will - * call do_exit(). - */ - ret = reboot_pid_ns(pid_ns, cmd); - if (ret) - return ret; - - /* Instead of trying to make the power_off code look like - * halt when pm_power_off is not set do it the easy way. - */ - if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) - cmd = LINUX_REBOOT_CMD_HALT; - - mutex_lock(&reboot_mutex); - switch (cmd) { - case LINUX_REBOOT_CMD_RESTART: - kernel_restart(NULL); - break; - - case LINUX_REBOOT_CMD_CAD_ON: - C_A_D = 1; - break; - - case LINUX_REBOOT_CMD_CAD_OFF: - C_A_D = 0; - break; - - case LINUX_REBOOT_CMD_HALT: - kernel_halt(); - do_exit(0); - panic("cannot halt.\n"); - - case LINUX_REBOOT_CMD_POWER_OFF: - kernel_power_off(); - do_exit(0); - break; - - case LINUX_REBOOT_CMD_RESTART2: - if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { - ret = -EFAULT; - break; - } - buffer[sizeof(buffer) - 1] = '\0'; - - kernel_restart(buffer); - break; - -#ifdef CONFIG_KEXEC - case LINUX_REBOOT_CMD_KEXEC: - ret = kernel_kexec(); - break; -#endif - -#ifdef CONFIG_HIBERNATION - case LINUX_REBOOT_CMD_SW_SUSPEND: - ret = hibernate(); - break; -#endif - - default: - ret = -EINVAL; - break; - } - mutex_unlock(&reboot_mutex); - return ret; -} - -static void deferred_cad(struct work_struct *dummy) -{ - kernel_restart(NULL); -} - -/* - * This function gets called by ctrl-alt-del - ie the keyboard interrupt. - * As it's called within an interrupt, it may NOT sync: the only choice - * is whether to reboot at once, or just ignore the ctrl-alt-del. - */ -void ctrl_alt_del(void) -{ - static DECLARE_WORK(cad_work, deferred_cad); - - if (C_A_D) - schedule_work(&cad_work); - else - kill_cad_pid(SIGINT, 1); -} - /* * Unprivileged users may change the real gid to the effective gid * or vice versa. (BSD-style) @@ -2287,68 +2018,6 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, return err ? -EFAULT : 0; } -char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; - -static int __orderly_poweroff(bool force) -{ - char **argv; - static char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL - }; - int ret; - - argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); - if (argv) { - ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); - argv_free(argv); - } else { - printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", - __func__, poweroff_cmd); - ret = -ENOMEM; - } - - if (ret && force) { - printk(KERN_WARNING "Failed to start orderly shutdown: " - "forcing the issue\n"); - /* - * I guess this should try to kick off some daemon to sync and - * poweroff asap. Or not even bother syncing if we're doing an - * emergency shutdown? - */ - emergency_sync(); - kernel_power_off(); - } - - return ret; -} - -static bool poweroff_force; - -static void poweroff_work_func(struct work_struct *work) -{ - __orderly_poweroff(poweroff_force); -} - -static DECLARE_WORK(poweroff_work, poweroff_work_func); - -/** - * orderly_poweroff - Trigger an orderly system poweroff - * @force: force poweroff if command execution fails - * - * This may be called from any context to trigger a system shutdown. - * If the orderly shutdown fails, it will force an immediate shutdown. - */ -int orderly_poweroff(bool force) -{ - if (force) /* do not override the pending "true" */ - poweroff_force = true; - schedule_work(&poweroff_work); - return 0; -} -EXPORT_SYMBOL_GPL(orderly_poweroff); - /** * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill -- cgit v1.2.3-70-g09d2 From c7b96acf1456ef127fef461fcfedb54b81fecfbb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Mar 2013 12:49:49 -0700 Subject: userns: Kill nsown_capable it makes the wrong thing easy nsown_capable is a special case of ns_capable essentially for just CAP_SETUID and CAP_SETGID. For the existing users it doesn't noticably simplify things and from the suggested patches I have seen it encourages people to do the wrong thing. So remove nsown_capable. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 4 ++-- fs/open.c | 2 +- include/linux/capability.h | 1 - ipc/namespace.c | 2 +- kernel/capability.c | 12 ------------ kernel/groups.c | 2 +- kernel/pid_namespace.c | 2 +- kernel/sys.c | 20 ++++++++++---------- kernel/uid16.c | 2 +- kernel/utsname.c | 2 +- net/core/net_namespace.c | 2 +- net/core/scm.c | 4 ++-- 12 files changed, 21 insertions(+), 34 deletions(-) (limited to 'kernel/sys.c') diff --git a/fs/namespace.c b/fs/namespace.c index 877e4277f49..dc519a1437e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2929,8 +2929,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns) struct path root; if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || - !nsown_capable(CAP_SYS_CHROOT) || - !nsown_capable(CAP_SYS_ADMIN)) + !ns_capable(current_user_ns(), CAP_SYS_CHROOT) || + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) return -EPERM; if (fs->users != 1) diff --git a/fs/open.c b/fs/open.c index 9156cb050d0..1c9d23f7e68 100644 --- a/fs/open.c +++ b/fs/open.c @@ -443,7 +443,7 @@ retry: goto dput_and_out; error = -EPERM; - if (!nsown_capable(CAP_SYS_CHROOT)) + if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT)) goto dput_and_out; error = security_path_chroot(&path); if (error) diff --git a/include/linux/capability.h b/include/linux/capability.h index d9a4f7f40f3..a6ee1f9a501 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -210,7 +210,6 @@ extern bool has_ns_capability_noaudit(struct task_struct *t, struct user_namespace *ns, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); -extern bool nsown_capable(int cap); extern bool inode_capable(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); diff --git a/ipc/namespace.c b/ipc/namespace.c index 7ee61bf4493..4be6581d3b7 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -171,7 +171,7 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new) { struct ipc_namespace *ns = new; if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || - !nsown_capable(CAP_SYS_ADMIN)) + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) return -EPERM; /* Ditch state from the old ipc namespace */ diff --git a/kernel/capability.c b/kernel/capability.c index f6c2ce5701e..6fc1c8af44d 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -432,18 +432,6 @@ bool capable(int cap) } EXPORT_SYMBOL(capable); -/** - * nsown_capable - Check superior capability to one's own user_ns - * @cap: The capability in question - * - * Return true if the current task has the given superior capability - * targeted at its own user namespace. - */ -bool nsown_capable(int cap) -{ - return ns_capable(current_user_ns(), cap); -} - /** * inode_capable - Check superior capability over inode * @inode: The inode in question diff --git a/kernel/groups.c b/kernel/groups.c index 6b2588dd04f..90cf1c38c8e 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!nsown_capable(CAP_SETGID)) + if (!ns_capable(current_user_ns(), CAP_SETGID)) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6917e8edb48..ee1f6bb83d6 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -329,7 +329,7 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns) struct pid_namespace *ancestor, *new = ns; if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || - !nsown_capable(CAP_SYS_ADMIN)) + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) return -EPERM; /* diff --git a/kernel/sys.c b/kernel/sys.c index 771129b299f..c18ecca575b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -337,7 +337,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) if (rgid != (gid_t) -1) { if (gid_eq(old->gid, krgid) || gid_eq(old->egid, krgid) || - nsown_capable(CAP_SETGID)) + ns_capable(old->user_ns, CAP_SETGID)) new->gid = krgid; else goto error; @@ -346,7 +346,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) if (gid_eq(old->gid, kegid) || gid_eq(old->egid, kegid) || gid_eq(old->sgid, kegid) || - nsown_capable(CAP_SETGID)) + ns_capable(old->user_ns, CAP_SETGID)) new->egid = kegid; else goto error; @@ -387,7 +387,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid) old = current_cred(); retval = -EPERM; - if (nsown_capable(CAP_SETGID)) + if (ns_capable(old->user_ns, CAP_SETGID)) new->gid = new->egid = new->sgid = new->fsgid = kgid; else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) new->egid = new->fsgid = kgid; @@ -471,7 +471,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) new->uid = kruid; if (!uid_eq(old->uid, kruid) && !uid_eq(old->euid, kruid) && - !nsown_capable(CAP_SETUID)) + !ns_capable(old->user_ns, CAP_SETUID)) goto error; } @@ -480,7 +480,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) if (!uid_eq(old->uid, keuid) && !uid_eq(old->euid, keuid) && !uid_eq(old->suid, keuid) && - !nsown_capable(CAP_SETUID)) + !ns_capable(old->user_ns, CAP_SETUID)) goto error; } @@ -534,7 +534,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) old = current_cred(); retval = -EPERM; - if (nsown_capable(CAP_SETUID)) { + if (ns_capable(old->user_ns, CAP_SETUID)) { new->suid = new->uid = kuid; if (!uid_eq(kuid, old->uid)) { retval = set_user(new); @@ -591,7 +591,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) old = current_cred(); retval = -EPERM; - if (!nsown_capable(CAP_SETUID)) { + if (!ns_capable(old->user_ns, CAP_SETUID)) { if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) goto error; @@ -673,7 +673,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) old = current_cred(); retval = -EPERM; - if (!nsown_capable(CAP_SETGID)) { + if (!ns_capable(old->user_ns, CAP_SETGID)) { if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) goto error; @@ -744,7 +744,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid) if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || - nsown_capable(CAP_SETUID)) { + ns_capable(old->user_ns, CAP_SETUID)) { if (!uid_eq(kuid, old->fsuid)) { new->fsuid = kuid; if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) @@ -783,7 +783,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid) if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || - nsown_capable(CAP_SETGID)) { + ns_capable(old->user_ns, CAP_SETGID)) { if (!gid_eq(kgid, old->fsgid)) { new->fsgid = kgid; goto change_okay; diff --git a/kernel/uid16.c b/kernel/uid16.c index f6c83d7ef00..602e5bbbcef 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!nsown_capable(CAP_SETGID)) + if (!ns_capable(current_user_ns(), CAP_SETGID)) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/utsname.c b/kernel/utsname.c index 2fc8576efaa..fd393124e50 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -114,7 +114,7 @@ static int utsns_install(struct nsproxy *nsproxy, void *new) struct uts_namespace *ns = new; if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || - !nsown_capable(CAP_SYS_ADMIN)) + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) return -EPERM; get_uts_ns(ns); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f9765203675..81d3a9a0845 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -651,7 +651,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns) struct net *net = ns; if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || - !nsown_capable(CAP_SYS_ADMIN)) + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) return -EPERM; put_net(nsproxy->net_ns); diff --git a/net/core/scm.c b/net/core/scm.c index 03795d0147f..c346f58d97c 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -56,9 +56,9 @@ static __inline__ int scm_check_creds(struct ucred *creds) if ((creds->pid == task_tgid_vnr(current) || ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || - uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && + uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || - gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) { + gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) { return 0; } return -EPERM; -- cgit v1.2.3-70-g09d2