aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2006-10-01 17:55:53 +0100
committerDavid Woodhouse <dwmw2@infradead.org>2006-10-01 17:55:53 +0100
commit8a84fc15ae5cafcc366dd85cf8e1ab2040679abc (patch)
tree5d8dce194c9667fa92e9ec9f545cec867a9a1e0d /kernel
parent28b79ff9661b22e4c41c0d00d4ab8503e810f13d (diff)
parent82965addad66fce61a92c5f03104ea90b0b87124 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Manually resolve conflict in include/mtd/Kbuild Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/acct.c36
-rw-r--r--kernel/audit.c14
-rw-r--r--kernel/auditfilter.c2
-rw-r--r--kernel/auditsc.c11
-rw-r--r--kernel/capability.c2
-rw-r--r--kernel/compat.c35
-rw-r--r--kernel/cpu.c138
-rw-r--r--kernel/cpuset.c121
-rw-r--r--kernel/exit.c41
-rw-r--r--kernel/fork.c21
-rw-r--r--kernel/futex.c10
-rw-r--r--kernel/hrtimer.c20
-rw-r--r--kernel/irq/chip.c6
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/kexec.c8
-rw-r--r--kernel/kfifo.c28
-rw-r--r--kernel/kmod.c71
-rw-r--r--kernel/latency.c279
-rw-r--r--kernel/lockdep.c26
-rw-r--r--kernel/module.c32
-rw-r--r--kernel/panic.c12
-rw-r--r--kernel/params.c15
-rw-r--r--kernel/pid.c12
-rw-r--r--kernel/posix-cpu-timers.c101
-rw-r--r--kernel/posix-timers.c21
-rw-r--r--kernel/power/Kconfig22
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/disk.c11
-rw-r--r--kernel/power/main.c40
-rw-r--r--kernel/power/power.h59
-rw-r--r--kernel/power/smp.c62
-rw-r--r--kernel/power/snapshot.c1155
-rw-r--r--kernel/power/swap.c270
-rw-r--r--kernel/power/swsusp.c14
-rw-r--r--kernel/power/user.c17
-rw-r--r--kernel/printk.c3
-rw-r--r--kernel/profile.c16
-rw-r--r--kernel/ptrace.c55
-rw-r--r--kernel/rcutorture.c8
-rw-r--r--kernel/relay.c38
-rw-r--r--kernel/resource.c32
-rw-r--r--kernel/rtmutex.c51
-rw-r--r--kernel/sched.c130
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/softlockup.c3
-rw-r--r--kernel/spinlock.c20
-rw-r--r--kernel/stop_machine.c3
-rw-r--r--kernel/sys.c112
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl.c156
-rw-r--r--kernel/taskstats.c10
-rw-r--r--kernel/time.c173
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/ntp.c350
-rw-r--r--kernel/timer.c283
-rw-r--r--kernel/tsacct.c124
-rw-r--r--kernel/unwind.c39
59 files changed, 2878 insertions, 1470 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d62ec66c1af..aacaafb28b9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
- hrtimer.o rwsem.o
+ hrtimer.o rwsem.o latency.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
@@ -49,7 +49,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
-obj-$(CONFIG_TASKSTATS) += taskstats.o
+obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/acct.c b/kernel/acct.c
index 2a7c933651c..0aad5ca36a8 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -483,10 +483,14 @@ static void do_acct_process(struct file *file)
ac.ac_ppid = current->parent->tgid;
#endif
- read_lock(&tasklist_lock); /* pin current->signal */
+ mutex_lock(&tty_mutex);
+ /* FIXME: Whoever is responsible for current->signal locking needs
+ to use the same locking all over the kernel and document it */
+ read_lock(&tasklist_lock);
ac.ac_tty = current->signal->tty ?
old_encode_dev(tty_devnum(current->signal->tty)) : 0;
read_unlock(&tasklist_lock);
+ mutex_unlock(&tty_mutex);
spin_lock_irq(&current->sighand->siglock);
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
@@ -598,33 +602,3 @@ void acct_process(void)
do_acct_process(file);
fput(file);
}
-
-
-/**
- * acct_update_integrals - update mm integral fields in task_struct
- * @tsk: task_struct for accounting
- */
-void acct_update_integrals(struct task_struct *tsk)
-{
- if (likely(tsk->mm)) {
- long delta =
- cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
-
- if (delta == 0)
- return;
- tsk->acct_stimexpd = tsk->stime;
- tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
- tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
- }
-}
-
-/**
- * acct_clear_integrals - clear the mm integral fields in task_struct
- * @tsk: task_struct whose accounting fields are cleared
- */
-void acct_clear_integrals(struct task_struct *tsk)
-{
- tsk->acct_stimexpd = 0;
- tsk->acct_rss_mem1 = 0;
- tsk->acct_vm_mem1 = 0;
-}
diff --git a/kernel/audit.c b/kernel/audit.c
index 963fd15c962..f9889ee7782 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid)
char *ctx = NULL;
u32 len;
int rc;
- if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
return rc;
else
audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (status_get->mask & AUDIT_STATUS_PID) {
int old = audit_pid;
if (sid) {
- if ((err = selinux_ctxid_to_string(
+ if ((err = selinux_sid_to_string(
sid, &ctx, &len)))
return err;
else
@@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
"user pid=%d uid=%u auid=%u",
pid, uid, loginuid);
if (sid) {
- if (selinux_ctxid_to_string(
+ if (selinux_sid_to_string(
sid, &ctx, &len)) {
audit_log_format(ab,
" ssid=%u", sid);
@@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
loginuid, sid);
break;
case AUDIT_SIGNAL_INFO:
- err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len);
+ err = selinux_sid_to_string(audit_sig_sid, &ctx, &len);
if (err)
return err;
sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a44879b0c72..1a58a81fb09 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
if (sid) {
char *ctx = NULL;
u32 len;
- if (selinux_ctxid_to_string(sid, &ctx, &len))
+ if (selinux_sid_to_string(sid, &ctx, &len))
audit_log_format(ab, " ssid=%u", sid);
else
audit_log_format(ab, " subj=%s", ctx);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1bd8827a010..10514763175 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk,
logged upon error */
if (f->se_rule) {
if (need_sid) {
- selinux_task_ctxid(tsk, &sid);
+ selinux_get_task_sid(tsk, &sid);
need_sid = 0;
}
result = selinux_audit_rule_match(sid, f->type,
@@ -817,6 +817,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
audit_log_format(ab, " success=%s exit=%ld",
(context->return_valid==AUDITSC_SUCCESS)?"yes":"no",
context->return_code);
+
+ mutex_lock(&tty_mutex);
if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
tty = tsk->signal->tty->name;
else
@@ -838,6 +840,9 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
context->gid,
context->euid, context->suid, context->fsuid,
context->egid, context->sgid, context->fsgid, tty);
+
+ mutex_unlock(&tty_mutex);
+
audit_log_task_info(ab, tsk);
if (context->filterkey) {
audit_log_format(ab, " key=");
@@ -898,7 +903,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
if (axi->osid != 0) {
char *ctx = NULL;
u32 len;
- if (selinux_ctxid_to_string(
+ if (selinux_sid_to_string(
axi->osid, &ctx, &len)) {
audit_log_format(ab, " osid=%u",
axi->osid);
@@ -1005,7 +1010,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
if (n->osid != 0) {
char *ctx = NULL;
u32 len;
- if (selinux_ctxid_to_string(
+ if (selinux_sid_to_string(
n->osid, &ctx, &len)) {
audit_log_format(ab, " osid=%u", n->osid);
call_panic = 2;
diff --git a/kernel/capability.c b/kernel/capability.c
index c7685ad00a9..edb845a6e84 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -133,7 +133,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
int found = 0;
do_each_thread(g, target) {
- if (target == current || target->pid == 1)
+ if (target == current || is_init(target))
continue;
found = 1;
if (security_capset_check(target, effective, inheritable,
diff --git a/kernel/compat.c b/kernel/compat.c
index 126dee9530a..b4fbd838cd7 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -22,9 +22,12 @@
#include <linux/security.h>
#include <linux/timex.h>
#include <linux/migrate.h>
+#include <linux/posix-timers.h>
#include <asm/uaccess.h>
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
{
return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
@@ -601,6 +604,30 @@ long compat_sys_clock_getres(clockid_t which_clock,
return err;
}
+static long compat_clock_nanosleep_restart(struct restart_block *restart)
+{
+ long err;
+ mm_segment_t oldfs;
+ struct timespec tu;
+ struct compat_timespec *rmtp = (struct compat_timespec *)(restart->arg1);
+
+ restart->arg1 = (unsigned long) &tu;
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ err = clock_nanosleep_restart(restart);
+ set_fs(oldfs);
+
+ if ((err == -ERESTART_RESTARTBLOCK) && rmtp &&
+ put_compat_timespec(&tu, rmtp))
+ return -EFAULT;
+
+ if (err == -ERESTART_RESTARTBLOCK) {
+ restart->fn = compat_clock_nanosleep_restart;
+ restart->arg1 = (unsigned long) rmtp;
+ }
+ return err;
+}
+
long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
struct compat_timespec __user *rqtp,
struct compat_timespec __user *rmtp)
@@ -608,6 +635,7 @@ long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
long err;
mm_segment_t oldfs;
struct timespec in, out;
+ struct restart_block *restart;
if (get_compat_timespec(&in, rqtp))
return -EFAULT;
@@ -618,9 +646,16 @@ long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
(struct timespec __user *) &in,
(struct timespec __user *) &out);
set_fs(oldfs);
+
if ((err == -ERESTART_RESTARTBLOCK) && rmtp &&
put_compat_timespec(&out, rmtp))
return -EFAULT;
+
+ if (err == -ERESTART_RESTARTBLOCK) {
+ restart = &current_thread_info()->restart_block;
+ restart->fn = compat_clock_nanosleep_restart;
+ restart->arg1 = (unsigned long) rmtp;
+ }
return err;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f230f9ae01c..32c96628463 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock);
static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
+/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ * Should always be manipulated under cpu_add_remove_lock
+ */
+static int cpu_hotplug_disabled;
+
#ifdef CONFIG_HOTPLUG_CPU
/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused)
return 0;
}
-int cpu_down(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int _cpu_down(unsigned int cpu)
{
int err;
struct task_struct *p;
cpumask_t old_allowed, tmp;
- mutex_lock(&cpu_add_remove_lock);
- if (num_online_cpus() == 1) {
- err = -EBUSY;
- goto out;
- }
+ if (num_online_cpus() == 1)
+ return -EBUSY;
- if (!cpu_online(cpu)) {
- err = -EINVAL;
- goto out;
- }
+ if (!cpu_online(cpu))
+ return -EINVAL;
err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
(void *)(long)cpu);
if (err == NOTIFY_BAD) {
printk("%s: attempt to take down CPU %u failed\n",
__FUNCTION__, cpu);
- err = -EINVAL;
- goto out;
+ return -EINVAL;
}
/* Ensure that we are not runnable on dying cpu */
@@ -179,22 +179,32 @@ out_thread:
err = kthread_stop(p);
out_allowed:
set_cpus_allowed(current, old_allowed);
-out:
+ return err;
+}
+
+int cpu_down(unsigned int cpu)
+{
+ int err = 0;
+
+ mutex_lock(&cpu_add_remove_lock);
+ if (cpu_hotplug_disabled)
+ err = -EBUSY;
+ else
+ err = _cpu_down(cpu);
+
mutex_unlock(&cpu_add_remove_lock);
return err;
}
#endif /*CONFIG_HOTPLUG_CPU*/
-int __devinit cpu_up(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int __devinit _cpu_up(unsigned int cpu)
{
int ret;
void *hcpu = (void *)(long)cpu;
- mutex_lock(&cpu_add_remove_lock);
- if (cpu_online(cpu) || !cpu_present(cpu)) {
- ret = -EINVAL;
- goto out;
- }
+ if (cpu_online(cpu) || !cpu_present(cpu))
+ return -EINVAL;
ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
if (ret == NOTIFY_BAD) {
@@ -219,7 +229,95 @@ out_notify:
if (ret != 0)
blocking_notifier_call_chain(&cpu_chain,
CPU_UP_CANCELED, hcpu);
+
+ return ret;
+}
+
+int __devinit cpu_up(unsigned int cpu)
+{
+ int err = 0;
+
+ mutex_lock(&cpu_add_remove_lock);
+ if (cpu_hotplug_disabled)
+ err = -EBUSY;
+ else
+ err = _cpu_up(cpu);
+
+ mutex_unlock(&cpu_add_remove_lock);
+ return err;
+}
+
+#ifdef CONFIG_SUSPEND_SMP
+static cpumask_t frozen_cpus;
+
+int disable_nonboot_cpus(void)
+{
+ int cpu, first_cpu, error;
+
+ mutex_lock(&cpu_add_remove_lock);
+ first_cpu = first_cpu(cpu_present_map);
+ if (!cpu_online(first_cpu)) {
+ error = _cpu_up(first_cpu);
+ if (error) {
+ printk(KERN_ERR "Could not bring CPU%d up.\n",
+ first_cpu);
+ goto out;
+ }
+ }
+ error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
+ if (error) {
+ printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
+ goto out;
+ }
+ /* We take down all of the non-boot CPUs in one shot to avoid races
+ * with the userspace trying to use the CPU hotplug at the same time
+ */
+ cpus_clear(frozen_cpus);
+ printk("Disabling non-boot CPUs ...\n");
+ for_each_online_cpu(cpu) {
+ if (cpu == first_cpu)
+ continue;
+ error = _cpu_down(cpu);
+ if (!error) {
+ cpu_set(cpu, frozen_cpus);
+ printk("CPU%d is down\n", cpu);
+ } else {
+ printk(KERN_ERR "Error taking CPU%d down: %d\n",
+ cpu, error);
+ break;
+ }
+ }
+ if (!error) {
+ BUG_ON(num_online_cpus() > 1);
+ /* Make sure the CPUs won't be enabled by someone else */
+ cpu_hotplug_disabled = 1;
+ } else {
+ printk(KERN_ERR "Non-boot CPUs are not disabled");
+ }
out:
mutex_unlock(&cpu_add_remove_lock);
- return ret;
+ return error;
+}
+
+void enable_nonboot_cpus(void)
+{
+ int cpu, error;
+
+ /* Allow everyone to use the CPU hotplug again */
+ mutex_lock(&cpu_add_remove_lock);
+ cpu_hotplug_disabled = 0;
+ mutex_unlock(&cpu_add_remove_lock);
+
+ printk("Enabling non-boot CPUs ...\n");
+ for_each_cpu_mask(cpu, frozen_cpus) {
+ error = cpu_up(cpu);
+ if (!error) {
+ printk("CPU%d is up\n", cpu);
+ continue;
+ }
+ printk(KERN_WARNING "Error taking CPU%d up: %d\n",
+ cpu, error);
+ }
+ cpus_clear(frozen_cpus);
}
+#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4ea6f0dc2fc..9d850ae13b1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -240,7 +240,7 @@ static struct super_block *cpuset_sb;
* A cpuset can only be deleted if both its 'count' of using tasks
* is zero, and its list of 'children' cpusets is empty. Since all
* tasks in the system use _some_ cpuset, and since there is always at
- * least one task in the system (init, pid == 1), therefore, top_cpuset
+ * least one task in the system (init), therefore, top_cpuset
* always has either children cpusets and/or using tasks. So we don't
* need a special hack to ensure that top_cpuset cannot be deleted.
*
@@ -289,7 +289,6 @@ static struct inode *cpuset_new_inode(mode_t mode)
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
@@ -378,7 +377,7 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data,
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* directories start off with i_nlink == 2 (for "." entry) */
- inode->i_nlink++;
+ inc_nlink(inode);
} else {
return -ENOMEM;
}
@@ -913,6 +912,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
int fudge;
int retval;
+ /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
+ if (cs == &top_cpuset)
+ return -EACCES;
+
trialcs = *cs;
retval = nodelist_parse(buf, trialcs.mems_allowed);
if (retval < 0)
@@ -1222,7 +1225,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
task_lock(tsk);
oldcs = tsk->cpuset;
- if (!oldcs) {
+ /*
+ * After getting 'oldcs' cpuset ptr, be sure still not exiting.
+ * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack
+ * then fail this attach_task(), to avoid breaking top_cpuset.count.
+ */
+ if (tsk->flags & PF_EXITING) {
task_unlock(tsk);
mutex_unlock(&callback_mutex);
put_task_struct(tsk);
@@ -1557,7 +1565,7 @@ static int cpuset_create_file(struct dentry *dentry, int mode)
inode->i_fop = &simple_dir_operations;
/* start off with i_nlink == 2 (for "." entry) */
- inode->i_nlink++;
+ inc_nlink(inode);
} else if (S_ISREG(mode)) {
inode->i_size = 0;
inode->i_fop = &cpuset_file_operations;
@@ -1590,7 +1598,7 @@ static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode)
error = cpuset_create_file(dentry, S_IFDIR | mode);
if (!error) {
dentry->d_fsdata = cs;
- parent->d_inode->i_nlink++;
+ inc_nlink(parent->d_inode);
cs->dentry = dentry;
}
dput(dentry);
@@ -2025,7 +2033,7 @@ int __init cpuset_init(void)
}
root = cpuset_mount->mnt_sb->s_root;
root->d_fsdata = &top_cpuset;
- root->d_inode->i_nlink++;
+ inc_nlink(root->d_inode);
top_cpuset.dentry = root;
root->d_inode->i_op = &cpuset_dir_inode_operations;
number_of_cpusets = 1;
@@ -2037,33 +2045,104 @@ out:
return err;
}
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
/*
- * The top_cpuset tracks what CPUs and Memory Nodes are online,
- * period. This is necessary in order to make cpusets transparent
- * (of no affect) on systems that are actively using CPU hotplug
- * but making no active use of cpusets.
- *
- * This handles CPU hotplug (cpuhp) events. If someday Memory
- * Nodes can be hotplugged (dynamically changing node_online_map)
- * then we should handle that too, perhaps in a similar way.
+ * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
+ * or memory nodes, we need to walk over the cpuset hierarchy,
+ * removing that CPU or node from all cpusets. If this removes the
+ * last CPU or node from a cpuset, then the guarantee_online_cpus()
+ * or guarantee_online_mems() code will use that emptied cpusets
+ * parent online CPUs or nodes. Cpusets that were already empty of
+ * CPUs or nodes are left empty.
+ *
+ * This routine is intentionally inefficient in a couple of regards.
+ * It will check all cpusets in a subtree even if the top cpuset of
+ * the subtree has no offline CPUs or nodes. It checks both CPUs and
+ * nodes, even though the caller could have been coded to know that
+ * only one of CPUs or nodes needed to be checked on a given call.
+ * This was done to minimize text size rather than cpu cycles.
+ *
+ * Call with both manage_mutex and callback_mutex held.
+ *
+ * Recursive, on depth of cpuset subtree.
*/
-#ifdef CONFIG_HOTPLUG_CPU
-static int cpuset_handle_cpuhp(struct notifier_block *nb,
- unsigned long phase, void *cpu)
+static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
+{
+ struct cpuset *c;
+
+ /* Each of our child cpusets mems must be online */
+ list_for_each_entry(c, &cur->children, sibling) {
+ guarantee_online_cpus_mems_in_subtree(c);
+ if (!cpus_empty(c->cpus_allowed))
+ guarantee_online_cpus(c, &c->cpus_allowed);
+ if (!nodes_empty(c->mems_allowed))
+ guarantee_online_mems(c, &c->mems_allowed);
+ }
+}
+
+/*
+ * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
+ * cpu_online_map and node_online_map. Force the top cpuset to track