Merge branch 'linus' into core/generic-dma-coherent

Conflicts: arch/x86/Kconfig Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2008-07-29 00:07:55 +0200
committer: Ingo Molnar <mingo@elte.hu> 2008-07-29 00:07:55 +0200
commit: cb28a1bbdb4790378e7366d6c9ee1d2340b84f92 (patch)
tree: 316436f77dac75335fd2c3ef5f109e71606c50d3 /kernel
parent: b6d4f7e3ef25beb8c658c97867d98883e69dc544 (diff)
parent: f934fb19ef34730263e6afc01e8ec27a8a71470f (diff)
65 files changed, 2720 insertions, 1798 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 526128a2e62..382dd5a8b2d 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
 	default 1000 if HZ_1000
 
 config SCHED_HRTICK
-	def_bool HIGH_RES_TIMERS && X86
+	def_bool HIGH_RES_TIMERS && USE_GENERIC_SMP_HELPERS
diff --git a/kernel/Makefile b/kernel/Makefile
index e25ee775dc9..4e1d7df7c3e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
@@ -11,6 +11,8 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o
 
+CFLAGS_REMOVE_sched.o = -mno-spe
+
 ifdef CONFIG_FTRACE
 # Do not trace debug files and internal ftrace files
 CFLAGS_REMOVE_lockdep.o = -pg
@@ -22,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
 
+obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff --git a/kernel/acct.c b/kernel/acct.c
index 91e1cfd734d..dd68b905941 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -75,37 +75,39 @@ int acct_parm[3] = {4, 2, 30};
 /*
  * External references and all of the globals.
  */
-static void do_acct_process(struct pid_namespace *ns, struct file *);
+static void do_acct_process(struct bsd_acct_struct *acct,
+		struct pid_namespace *ns, struct file *);
 
 /*
  * This structure is used so that all the data protected by lock
  * can be placed in the same cache line as the lock.  This primes
  * the cache line to have the data after getting the lock.
  */
-struct acct_glbs {
-	spinlock_t		lock;
+struct bsd_acct_struct {
 	volatile int		active;
 	volatile int		needcheck;
 	struct file		*file;
 	struct pid_namespace	*ns;
 	struct timer_list	timer;
+	struct list_head	list;
 };
 
-static struct acct_glbs acct_globals __cacheline_aligned =
-	{__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
+static DEFINE_SPINLOCK(acct_lock);
+static LIST_HEAD(acct_list);
 
 /*
  * Called whenever the timer says to check the free space.
  */
-static void acct_timeout(unsigned long unused)
+static void acct_timeout(unsigned long x)
 {
-	acct_globals.needcheck = 1;
+	struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x;
+	acct->needcheck = 1;
 }
 
 /*
  * Check the amount of free space and suspend/resume accordingly.
  */
-static int check_free_space(struct file *file)
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
 {
 	struct kstatfs sbuf;
 	int res;
@@ -113,11 +115,11 @@ static int check_free_space(struct file *file)
 	sector_t resume;
 	sector_t suspend;
 
-	spin_lock(&acct_globals.lock);
-	res = acct_globals.active;
-	if (!file || !acct_globals.needcheck)
+	spin_lock(&acct_lock);
+	res = acct->active;
+	if (!file || !acct->needcheck)
 		goto out;
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 
 	/* May block */
 	if (vfs_statfs(file->f_path.dentry, &sbuf))
@@ -136,35 +138,35 @@ static int check_free_space(struct file *file)
 		act = 0;
 
 	/*
-	 * If some joker switched acct_globals.file under us we'ld better be
+	 * If some joker switched acct->file under us we'ld better be
 	 * silent and _not_ touch anything.
 	 */
-	spin_lock(&acct_globals.lock);
-	if (file != acct_globals.file) {
+	spin_lock(&acct_lock);
+	if (file != acct->file) {
 		if (act)
 			res = act>0;
 		goto out;
 	}
 
-	if (acct_globals.active) {
+	if (acct->active) {
 		if (act < 0) {
-			acct_globals.active = 0;
+			acct->active = 0;
 			printk(KERN_INFO "Process accounting paused\n");
 		}
 	} else {
 		if (act > 0) {
-			acct_globals.active = 1;
+			acct->active = 1;
 			printk(KERN_INFO "Process accounting resumed\n");
 		}
 	}
 
-	del_timer(&acct_globals.timer);
-	acct_globals.needcheck = 0;
-	acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-	add_timer(&acct_globals.timer);
-	res = acct_globals.active;
+	del_timer(&acct->timer);
+	acct->needcheck = 0;
+	acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+	add_timer(&acct->timer);
+	res = acct->active;
 out:
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 	return res;
 }
 
@@ -172,39 +174,41 @@ out:
  * Close the old accounting file (if currently open) and then replace
  * it with file (if non-NULL).
  *
- * NOTE: acct_globals.lock MUST be held on entry and exit.
+ * NOTE: acct_lock MUST be held on entry and exit.
  */
-static void acct_file_reopen(struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+		struct pid_namespace *ns)
 {
 	struct file *old_acct = NULL;
 	struct pid_namespace *old_ns = NULL;
 
-	if (acct_globals.file) {
-		old_acct = acct_globals.file;
-		old_ns = acct_globals.ns;
-		del_timer(&acct_globals.timer);
-		acct_globals.active = 0;
-		acct_globals.needcheck = 0;
-		acct_globals.file = NULL;
+	if (acct->file) {
+		old_acct = acct->file;
+		old_ns = acct->ns;
+		del_timer(&acct->timer);
+		acct->active = 0;
+		acct->needcheck = 0;
+		acct->file = NULL;
+		acct->ns = NULL;
+		list_del(&acct->list);
 	}
 	if (file) {
-		acct_globals.file = file;
-		acct_globals.ns = get_pid_ns(task_active_pid_ns(current));
-		acct_globals.needcheck = 0;
-		acct_globals.active = 1;
+		acct->file = file;
+		acct->ns = ns;
+		acct->needcheck = 0;
+		acct->active = 1;
+		list_add(&acct->list, &acct_list);
 		/* It's been deleted if it was used before so this is safe */
-		init_timer(&acct_globals.timer);
-		acct_globals.timer.function = acct_timeout;
-		acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-		add_timer(&acct_globals.timer);
+		setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
+		acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+		add_timer(&acct->timer);
 	}
 	if (old_acct) {
 		mnt_unpin(old_acct->f_path.mnt);
-		spin_unlock(&acct_globals.lock);
-		do_acct_process(old_ns, old_acct);
+		spin_unlock(&acct_lock);
+		do_acct_process(acct, old_ns, old_acct);
 		filp_close(old_acct, NULL);
-		put_pid_ns(old_ns);
-		spin_lock(&acct_globals.lock);
+		spin_lock(&acct_lock);
 	}
 }
 
@@ -212,6 +216,8 @@ static int acct_on(char *name)
 {
 	struct file *file;
 	int error;
+	struct pid_namespace *ns;
+	struct bsd_acct_struct *acct = NULL;
 
 	/* Difference from BSD - they don't do O_APPEND */
 	file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -228,18 +234,34 @@ static int acct_on(char *name)
 		return -EIO;
 	}
 
+	ns = task_active_pid_ns(current);
+	if (ns->bacct == NULL) {
+		acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+		if (acct == NULL) {
+			filp_close(file, NULL);
+			return -ENOMEM;
+		}
+	}
+
 	error = security_acct(file);
 	if (error) {
+		kfree(acct);
 		filp_close(file, NULL);
 		return error;
 	}
 
-	spin_lock(&acct_globals.lock);
+	spin_lock(&acct_lock);
+	if (ns->bacct == NULL) {
+		ns->bacct = acct;
+		acct = NULL;
+	}
+
 	mnt_pin(file->f_path.mnt);
-	acct_file_reopen(file);
-	spin_unlock(&acct_globals.lock);
+	acct_file_reopen(ns->bacct, file, ns);
+	spin_unlock(&acct_lock);
 
 	mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+	kfree(acct);
 
 	return 0;
 }
@@ -269,11 +291,17 @@ asmlinkage long sys_acct(const char __user *name)
 		error = acct_on(tmp);
 		putname(tmp);
 	} else {
+		struct bsd_acct_struct *acct;
+
+		acct = task_active_pid_ns(current)->bacct;
+		if (acct == NULL)
+			return 0;
+
 		error = security_acct(NULL);
 		if (!error) {
-			spin_lock(&acct_globals.lock);
-			acct_file_reopen(NULL);
-			spin_unlock(&acct_globals.lock);
+			spin_lock(&acct_lock);
+			acct_file_reopen(acct, NULL, NULL);
+			spin_unlock(&acct_lock);
 		}
 	}
 	return error;
@@ -288,10 +316,16 @@ asmlinkage long sys_acct(const char __user *name)
  */
 void acct_auto_close_mnt(struct vfsmount *m)
 {
-	spin_lock(&acct_globals.lock);
-	if (acct_globals.file && acct_globals.file->f_path.mnt == m)
-		acct_file_reopen(NULL);
-	spin_unlock(&acct_globals.lock);
+	struct bsd_acct_struct *acct;
+
+	spin_lock(&acct_lock);
+restart:
+	list_for_each_entry(acct, &acct_list, list)
+		if (acct->file && acct->file->f_path.mnt == m) {
+			acct_file_reopen(acct, NULL, NULL);
+			goto restart;
+		}
+	spin_unlock(&acct_lock);
 }
 
 /**
@@ -303,12 +337,31 @@ void acct_auto_close_mnt(struct vfsmount *m)
  */
 void acct_auto_close(struct super_block *sb)
 {
-	spin_lock(&acct_globals.lock);
-	if (acct_globals.file &&
-	    acct_globals.file->f_path.mnt->mnt_sb == sb) {
-		acct_file_reopen(NULL);
+	struct bsd_acct_struct *acct;
+
+	spin_lock(&acct_lock);
+restart:
+	list_for_each_entry(acct, &acct_list, list)
+		if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
+			acct_file_reopen(acct, NULL, NULL);
+			goto restart;
+		}
+	spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+	struct bsd_acct_struct *acct;
+
+	spin_lock(&acct_lock);
+	acct = ns->bacct;
+	if (acct != NULL) {
+		if (acct->file != NULL)
+			acct_file_reopen(acct, NULL, NULL);
+
+		kfree(acct);
 	}
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 }
 
 /*
@@ -425,7 +478,8 @@ static u32 encode_float(u64 value)
 /*
  *  do_acct_process does all actual work. Caller holds the reference to file.
  */
-static void do_acct_process(struct pid_namespace *ns, struct file *file)
+static void do_acct_process(struct bsd_acct_struct *acct,
+		struct pid_namespace *ns, struct file *file)
 {
 	struct pacct_struct *pacct = &current->signal->pacct;
 	acct_t ac;
@@ -440,7 +494,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
 	 * First check to see if there is enough free_space to continue
 	 * the process accounting system.
 	 */
-	if (!check_free_space(file))
+	if (!check_free_space(acct, file))
 		return;
 
 	/*
@@ -577,34 +631,46 @@ void acct_collect(long exitcode, int group_dead)
 	spin_unlock_irq(&current->sighand->siglock);
 }
 
-/**
- * acct_process - now just a wrapper around do_acct_process
- * @exitcode: task exit code
- *
- * handles process accounting for an exiting task
- */
-void acct_process(void)
+static void acct_process_in_ns(struct pid_namespace *ns)
 {
 	struct file *file = NULL;
-	struct pid_namespace *ns;
+	struct bsd_acct_struct *acct;
 
+	acct = ns->bacct;
 	/*
 	 * accelerate the common fastpath:
 	 */
-	if (!acct_globals.file)
+	if (!acct || !acct->file)
 		return;
 
-	spin_lock(&acct_globals.lock);
-	file = acct_globals.file;
+	spin_lock(&acct_lock);
+	file = acct->file;
 	if (unlikely(!file)) {
-		spin_unlock(&acct_globals.lock);
+		spin_unlock(&acct_lock);
 		return;
 	}
 	get_file(file);
-	ns = get_pid_ns(acct_globals.ns);
-	spin_unlock(&acct_globals.lock);
+	spin_unlock(&acct_lock);
 
-	do_acct_process(ns, file);
+	do_acct_process(acct, ns, file);
 	fput(file);
-	put_pid_ns(ns);
+}
+
+/**
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
+ *
+ * handles process accounting for an exiting task
+ */
+void acct_process(void)
+{
+	struct pid_namespace *ns;
+
+	/*
+	 * This loop is safe lockless, since current is still
+	 * alive and holds its namespace, which in turn holds
+	 * its parent.
+	 */
+	for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+		acct_process_in_ns(ns);
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c10e7aae04d..4699950e65b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1476,7 +1476,8 @@ void audit_syscall_entry(int arch, int major,
 	struct audit_context *context = tsk->audit_context;
 	enum audit_state     state;
 
-	BUG_ON(!context);
+	if (unlikely(!context))
+		return;
 
 	/*
 	 * This happens only on certain architectures that make system
diff --git a/kernel/capability.c b/kernel/capability.c
index 901e0fdc3ff..0101e847603 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -115,11 +115,208 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
 	return 0;
 }
 
+#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
+
+/*
+ * Without filesystem capability support, we nominally support one process
+ * setting the capabilities of another
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+				     kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+	struct task_struct *target;
+	int ret;
+
+	spin_lock(&task_capability_lock);
+	read_lock(&tasklist_lock);
+
+	if (pid && pid != task_pid_vnr(current)) {
+		target = find_task_by_vpid(pid);
+		if (!target) {
+			ret = -ESRCH;
+			goto out;
+		}
+	} else
+		target = current;
+
+	ret = security_capget(target, pEp, pIp, pPp);
+
+out:
+	read_unlock(&tasklist_lock);
+	spin_unlock(&task_capability_lock);
+
+	return ret;
+}
+
+/*
+ * cap_set_pg - set capabilities for all processes in a given process
+ * group.  We call this holding task_capability_lock and tasklist_lock.
+ */
+static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
+			     kernel_cap_t *inheritable,
+			     kernel_cap_t *permitted)
+{
+	struct task_struct *g, *target;
+	int ret = -EPERM;
+	int found = 0;
+	struct pid *pgrp;
+
+	spin_lock(&task_capability_lock);
+	read_lock(&tasklist_lock);
+
+	pgrp = find_vpid(pgrp_nr);
+	do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
+		target = g;
+		while_each_thread(g, target) {
+			if (!security_capset_check(target, effective,
+						   inheritable, permitted)) {
+				security_capset_set(target, effective,
+						    inheritable, permitted);
+				ret = 0;
+			}
+			found = 1;
+		}
+	} while_each_pid_task(pgrp, PIDTYPE_PGID, g);
+
+	read_unlock(&tasklist_lock);
+	spin_unlock(&task_capability_lock);
+
+	if (!found)
+		ret = 0;
+	return ret;
+}
+
 /*
- * For sys_getproccap() and sys_setproccap(), any of the three
- * capability set pointers may be NULL -- indicating that that set is
- * uninteresting and/or not to be changed.
+ * cap_set_all - set capabilities for all processes other than init
+ * and self.  We call this holding task_capability_lock and tasklist_lock.
  */
+static inline int cap_set_all(kernel_cap_t *effective,
+			      kernel_cap_t *inheritable,
+			      kernel_cap_t *permitted)
+{
+	struct task_struct *g, *target;
+	int ret = -EPERM;
+	int found = 0;
+
+	spin_lock(&task_capability_lock);
+	read_lock(&tasklist_lock);
+
+	do_each_thread(g, target) {
+		if (target == current
+		    || is_container_init(target->group_leader))
+			continue;
+		found = 1;
+		if (security_capset_check(target, effective, inheritable,
+					  permitted))
+			continue;
+		ret = 0;
+		security_capset_set(target, effective, inheritable, permitted);
+	} while_each_thread(g, target);
+
+	read_unlock(&tasklist_lock);
+	spin_unlock(&task_capability_lock);
+
+	if (!found)
+		ret = 0;
+
+	return ret;
+}
+
+/*
+ * Given the target pid does not refer to the current process we
+ * need more elaborate support... (This support is not present when
+ * filesystem capabilities are configured.)
+ */
+static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
+					    kernel_cap_t *inheritable,
+					    kernel_cap_t *permitted)
+{
+	struct task_struct *target;
+	int ret;
+
+	if (!capable(CAP_SETPCAP))
+		return -EPERM;
+
+	if (pid == -1)	          /* all procs other than current and init */
+		return cap_set_all(effective, inheritable, permitted);
+
+	else if (pid < 0)                    /* all procs in process group */
+		return cap_set_pg(-pid, effective, inheritable, permitted);
+
+	/* target != current */
+	spin_lock(&task_capability_lock);
+	read_lock(&tasklist_lock);
+
+	target = find_task_by_vpid(pid);
+	if (!target)
+		ret = -ESRCH;
+	else {
+		ret = security_capset_check(target, effective, inheritable,
+					    permitted);
+
+		/* having verified that the proposed changes are legal,
+		   we now put them into effect. */
+		if (!ret)
+			security_capset_set(target, effective, inheritable,
+					    permitted);
+	}
+
+	read_unlock(&tasklist_lock);
+	spin_unlock(&task_capability_lock);
+
+	return ret;
+}
+
+#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
+
+/*
+ * If we have configured with filesystem capability support, then the
+ * only thing that can change the capabilities of the current process
+ * is the current process. As such, we can't be in this code at the
+ * same time as we are in the process of setting capabilities in this
+ * process. The net result is that we can limit our use of locks to
+ * when we are reading the caps of another process.
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+				     kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+	int ret;
+
+	if (pid && (pid != task_pid_vnr(current))) {
+		struct task_struct *target;
+
+		spin_lock(&task_capability_lock);
+		read_lock(&tasklist_lock);
+
+		target = find_task_by_vpid(pid);
+		if (!target)
+			ret = -ESRCH;
+		else
+			ret = security_capget(target, pEp, pIp, pPp);
+
+		read_unlock(&tasklist_lock);
+		spin_unlock(&task_capability_lock);
+	} else
+		ret = security_capget(current, pEp, pIp, pPp);
+
+	return ret;
+}
+
+/*
+ * With filesystem capability support configured, the kernel does not
+ * permit the ch
author	Ingo Molnar <mingo@elte.hu>	2008-07-29 00:07:55 +0200
committer	Ingo Molnar <mingo@elte.hu>	2008-07-29 00:07:55 +0200
commit	cb28a1bbdb4790378e7366d6c9ee1d2340b84f92 (patch)
tree	316436f77dac75335fd2c3ef5f109e71606c50d3 /kernel
parent	b6d4f7e3ef25beb8c658c97867d98883e69dc544 (diff)
parent	f934fb19ef34730263e6afc01e8ec27a8a71470f (diff)