From 5459c164f0591ee75ed0203bb8f3817f25948e2f Mon Sep 17 00:00:00 2001
From: "Andrew G. Morgan" <morgan@kernel.org>
Date: Wed, 23 Jul 2008 21:28:24 -0700
Subject: security: protect legacy applications from executing with
 insufficient privilege

When cap_bset suppresses some of the forced (fP) capabilities of a file,
it is generally only safe to execute the program if it understands how to
recognize it doesn't have enough privilege to work correctly.  For legacy
applications (fE!=0), which have no non-destructive way to determine that
they are missing privilege, we fail to execute (EPERM) any executable that
requires fP capabilities, but would otherwise get pP' < fP.  This is a
fail-safe permission check.

For some discussion of why it is problematic for (legacy) privileged
applications to run with less than the set of capabilities requested for
them, see:

 http://userweb.kernel.org/~morgan/sendmail-capabilities-war-story.html

With this iteration of this support, we do not include setuid-0 based
privilege protection from the bounding set.  That is, the admin can still
(ab)use the bounding set to suppress the privileges of a setuid-0 program.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: cleanup]
Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/commoncap.c | 108 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 59 insertions(+), 49 deletions(-)

(limited to 'security')

diff --git a/security/commoncap.c b/security/commoncap.c
index 0b6537a3672..4afbece37a0 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -162,8 +162,7 @@ void cap_capset_set (struct task_struct *target, kernel_cap_t *effective,
 
 static inline void bprm_clear_caps(struct linux_binprm *bprm)
 {
-	cap_clear(bprm->cap_inheritable);
-	cap_clear(bprm->cap_permitted);
+	cap_clear(bprm->cap_post_exec_permitted);
 	bprm->cap_effective = false;
 }
 
@@ -198,6 +197,7 @@ static inline int cap_from_disk(struct vfs_cap_data *caps,
 {
 	__u32 magic_etc;
 	unsigned tocopy, i;
+	int ret;
 
 	if (size < sizeof(magic_etc))
 		return -EINVAL;
@@ -225,19 +225,40 @@ static inline int cap_from_disk(struct vfs_cap_data *caps,
 		bprm->cap_effective = false;
 	}
 
-	for (i = 0; i < tocopy; ++i) {
-		bprm->cap_permitted.cap[i] =
-			le32_to_cpu(caps->data[i].permitted);
-		bprm->cap_inheritable.cap[i] =
-			le32_to_cpu(caps->data[i].inheritable);
-	}
-	while (i < VFS_CAP_U32) {
-		bprm->cap_permitted.cap[i] = 0;
-		bprm->cap_inheritable.cap[i] = 0;
-		i++;
+	ret = 0;
+
+	CAP_FOR_EACH_U32(i) {
+		__u32 value_cpu;
+
+		if (i >= tocopy) {
+			/*
+			 * Legacy capability sets have no upper bits
+			 */
+			bprm->cap_post_exec_permitted.cap[i] = 0;
+			continue;
+		}
+		/*
+		 * pP' = (X & fP) | (pI & fI)
+		 */
+		value_cpu = le32_to_cpu(caps->data[i].permitted);
+		bprm->cap_post_exec_permitted.cap[i] =
+			(current->cap_bset.cap[i] & value_cpu) |
+			(current->cap_inheritable.cap[i] &
+				le32_to_cpu(caps->data[i].inheritable));
+		if (value_cpu & ~bprm->cap_post_exec_permitted.cap[i]) {
+			/*
+			 * insufficient to execute correctly
+			 */
+			ret = -EPERM;
+		}
 	}
 
-	return 0;
+	/*
+	 * For legacy apps, with no internal support for recognizing they
+	 * do not have enough capabilities, we return an error if they are
+	 * missing some "forced" (aka file-permitted) capabilities.
+	 */
+	return bprm->cap_effective ? ret : 0;
 }
 
 /* Locate any VFS capabilities: */
@@ -269,9 +290,9 @@ static int get_file_caps(struct linux_binprm *bprm)
 		goto out;
 
 	rc = cap_from_disk(&vcaps, bprm, rc);
-	if (rc)
+	if (rc == -EINVAL)
 		printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
-			__func__, rc, bprm->filename);
+		       __func__, rc, bprm->filename);
 
 out:
 	dput(dentry);
@@ -304,25 +325,24 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 	int ret;
 
 	ret = get_file_caps(bprm);
-	if (ret)
-		printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n",
-			__func__, ret, bprm->filename);
-
-	/*  To support inheritance of root-permissions and suid-root
-	 *  executables under compatibility mode, we raise all three
-	 *  capability sets for the file.
-	 *
-	 *  If only the real uid is 0, we only raise the inheritable
-	 *  and permitted sets of the executable file.
-	 */
 
-	if (!issecure (SECURE_NOROOT)) {
+	if (!issecure(SECURE_NOROOT)) {
+		/*
+		 * To support inheritance of root-permissions and suid-root
+		 * executables under compatibility mode, we override the
+		 * capability sets for the file.
+		 *
+		 * If only the real uid is 0, we do not set the effective
+		 * bit.
+		 */
 		if (bprm->e_uid == 0 || current->uid == 0) {
-			cap_set_full (bprm->cap_inheritable);
-			cap_set_full (bprm->cap_permitted);
+			/* pP' = (cap_bset & ~0) | (pI & ~0) */
+			bprm->cap_post_exec_permitted = cap_combine(
+				current->cap_bset, current->cap_inheritable
+				);
+			bprm->cap_effective = (bprm->e_uid == 0);
+			ret = 0;
 		}
-		if (bprm->e_uid == 0)
-			bprm->cap_effective = true;
 	}
 
 	return ret;
@@ -330,17 +350,9 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 
 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 {
-	/* Derived from fs/exec.c:compute_creds. */
-	kernel_cap_t new_permitted, working;
-
-	new_permitted = cap_intersect(bprm->cap_permitted,
-				 current->cap_bset);
-	working = cap_intersect(bprm->cap_inheritable,
-				 current->cap_inheritable);
-	new_permitted = cap_combine(new_permitted, working);
-
 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
-	    !cap_issubset (new_permitted, current->cap_permitted)) {
+	    !cap_issubset(bprm->cap_post_exec_permitted,
+			  current->cap_permitted)) {
 		set_dumpable(current->mm, suid_dumpable);
 		current->pdeath_signal = 0;
 
@@ -350,9 +362,9 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 				bprm->e_gid = current->gid;
 			}
 			if (cap_limit_ptraced_target()) {
-				new_permitted =
-					cap_intersect(new_permitted,
-						      current->cap_permitted);
+				bprm->cap_post_exec_permitted = cap_intersect(
+					bprm->cap_post_exec_permitted,
+					current->cap_permitted);
 			}
 		}
 	}
@@ -364,9 +376,9 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 	 * in the init_task struct. Thus we skip the usual
 	 * capability rules */
 	if (!is_global_init(current)) {
-		current->cap_permitted = new_permitted;
+		current->cap_permitted = bprm->cap_post_exec_permitted;
 		if (bprm->cap_effective)
-			current->cap_effective = new_permitted;
+			current->cap_effective = bprm->cap_post_exec_permitted;
 		else
 			cap_clear(current->cap_effective);
 	}
@@ -381,9 +393,7 @@ int cap_bprm_secureexec (struct linux_binprm *bprm)
 	if (current->uid != 0) {
 		if (bprm->cap_effective)
 			return 1;
-		if (!cap_isclear(bprm->cap_permitted))
-			return 1;
-		if (!cap_isclear(bprm->cap_inheritable))
+		if (!cap_isclear(bprm->cap_post_exec_permitted))
 			return 1;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 84aaa7ab4c40b66d6dd9aa393901551ad50ec640 Mon Sep 17 00:00:00 2001
From: "Andrew G. Morgan" <morgan@kernel.org>
Date: Wed, 23 Jul 2008 21:28:25 -0700
Subject: security: filesystem capabilities no longer experimental

Filesystem capabilities have come of age.  Remove the experimental tag for
configuring filesystem capabilities.

Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'security')

diff --git a/security/Kconfig b/security/Kconfig
index 62ed4717d33..559293922a4 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -74,8 +74,7 @@ config SECURITY_NETWORK_XFRM
 	  If you are unsure how to answer this question, answer N.
 
 config SECURITY_FILE_CAPABILITIES
-	bool "File POSIX Capabilities (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "File POSIX Capabilities"
 	default n
 	help
 	  This enables filesystem capabilities, allowing you to give
-- 
cgit v1.2.3-70-g09d2


From f92523e3a7861f5dbd76021e0719a35fe8771f2d Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:03 -0700
Subject: cgroup files: convert devcgroup_access_write() into a cgroup
 write_string() handler

This patch converts devcgroup_access_write() from a raw file handler
into a handler for the cgroup write_string() method. This allows some
boilerplate copying/locking/checking to be removed and simplifies the
cleanup path, since these functions are performed by the cgroups
framework before calling the handler.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 101 ++++++++++++++++++-----------------------------
 1 file changed, 38 insertions(+), 63 deletions(-)

(limited to 'security')

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index ddd92cec78e..236fffa9d05 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -59,6 +59,11 @@ static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
 	return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
 }
 
+static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
+{
+	return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
+}
+
 struct cgroup_subsys devices_subsys;
 
 static int devcgroup_can_attach(struct cgroup_subsys *ss,
@@ -312,10 +317,10 @@ static int may_access_whitelist(struct dev_cgroup *c,
  * when adding a new allow rule to a device whitelist, the rule
  * must be allowed in the parent device
  */
-static int parent_has_perm(struct cgroup *childcg,
+static int parent_has_perm(struct dev_cgroup *childcg,
 				  struct dev_whitelist_item *wh)
 {
-	struct cgroup *pcg = childcg->parent;
+	struct cgroup *pcg = childcg->css.cgroup->parent;
 	struct dev_cgroup *parent;
 	int ret;
 
@@ -341,39 +346,18 @@ static int parent_has_perm(struct cgroup *childcg,
  * new access is only allowed if you're in the top-level cgroup, or your
  * parent cgroup has the access you're asking for.
  */
-static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
-				struct file *file, const char __user *userbuf,
-				size_t nbytes, loff_t *ppos)
+static int devcgroup_update_access(struct dev_cgroup *devcgroup,
+				   int filetype, const char *buffer)
 {
-	struct cgroup *cur_cgroup;
-	struct dev_cgroup *devcgroup, *cur_devcgroup;
-	int filetype = cft->private;
-	char *buffer, *b;
+	struct dev_cgroup *cur_devcgroup;
+	const char *b;
 	int retval = 0, count;
 	struct dev_whitelist_item wh;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	devcgroup = cgroup_to_devcgroup(cgroup);
-	cur_cgroup = task_cgroup(current, devices_subsys.subsys_id);
-	cur_devcgroup = cgroup_to_devcgroup(cur_cgroup);
-
-	buffer = kmalloc(nbytes+1, GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
-	if (copy_from_user(buffer, userbuf, nbytes)) {
-		retval = -EFAULT;
-		goto out1;
-	}
-	buffer[nbytes] = 0;	/* nul-terminate */
-
-	cgroup_lock();
-	if (cgroup_is_removed(cgroup)) {
-		retval = -ENODEV;
-		goto out2;
-	}
+	cur_devcgroup = task_devcgroup(current);
 
 	memset(&wh, 0, sizeof(wh));
 	b = buffer;
@@ -392,14 +376,11 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
 		wh.type = DEV_CHAR;
 		break;
 	default:
-		retval = -EINVAL;
-		goto out2;
+		return -EINVAL;
 	}
 	b++;
-	if (!isspace(*b)) {
-		retval = -EINVAL;
-		goto out2;
-	}
+	if (!isspace(*b))
+		return -EINVAL;
 	b++;
 	if (*b == '*') {
 		wh.major = ~0;
@@ -411,13 +392,10 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
 			b++;
 		}
 	} else {
-		retval = -EINVAL;
-		goto out2;
-	}
-	if (*b != ':') {
-		retval = -EINVAL;
-		goto out2;
+		return -EINVAL;
 	}
+	if (*b != ':')
+		return -EINVAL;
 	b++;
 
 	/* read minor */
@@ -431,13 +409,10 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
 			b++;
 		}
 	} else {
-		retval = -EINVAL;
-		goto out2;
-	}
-	if (!isspace(*b)) {
-		retval = -EINVAL;
-		goto out2;
+		return -EINVAL;
 	}
+	if (!isspace(*b))
+		return -EINVAL;
 	for (b++, count = 0; count < 3; count++, b++) {
 		switch (*b) {
 		case 'r':
@@ -454,8 +429,7 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
 			count = 3;
 			break;
 		default:
-			retval = -EINVAL;
-			goto out2;
+			return -EINVAL;
 		}
 	}
 
@@ -463,38 +437,39 @@ handle:
 	retval = 0;
 	switch (filetype) {
 	case DEVCG_ALLOW:
-		if (!parent_has_perm(cgroup, &wh))
-			retval = -EPERM;
-		else
-			retval = dev_whitelist_add(devcgroup, &wh);
-		break;
+		if (!parent_has_perm(devcgroup, &wh))
+			return -EPERM;
+		return dev_whitelist_add(devcgroup, &wh);
 	case DEVCG_DENY:
 		dev_whitelist_rm(devcgroup, &wh);
 		break;
 	default:
-		retval = -EINVAL;
-		goto out2;
+		return -EINVAL;
 	}
+	return 0;
+}
 
-	if (retval == 0)
-		retval = nbytes;
-
-out2:
+static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
+				  const char *buffer)
+{
+	int retval;
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+	retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
+					 cft->private, buffer);
 	cgroup_unlock();
-out1:
-	kfree(buffer);
 	return retval;
 }
 
 static struct cftype dev_cgroup_files[] = {
 	{
 		.name = "allow",
-		.write  = devcgroup_access_write,
+		.write_string  = devcgroup_access_write,
 		.private = DEVCG_ALLOW,
 	},
 	{
 		.name = "deny",
-		.write = devcgroup_access_write,
+		.write_string = devcgroup_access_write,
 		.private = DEVCG_DENY,
 	},
 	{
-- 
cgit v1.2.3-70-g09d2


From 4efd1a1b2f09a4b746dd9dc057986c6dadcb1317 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:47:07 -0700
Subject: devcgroup: relax white-list protection down to RCU

Currently this list is protected with a simple spinlock, even for reading
from one.  This is OK, but can be better.

Actually I want it to be better very much, since after replacing the
OpenVZ device permissions engine with the cgroup-based one I noticed, that
we set 12 default device permissions for each newly created container (for
/dev/null, full, terminals, ect devices), and people sometimes have up to
20 perms more, so traversing the ~30-40 elements list under a spinlock
doesn't seem very good.

Here's the RCU protection for white-list - dev_whitelist_item-s are added
and removed under the devcg->lock, but are looked up in permissions
checking under the rcu_read_lock.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

(limited to 'security')

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 236fffa9d05..9da3532726f 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -41,6 +41,7 @@ struct dev_whitelist_item {
 	short type;
 	short access;
 	struct list_head list;
+	struct rcu_head rcu;
 };
 
 struct dev_cgroup {
@@ -133,11 +134,19 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 	}
 
 	if (whcopy != NULL)
-		list_add_tail(&whcopy->list, &dev_cgroup->whitelist);
+		list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
 	spin_unlock(&dev_cgroup->lock);
 	return 0;
 }
 
+static void whitelist_item_free(struct rcu_head *rcu)
+{
+	struct dev_whitelist_item *item;
+
+	item = container_of(rcu, struct dev_whitelist_item, rcu);
+	kfree(item);
+}
+
 /*
  * called under cgroup_lock()
  * since the list is visible to other tasks, we need the spinlock also
@@ -161,8 +170,8 @@ static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
 remove:
 		walk->access &= ~wh->access;
 		if (!walk->access) {
-			list_del(&walk->list);
-			kfree(walk);
+			list_del_rcu(&walk->list);
+			call_rcu(&walk->rcu, whitelist_item_free);
 		}
 	}
 	spin_unlock(&dev_cgroup->lock);
@@ -269,15 +278,15 @@ static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
 	struct dev_whitelist_item *wh;
 	char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 
-	spin_lock(&devcgroup->lock);
-	list_for_each_entry(wh, &devcgroup->whitelist, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) {
 		set_access(acc, wh->access);
 		set_majmin(maj, wh->major);
 		set_majmin(min, wh->minor);
 		seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
 			   maj, min, acc);
 	}
-	spin_unlock(&devcgroup->lock);
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -510,8 +519,8 @@ int devcgroup_inode_permission(struct inode *inode, int mask)
 	if (!dev_cgroup)
 		return 0;
 
-	spin_lock(&dev_cgroup->lock);
-	list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
 		if (wh->type & DEV_ALL)
 			goto acc_check;
 		if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
@@ -527,10 +536,10 @@ acc_check:
 			continue;
 		if ((mask & MAY_READ) && !(wh->access & ACC_READ))
 			continue;
-		spin_unlock(&dev_cgroup->lock);
+		rcu_read_unlock();
 		return 0;
 	}
-	spin_unlock(&dev_cgroup->lock);
+	rcu_read_unlock();
 
 	return -EPERM;
 }
@@ -545,7 +554,7 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
 	if (!dev_cgroup)
 		return 0;
 
-	spin_lock(&dev_cgroup->lock);
+	rcu_read_lock();
 	list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
 		if (wh->type & DEV_ALL)
 			goto acc_check;
@@ -560,9 +569,9 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
 acc_check:
 		if (!(wh->access & ACC_MKNOD))
 			continue;
-		spin_unlock(&dev_cgroup->lock);
+		rcu_read_unlock();
 		return 0;
 	}
-	spin_unlock(&dev_cgroup->lock);
+	rcu_read_unlock();
 	return -EPERM;
 }
-- 
cgit v1.2.3-70-g09d2


From 7759fc9d10d3559f365cb122d81e0c0a185fe0fe Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:08 -0700
Subject: devcgroup: code cleanup

- clean up set_majmin()
- use simple_strtoul() to parse major/minor

[akpm@linux-foundation.org: fix simple_strtoul() usage]
[kosaki.motohiro@jp.fujitsu.com: fix warnings]
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'security')

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 9da3532726f..7bd296cca04 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -202,7 +202,7 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
 		}
 		wh->minor = wh->major = ~0;
 		wh->type = DEV_ALL;
-		wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE;
+		wh->access = ACC_MASK;
 		list_add(&wh->list, &dev_cgroup->whitelist);
 	} else {
 		parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
@@ -264,11 +264,10 @@ static char type_to_char(short type)
 
 static void set_majmin(char *str, unsigned m)
 {
-	memset(str, 0, MAJMINLEN);
 	if (m == ~0)
-		sprintf(str, "*");
+		strcpy(str, "*");
 	else
-		snprintf(str, MAJMINLEN, "%u", m);
+		sprintf(str, "%u", m);
 }
 
 static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
@@ -360,6 +359,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 {
 	struct dev_cgroup *cur_devcgroup;
 	const char *b;
+	char *endp;
 	int retval = 0, count;
 	struct dev_whitelist_item wh;
 
@@ -395,11 +395,8 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 		wh.major = ~0;
 		b++;
 	} else if (isdigit(*b)) {
-		wh.major = 0;
-		while (isdigit(*b)) {
-			wh.major = wh.major*10+(*b-'0');
-			b++;
-		}
+		wh.major = simple_strtoul(b, &endp, 10);
+		b = endp;
 	} else {
 		return -EINVAL;
 	}
@@ -412,11 +409,8 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 		wh.minor = ~0;
 		b++;
 	} else if (isdigit(*b)) {
-		wh.minor = 0;
-		while (isdigit(*b)) {
-			wh.minor = wh.minor*10+(*b-'0');
-			b++;
-		}
+		wh.minor = simple_strtoul(b, &endp, 10);
+		b = endp;
 	} else {
 		return -EINVAL;
 	}
-- 
cgit v1.2.3-70-g09d2


From 0d094efeb1e98010c6b99923f1eb7e17bf1e3a74 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:49 -0700
Subject: tracehook: tracehook_tracer_task

This adds the tracehook_tracer_task() hook to consolidate all forms of
"Who is using ptrace on me?" logic.  This is used for "TracerPid:" in
/proc and for permission checks.  We also clean up the selinux code the
called an identical accessor.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/array.c           |  9 +++++++--
 fs/proc/base.c            | 13 +++++++++----
 include/linux/tracehook.h | 18 ++++++++++++++++++
 security/selinux/hooks.c  | 22 +++-------------------
 4 files changed, 37 insertions(+), 25 deletions(-)

(limited to 'security')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 797d775e035..0d6eb33597c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
 #include <linux/delayacct.h>
 #include <linux/seq_file.h>
 #include <linux/pid_namespace.h>
+#include <linux/tracehook.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -168,8 +169,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	rcu_read_lock();
 	ppid = pid_alive(p) ?
 		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
-	tpid = pid_alive(p) && p->ptrace ?
-		task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
+	tpid = 0;
+	if (pid_alive(p)) {
+		struct task_struct *tracer = tracehook_tracer_task(p);
+		if (tracer)
+			tpid = task_pid_nr_ns(tracer, ns);
+	}
 	seq_printf(m,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a891fe4cb43..4b74dba69a6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -69,6 +69,7 @@
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
@@ -231,10 +232,14 @@ static int check_mem_permission(struct task_struct *task)
 	 * If current is actively ptrace'ing, and would also be
 	 * permitted to freshly attach with ptrace now, permit it.
 	 */
-	if (task->parent == current && (task->ptrace & PT_PTRACED) &&
-	    task_is_stopped_or_traced(task) &&
-	    ptrace_may_access(task, PTRACE_MODE_ATTACH))
-		return 0;
+	if (task_is_stopped_or_traced(task)) {
+		int match;
+		rcu_read_lock();
+		match = (tracehook_tracer_task(task) == current);
+		rcu_read_unlock();
+		if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
+			return 0;
+	}
 
 	/*
 	 * Noone else is allowed.
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 9a5b3be2503..6468ca0fe69 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -72,6 +72,24 @@ static inline int tracehook_unsafe_exec(struct task_struct *task)
 	return unsafe;
 }
 
+/**
+ * tracehook_tracer_task - return the task that is tracing the given task
+ * @tsk:		task to consider
+ *
+ * Returns NULL if noone is tracing @task, or the &struct task_struct
+ * pointer to its tracer.
+ *
+ * Must called under rcu_read_lock().  The pointer returned might be kept
+ * live only by RCU.  During exec, this may be called with task_lock()
+ * held on @task, still held from when tracehook_unsafe_exec() was called.
+ */
+static inline struct task_struct *tracehook_tracer_task(struct task_struct *tsk)
+{
+	if (task_ptrace(tsk) & PT_PTRACED)
+		return rcu_dereference(tsk->parent);
+	return NULL;
+}
+
 /**
  * tracehook_report_exec - a successful exec was completed
  * @fmt:		&struct linux_binfmt that performed the exec
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 63f131fc42e..3481cde5bf1 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -25,7 +25,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/security.h>
@@ -1971,22 +1971,6 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
 
-/**
- * task_tracer_task - return the task that is tracing the given task
- * @task:		task to consider
- *
- * Returns NULL if noone is tracing @task, or the &struct task_struct
- * pointer to its tracer.
- *
- * Must be called under rcu_read_lock().
- */
-static struct task_struct *task_tracer_task(struct task_struct *task)
-{
-	if (task->ptrace & PT_PTRACED)
-		return rcu_dereference(task->parent);
-	return NULL;
-}
-
 /* binprm security operations */
 
 static int selinux_bprm_alloc_security(struct linux_binprm *bprm)
@@ -2238,7 +2222,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
 			u32 ptsid = 0;
 
 			rcu_read_lock();
-			tracer = task_tracer_task(current);
+			tracer = tracehook_tracer_task(current);
 			if (likely(tracer != NULL)) {
 				sec = tracer->security;
 				ptsid = sec->sid;
@@ -5247,7 +5231,7 @@ static int selinux_setprocattr(struct task_struct *p,
 		   Otherwise, leave SID unchanged and fail. */
 		task_lock(p);
 		rcu_read_lock();
-		tracer = task_tracer_task(p);
+		tracer = tracehook_tracer_task(p);
 		if (tracer != NULL) {
 			struct task_security_struct *ptsec = tracer->security;
 			u32 ptsid = ptsec->sid;
-- 
cgit v1.2.3-70-g09d2


From 6c5a9d2e1599a099b0e47235a1c1502162b14310 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Jul 2008 17:48:15 -0700
Subject: selinux: use nf_register_hooks()

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/hooks.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

(limited to 'security')

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 63f131fc42e..df0515dd4d1 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5670,27 +5670,20 @@ static struct nf_hook_ops selinux_ipv6_ops[] = {
 static int __init selinux_nf_ip_init(void)
 {
 	int err = 0;
-	u32 iter;
 
 	if (!selinux_enabled)
 		goto out;
 
 	printk(KERN_DEBUG "SELinux:  Registering netfilter hooks\n");
 
-	for (iter = 0; iter < ARRAY_SIZE(selinux_ipv4_ops); iter++) {
-		err = nf_register_hook(&selinux_ipv4_ops[iter]);
-		if (err)
-			panic("SELinux: nf_register_hook for IPv4: error %d\n",
-			      err);
-	}
+	err = nf_register_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops));
+	if (err)
+		panic("SELinux: nf_register_hooks for IPv4: error %d\n", err);
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	for (iter = 0; iter < ARRAY_SIZE(selinux_ipv6_ops); iter++) {
-		err = nf_register_hook(&selinux_ipv6_ops[iter]);
-		if (err)
-			panic("SELinux: nf_register_hook for IPv6: error %d\n",
-			      err);
-	}
+	err = nf_register_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops));
+	if (err)
+		panic("SELinux: nf_register_hooks for IPv6: error %d\n", err);
 #endif	/* IPV6 */
 
 out:
@@ -5702,15 +5695,11 @@ __initcall(selinux_nf_ip_init);
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 static void selinux_nf_ip_exit(void)
 {
-	u32 iter;
-
 	printk(KERN_DEBUG "SELinux:  Unregistering netfilter hooks\n");
 
-	for (iter = 0; iter < ARRAY_SIZE(selinux_ipv4_ops); iter++)
-		nf_unregister_hook(&selinux_ipv4_ops[iter]);
+	nf_unregister_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops));
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	for (iter = 0; iter < ARRAY_SIZE(selinux_ipv6_ops); iter++)
-		nf_unregister_hook(&selinux_ipv6_ops[iter]);
+	nf_unregister_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops));
 #endif	/* IPV6 */
 }
 #endif
-- 
cgit v1.2.3-70-g09d2


From b77b0646ef4efe31a7449bb3d9360fd00f95433d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 17 Jul 2008 09:37:02 -0400
Subject: [PATCH] pass MAY_OPEN to vfs_permission() explicitly

... and get rid of the last "let's deduce mask from nameidata->flags"
bit.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c                  |  4 ++--
 fs/namei.c                 | 13 ++++---------
 include/linux/security.h   |  7 +++----
 security/capability.c      |  3 +--
 security/security.c        |  4 ++--
 security/selinux/hooks.c   |  5 ++---
 security/smack/smack_lsm.c |  3 +--
 7 files changed, 15 insertions(+), 24 deletions(-)

(limited to 'security')

diff --git a/fs/exec.c b/fs/exec.c
index b8792a13153..0ba5d355c5a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -118,7 +118,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
 		goto exit;
 
-	error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
+	error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
 	if (error)
 		goto exit;
 
@@ -666,7 +666,7 @@ struct file *open_exec(const char *name)
 		struct inode *inode = nd.path.dentry->d_inode;
 		file = ERR_PTR(-EACCES);
 		if (S_ISREG(inode->i_mode)) {
-			int err = vfs_permission(&nd, MAY_EXEC);
+			int err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
 			file = ERR_PTR(err);
 			if (!err) {
 				file = nameidata_to_filp(&nd,
diff --git a/fs/namei.c b/fs/namei.c
index 33dcaf025c4..6b0e8e5e079 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -263,12 +263,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 
 	/* Ordinary permission routines do not understand MAY_APPEND. */
 	if (inode->i_op && inode->i_op->permission) {
-		int extra = 0;
-		if (nd) {
-			if (nd->flags & LOOKUP_OPEN)
-				extra |= MAY_OPEN;
-		}
-		retval = inode->i_op->permission(inode, mask | extra);
+		retval = inode->i_op->permission(inode, mask);
 		if (!retval) {
 			/*
 			 * Exec permission on a regular file is denied if none
@@ -292,7 +287,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 		return retval;
 
 	return security_inode_permission(inode,
-			mask & (MAY_READ|MAY_WRITE|MAY_EXEC), nd);
+			mask & (MAY_READ|MAY_WRITE|MAY_EXEC));
 }
 
 /**
@@ -492,7 +487,7 @@ static int exec_permission_lite(struct inode *inode,
 
 	return -EACCES;
 ok:
-	return security_inode_permission(inode, MAY_EXEC, nd);
+	return security_inode_permission(inode, MAY_EXEC);
 }
 
 /*
@@ -1692,7 +1687,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	int will_write;
 	int flag = open_to_namei_flags(open_flag);
 
-	acc_mode = ACC_MODE(flag);
+	acc_mode = MAY_OPEN | ACC_MODE(flag);
 
 	/* O_TRUNC implies we need access checks for write permissions */
 	if (flag & O_TRUNC)
diff --git a/include/linux/security.h b/include/linux/security.h
index f0e9adb22ac..fd96e7f8a6f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1362,7 +1362,7 @@ struct security_operations {
 			     struct inode *new_dir, struct dentry *new_dentry);
 	int (*inode_readlink) (struct dentry *dentry);
 	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
-	int (*inode_permission) (struct inode *inode, int mask, struct nameidata *nd);
+	int (*inode_permission) (struct inode *inode, int mask);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
 	int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
 	void (*inode_delete) (struct inode *inode);
@@ -1628,7 +1628,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
 			  struct inode *new_dir, struct dentry *new_dentry);
 int security_inode_readlink(struct dentry *dentry);
 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
-int security_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
+int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
 int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
 void security_inode_delete(struct inode *inode);
@@ -2021,8 +2021,7 @@ static inline int security_inode_follow_link(struct dentry *dentry,
 	return 0;
 }
 
-static inline int security_inode_permission(struct inode *inode, int mask,
-					     struct nameidata *nd)
+static inline int security_inode_permission(struct inode *inode, int mask)
 {
 	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index 5b01c0b0242..63d10da515a 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -211,8 +211,7 @@ static int cap_inode_follow_link(struct dentry *dentry,
 	return 0;
 }
 
-static int cap_inode_permission(struct inode *inode, int mask,
-				struct nameidata *nd)
+static int cap_inode_permission(struct inode *inode, int mask)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 59f23b5918b..78ed3ffde24 100644
--- a/security/security.c
+++ b/security/security.c
@@ -429,11 +429,11 @@ int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
 	return security_ops->inode_follow_link(dentry, nd);
 }
 
-int security_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
+int security_inode_permission(struct inode *inode, int mask)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	return security_ops->inode_permission(inode, mask, nd);
+	return security_ops->inode_permission(inode, mask);
 }
 
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3481cde5bf1..5ba13908b5b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2624,12 +2624,11 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
 	return dentry_has_perm(current, NULL, dentry, FILE__READ);
 }
 
-static int selinux_inode_permission(struct inode *inode, int mask,
-				    struct nameidata *nd)
+static int selinux_inode_permission(struct inode *inode, int mask)
 {
 	int rc;
 
-	rc = secondary_ops->inode_permission(inode, mask, nd);
+	rc = secondary_ops->inode_permission(inode, mask);
 	if (rc)
 		return rc;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index ee5a51cbc5e..1b40e558f98 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -522,8 +522,7 @@ static int smack_inode_rename(struct inode *old_inode,
  *
  * Returns 0 if access is permitted, -EACCES otherwise
  */
-static int smack_inode_permission(struct inode *inode, int mask,
-				  struct nameidata *nd)
+static int smack_inode_permission(struct inode *inode, int mask)
 {
 	/*
 	 * No permission to check. Existence test. Yup, it's there.
-- 
cgit v1.2.3-70-g09d2


From b1da47e29e467f1ec36dc78d009bfb109fd533c7 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Jul 2008 15:01:28 +0200
Subject: [patch 3/4] fat: dont call notify_change

The FAT_IOCTL_SET_ATTRIBUTES ioctl() calls notify_change() to change
the file mode before changing the inode attributes.  Replace with
explicit calls to security_inode_setattr(), fat_setattr() and
fsnotify_change().

This is equivalent to the original.  The reason it is needed, is that
later in the series we move the immutable check into notify_change().
That would break the FAT_IOCTL_SET_ATTRIBUTES ioctl, as it needs to
perform the mode change regardless of the immutability of the file.

[Fix error if fat is built as a module.  Thanks to OGAWA Hirofumi for
noticing.]

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fat/file.c       | 15 ++++++++++++++-
 security/security.c |  1 +
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'security')

diff --git a/fs/fat/file.c b/fs/fat/file.c
index c672df4036e..8707a8cfa02 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -15,6 +15,8 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
+#include <linux/fsnotify.h>
+#include <linux/security.h>
 
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
 		      unsigned int cmd, unsigned long arg)
@@ -64,6 +66,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 
 		/* Equivalent to a chmod() */
 		ia.ia_valid = ATTR_MODE | ATTR_CTIME;
+		ia.ia_ctime = current_fs_time(inode->i_sb);
 		if (is_dir) {
 			ia.ia_mode = MSDOS_MKMODE(attr,
 				S_IRWXUGO & ~sbi->options.fs_dmask)
@@ -90,11 +93,21 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 			}
 		}
 
+		/*
+		 * The security check is questionable...  We single
+		 * out the RO attribute for checking by the security
+		 * module, just because it maps to a file mode.
+		 */
+		err = security_inode_setattr(filp->f_path.dentry, &ia);
+		if (err)
+			goto up;
+
 		/* This MUST be done before doing anything irreversible... */
-		err = notify_change(filp->f_path.dentry, &ia);
+		err = fat_setattr(filp->f_path.dentry, &ia);
 		if (err)
 			goto up;
 
+		fsnotify_change(filp->f_path.dentry, ia.ia_valid);
 		if (sbi->options.sys_immutable) {
 			if (attr & ATTR_SYS)
 				inode->i_flags |= S_IMMUTABLE;
diff --git a/security/security.c b/security/security.c
index 78ed3ffde24..ff706872775 100644
--- a/security/security.c
+++ b/security/security.c
@@ -442,6 +442,7 @@ int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
 		return 0;
 	return security_ops->inode_setattr(dentry, attr);
 }
+EXPORT_SYMBOL_GPL(security_inode_setattr);
 
 int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
 {
-- 
cgit v1.2.3-70-g09d2