From 3151527ee007b73a0ebd296010f1c0454a919c7d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 15 Mar 2013 01:45:51 -0700
Subject: userns:  Don't allow creation if the user is chrooted

Guarantee that the policy of which files may be access that is
established by setting the root directory will not be violated
by user namespaces by verifying that the root directory points
to the root of the mount namespace at the time of user namespace
creation.

Changing the root is a privileged operation, and as a matter of policy
it serves to limit unprivileged processes to files below the current
root directory.

For reasons of simplicity and comprehensibility the privilege to
change the root directory is gated solely on the CAP_SYS_CHROOT
capability in the user namespace.  Therefore when creating a user
namespace we must ensure that the policy of which files may be access
can not be violated by changing the root directory.

Anyone who runs a processes in a chroot and would like to use user
namespace can setup the same view of filesystems with a mount
namespace instead.  With this result that this is not a practical
limitation for using user namespaces.

Cc: stable@vger.kernel.org
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/user_namespace.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'kernel/user_namespace.c')

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index b14f4d34204..0f1e4288457 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -61,6 +61,15 @@ int create_user_ns(struct cred *new)
 	kgid_t group = new->egid;
 	int ret;
 
+	/*
+	 * Verify that we can not violate the policy of which files
+	 * may be accessed that is specified by the root directory,
+	 * by verifing that the root directory is at the root of the
+	 * mount namespace which allows all files to be accessed.
+	 */
+	if (current_chrooted())
+		return -EPERM;
+
 	/* The creator needs a mapping in the parent user namespace
 	 * or else we won't be able to reasonably tell userspace who
 	 * created a user_namespace.
-- 
cgit v1.2.3-70-g09d2


From 87a8ebd637dafc255070f503909a053cf0d98d3f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 24 Mar 2013 14:28:27 -0700
Subject: userns: Restrict when proc and sysfs can be mounted

Only allow unprivileged mounts of proc and sysfs if they are already
mounted when the user namespace is created.

proc and sysfs are interesting because they have content that is
per namespace, and so fresh mounts are needed when new namespaces
are created while at the same time proc and sysfs have content that
is shared between every instance.

Respect the policy of who may see the shared content of proc and sysfs
by only allowing new mounts if there was an existing mount at the time
the user namespace was created.

In practice there are only two interesting cases: proc and sysfs are
mounted at their usual places, proc and sysfs are not mounted at all
(some form of mount namespace jail).

Cc: stable@vger.kernel.org
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c                 | 21 +++++++++++++++++++++
 fs/proc/root.c                 |  4 ++++
 fs/sysfs/mount.c               |  4 ++++
 include/linux/user_namespace.h |  4 ++++
 kernel/user.c                  |  2 ++
 kernel/user_namespace.c        |  2 ++
 6 files changed, 37 insertions(+)

(limited to 'kernel/user_namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index 968d4c5eae0..d581e45c0a9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2763,6 +2763,27 @@ bool current_chrooted(void)
 	return chrooted;
 }
 
+void update_mnt_policy(struct user_namespace *userns)
+{
+	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+	struct mount *mnt;
+
+	down_read(&namespace_sem);
+	list_for_each_entry(mnt, &ns->list, mnt_list) {
+		switch (mnt->mnt.mnt_sb->s_magic) {
+		case SYSFS_MAGIC:
+			userns->may_mount_sysfs = true;
+			break;
+		case PROC_SUPER_MAGIC:
+			userns->may_mount_proc = true;
+			break;
+		}
+		if (userns->may_mount_sysfs && userns->may_mount_proc)
+			break;
+	}
+	up_read(&namespace_sem);
+}
+
 static void *mntns_get(struct task_struct *task)
 {
 	struct mnt_namespace *ns = NULL;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6e9fac26ba..9c7fab1d23f 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
+#include <linux/user_namespace.h>
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
 #include <linux/parser.h>
@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
 	} else {
 		ns = task_active_pid_ns(current);
 		options = data;
+
+		if (!current_user_ns()->may_mount_proc)
+			return ERR_PTR(-EPERM);
 	}
 
 	sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8d924b5ec73..afd83273e6c 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/user_namespace.h>
 
 #include "sysfs.h"
 
@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	struct super_block *sb;
 	int error;
 
+	if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
+		return ERR_PTR(-EPERM);
+
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
 		return ERR_PTR(-ENOMEM);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4ce00932493..b6b215f13b4 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -26,6 +26,8 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
+	bool			may_mount_sysfs;
+	bool			may_mount_proc;
 };
 
 extern struct user_namespace init_user_ns;
@@ -82,4 +84,6 @@ static inline void put_user_ns(struct user_namespace *ns)
 
 #endif
 
+void update_mnt_policy(struct user_namespace *userns);
+
 #endif /* _LINUX_USER_H */
diff --git a/kernel/user.c b/kernel/user.c
index e81978e8c03..8e635a18ab5 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,8 @@ struct user_namespace init_user_ns = {
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
 	.proc_inum = PROC_USER_INIT_INO,
+	.may_mount_sysfs = true,
+	.may_mount_proc = true,
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0f1e4288457..a54f26f82eb 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -96,6 +96,8 @@ int create_user_ns(struct cred *new)
 
 	set_cred_user_ns(new, ns);
 
+	update_mnt_policy(ns);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 6708075f104c3c9b04b23336bb0366ca30c3931b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 14 Apr 2013 13:47:02 -0700
Subject: userns: Don't let unprivileged users trick privileged users into
 setting the id_map

When we require privilege for setting /proc/<pid>/uid_map or
/proc/<pid>/gid_map no longer allow an unprivileged user to
open the file and pass it to a privileged program to write
to the file.

Instead when privilege is required require both the opener and the
writer to have the necessary capabilities.

I have tested this code and verified that setting /proc/<pid>/uid_map
fails when an unprivileged user opens the file and a privielged user
attempts to set the mapping, that unprivileged users can still map
their own id, and that a privileged users can still setup an arbitrary
mapping.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/user_namespace.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'kernel/user_namespace.c')

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a54f26f82eb..e2d4ace4481 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -25,7 +25,8 @@
 
 static struct kmem_cache *user_ns_cachep __read_mostly;
 
-static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
+static bool new_idmap_permitted(const struct file *file,
+				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
 
 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
@@ -700,7 +701,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 
 	ret = -EPERM;
 	/* Validate the user is allowed to use user id's mapped to. */
-	if (!new_idmap_permitted(ns, cap_setid, &new_map))
+	if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
 		goto out;
 
 	/* Map the lower ids from the parent user namespace to the
@@ -787,7 +788,8 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t
 			 &ns->projid_map, &ns->parent->projid_map);
 }
 
-static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
+static bool new_idmap_permitted(const struct file *file, 
+				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *new_map)
 {
 	/* Allow mapping to your own filesystem ids */
@@ -811,8 +813,10 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
 
 	/* Allow the specified ids if we have the appropriate capability
 	 * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
+	 * And the opener of the id file also had the approprpiate capability.
 	 */
-	if (ns_capable(ns->parent, cap_setid))
+	if (ns_capable(ns->parent, cap_setid) &&
+	    file_ns_capable(file, ns->parent, cap_setid))
 		return true;
 
 	return false;
-- 
cgit v1.2.3-70-g09d2


From e3211c120a85b792978bcb4be7b2886df18d27f0 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Sun, 14 Apr 2013 16:28:19 -0700
Subject: userns: Check uid_map's opener's fsuid, not the current fsuid

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/user_namespace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/user_namespace.c')

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index e2d4ace4481..5c16f3aa757 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -797,12 +797,12 @@ static bool new_idmap_permitted(const struct file *file,
 		u32 id = new_map->extent[0].lower_first;
 		if (cap_setid == CAP_SETUID) {
 			kuid_t uid = make_kuid(ns->parent, id);
-			if (uid_eq(uid, current_fsuid()))
+			if (uid_eq(uid, file->f_cred->fsuid))
 				return true;
 		}
 		else if (cap_setid == CAP_SETGID) {
 			kgid_t gid = make_kgid(ns->parent, id);
-			if (gid_eq(gid, current_fsgid()))
+			if (gid_eq(gid, file->f_cred->fsgid))
 				return true;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From 41c21e351e79004dbb4efa4bc14a53a7e0af38c5 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Sun, 14 Apr 2013 11:44:04 -0700
Subject: userns: Changing any namespace id mappings should require privileges

Changing uid/gid/projid mappings doesn't change your id within the
namespace; it reconfigures the namespace.  Unprivileged programs should
*not* be able to write these files.  (We're also checking the privileges
on the wrong task.)

Given the write-once nature of these files and the other security
checks, this is likely impossible to usefully exploit.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/user_namespace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/user_namespace.c')

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 5c16f3aa757..e134d8f365d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -613,10 +613,10 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	if (map->nr_extents != 0)
 		goto out;
 
-	/* Require the appropriate privilege CAP_SETUID or CAP_SETGID
-	 * over the user namespace in order to set the id mapping.
+	/*
+	 * Adjusting namespace settings requires capabilities on the target.
 	 */
-	if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid))
+	if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
 		goto out;
 
 	/* Get a buffer */
-- 
cgit v1.2.3-70-g09d2