diff options
Diffstat (limited to 'kernel/user_namespace.c')
| -rw-r--r-- | kernel/user_namespace.c | 291 |
1 files changed, 236 insertions, 55 deletions
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 456a6b9fba3..fcc02560fd6 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -9,6 +9,7 @@ #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/user_namespace.h> +#include <linux/proc_ns.h> #include <linux/highuid.h> #include <linux/cred.h> #include <linux/securebits.h> @@ -20,12 +21,32 @@ #include <linux/uaccess.h> #include <linux/ctype.h> #include <linux/projid.h> +#include <linux/fs_struct.h> static struct kmem_cache *user_ns_cachep __read_mostly; -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *map); +static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) +{ + /* Start with the same capabilities as init but useless for doing + * anything as the capabilities are bound to the new user namespace. + */ + cred->securebits = SECUREBITS_DEFAULT; + cred->cap_inheritable = CAP_EMPTY_SET; + cred->cap_permitted = CAP_FULL_SET; + cred->cap_effective = CAP_FULL_SET; + cred->cap_bset = CAP_FULL_SET; +#ifdef CONFIG_KEYS + key_put(cred->request_key_auth); + cred->request_key_auth = NULL; +#endif + /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ + cred->user_ns = user_ns; +} + /* * Create a new user namespace, deriving the creator from the user in the * passed credentials, and replacing that user with the new root user for the @@ -39,6 +60,19 @@ int create_user_ns(struct cred *new) struct user_namespace *ns, *parent_ns = new->user_ns; kuid_t owner = new->euid; kgid_t group = new->egid; + int ret; + + if (parent_ns->level > 32) + return -EUSERS; + + /* + * Verify that we can not violate the policy of which files + * may be accessed that is specified by the root directory, + * by verifing that the root directory is at the root of the + * mount namespace which allows all files to be accessed. + */ + if (current_chrooted()) + return -EPERM; /* The creator needs a mapping in the parent user namespace * or else we won't be able to reasonably tell userspace who @@ -52,40 +86,60 @@ int create_user_ns(struct cred *new) if (!ns) return -ENOMEM; - kref_init(&ns->kref); + ret = proc_alloc_inum(&ns->proc_inum); + if (ret) { + kmem_cache_free(user_ns_cachep, ns); + return ret; + } + + atomic_set(&ns->count, 1); + /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; + ns->level = parent_ns->level + 1; ns->owner = owner; ns->group = group; - /* Start with the same capabilities as init but useless for doing - * anything as the capabilities are bound to the new user namespace. - */ - new->securebits = SECUREBITS_DEFAULT; - new->cap_inheritable = CAP_EMPTY_SET; - new->cap_permitted = CAP_FULL_SET; - new->cap_effective = CAP_FULL_SET; - new->cap_bset = CAP_FULL_SET; -#ifdef CONFIG_KEYS - key_put(new->request_key_auth); - new->request_key_auth = NULL; + set_cred_user_ns(new, ns); + +#ifdef CONFIG_PERSISTENT_KEYRINGS + init_rwsem(&ns->persistent_keyring_register_sem); #endif - /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ + return 0; +} - /* Leave the new->user_ns reference with the new user namespace. */ - /* Leave the reference to our user_ns with the new cred. */ - new->user_ns = ns; +int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) +{ + struct cred *cred; + int err = -ENOMEM; + + if (!(unshare_flags & CLONE_NEWUSER)) + return 0; + + cred = prepare_creds(); + if (cred) { + err = create_user_ns(cred); + if (err) + put_cred(cred); + else + *new_cred = cred; + } - return 0; + return err; } -void free_user_ns(struct kref *kref) +void free_user_ns(struct user_namespace *ns) { - struct user_namespace *parent, *ns = - container_of(kref, struct user_namespace, kref); + struct user_namespace *parent; - parent = ns->parent; - kmem_cache_free(user_ns_cachep, ns); - put_user_ns(parent); + do { + parent = ns->parent; +#ifdef CONFIG_PERSISTENT_KEYRINGS + key_put(ns->persistent_keyring_register); +#endif + proc_free_inum(ns->proc_inum); + kmem_cache_free(user_ns_cachep, ns); + ns = parent; + } while (atomic_dec_and_test(&parent->count)); } EXPORT_SYMBOL(free_user_ns); @@ -98,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -122,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -145,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].lower_first; last = first + map->extent[idx].count - 1; @@ -171,7 +225,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) * * When there is no mapping defined for the user-namespace uid * pair INVALID_UID is returned. Callers are expected to test - * for and handle handle INVALID_UID being returned. INVALID_UID + * for and handle INVALID_UID being returned. INVALID_UID * may be tested for using uid_valid(). */ kuid_t make_kuid(struct user_namespace *ns, uid_t uid) @@ -232,7 +286,7 @@ EXPORT_SYMBOL(from_kuid_munged); /** * make_kgid - Map a user-namespace gid pair into a kgid. * @ns: User namespace that the gid is in - * @uid: group identifier + * @gid: group identifier * * Maps a user-namespace gid pair into a kernel internal kgid, * and returns that kgid. @@ -372,7 +426,7 @@ static int uid_m_show(struct seq_file *seq, void *v) struct user_namespace *lower_ns; uid_t lower; - lower_ns = current_user_ns(); + lower_ns = seq_user_ns(seq); if ((lower_ns == ns) && lower_ns->parent) lower_ns = lower_ns->parent; @@ -393,7 +447,7 @@ static int gid_m_show(struct seq_file *seq, void *v) struct user_namespace *lower_ns; gid_t lower; - lower_ns = current_user_ns(); + lower_ns = seq_user_ns(seq); if ((lower_ns == ns) && lower_ns->parent) lower_ns = lower_ns->parent; @@ -428,7 +482,8 @@ static int projid_m_show(struct seq_file *seq, void *v) return 0; } -static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map) +static void *m_start(struct seq_file *seq, loff_t *ppos, + struct uid_gid_map *map) { struct uid_gid_extent *extent = NULL; loff_t pos = *ppos; @@ -492,6 +547,43 @@ struct seq_operations proc_projid_seq_operations = { .show = projid_m_show, }; +static bool mappings_overlap(struct uid_gid_map *new_map, + struct uid_gid_extent *extent) +{ + u32 upper_first, lower_first, upper_last, lower_last; + unsigned idx; + + upper_first = extent->first; + lower_first = extent->lower_first; + upper_last = upper_first + extent->count - 1; + lower_last = lower_first + extent->count - 1; + + for (idx = 0; idx < new_map->nr_extents; idx++) { + u32 prev_upper_first, prev_lower_first; + u32 prev_upper_last, prev_lower_last; + struct uid_gid_extent *prev; + + prev = &new_map->extent[idx]; + + prev_upper_first = prev->first; + prev_lower_first = prev->lower_first; + prev_upper_last = prev_upper_first + prev->count - 1; + prev_lower_last = prev_lower_first + prev->count - 1; + + /* Does the upper range intersect a previous extent? */ + if ((prev_upper_first <= upper_last) && + (prev_upper_last >= upper_first)) + return true; + + /* Does the lower range intersect a previous extent? */ + if ((prev_lower_first <= lower_last) && + (prev_lower_last >= lower_first)) + return true; + } + return false; +} + + static DEFINE_MUTEX(id_map_mutex); static ssize_t map_write(struct file *file, const char __user *buf, @@ -504,7 +596,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, struct user_namespace *ns = seq->private; struct uid_gid_map new_map; unsigned idx; - struct uid_gid_extent *extent, *last = NULL; + struct uid_gid_extent *extent = NULL; unsigned long page = 0; char *kbuf, *pos, *next_line; ssize_t ret = -EINVAL; @@ -525,9 +617,8 @@ static ssize_t map_write(struct file *file, const char __user *buf, * were written before the count of the extents. * * To achieve this smp_wmb() is used on guarantee the write - * order and smp_read_barrier_depends() is guaranteed that we - * don't have crazy architectures returning stale data. - * + * order and smp_rmb() is guaranteed that we don't have crazy + * architectures returning stale data. */ mutex_lock(&id_map_mutex); @@ -536,10 +627,10 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (map->nr_extents != 0) goto out; - /* Require the appropriate privilege CAP_SETUID or CAP_SETGID - * over the user namespace in order to set the id mapping. + /* + * Adjusting namespace settings requires capabilities on the target. */ - if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) + if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) goto out; /* Get a buffer */ @@ -564,7 +655,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EINVAL; pos = kbuf; new_map.nr_extents = 0; - for (;pos; pos = next_line) { + for (; pos; pos = next_line) { extent = &new_map.extent[new_map.nr_extents]; /* Find the end of line and ensure I don't look past it */ @@ -598,23 +689,23 @@ static ssize_t map_write(struct file *file, const char __user *buf, /* Verify we have been given valid starting values */ if ((extent->first == (u32) -1) || - (extent->lower_first == (u32) -1 )) + (extent->lower_first == (u32) -1)) goto out; - /* Verify count is not zero and does not cause the extent to wrap */ + /* Verify count is not zero and does not cause the + * extent to wrap + */ if ((extent->first + extent->count) <= extent->first) goto out; - if ((extent->lower_first + extent->count) <= extent->lower_first) + if ((extent->lower_first + extent->count) <= + extent->lower_first) goto out; - /* For now only accept extents that are strictly in order */ - if (last && - (((last->first + last->count) > extent->first) || - ((last->lower_first + last->count) > extent->lower_first))) + /* Do the ranges in extent overlap any previous extents? */ + if (mappings_overlap(&new_map, extent)) goto out; new_map.nr_extents++; - last = extent; /* Fail if the file contains too many extents */ if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && @@ -627,7 +718,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ - if (!new_idmap_permitted(ns, cap_setid, &new_map)) + if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) goto out; /* Map the lower ids from the parent user namespace to the @@ -665,31 +756,42 @@ out: return ret; } -ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) +ssize_t proc_uid_map_write(struct file *file, const char __user *buf, + size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; + struct user_namespace *seq_ns = seq_user_ns(seq); if (!ns->parent) return -EPERM; + if ((seq_ns != ns) && (seq_ns != ns->parent)) + return -EPERM; + return map_write(file, buf, size, ppos, CAP_SETUID, &ns->uid_map, &ns->parent->uid_map); } -ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) +ssize_t proc_gid_map_write(struct file *file, const char __user *buf, + size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; + struct user_namespace *seq_ns = seq_user_ns(seq); if (!ns->parent) return -EPERM; + if ((seq_ns != ns) && (seq_ns != ns->parent)) + return -EPERM; + return map_write(file, buf, size, ppos, CAP_SETGID, &ns->gid_map, &ns->parent->gid_map); } -ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) +ssize_t proc_projid_map_write(struct file *file, const char __user *buf, + size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; @@ -706,25 +808,104 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t &ns->projid_map, &ns->parent->projid_map); } -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { + /* Allow mapping to your own filesystem ids */ + if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { + u32 id = new_map->extent[0].lower_first; + if (cap_setid == CAP_SETUID) { + kuid_t uid = make_kuid(ns->parent, id); + if (uid_eq(uid, file->f_cred->fsuid)) + return true; + } else if (cap_setid == CAP_SETGID) { + kgid_t gid = make_kgid(ns->parent, id); + if (gid_eq(gid, file->f_cred->fsgid)) + return true; + } + } + /* Allow anyone to set a mapping that doesn't require privilege */ if (!cap_valid(cap_setid)) return true; /* Allow the specified ids if we have the appropriate capability * (CAP_SETUID or CAP_SETGID) over the parent user namespace. + * And the opener of the id file also had the approprpiate capability. */ - if (ns_capable(ns->parent, cap_setid)) + if (ns_capable(ns->parent, cap_setid) && + file_ns_capable(file, ns->parent, cap_setid)) return true; return false; } +static void *userns_get(struct task_struct *task) +{ + struct user_namespace *user_ns; + + rcu_read_lock(); + user_ns = get_user_ns(__task_cred(task)->user_ns); + rcu_read_unlock(); + + return user_ns; +} + +static void userns_put(void *ns) +{ + put_user_ns(ns); +} + +static int userns_install(struct nsproxy *nsproxy, void *ns) +{ + struct user_namespace *user_ns = ns; + struct cred *cred; + + /* Don't allow gaining capabilities by reentering + * the same user namespace. + */ + if (user_ns == current_user_ns()) + return -EINVAL; + + /* Threaded processes may not enter a different user namespace */ + if (atomic_read(¤t->mm->mm_users) > 1) + return -EINVAL; + + if (current->fs->users != 1) + return -EINVAL; + + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + cred = prepare_creds(); + if (!cred) + return -ENOMEM; + + put_user_ns(cred->user_ns); + set_cred_user_ns(cred, get_user_ns(user_ns)); + + return commit_creds(cred); +} + +static unsigned int userns_inum(void *ns) +{ + struct user_namespace *user_ns = ns; + return user_ns->proc_inum; +} + +const struct proc_ns_operations userns_operations = { + .name = "user", + .type = CLONE_NEWUSER, + .get = userns_get, + .put = userns_put, + .install = userns_install, + .inum = userns_inum, +}; + static __init int user_namespaces_init(void) { user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); return 0; } -module_init(user_namespaces_init); +subsys_initcall(user_namespaces_init); |
