diff options
Diffstat (limited to 'kernel/user_namespace.c')
| -rw-r--r-- | kernel/user_namespace.c | 415 |
1 files changed, 361 insertions, 54 deletions
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 86602316422..fcc02560fd6 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -9,6 +9,7 @@ #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/user_namespace.h> +#include <linux/proc_ns.h> #include <linux/highuid.h> #include <linux/cred.h> #include <linux/securebits.h> @@ -19,12 +20,33 @@ #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/ctype.h> +#include <linux/projid.h> +#include <linux/fs_struct.h> static struct kmem_cache *user_ns_cachep __read_mostly; -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *map); +static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) +{ + /* Start with the same capabilities as init but useless for doing + * anything as the capabilities are bound to the new user namespace. + */ + cred->securebits = SECUREBITS_DEFAULT; + cred->cap_inheritable = CAP_EMPTY_SET; + cred->cap_permitted = CAP_FULL_SET; + cred->cap_effective = CAP_FULL_SET; + cred->cap_bset = CAP_FULL_SET; +#ifdef CONFIG_KEYS + key_put(cred->request_key_auth); + cred->request_key_auth = NULL; +#endif + /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ + cred->user_ns = user_ns; +} + /* * Create a new user namespace, deriving the creator from the user in the * passed credentials, and replacing that user with the new root user for the @@ -38,6 +60,19 @@ int create_user_ns(struct cred *new) struct user_namespace *ns, *parent_ns = new->user_ns; kuid_t owner = new->euid; kgid_t group = new->egid; + int ret; + + if (parent_ns->level > 32) + return -EUSERS; + + /* + * Verify that we can not violate the policy of which files + * may be accessed that is specified by the root directory, + * by verifing that the root directory is at the root of the + * mount namespace which allows all files to be accessed. + */ + if (current_chrooted()) + return -EPERM; /* The creator needs a mapping in the parent user namespace * or else we won't be able to reasonably tell userspace who @@ -51,40 +86,60 @@ int create_user_ns(struct cred *new) if (!ns) return -ENOMEM; - kref_init(&ns->kref); + ret = proc_alloc_inum(&ns->proc_inum); + if (ret) { + kmem_cache_free(user_ns_cachep, ns); + return ret; + } + + atomic_set(&ns->count, 1); + /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; + ns->level = parent_ns->level + 1; ns->owner = owner; ns->group = group; - /* Start with the same capabilities as init but useless for doing - * anything as the capabilities are bound to the new user namespace. - */ - new->securebits = SECUREBITS_DEFAULT; - new->cap_inheritable = CAP_EMPTY_SET; - new->cap_permitted = CAP_FULL_SET; - new->cap_effective = CAP_FULL_SET; - new->cap_bset = CAP_FULL_SET; -#ifdef CONFIG_KEYS - key_put(new->request_key_auth); - new->request_key_auth = NULL; + set_cred_user_ns(new, ns); + +#ifdef CONFIG_PERSISTENT_KEYRINGS + init_rwsem(&ns->persistent_keyring_register_sem); #endif - /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ + return 0; +} - /* Leave the new->user_ns reference with the new user namespace. */ - /* Leave the reference to our user_ns with the new cred. */ - new->user_ns = ns; +int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) +{ + struct cred *cred; + int err = -ENOMEM; + + if (!(unshare_flags & CLONE_NEWUSER)) + return 0; + + cred = prepare_creds(); + if (cred) { + err = create_user_ns(cred); + if (err) + put_cred(cred); + else + *new_cred = cred; + } - return 0; + return err; } -void free_user_ns(struct kref *kref) +void free_user_ns(struct user_namespace *ns) { - struct user_namespace *parent, *ns = - container_of(kref, struct user_namespace, kref); + struct user_namespace *parent; - parent = ns->parent; - kmem_cache_free(user_ns_cachep, ns); - put_user_ns(parent); + do { + parent = ns->parent; +#ifdef CONFIG_PERSISTENT_KEYRINGS + key_put(ns->persistent_keyring_register); +#endif + proc_free_inum(ns->proc_inum); + kmem_cache_free(user_ns_cachep, ns); + ns = parent; + } while (atomic_dec_and_test(&parent->count)); } EXPORT_SYMBOL(free_user_ns); @@ -97,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -121,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -144,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].lower_first; last = first + map->extent[idx].count - 1; @@ -170,7 +225,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) * * When there is no mapping defined for the user-namespace uid * pair INVALID_UID is returned. Callers are expected to test - * for and handle handle INVALID_UID being returned. INVALID_UID + * for and handle INVALID_UID being returned. INVALID_UID * may be tested for using uid_valid(). */ kuid_t make_kuid(struct user_namespace *ns, uid_t uid) @@ -231,7 +286,7 @@ EXPORT_SYMBOL(from_kuid_munged); /** * make_kgid - Map a user-namespace gid pair into a kgid. * @ns: User namespace that the gid is in - * @uid: group identifier + * @gid: group identifier * * Maps a user-namespace gid pair into a kernel internal kgid, * and returns that kgid. @@ -295,6 +350,75 @@ gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) } EXPORT_SYMBOL(from_kgid_munged); +/** + * make_kprojid - Map a user-namespace projid pair into a kprojid. + * @ns: User namespace that the projid is in + * @projid: Project identifier + * + * Maps a user-namespace uid pair into a kernel internal kuid, + * and returns that kuid. + * + * When there is no mapping defined for the user-namespace projid + * pair INVALID_PROJID is returned. Callers are expected to test + * for and handle handle INVALID_PROJID being returned. INVALID_PROJID + * may be tested for using projid_valid(). + */ +kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) +{ + /* Map the uid to a global kernel uid */ + return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); +} +EXPORT_SYMBOL(make_kprojid); + +/** + * from_kprojid - Create a projid from a kprojid user-namespace pair. + * @targ: The user namespace we want a projid in. + * @kprojid: The kernel internal project identifier to start with. + * + * Map @kprojid into the user-namespace specified by @targ and + * return the resulting projid. + * + * There is always a mapping into the initial user_namespace. + * + * If @kprojid has no mapping in @targ (projid_t)-1 is returned. + */ +projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) +{ + /* Map the uid from a global kernel uid */ + return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); +} +EXPORT_SYMBOL(from_kprojid); + +/** + * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair. + * @targ: The user namespace we want a projid in. + * @kprojid: The kernel internal projid to start with. + * + * Map @kprojid into the user-namespace specified by @targ and + * return the resulting projid. + * + * There is always a mapping into the initial user_namespace. + * + * Unlike from_kprojid from_kprojid_munged never fails and always + * returns a valid projid. This makes from_kprojid_munged + * appropriate for use in syscalls like stat and where + * failing the system call and failing to provide a valid projid are + * not an options. + * + * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned. + */ +projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid) +{ + projid_t projid; + projid = from_kprojid(targ, kprojid); + + if (projid == (projid_t) -1) + projid = OVERFLOW_PROJID; + return projid; +} +EXPORT_SYMBOL(from_kprojid_munged); + + static int uid_m_show(struct seq_file *seq, void *v) { struct user_namespace *ns = seq->private; @@ -302,7 +426,7 @@ static int uid_m_show(struct seq_file *seq, void *v) struct user_namespace *lower_ns; uid_t lower; - lower_ns = current_user_ns(); + lower_ns = seq_user_ns(seq); if ((lower_ns == ns) && lower_ns->parent) lower_ns = lower_ns->parent; @@ -323,7 +447,7 @@ static int gid_m_show(struct seq_file *seq, void *v) struct user_namespace *lower_ns; gid_t lower; - lower_ns = current_user_ns(); + lower_ns = seq_user_ns(seq); if ((lower_ns == ns) && lower_ns->parent) lower_ns = lower_ns->parent; @@ -337,7 +461,29 @@ static int gid_m_show(struct seq_file *seq, void *v) return 0; } -static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map) +static int projid_m_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + struct uid_gid_extent *extent = v; + struct user_namespace *lower_ns; + projid_t lower; + + lower_ns = seq_user_ns(seq); + if ((lower_ns == ns) && lower_ns->parent) + lower_ns = lower_ns->parent; + + lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first)); + + seq_printf(seq, "%10u %10u %10u\n", + extent->first, + lower, + extent->count); + + return 0; +} + +static void *m_start(struct seq_file *seq, loff_t *ppos, + struct uid_gid_map *map) { struct uid_gid_extent *extent = NULL; loff_t pos = *ppos; @@ -362,6 +508,13 @@ static void *gid_m_start(struct seq_file *seq, loff_t *ppos) return m_start(seq, ppos, &ns->gid_map); } +static void *projid_m_start(struct seq_file *seq, loff_t *ppos) +{ + struct user_namespace *ns = seq->private; + + return m_start(seq, ppos, &ns->projid_map); +} + static void *m_next(struct seq_file *seq, void *v, loff_t *pos) { (*pos)++; @@ -387,6 +540,50 @@ struct seq_operations proc_gid_seq_operations = { .show = gid_m_show, }; +struct seq_operations proc_projid_seq_operations = { + .start = projid_m_start, + .stop = m_stop, + .next = m_next, + .show = projid_m_show, +}; + +static bool mappings_overlap(struct uid_gid_map *new_map, + struct uid_gid_extent *extent) +{ + u32 upper_first, lower_first, upper_last, lower_last; + unsigned idx; + + upper_first = extent->first; + lower_first = extent->lower_first; + upper_last = upper_first + extent->count - 1; + lower_last = lower_first + extent->count - 1; + + for (idx = 0; idx < new_map->nr_extents; idx++) { + u32 prev_upper_first, prev_lower_first; + u32 prev_upper_last, prev_lower_last; + struct uid_gid_extent *prev; + + prev = &new_map->extent[idx]; + + prev_upper_first = prev->first; + prev_lower_first = prev->lower_first; + prev_upper_last = prev_upper_first + prev->count - 1; + prev_lower_last = prev_lower_first + prev->count - 1; + + /* Does the upper range intersect a previous extent? */ + if ((prev_upper_first <= upper_last) && + (prev_upper_last >= upper_first)) + return true; + + /* Does the lower range intersect a previous extent? */ + if ((prev_lower_first <= lower_last) && + (prev_lower_last >= lower_first)) + return true; + } + return false; +} + + static DEFINE_MUTEX(id_map_mutex); static ssize_t map_write(struct file *file, const char __user *buf, @@ -399,7 +596,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, struct user_namespace *ns = seq->private; struct uid_gid_map new_map; unsigned idx; - struct uid_gid_extent *extent, *last = NULL; + struct uid_gid_extent *extent = NULL; unsigned long page = 0; char *kbuf, *pos, *next_line; ssize_t ret = -EINVAL; @@ -420,9 +617,8 @@ static ssize_t map_write(struct file *file, const char __user *buf, * were written before the count of the extents. * * To achieve this smp_wmb() is used on guarantee the write - * order and smp_read_barrier_depends() is guaranteed that we - * don't have crazy architectures returning stale data. - * + * order and smp_rmb() is guaranteed that we don't have crazy + * architectures returning stale data. */ mutex_lock(&id_map_mutex); @@ -431,10 +627,10 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (map->nr_extents != 0) goto out; - /* Require the appropriate privilege CAP_SETUID or CAP_SETGID - * over the user namespace in order to set the id mapping. + /* + * Adjusting namespace settings requires capabilities on the target. */ - if (!ns_capable(ns, cap_setid)) + if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) goto out; /* Get a buffer */ @@ -459,7 +655,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EINVAL; pos = kbuf; new_map.nr_extents = 0; - for (;pos; pos = next_line) { + for (; pos; pos = next_line) { extent = &new_map.extent[new_map.nr_extents]; /* Find the end of line and ensure I don't look past it */ @@ -493,23 +689,23 @@ static ssize_t map_write(struct file *file, const char __user *buf, /* Verify we have been given valid starting values */ if ((extent->first == (u32) -1) || - (extent->lower_first == (u32) -1 )) + (extent->lower_first == (u32) -1)) goto out; - /* Verify count is not zero and does not cause the extent to wrap */ + /* Verify count is not zero and does not cause the + * extent to wrap + */ if ((extent->first + extent->count) <= extent->first) goto out; - if ((extent->lower_first + extent->count) <= extent->lower_first) + if ((extent->lower_first + extent->count) <= + extent->lower_first) goto out; - /* For now only accept extents that are strictly in order */ - if (last && - (((last->first + last->count) > extent->first) || - ((last->lower_first + last->count) > extent->lower_first))) + /* Do the ranges in extent overlap any previous extents? */ + if (mappings_overlap(&new_map, extent)) goto out; new_map.nr_extents++; - last = extent; /* Fail if the file contains too many extents */ if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && @@ -522,7 +718,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ - if (!new_idmap_permitted(ns, cap_setid, &new_map)) + if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) goto out; /* Map the lower ids from the parent user namespace to the @@ -560,45 +756,156 @@ out: return ret; } -ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) +ssize_t proc_uid_map_write(struct file *file, const char __user *buf, + size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; + struct user_namespace *seq_ns = seq_user_ns(seq); if (!ns->parent) return -EPERM; + if ((seq_ns != ns) && (seq_ns != ns->parent)) + return -EPERM; + return map_write(file, buf, size, ppos, CAP_SETUID, &ns->uid_map, &ns->parent->uid_map); } -ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) +ssize_t proc_gid_map_write(struct file *file, const char __user *buf, + size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; + struct user_namespace *seq_ns = seq_user_ns(seq); if (!ns->parent) return -EPERM; + if ((seq_ns != ns) && (seq_ns != ns->parent)) + return -EPERM; + return map_write(file, buf, size, ppos, CAP_SETGID, &ns->gid_map, &ns->parent->gid_map); } -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +ssize_t proc_projid_map_write(struct file *file, const char __user *buf, + size_t size, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + struct user_namespace *seq_ns = seq_user_ns(seq); + + if (!ns->parent) + return -EPERM; + + if ((seq_ns != ns) && (seq_ns != ns->parent)) + return -EPERM; + + /* Anyone can set any valid project id no capability needed */ + return map_write(file, buf, size, ppos, -1, + &ns->projid_map, &ns->parent->projid_map); +} + +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { + /* Allow mapping to your own filesystem ids */ + if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { + u32 id = new_map->extent[0].lower_first; + if (cap_setid == CAP_SETUID) { + kuid_t uid = make_kuid(ns->parent, id); + if (uid_eq(uid, file->f_cred->fsuid)) + return true; + } else if (cap_setid == CAP_SETGID) { + kgid_t gid = make_kgid(ns->parent, id); + if (gid_eq(gid, file->f_cred->fsgid)) + return true; + } + } + + /* Allow anyone to set a mapping that doesn't require privilege */ + if (!cap_valid(cap_setid)) + return true; + /* Allow the specified ids if we have the appropriate capability * (CAP_SETUID or CAP_SETGID) over the parent user namespace. + * And the opener of the id file also had the approprpiate capability. */ - if (ns_capable(ns->parent, cap_setid)) + if (ns_capable(ns->parent, cap_setid) && + file_ns_capable(file, ns->parent, cap_setid)) return true; return false; } +static void *userns_get(struct task_struct *task) +{ + struct user_namespace *user_ns; + + rcu_read_lock(); + user_ns = get_user_ns(__task_cred(task)->user_ns); + rcu_read_unlock(); + + return user_ns; +} + +static void userns_put(void *ns) +{ + put_user_ns(ns); +} + +static int userns_install(struct nsproxy *nsproxy, void *ns) +{ + struct user_namespace *user_ns = ns; + struct cred *cred; + + /* Don't allow gaining capabilities by reentering + * the same user namespace. + */ + if (user_ns == current_user_ns()) + return -EINVAL; + + /* Threaded processes may not enter a different user namespace */ + if (atomic_read(¤t->mm->mm_users) > 1) + return -EINVAL; + + if (current->fs->users != 1) + return -EINVAL; + + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + cred = prepare_creds(); + if (!cred) + return -ENOMEM; + + put_user_ns(cred->user_ns); + set_cred_user_ns(cred, get_user_ns(user_ns)); + + return commit_creds(cred); +} + +static unsigned int userns_inum(void *ns) +{ + struct user_namespace *user_ns = ns; + return user_ns->proc_inum; +} + +const struct proc_ns_operations userns_operations = { + .name = "user", + .type = CLONE_NEWUSER, + .get = userns_get, + .put = userns_put, + .install = userns_install, + .inum = userns_inum, +}; + static __init int user_namespaces_init(void) { user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); return 0; } -module_init(user_namespaces_init); +subsys_initcall(user_namespaces_init); |
