diff options
Diffstat (limited to 'fs/pnode.c')
| -rw-r--r-- | fs/pnode.c | 318 |
1 files changed, 182 insertions, 136 deletions
diff --git a/fs/pnode.c b/fs/pnode.c index 8d5f392ec3d..302bf22c4a3 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -9,49 +9,35 @@ #include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/nsproxy.h> #include "internal.h" #include "pnode.h" /* return the next shared peer mount of @p */ -static inline struct vfsmount *next_peer(struct vfsmount *p) +static inline struct mount *next_peer(struct mount *p) { - return list_entry(p->mnt_share.next, struct vfsmount, mnt_share); + return list_entry(p->mnt_share.next, struct mount, mnt_share); } -static inline struct vfsmount *first_slave(struct vfsmount *p) +static inline struct mount *first_slave(struct mount *p) { - return list_entry(p->mnt_slave_list.next, struct vfsmount, mnt_slave); + return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave); } -static inline struct vfsmount *next_slave(struct vfsmount *p) +static inline struct mount *next_slave(struct mount *p) { - return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave); + return list_entry(p->mnt_slave.next, struct mount, mnt_slave); } -/* - * Return true if path is reachable from root - * - * namespace_sem is held, and mnt is attached - */ -static bool is_path_reachable(struct vfsmount *mnt, struct dentry *dentry, - const struct path *root) -{ - while (mnt != root->mnt && mnt->mnt_parent != mnt) { - dentry = mnt->mnt_mountpoint; - mnt = mnt->mnt_parent; - } - return mnt == root->mnt && is_subdir(dentry, root->dentry); -} - -static struct vfsmount *get_peer_under_root(struct vfsmount *mnt, - struct mnt_namespace *ns, - const struct path *root) +static struct mount *get_peer_under_root(struct mount *mnt, + struct mnt_namespace *ns, + const struct path *root) { - struct vfsmount *m = mnt; + struct mount *m = mnt; do { /* Check the namespace first for optimization */ - if (m->mnt_ns == ns && is_path_reachable(m, m->mnt_root, root)) + if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root)) return m; m = next_peer(m); @@ -66,12 +52,12 @@ static struct vfsmount *get_peer_under_root(struct vfsmount *mnt, * * Caller must hold namespace_sem */ -int get_dominating_id(struct vfsmount *mnt, const struct path *root) +int get_dominating_id(struct mount *mnt, const struct path *root) { - struct vfsmount *m; + struct mount *m; for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { - struct vfsmount *d = get_peer_under_root(m, mnt->mnt_ns, root); + struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root); if (d) return d->mnt_group_id; } @@ -79,25 +65,26 @@ int get_dominating_id(struct vfsmount *mnt, const struct path *root) return 0; } -static int do_make_slave(struct vfsmount *mnt) +static int do_make_slave(struct mount *mnt) { - struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master; - struct vfsmount *slave_mnt; + struct mount *peer_mnt = mnt, *master = mnt->mnt_master; + struct mount *slave_mnt; /* * slave 'mnt' to a peer mount that has the - * same root dentry. If none is available than + * same root dentry. If none is available then * slave it to anything that is available. */ while ((peer_mnt = next_peer(peer_mnt)) != mnt && - peer_mnt->mnt_root != mnt->mnt_root) ; + peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ; if (peer_mnt == mnt) { peer_mnt = next_peer(mnt); if (peer_mnt == mnt) peer_mnt = NULL; } - if (IS_MNT_SHARED(mnt) && list_empty(&mnt->mnt_share)) + if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) && + list_empty(&mnt->mnt_share)) mnt_release_group_id(mnt); list_del_init(&mnt->mnt_share); @@ -116,7 +103,7 @@ static int do_make_slave(struct vfsmount *mnt) struct list_head *p = &mnt->mnt_slave_list; while (!list_empty(p)) { slave_mnt = list_first_entry(p, - struct vfsmount, mnt_slave); + struct mount, mnt_slave); list_del_init(&slave_mnt->mnt_slave); slave_mnt->mnt_master = NULL; } @@ -126,7 +113,10 @@ static int do_make_slave(struct vfsmount *mnt) return 0; } -void change_mnt_propagation(struct vfsmount *mnt, int type) +/* + * vfsmount lock must be held for write + */ +void change_mnt_propagation(struct mount *mnt, int type) { if (type == MS_SHARED) { set_mnt_shared(mnt); @@ -137,9 +127,9 @@ void change_mnt_propagation(struct vfsmount *mnt, int type) list_del_init(&mnt->mnt_slave); mnt->mnt_master = NULL; if (type == MS_UNBINDABLE) - mnt->mnt_flags |= MNT_UNBINDABLE; + mnt->mnt.mnt_flags |= MNT_UNBINDABLE; else - mnt->mnt_flags &= ~MNT_UNBINDABLE; + mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE; } } @@ -147,21 +137,25 @@ void change_mnt_propagation(struct vfsmount *mnt, int type) * get the next mount in the propagation tree. * @m: the mount seen last * @origin: the original mount from where the tree walk initiated + * + * Note that peer groups form contiguous segments of slave lists. + * We rely on that in get_source() to be able to find out if + * vfsmount found while iterating with propagation_next() is + * a peer of one we'd found earlier. */ -static struct vfsmount *propagation_next(struct vfsmount *m, - struct vfsmount *origin) +static struct mount *propagation_next(struct mount *m, + struct mount *origin) { /* are there any slaves of this mount? */ if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); while (1) { - struct vfsmount *next; - struct vfsmount *master = m->mnt_master; + struct mount *master = m->mnt_master; if (master == origin->mnt_master) { - next = next_peer(m); - return ((next == origin) ? NULL : next); + struct mount *next = next_peer(m); + return (next == origin) ? NULL : next; } else if (m->mnt_slave.next != &master->mnt_slave_list) return next_slave(m); @@ -170,45 +164,94 @@ static struct vfsmount *propagation_next(struct vfsmount *m, } } -/* - * return the source mount to be used for cloning - * - * @dest the current destination mount - * @last_dest the last seen destination mount - * @last_src the last seen source mount - * @type return CL_SLAVE if the new mount has to be - * cloned as a slave. - */ -static struct vfsmount *get_source(struct vfsmount *dest, - struct vfsmount *last_dest, - struct vfsmount *last_src, - int *type) +static struct mount *next_group(struct mount *m, struct mount *origin) { - struct vfsmount *p_last_src = NULL; - struct vfsmount *p_last_dest = NULL; - *type = CL_PROPAGATION; - - if (IS_MNT_SHARED(dest)) - *type |= CL_MAKE_SHARED; - - while (last_dest != dest->mnt_master) { - p_last_dest = last_dest; - p_last_src = last_src; - last_dest = last_dest->mnt_master; - last_src = last_src->mnt_master; + while (1) { + while (1) { + struct mount *next; + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) + return first_slave(m); + next = next_peer(m); + if (m->mnt_group_id == origin->mnt_group_id) { + if (next == origin) + return NULL; + } else if (m->mnt_slave.next != &next->mnt_slave) + break; + m = next; + } + /* m is the last peer */ + while (1) { + struct mount *master = m->mnt_master; + if (m->mnt_slave.next != &master->mnt_slave_list) + return next_slave(m); + m = next_peer(master); + if (master->mnt_group_id == origin->mnt_group_id) + break; + if (master->mnt_slave.next == &m->mnt_slave) + break; + m = master; + } + if (m == origin) + return NULL; } +} - if (p_last_dest) { - do { - p_last_dest = next_peer(p_last_dest); - } while (IS_MNT_NEW(p_last_dest)); - } +/* all accesses are serialized by namespace_sem */ +static struct user_namespace *user_ns; +static struct mount *last_dest, *last_source, *dest_master; +static struct mountpoint *mp; +static struct hlist_head *list; - if (dest != p_last_dest) { - *type |= CL_SLAVE; - return last_src; - } else - return p_last_src; +static int propagate_one(struct mount *m) +{ + struct mount *child; + int type; + /* skip ones added by this propagate_mnt() */ + if (IS_MNT_NEW(m)) + return 0; + /* skip if mountpoint isn't covered by it */ + if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) + return 0; + if (m->mnt_group_id == last_dest->mnt_group_id) { + type = CL_MAKE_SHARED; + } else { + struct mount *n, *p; + for (n = m; ; n = p) { + p = n->mnt_master; + if (p == dest_master || IS_MNT_MARKED(p)) { + while (last_dest->mnt_master != p) { + last_source = last_source->mnt_master; + last_dest = last_source->mnt_parent; + } + if (n->mnt_group_id != last_dest->mnt_group_id) { + last_source = last_source->mnt_master; + last_dest = last_source->mnt_parent; + } + break; + } + } + type = CL_SLAVE; + /* beginning of peer group among the slaves? */ + if (IS_MNT_SHARED(m)) + type |= CL_MAKE_SHARED; + } + + /* Notice when we are propagating across user namespaces */ + if (m->mnt_ns->user_ns != user_ns) + type |= CL_UNPRIVILEGED; + child = copy_tree(last_source, last_source->mnt.mnt_root, type); + if (IS_ERR(child)) + return PTR_ERR(child); + mnt_set_mountpoint(m, mp, child); + last_dest = m; + last_source = child; + if (m->mnt_master != dest_master) { + read_seqlock_excl(&mount_lock); + SET_MNT_MARK(m->mnt_master); + read_sequnlock_excl(&mount_lock); + } + hlist_add_head(&child->mnt_hash, list); + return 0; } /* @@ -224,63 +267,60 @@ static struct vfsmount *get_source(struct vfsmount *dest, * @source_mnt: source mount. * @tree_list : list of heads of trees to be attached. */ -int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, - struct vfsmount *source_mnt, struct list_head *tree_list) +int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, + struct mount *source_mnt, struct hlist_head *tree_list) { - struct vfsmount *m, *child; + struct mount *m, *n; int ret = 0; - struct vfsmount *prev_dest_mnt = dest_mnt; - struct vfsmount *prev_src_mnt = source_mnt; - LIST_HEAD(tmp_list); - LIST_HEAD(umount_list); - - for (m = propagation_next(dest_mnt, dest_mnt); m; - m = propagation_next(m, dest_mnt)) { - int type; - struct vfsmount *source; - - if (IS_MNT_NEW(m)) - continue; - - source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); - if (!(child = copy_tree(source, source->mnt_root, type))) { - ret = -ENOMEM; - list_splice(tree_list, tmp_list.prev); + /* + * we don't want to bother passing tons of arguments to + * propagate_one(); everything is serialized by namespace_sem, + * so globals will do just fine. + */ + user_ns = current->nsproxy->mnt_ns->user_ns; + last_dest = dest_mnt; + last_source = source_mnt; + mp = dest_mp; + list = tree_list; + dest_master = dest_mnt->mnt_master; + + /* all peers of dest_mnt, except dest_mnt itself */ + for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { + ret = propagate_one(n); + if (ret) goto out; - } + } - if (is_subdir(dest_dentry, m->mnt_root)) { - mnt_set_mountpoint(m, dest_dentry, child); - list_add_tail(&child->mnt_hash, tree_list); - } else { - /* - * This can happen if the parent mount was bind mounted - * on some subdirectory of a shared/slave mount. - */ - list_add_tail(&child->mnt_hash, &tmp_list); - } - prev_dest_mnt = m; - prev_src_mnt = child; + /* all slave groups */ + for (m = next_group(dest_mnt, dest_mnt); m; + m = next_group(m, dest_mnt)) { + /* everything in that slave group */ + n = m; + do { + ret = propagate_one(n); + if (ret) + goto out; + n = next_peer(n); + } while (n != m); } out: - spin_lock(&vfsmount_lock); - while (!list_empty(&tmp_list)) { - child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); - umount_tree(child, 0, &umount_list); + read_seqlock_excl(&mount_lock); + hlist_for_each_entry(n, tree_list, mnt_hash) { + m = n->mnt_parent; + if (m->mnt_master != dest_mnt->mnt_master) + CLEAR_MNT_MARK(m->mnt_master); } - spin_unlock(&vfsmount_lock); - release_mounts(&umount_list); + read_sequnlock_excl(&mount_lock); return ret; } /* * return true if the refcount is greater than count */ -static inline int do_refcount_check(struct vfsmount *mnt, int count) +static inline int do_refcount_check(struct mount *mnt, int count) { - int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; - return (mycount > count); + return mnt_get_count(mnt) > count; } /* @@ -290,11 +330,13 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * other mounts its parent propagates to. * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. + * + * vfsmount lock must be held for write */ -int propagate_mount_busy(struct vfsmount *mnt, int refcnt) +int propagate_mount_busy(struct mount *mnt, int refcnt) { - struct vfsmount *m, *child; - struct vfsmount *parent = mnt->mnt_parent; + struct mount *m, *child; + struct mount *parent = mnt->mnt_parent; int ret = 0; if (mnt == parent) @@ -310,7 +352,7 @@ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - child = __lookup_mnt(m, mnt->mnt_mountpoint, 0); + child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); if (child && list_empty(&child->mnt_mounts) && (ret = do_refcount_check(child, 1))) break; @@ -322,24 +364,26 @@ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) * NOTE: unmounting 'mnt' naturally propagates to all other mounts its * parent propagates to. */ -static void __propagate_umount(struct vfsmount *mnt) +static void __propagate_umount(struct mount *mnt) { - struct vfsmount *parent = mnt->mnt_parent; - struct vfsmount *m; + struct mount *parent = mnt->mnt_parent; + struct mount *m; BUG_ON(parent == mnt); for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - struct vfsmount *child = __lookup_mnt(m, - mnt->mnt_mountpoint, 0); + struct mount *child = __lookup_mnt_last(&m->mnt, + mnt->mnt_mountpoint); /* * umount the child only if the child has no * other children */ - if (child && list_empty(&child->mnt_mounts)) - list_move_tail(&child->mnt_hash, &mnt->mnt_hash); + if (child && list_empty(&child->mnt_mounts)) { + hlist_del_init_rcu(&child->mnt_hash); + hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); + } } } @@ -347,12 +391,14 @@ static void __propagate_umount(struct vfsmount *mnt) * collect all mounts that receive propagation from the mount in @list, * and return these additional mounts in the same list. * @list: the list of mounts to be unmounted. + * + * vfsmount lock must be held for write */ -int propagate_umount(struct list_head *list) +int propagate_umount(struct hlist_head *list) { - struct vfsmount *mnt; + struct mount *mnt; - list_for_each_entry(mnt, list, mnt_hash) + hlist_for_each_entry(mnt, list, mnt_hash) __propagate_umount(mnt); return 0; } |
