From 586ce098a23b6ab7383df853a84ae3d48dc889aa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 13 Mar 2011 01:50:58 -0500 Subject: compat breakage in preadv() and pwritev() Fix for a dumb preadv()/pwritev() compat bug - unlike the native variants, compat_... ones forget to check FMODE_P{READ,WRITE}, so e.g. on pipe the native preadv() will fail with -ESPIPE and compat one will act as readv() and succeed. Not critical, but it's a clear bug with trivial fix. Signed-off-by: Al Viro --- fs/compat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/compat.c b/fs/compat.c index f6fd0a00e6c..691c3fd8ce1 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1228,7 +1228,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec, file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_readv(file, vec, vlen, &pos); + ret = -ESPIPE; + if (file->f_mode & FMODE_PREAD) + ret = compat_readv(file, vec, vlen, &pos); fput_light(file, fput_needed); return ret; } @@ -1285,7 +1287,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec, file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_writev(file, vec, vlen, &pos); + ret = -ESPIPE; + if (file->f_mode & FMODE_PWRITE) + ret = compat_writev(file, vec, vlen, &pos); fput_light(file, fput_needed); return ret; } -- cgit v1.2.3-18-g5258 From 15a9155fe3e8215c02b80df51ec2cac7c0d726ad Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Feb 2011 15:08:54 -0500 Subject: fix race in audit_get_nd() don't rely on pathname resolution ending up twice at the same point... Signed-off-by: Al Viro --- kernel/audit_watch.c | 85 ++++++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 53 deletions(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index d2e3c786646..20b9fe6907d 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -144,9 +144,9 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev) } /* Initialize a parent watch entry. */ -static struct audit_parent *audit_init_parent(struct nameidata *ndp) +static struct audit_parent *audit_init_parent(struct path *path) { - struct inode *inode = ndp->path.dentry->d_inode; + struct inode *inode = path->dentry->d_inode; struct audit_parent *parent; int ret; @@ -353,53 +353,40 @@ static void audit_remove_parent_watches(struct audit_parent *parent) } /* Get path information necessary for adding watches. */ -static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw) +static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct nameidata *ndparent, *ndwatch; + struct nameidata nd; + struct dentry *d; int err; - ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); - if (unlikely(!ndparent)) - return -ENOMEM; + err = path_lookup(watch->path, LOOKUP_PARENT, &nd); + if (err) + return err; - ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); - if (unlikely(!ndwatch)) { - kfree(ndparent); - return -ENOMEM; + if (nd.last_type != LAST_NORM) { + path_put(&nd.path); + return -EINVAL; } - err = path_lookup(path, LOOKUP_PARENT, ndparent); - if (err) { - kfree(ndparent); - kfree(ndwatch); - return err; + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + if (IS_ERR(d)) { + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); + return PTR_ERR(d); } - - err = path_lookup(path, 0, ndwatch); - if (err) { - kfree(ndwatch); - ndwatch = NULL; + if (d->d_inode) { + /* update watch filter fields */ + watch->dev = d->d_inode->i_sb->s_dev; + watch->ino = d->d_inode->i_ino; } + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - *ndp = ndparent; - *ndw = ndwatch; - + *parent = nd.path; + dput(d); return 0; } -/* Release resources used for watch path information. */ -static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) -{ - if (ndp) { - path_put(&ndp->path); - kfree(ndp); - } - if (ndw) { - path_put(&ndw->path); - kfree(ndw); - } -} - /* Associate the given rule with an existing parent. * Caller must hold audit_filter_mutex. */ static void audit_add_to_parent(struct audit_krule *krule, @@ -440,31 +427,24 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) { struct audit_watch *watch = krule->watch; struct audit_parent *parent; - struct nameidata *ndp = NULL, *ndw = NULL; + struct path parent_path; int h, ret = 0; mutex_unlock(&audit_filter_mutex); /* Avoid calling path_lookup under audit_filter_mutex. */ - ret = audit_get_nd(watch->path, &ndp, &ndw); - if (ret) { - /* caller expects mutex locked */ - mutex_lock(&audit_filter_mutex); - goto error; - } + ret = audit_get_nd(watch, &parent_path); + /* caller expects mutex locked */ mutex_lock(&audit_filter_mutex); - /* update watch filter fields */ - if (ndw) { - watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev; - watch->ino = ndw->path.dentry->d_inode->i_ino; - } + if (ret) + return ret; /* either find an old parent or attach a new one */ - parent = audit_find_parent(ndp->path.dentry->d_inode); + parent = audit_find_parent(parent_path.dentry->d_inode); if (!parent) { - parent = audit_init_parent(ndp); + parent = audit_init_parent(&parent_path); if (IS_ERR(parent)) { ret = PTR_ERR(parent); goto error; @@ -479,9 +459,8 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) h = audit_hash_ino((u32)watch->ino); *list = &audit_inode_hash[h]; error: - audit_put_nd(ndp, ndw); /* NULL args OK */ + path_put(&parent_path); return ret; - } void audit_remove_watch_rule(struct audit_krule *krule) -- cgit v1.2.3-18-g5258 From c9c6cac0c2bdbda42e7b804838648d0bc60ddb13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Feb 2011 15:15:47 -0500 Subject: kill path_lookup() all remaining callers pass LOOKUP_PARENT to it, so flags argument can die; renamed to kern_path_parent() Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/syscalls.c | 2 +- fs/namei.c | 7 +++---- fs/ocfs2/refcounttree.c | 2 +- include/linux/namei.h | 2 +- kernel/audit_watch.c | 2 +- net/unix/af_unix.c | 2 +- 6 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 187a7d32f86..a3d2ce54ea2 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags, if (!IS_ERR(tmp)) { struct nameidata nd; - ret = path_lookup(tmp, LOOKUP_PARENT, &nd); + ret = kern_path_parent(tmp, &nd); if (!ret) { nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE; ret = spufs_create(&nd, flags, mode, neighbor); diff --git a/fs/namei.c b/fs/namei.c index a4689eb2df2..1d6bc815155 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1747,10 +1747,9 @@ static int do_path_lookup(int dfd, const char *name, return retval; } -int path_lookup(const char *name, unsigned int flags, - struct nameidata *nd) +int kern_path_parent(const char *name, struct nameidata *nd) { - return do_path_lookup(AT_FDCWD, name, flags, nd); + return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); } int kern_path(const char *name, unsigned int flags, struct path *path) @@ -3586,7 +3585,7 @@ EXPORT_SYMBOL(page_readlink); EXPORT_SYMBOL(__page_symlink); EXPORT_SYMBOL(page_symlink); EXPORT_SYMBOL(page_symlink_inode_operations); -EXPORT_SYMBOL(path_lookup); +EXPORT_SYMBOL(kern_path_parent); EXPORT_SYMBOL(kern_path); EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 19ebc5aad39..29623da133c 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4379,7 +4379,7 @@ static int ocfs2_user_path_parent(const char __user *path, if (IS_ERR(s)) return PTR_ERR(s); - error = path_lookup(s, LOOKUP_PARENT, nd); + error = kern_path_parent(s, nd); if (error) putname(s); else diff --git a/include/linux/namei.h b/include/linux/namei.h index f276d4fa01f..58ce3433d4e 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -72,7 +72,7 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *); extern int kern_path(const char *, unsigned, struct path *); -extern int path_lookup(const char *, unsigned, struct nameidata *); +extern int kern_path_parent(const char *, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 20b9fe6907d..e683869365d 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -359,7 +359,7 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d; int err; - err = path_lookup(watch->path, LOOKUP_PARENT, &nd); + err = kern_path_parent(watch->path, &nd); if (err) return err; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dd419d28620..d8c04a602cf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * Get the parent directory, calculate the hash for last * component. */ - err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); + err = kern_path_parent(sunaddr->sun_path, &nd); if (err) goto out_mknod_parent; -- cgit v1.2.3-18-g5258 From 52094c8a0610cf57920ad4c6c57470ae2ccbbd25 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Feb 2011 21:34:47 -0500 Subject: take RCU-dependent stuff around exec_permission() into a new helper Signed-off-by: Al Viro --- fs/namei.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 1d6bc815155..8c704465f6c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1322,6 +1322,18 @@ fail: return PTR_ERR(dentry); } +static inline int may_lookup(struct nameidata *nd) +{ + if (nd->flags & LOOKUP_RCU) { + int err = exec_permission(nd->inode, IPERM_FLAG_RCU); + if (err != -ECHILD) + return err; + if (nameidata_drop_rcu(nd)) + return -ECHILD; + } + return exec_permission(nd->inode, 0); +} + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -1352,17 +1364,8 @@ static int link_path_walk(const char *name, struct nameidata *nd) unsigned int c; nd->flags |= LOOKUP_CONTINUE; - if (nd->flags & LOOKUP_RCU) { - err = exec_permission(nd->inode, IPERM_FLAG_RCU); - if (err == -ECHILD) { - if (nameidata_drop_rcu(nd)) - return -ECHILD; - goto exec_again; - } - } else { -exec_again: - err = exec_permission(nd->inode, 0); - } + + err = may_lookup(nd); if (err) break; -- cgit v1.2.3-18-g5258 From ee0827cd6b42b0385dc1a116cd853ac1b739f711 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Feb 2011 23:38:09 -0500 Subject: sanitize path_walk() mess New helper: path_lookupat(). Basically, what do_path_lookup() boils to modulo -ECHILD/-ESTALE handler. path_walk* family is gone; vfs_path_lookup() is using link_path_walk() directly, do_path_lookup() and do_filp_open() are using path_lookupat(). Signed-off-by: Al Viro --- fs/namei.c | 148 +++++++++++++++++++++++-------------------------------------- 1 file changed, 56 insertions(+), 92 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 8c704465f6c..f5de5bb1a61 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1520,59 +1520,6 @@ return_err: return err; } -static inline int path_walk_rcu(const char *name, struct nameidata *nd) -{ - current->total_link_count = 0; - - return link_path_walk(name, nd); -} - -static inline int path_walk_simple(const char *name, struct nameidata *nd) -{ - current->total_link_count = 0; - - return link_path_walk(name, nd); -} - -static int path_walk(const char *name, struct nameidata *nd) -{ - struct path save = nd->path; - int result; - - current->total_link_count = 0; - - /* make sure the stuff we saved doesn't go away */ - path_get(&save); - - result = link_path_walk(name, nd); - if (result == -ESTALE) { - /* nd->path had been dropped */ - current->total_link_count = 0; - nd->path = save; - nd->inode = save.dentry->d_inode; - path_get(&nd->path); - nd->flags |= LOOKUP_REVAL; - result = link_path_walk(name, nd); - } - - path_put(&save); - - return result; -} - -static void path_finish_rcu(struct nameidata *nd) -{ - if (nd->flags & LOOKUP_RCU) { - /* RCU dangling. Cancel it. */ - nd->flags &= ~LOOKUP_RCU; - nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - } - if (nd->file) - fput(nd->file); -} - static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; @@ -1697,7 +1644,7 @@ out_fail: } /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ -static int do_path_lookup(int dfd, const char *name, +static int path_lookupat(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval; @@ -1716,29 +1663,45 @@ static int do_path_lookup(int dfd, const char *name, * be handled by restarting a traditional ref-walk (which will always * be able to complete). */ - retval = path_init_rcu(dfd, name, flags, nd); + if (flags & LOOKUP_RCU) + retval = path_init_rcu(dfd, name, flags, nd); + else + retval = path_init(dfd, name, flags, nd); + if (unlikely(retval)) return retval; - retval = path_walk_rcu(name, nd); - path_finish_rcu(nd); + + current->total_link_count = 0; + retval = link_path_walk(name, nd); + + if (nd->flags & LOOKUP_RCU) { + /* RCU dangling. Cancel it. */ + nd->flags &= ~LOOKUP_RCU; + nd->root.mnt = NULL; + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + } + + if (nd->file) { + fput(nd->file); + nd->file = NULL; + } + if (nd->root.mnt) { path_put(&nd->root); nd->root.mnt = NULL; } + return retval; +} - if (unlikely(retval == -ECHILD || retval == -ESTALE)) { - /* slower, locked walk */ - if (retval == -ESTALE) - flags |= LOOKUP_REVAL; - retval = path_init(dfd, name, flags, nd); - if (unlikely(retval)) - return retval; - retval = path_walk(name, nd); - if (nd->root.mnt) { - path_put(&nd->root); - nd->root.mnt = NULL; - } - } +static int do_path_lookup(int dfd, const char *name, + unsigned int flags, struct nameidata *nd) +{ + int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); + if (unlikely(retval == -ECHILD)) + retval = path_lookupat(dfd, name, flags, nd); + if (unlikely(retval == -ESTALE)) + retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); if (likely(!retval)) { if (unlikely(!audit_dummy_context())) { @@ -1746,7 +1709,6 @@ static int do_path_lookup(int dfd, const char *name, audit_inode(name, nd->path.dentry); } } - return retval; } @@ -1776,7 +1738,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, const char *name, unsigned int flags, struct nameidata *nd) { - int retval; + int result; /* same as do_path_lookup */ nd->last_type = LAST_ROOT; @@ -1790,15 +1752,27 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, path_get(&nd->root); nd->inode = nd->path.dentry->d_inode; - retval = path_walk(name, nd); - if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && + current->total_link_count = 0; + + result = link_path_walk(name, nd); + if (result == -ESTALE) { + /* nd->path had been dropped */ + current->total_link_count = 0; + nd->path.dentry = dentry; + nd->path.mnt = mnt; + nd->inode = dentry->d_inode; + path_get(&nd->path); + nd->flags |= LOOKUP_REVAL; + result = link_path_walk(name, nd); + } + if (unlikely(!result && !audit_dummy_context() && nd->path.dentry && nd->inode)) audit_inode(name, nd->path.dentry); path_put(&nd->root); nd->root.mnt = NULL; - return retval; + return result; } static struct dentry *__lookup_hash(struct qstr *name, @@ -2483,24 +2457,14 @@ out_filp2: creat: /* OK, have to create the file. Find the parent. */ - error = path_init_rcu(dfd, pathname, - LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); - if (error) - goto out_filp; - error = path_walk_rcu(pathname, &nd); - path_finish_rcu(&nd); - if (unlikely(error == -ECHILD || error == -ESTALE)) { - /* slower, locked walk */ - if (error == -ESTALE) { + error = path_lookupat(dfd, pathname, LOOKUP_PARENT | LOOKUP_RCU, &nd); + if (unlikely(error == -ECHILD)) + error = path_lookupat(dfd, pathname, LOOKUP_PARENT, &nd); + if (unlikely(error == -ESTALE)) { reval: - flags |= LOOKUP_REVAL; - } - error = path_init(dfd, pathname, - LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); - if (error) - goto out_filp; - - error = path_walk_simple(pathname, &nd); + flags |= LOOKUP_REVAL; + error = path_lookupat(dfd, pathname, + LOOKUP_PARENT | LOOKUP_REVAL, &nd); } if (unlikely(error)) goto out_filp; -- cgit v1.2.3-18-g5258 From e41f7d4ee5bdb00da7d327a00b0ab9c4a2e9eaa3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 22 Feb 2011 14:02:58 -0500 Subject: merge path_init and path_init_rcu Actual dependency on whether we want RCU or not is in 3 small areas (as it ought to be) and everything around those is the same in both versions. Since each function has only one caller and those callers are on two sides of if (flags & LOOKUP_RCU), it's easier and cleaner to merge them and pull the checks inside. Signed-off-by: Al Viro --- fs/namei.c | 118 ++++++++++++++++++------------------------------------------- 1 file changed, 35 insertions(+), 83 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f5de5bb1a61..b9e537980ef 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1520,45 +1520,44 @@ return_err: return err; } -static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) +static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; int fput_needed; struct file *file; nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags | LOOKUP_RCU; + nd->flags = flags; nd->depth = 0; nd->root.mnt = NULL; nd->file = NULL; if (*name=='/') { - struct fs_struct *fs = current->fs; - unsigned seq; - - br_read_lock(vfsmount_lock); - rcu_read_lock(); - - do { - seq = read_seqcount_begin(&fs->seq); - nd->root = fs->root; - nd->path = nd->root; - nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - } while (read_seqcount_retry(&fs->seq, seq)); - + if (flags & LOOKUP_RCU) { + br_read_lock(vfsmount_lock); + rcu_read_lock(); + set_root_rcu(nd); + } else { + set_root(nd); + path_get(&nd->root); + } + nd->path = nd->root; } else if (dfd == AT_FDCWD) { - struct fs_struct *fs = current->fs; - unsigned seq; - - br_read_lock(vfsmount_lock); - rcu_read_lock(); + if (flags & LOOKUP_RCU) { + struct fs_struct *fs = current->fs; + unsigned seq; - do { - seq = read_seqcount_begin(&fs->seq); - nd->path = fs->pwd; - nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - } while (read_seqcount_retry(&fs->seq, seq)); + br_read_lock(vfsmount_lock); + rcu_read_lock(); + do { + seq = read_seqcount_begin(&fs->seq); + nd->path = fs->pwd; + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); + } while (read_seqcount_retry(&fs->seq, seq)); + } else { + get_fs_pwd(current->fs, &nd->path); + } } else { struct dentry *dentry; @@ -1578,62 +1577,18 @@ static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct n goto fput_fail; nd->path = file->f_path; - if (fput_needed) - nd->file = file; - - nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(vfsmount_lock); - rcu_read_lock(); + if (flags & LOOKUP_RCU) { + if (fput_needed) + nd->file = file; + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); + br_read_lock(vfsmount_lock); + rcu_read_lock(); + } else { + path_get(&file->f_path); + fput_light(file, fput_needed); + } } - nd->inode = nd->path.dentry->d_inode; - return 0; -fput_fail: - fput_light(file, fput_needed); -out_fail: - return retval; -} - -static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) -{ - int retval = 0; - int fput_needed; - struct file *file; - - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; - nd->depth = 0; - nd->root.mnt = NULL; - - if (*name=='/') { - set_root(nd); - nd->path = nd->root; - path_get(&nd->root); - } else if (dfd == AT_FDCWD) { - get_fs_pwd(current->fs, &nd->path); - } else { - struct dentry *dentry; - - file = fget_light(dfd, &fput_needed); - retval = -EBADF; - if (!file) - goto out_fail; - - dentry = file->f_path.dentry; - - retval = -ENOTDIR; - if (!S_ISDIR(dentry->d_inode->i_mode)) - goto fput_fail; - - retval = file_permission(file, MAY_EXEC); - if (retval) - goto fput_fail; - - nd->path = file->f_path; - path_get(&file->f_path); - - fput_light(file, fput_needed); - } nd->inode = nd->path.dentry->d_inode; return 0; @@ -1663,10 +1618,7 @@ static int path_lookupat(int dfd, const char *name, * be handled by restarting a traditional ref-walk (which will always * be able to complete). */ - if (flags & LOOKUP_RCU) - retval = path_init_rcu(dfd, name, flags, nd); - else - retval = path_init(dfd, name, flags, nd); + retval = path_init(dfd, name, flags, nd); if (unlikely(retval)) return retval; -- cgit v1.2.3-18-g5258 From fe479a580dc9c737c4eb49ff7fdb31d41d2c7003 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 22 Feb 2011 15:10:03 -0500 Subject: merge component type recognition no need to do it in three places... Signed-off-by: Al Viro --- fs/namei.c | 48 ++++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index b9e537980ef..4521b5ff7c9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1362,6 +1362,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) unsigned long hash; struct qstr this; unsigned int c; + int type; nd->flags |= LOOKUP_CONTINUE; @@ -1381,6 +1382,16 @@ static int link_path_walk(const char *name, struct nameidata *nd) this.len = name - (const char *) this.name; this.hash = end_name_hash(hash); + type = LAST_NORM; + if (this.name[0] == '.') switch (this.len) { + case 2: + if (this.name[1] == '.') + type = LAST_DOTDOT; + break; + case 1: + type = LAST_DOT; + } + /* remove trailing slashes? */ if (!c) goto last_component; @@ -1393,21 +1404,17 @@ static int link_path_walk(const char *name, struct nameidata *nd) * to be able to know about the current root directory and * parent relationships. */ - if (this.name[0] == '.') switch (this.len) { - default: - break; - case 2: - if (this.name[1] != '.') - break; + if (unlikely(type != LAST_NORM)) { + if (type == LAST_DOTDOT) { if (nd->flags & LOOKUP_RCU) { if (follow_dotdot_rcu(nd)) return -ECHILD; } else follow_dotdot(nd); - /* fallthrough */ - case 1: - continue; + } + continue; } + /* This does the actual lookups.. */ err = do_lookup(nd, &this, &next, &inode); if (err) @@ -1441,20 +1448,15 @@ last_component: nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; if (lookup_flags & LOOKUP_PARENT) goto lookup_parent; - if (this.name[0] == '.') switch (this.len) { - default: - break; - case 2: - if (this.name[1] != '.') - break; + if (unlikely(type != LAST_NORM)) { + if (type == LAST_DOTDOT) { if (nd->flags & LOOKUP_RCU) { if (follow_dotdot_rcu(nd)) return -ECHILD; } else follow_dotdot(nd); - /* fallthrough */ - case 1: - goto return_reval; + } + goto return_reval; } err = do_lookup(nd, &this, &next, &inode); if (err) @@ -1480,14 +1482,8 @@ last_component: goto return_base; lookup_parent: nd->last = this; - nd->last_type = LAST_NORM; - if (this.name[0] != '.') - goto return_base; - if (this.len == 1) - nd->last_type = LAST_DOT; - else if (this.len == 2 && this.name[1] == '.') - nd->last_type = LAST_DOTDOT; - else + nd->last_type = type; + if (type == LAST_NORM) goto return_base; return_reval: /* -- cgit v1.2.3-18-g5258 From 16c2cd7179881d5dd87779512ca5a0d657c64f62 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 22 Feb 2011 15:50:10 -0500 Subject: untangle the "need_reval_dot" mess instead of ad-hackery around need_reval_dot(), do the following: set a flag (LOOKUP_JUMPED) in the beginning of path, on absolute symlink traversal, on ".." and on procfs-style symlinks. Clear on normal components, leave unchanged on ".". Non-nested callers of link_path_walk() call handle_reval_path(), which checks that flag is set and that fs does want the final revalidate thing, then does ->d_revalidate(). In link_path_walk() all the return_reval stuff is gone. Signed-off-by: Al Viro --- fs/namei.c | 107 +++++++++++++++++++++----------------------------- include/linux/namei.h | 2 + 2 files changed, 46 insertions(+), 63 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 4521b5ff7c9..450b686e968 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -613,19 +613,8 @@ do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd) return dentry; } -static inline int need_reval_dot(struct dentry *dentry) -{ - if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) - return 0; - - if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) - return 0; - - return 1; -} - /* - * force_reval_path - force revalidation of a dentry + * handle_reval_path - force revalidation of a dentry * * In some situations the path walking code will trust dentries without * revalidating them. This causes problems for filesystems that depend on @@ -639,27 +628,28 @@ static inline int need_reval_dot(struct dentry *dentry) * invalidate the dentry. It's up to the caller to handle putting references * to the path if necessary. */ -static int -force_reval_path(struct path *path, struct nameidata *nd) +static inline int handle_reval_path(struct nameidata *nd) { + struct dentry *dentry = nd->path.dentry; int status; - struct dentry *dentry = path->dentry; - /* - * only check on filesystems where it's possible for the dentry to - * become stale. - */ - if (!need_reval_dot(dentry)) + if (likely(!(nd->flags & LOOKUP_JUMPED))) + return 0; + + if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) return 0; + if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) + return 0; + + /* Note: we do not d_invalidate() */ status = d_revalidate(dentry, nd); if (status > 0) return 0; - if (!status) { - d_invalidate(dentry); + if (!status) status = -ESTALE; - } + return status; } @@ -728,6 +718,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l path_put(&nd->path); nd->path = nd->root; path_get(&nd->root); + nd->flags |= LOOKUP_JUMPED; } nd->inode = nd->path.dentry->d_inode; @@ -779,11 +770,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) error = 0; if (s) error = __vfs_follow_link(nd, s); - else if (nd->last_type == LAST_BIND) { - error = force_reval_path(&nd->path, nd); - if (error) - path_put(&nd->path); - } + else if (nd->last_type == LAST_BIND) + nd->flags |= LOOKUP_JUMPED; } return error; } @@ -1351,7 +1339,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) while (*name=='/') name++; if (!*name) - goto return_reval; + goto return_base; if (nd->depth) lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); @@ -1385,12 +1373,16 @@ static int link_path_walk(const char *name, struct nameidata *nd) type = LAST_NORM; if (this.name[0] == '.') switch (this.len) { case 2: - if (this.name[1] == '.') + if (this.name[1] == '.') { type = LAST_DOTDOT; + nd->flags |= LOOKUP_JUMPED; + } break; case 1: type = LAST_DOT; } + if (likely(type == LAST_NORM)) + nd->flags &= ~LOOKUP_JUMPED; /* remove trailing slashes? */ if (!c) @@ -1456,7 +1448,7 @@ last_component: } else follow_dotdot(nd); } - goto return_reval; + goto return_base; } err = do_lookup(nd, &this, &next, &inode); if (err) @@ -1483,24 +1475,6 @@ last_component: lookup_parent: nd->last = this; nd->last_type = type; - if (type == LAST_NORM) - goto return_base; -return_reval: - /* - * We bypassed the ordinary revalidation routines. - * We may need to check the cached dentry for staleness. - */ - if (need_reval_dot(nd->path.dentry)) { - if (nameidata_drop_rcu_last_maybe(nd)) - return -ECHILD; - /* Note: we do not d_invalidate() */ - err = d_revalidate(nd->path.dentry, nd); - if (!err) - err = -ESTALE; - if (err < 0) - break; - return 0; - } return_base: if (nameidata_drop_rcu_last_maybe(nd)) return -ECHILD; @@ -1523,7 +1497,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei struct file *file; nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; + nd->flags = flags | LOOKUP_JUMPED; nd->depth = 0; nd->root.mnt = NULL; nd->file = NULL; @@ -1630,6 +1604,9 @@ static int path_lookupat(int dfd, const char *name, br_read_unlock(vfsmount_lock); } + if (!retval) + retval = handle_reval_path(nd); + if (nd->file) { fput(nd->file); nd->file = NULL; @@ -1690,7 +1667,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, /* same as do_path_lookup */ nd->last_type = LAST_ROOT; - nd->flags = flags; + nd->flags = flags | LOOKUP_JUMPED; nd->depth = 0; nd->path.dentry = dentry; @@ -1703,6 +1680,8 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, current->total_link_count = 0; result = link_path_walk(name, nd); + if (!result) + result = handle_reval_path(nd); if (result == -ESTALE) { /* nd->path had been dropped */ current->total_link_count = 0; @@ -1710,8 +1689,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->path.mnt = mnt; nd->inode = dentry->d_inode; path_get(&nd->path); - nd->flags |= LOOKUP_REVAL; + nd->flags = flags | LOOKUP_JUMPED | LOOKUP_REVAL; + result = link_path_walk(name, nd); + if (!result) + result = handle_reval_path(nd); } if (unlikely(!result && !audit_dummy_context() && nd->path.dentry && nd->inode)) @@ -2198,30 +2180,29 @@ static struct file *do_last(struct nameidata *nd, struct path *path, { struct dentry *dir = nd->path.dentry; struct file *filp; - int error = -EISDIR; + int error; switch (nd->last_type) { case LAST_DOTDOT: follow_dotdot(nd); dir = nd->path.dentry; case LAST_DOT: - if (need_reval_dot(dir)) { - int status = d_revalidate(nd->path.dentry, nd); - if (!status) - status = -ESTALE; - if (status < 0) { - error = status; - goto exit; - } - } /* fallthrough */ case LAST_ROOT: + error = handle_reval_path(nd); + if (error) + goto exit; + error = -EISDIR; goto exit; case LAST_BIND: + error = handle_reval_path(nd); + if (error) + goto exit; audit_inode(pathname, dir); goto ok; } + error = -EISDIR; /* trailing slashes? */ if (nd->last.name[nd->last.len]) goto exit; @@ -2422,7 +2403,7 @@ reval: /* * We have the parent and last component. */ - nd.flags = flags; + nd.flags = (nd.flags & ~LOOKUP_PARENT) | flags; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path link = path; diff --git a/include/linux/namei.h b/include/linux/namei.h index 58ce3433d4e..265378a707b 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -63,6 +63,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_EXCL 0x0400 #define LOOKUP_RENAME_TARGET 0x0800 +#define LOOKUP_JUMPED 0x1000 + extern int user_path_at(int, const char __user *, unsigned, struct path *); #define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) -- cgit v1.2.3-18-g5258 From 086e183a641109033420e0b26ddecb6f4abb4c89 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 22 Feb 2011 20:56:27 -0500 Subject: pull dropping RCU on success of link_path_walk() into path_lookupat() Signed-off-by: Al Viro --- fs/namei.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 450b686e968..8f10a9ff9f6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -539,14 +539,6 @@ err_unlock: return -ECHILD; } -/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ -static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) -{ - if (likely(nd->flags & LOOKUP_RCU)) - return nameidata_drop_rcu_last(nd); - return 0; -} - /** * release_open_intent - free up open intent resources * @nd: pointer to nameidata @@ -1339,7 +1331,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) while (*name=='/') name++; if (!*name) - goto return_base; + return 0; if (nd->depth) lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); @@ -1448,7 +1440,7 @@ last_component: } else follow_dotdot(nd); } - goto return_base; + return 0; } err = do_lookup(nd, &this, &next, &inode); if (err) @@ -1471,13 +1463,10 @@ last_component: if (!nd->inode->i_op->lookup) break; } - goto return_base; + return 0; lookup_parent: nd->last = this; nd->last_type = type; -return_base: - if (nameidata_drop_rcu_last_maybe(nd)) - return -ECHILD; return 0; out_dput: if (!(nd->flags & LOOKUP_RCU)) @@ -1598,10 +1587,15 @@ static int path_lookupat(int dfd, const char *name, if (nd->flags & LOOKUP_RCU) { /* RCU dangling. Cancel it. */ - nd->flags &= ~LOOKUP_RCU; - nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + if (!retval) { + if (nameidata_drop_rcu_last(nd)) + retval = -ECHILD; + } else { + nd->flags &= ~LOOKUP_RCU; + nd->root.mnt = NULL; + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + } } if (!retval) -- cgit v1.2.3-18-g5258 From 36f3b4f69070fee7c647bab5dc4408990bb3606c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 22 Feb 2011 21:24:38 -0500 Subject: pull security_inode_follow_link() into __do_follow_link() Signed-off-by: Al Viro --- fs/namei.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 8f10a9ff9f6..f956567270b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -754,6 +754,13 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) if (link->mnt == nd->path.mnt) mntget(link->mnt); + error = security_inode_follow_link(link->dentry, nd); + if (error) { + *p = ERR_PTR(error); /* no ->put_link(), please */ + path_put(&nd->path); + return error; + } + nd->last_type = LAST_BIND; *p = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(*p); @@ -791,9 +798,6 @@ static inline int do_follow_link(struct inode *inode, struct path *path, struct goto loop; BUG_ON(nd->depth >= MAX_NESTED_LINKS); cond_resched(); - err = security_inode_follow_link(path->dentry, nd); - if (err) - goto loop; current->link_count++; current->total_link_count++; nd->depth++; @@ -2420,9 +2424,6 @@ reval: * just set LAST_BIND. */ nd.flags |= LOOKUP_PARENT; - error = security_inode_follow_link(link.dentry, &nd); - if (error) - goto exit_dput; error = __do_follow_link(&link, &nd, &cookie); if (unlikely(error)) { if (!IS_ERR(cookie) && linki->i_op->put_link) -- cgit v1.2.3-18-g5258 From f1afe9efc84476ca42fbb7301a441021063eead7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 22 Feb 2011 22:27:28 -0500 Subject: clean up the failure exits after __do_follow_link() in do_filp_open() Signed-off-by: Al Viro --- fs/namei.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f956567270b..e0f59031be8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2426,15 +2426,12 @@ reval: nd.flags |= LOOKUP_PARENT; error = __do_follow_link(&link, &nd, &cookie); if (unlikely(error)) { - if (!IS_ERR(cookie) && linki->i_op->put_link) - linki->i_op->put_link(link.dentry, &nd, cookie); - /* nd.path had been dropped */ - nd.path = link; - goto out_path; + filp = ERR_PTR(error); + } else { + nd.flags &= ~LOOKUP_PARENT; + filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); } - nd.flags &= ~LOOKUP_PARENT; - filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); - if (linki->i_op->put_link) + if (!IS_ERR(cookie) && linki->i_op->put_link) linki->i_op->put_link(link.dentry, &nd, cookie); path_put(&link); } -- cgit v1.2.3-18-g5258 From c3e380b0b3cfa613189fb91513efd88a65e1d9d8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Feb 2011 13:39:45 -0500 Subject: Collect "operation mode" arguments of do_last() into a structure No point messing with passing shitloads of "operation mode" arguments to do_open() one by one, especially since they are not going to change during do_filp_open(). Collect them into a struct, fill it and pass to do_last() by reference. Make sure that lookup intent flags are correctly set and removed - we want them for do_last(), but they make no sense for __do_follow_link(). Signed-off-by: Al Viro --- fs/namei.c | 57 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index e0f59031be8..5e4206f4537 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2169,17 +2169,26 @@ exit: return ERR_PTR(error); } +struct open_flags { + int open_flag; + int mode; + int acc_mode; + int intent; +}; + /* * Handle O_CREAT case for do_filp_open */ static struct file *do_last(struct nameidata *nd, struct path *path, - int open_flag, int acc_mode, - int mode, const char *pathname) + const struct open_flags *op, const char *pathname) { struct dentry *dir = nd->path.dentry; struct file *filp; int error; + nd->flags &= ~LOOKUP_PARENT; + nd->flags |= op->intent; + switch (nd->last_type) { case LAST_DOTDOT: follow_dotdot(nd); @@ -2233,7 +2242,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, error = mnt_want_write(nd->path.mnt); if (error) goto exit_mutex_unlock; - error = __open_namei_create(nd, path, open_flag, mode); + error = __open_namei_create(nd, path, op->open_flag, op->mode); if (error) { mnt_drop_write(nd->path.mnt); goto exit; @@ -2242,7 +2251,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, mnt_drop_write(nd->path.mnt); path_put(&nd->path); if (!IS_ERR(filp)) { - error = ima_file_check(filp, acc_mode); + error = ima_file_check(filp, op->acc_mode); if (error) { fput(filp); filp = ERR_PTR(error); @@ -2258,7 +2267,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, audit_inode(pathname, path->dentry); error = -EEXIST; - if (open_flag & O_EXCL) + if (op->open_flag & O_EXCL) goto exit_dput; error = follow_managed(path, nd->flags); @@ -2278,7 +2287,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (S_ISDIR(nd->inode->i_mode)) goto exit; ok: - filp = finish_open(nd, open_flag, acc_mode); + filp = finish_open(nd, op->open_flag, op->acc_mode); return filp; exit_mutex_unlock: @@ -2304,7 +2313,8 @@ struct file *do_filp_open(int dfd, const char *pathname, struct path path; int count = 0; int flag = open_to_namei_flags(open_flag); - int flags; + int flags = 0; + struct open_flags op; if (!(open_flag & O_CREAT)) mode = 0; @@ -2321,6 +2331,8 @@ struct file *do_filp_open(int dfd, const char *pathname, if (open_flag & __O_SYNC) open_flag |= O_DSYNC; + op.open_flag = open_flag; + if (!acc_mode) acc_mode = MAY_OPEN | ACC_MODE(open_flag); @@ -2333,12 +2345,15 @@ struct file *do_filp_open(int dfd, const char *pathname, if (open_flag & O_APPEND) acc_mode |= MAY_APPEND; - flags = LOOKUP_OPEN; + op.acc_mode = acc_mode; + + op.intent = LOOKUP_OPEN; if (open_flag & O_CREAT) { - flags |= LOOKUP_CREATE; + op.intent |= LOOKUP_CREATE; if (open_flag & O_EXCL) - flags |= LOOKUP_EXCL; + op.intent |= LOOKUP_EXCL; } + if (open_flag & O_DIRECTORY) flags |= LOOKUP_DIRECTORY; if (!(open_flag & O_NOFOLLOW)) @@ -2357,7 +2372,7 @@ struct file *do_filp_open(int dfd, const char *pathname, goto creat; /* !O_CREAT, simple open */ - error = do_path_lookup(dfd, pathname, flags, &nd); + error = do_path_lookup(dfd, pathname, flags | op.intent, &nd); if (unlikely(error)) goto out_filp2; error = -ELOOP; @@ -2384,14 +2399,14 @@ out_filp2: creat: /* OK, have to create the file. Find the parent. */ - error = path_lookupat(dfd, pathname, LOOKUP_PARENT | LOOKUP_RCU, &nd); + error = path_lookupat(dfd, pathname, + LOOKUP_PARENT | LOOKUP_RCU | flags, &nd); if (unlikely(error == -ECHILD)) - error = path_lookupat(dfd, pathname, LOOKUP_PARENT, &nd); + error = path_lookupat(dfd, pathname, LOOKUP_PARENT | flags, &nd); if (unlikely(error == -ESTALE)) { reval: flags |= LOOKUP_REVAL; - error = path_lookupat(dfd, pathname, - LOOKUP_PARENT | LOOKUP_REVAL, &nd); + error = path_lookupat(dfd, pathname, LOOKUP_PARENT | flags, &nd); } if (unlikely(error)) goto out_filp; @@ -2401,8 +2416,7 @@ reval: /* * We have the parent and last component. */ - nd.flags = (nd.flags & ~LOOKUP_PARENT) | flags; - filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); + filp = do_last(&nd, &path, &op, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path link = path; struct inode *linki = link.dentry->d_inode; @@ -2424,13 +2438,12 @@ reval: * just set LAST_BIND. */ nd.flags |= LOOKUP_PARENT; + nd.flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = __do_follow_link(&link, &nd, &cookie); - if (unlikely(error)) { + if (unlikely(error)) filp = ERR_PTR(error); - } else { - nd.flags &= ~LOOKUP_PARENT; - filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); - } + else + filp = do_last(&nd, &path, &op, pathname); if (!IS_ERR(cookie) && linki->i_op->put_link) linki->i_op->put_link(link.dentry, &nd, cookie); path_put(&link); -- cgit v1.2.3-18-g5258 From 47c805dc2d2dff686962f5f0baa6bac2d703ba19 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Feb 2011 17:44:09 -0500 Subject: switch do_filp_open() to struct open_flags take calculation of open_flags by open(2) arguments into new helper in fs/open.c, move filp_open() over there, have it and do_sys_open() use that helper, switch exec.c callers of do_filp_open() to explicit (and constant) struct open_flags. Signed-off-by: Al Viro --- fs/exec.c | 18 +++++++---- fs/internal.h | 8 +++++ fs/namei.c | 88 ++++++------------------------------------------------ fs/open.c | 73 +++++++++++++++++++++++++++++++++++++++++++- include/linux/fs.h | 2 -- 5 files changed, 101 insertions(+), 88 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 52a447d9b6a..ba99e1abb1a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) struct file *file; char *tmp = getname(library); int error = PTR_ERR(tmp); + static const struct open_flags uselib_flags = { + .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, + .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN, + .intent = LOOKUP_OPEN + }; if (IS_ERR(tmp)) goto out; - file = do_filp_open(AT_FDCWD, tmp, - O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, - MAY_READ | MAY_EXEC | MAY_OPEN); + file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW); putname(tmp); error = PTR_ERR(file); if (IS_ERR(file)) @@ -721,10 +724,13 @@ struct file *open_exec(const char *name) { struct file *file; int err; + static const struct open_flags open_exec_flags = { + .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, + .acc_mode = MAY_EXEC | MAY_OPEN, + .intent = LOOKUP_OPEN + }; - file = do_filp_open(AT_FDCWD, name, - O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, - MAY_EXEC | MAY_OPEN); + file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW); if (IS_ERR(file)) goto out; diff --git a/fs/internal.h b/fs/internal.h index 9b976b57d7f..6fdbdf2c604 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -106,6 +106,14 @@ extern void put_super(struct super_block *sb); struct nameidata; extern struct file *nameidata_to_filp(struct nameidata *); extern void release_open_intent(struct nameidata *); +struct open_flags { + int open_flag; + int mode; + int acc_mode; + int intent; +}; +extern struct file *do_filp_open(int dfd, const char *pathname, + const struct open_flags *op, int lookup_flags); /* * inode.c diff --git a/fs/namei.c b/fs/namei.c index 5e4206f4537..9c7fa946abe 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2169,13 +2169,6 @@ exit: return ERR_PTR(error); } -struct open_flags { - int open_flag; - int mode; - int acc_mode; - int intent; -}; - /* * Handle O_CREAT case for do_filp_open */ @@ -2305,74 +2298,28 @@ exit: * open_to_namei_flags() for more details. */ struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode, int acc_mode) + const struct open_flags *op, int flags) { struct file *filp; struct nameidata nd; int error; struct path path; int count = 0; - int flag = open_to_namei_flags(open_flag); - int flags = 0; - struct open_flags op; - - if (!(open_flag & O_CREAT)) - mode = 0; - - /* Must never be set by userspace */ - open_flag &= ~FMODE_NONOTIFY; - - /* - * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only - * check for O_DSYNC if the need any syncing at all we enforce it's - * always set instead of having to deal with possibly weird behaviour - * for malicious applications setting only __O_SYNC. - */ - if (open_flag & __O_SYNC) - open_flag |= O_DSYNC; - - op.open_flag = open_flag; - - if (!acc_mode) - acc_mode = MAY_OPEN | ACC_MODE(open_flag); - - /* O_TRUNC implies we need access checks for write permissions */ - if (open_flag & O_TRUNC) - acc_mode |= MAY_WRITE; - - /* Allow the LSM permission hook to distinguish append - access from general write access. */ - if (open_flag & O_APPEND) - acc_mode |= MAY_APPEND; - - op.acc_mode = acc_mode; - - op.intent = LOOKUP_OPEN; - if (open_flag & O_CREAT) { - op.intent |= LOOKUP_CREATE; - if (open_flag & O_EXCL) - op.intent |= LOOKUP_EXCL; - } - - if (open_flag & O_DIRECTORY) - flags |= LOOKUP_DIRECTORY; - if (!(open_flag & O_NOFOLLOW)) - flags |= LOOKUP_FOLLOW; filp = get_empty_filp(); if (!filp) return ERR_PTR(-ENFILE); - filp->f_flags = open_flag; + filp->f_flags = op->open_flag; nd.intent.open.file = filp; - nd.intent.open.flags = flag; - nd.intent.open.create_mode = mode; + nd.intent.open.flags = open_to_namei_flags(op->open_flag); + nd.intent.open.create_mode = op->mode; - if (open_flag & O_CREAT) + if (op->open_flag & O_CREAT) goto creat; /* !O_CREAT, simple open */ - error = do_path_lookup(dfd, pathname, flags | op.intent, &nd); + error = do_path_lookup(dfd, pathname, flags | op->intent, &nd); if (unlikely(error)) goto out_filp2; error = -ELOOP; @@ -2386,7 +2333,7 @@ struct file *do_filp_open(int dfd, const char *pathname, goto out_path2; } audit_inode(pathname, nd.path.dentry); - filp = finish_open(&nd, open_flag, acc_mode); + filp = finish_open(&nd, op->open_flag, op->acc_mode); out2: release_open_intent(&nd); return filp; @@ -2416,7 +2363,7 @@ reval: /* * We have the parent and last component. */ - filp = do_last(&nd, &path, &op, pathname); + filp = do_last(&nd, &path, op, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path link = path; struct inode *linki = link.dentry->d_inode; @@ -2443,7 +2390,7 @@ reval: if (unlikely(error)) filp = ERR_PTR(error); else - filp = do_last(&nd, &path, &op, pathname); + filp = do_last(&nd, &path, op, pathname); if (!IS_ERR(cookie) && linki->i_op->put_link) linki->i_op->put_link(link.dentry, &nd, cookie); path_put(&link); @@ -2465,23 +2412,6 @@ out_filp: goto out; } -/** - * filp_open - open file and return file pointer - * - * @filename: path to open - * @flags: open flags as per the open(2) second argument - * @mode: mode for the new file if O_CREAT is set, else ignored - * - * This is the helper to open a file from kernelspace if you really - * have to. But in generally you should not do this, so please move - * along, nothing to see here.. - */ -struct file *filp_open(const char *filename, int flags, int mode) -{ - return do_filp_open(AT_FDCWD, filename, flags, mode, 0); -} -EXPORT_SYMBOL(filp_open); - /** * lookup_create - lookup a dentry, creating it if it doesn't exist * @nd: nameidata info diff --git a/fs/open.c b/fs/open.c index b47aab39c05..d05e18c60ba 100644 --- a/fs/open.c +++ b/fs/open.c @@ -890,15 +890,86 @@ void fd_install(unsigned int fd, struct file *file) EXPORT_SYMBOL(fd_install); +static inline int build_open_flags(int flags, int mode, struct open_flags *op) +{ + int lookup_flags = 0; + int acc_mode; + + if (!(flags & O_CREAT)) + mode = 0; + op->mode = mode; + + /* Must never be set by userspace */ + flags &= ~FMODE_NONOTIFY; + + /* + * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only + * check for O_DSYNC if the need any syncing at all we enforce it's + * always set instead of having to deal with possibly weird behaviour + * for malicious applications setting only __O_SYNC. + */ + if (flags & __O_SYNC) + flags |= O_DSYNC; + + op->open_flag = flags; + + acc_mode = MAY_OPEN | ACC_MODE(flags); + + /* O_TRUNC implies we need access checks for write permissions */ + if (flags & O_TRUNC) + acc_mode |= MAY_WRITE; + + /* Allow the LSM permission hook to distinguish append + access from general write access. */ + if (flags & O_APPEND) + acc_mode |= MAY_APPEND; + + op->acc_mode = acc_mode; + + op->intent = LOOKUP_OPEN; + if (flags & O_CREAT) { + op->intent |= LOOKUP_CREATE; + if (flags & O_EXCL) + op->intent |= LOOKUP_EXCL; + } + + if (flags & O_DIRECTORY) + lookup_flags |= LOOKUP_DIRECTORY; + if (!(flags & O_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + return lookup_flags; +} + +/** + * filp_open - open file and return file pointer + * + * @filename: path to open + * @flags: open flags as per the open(2) second argument + * @mode: mode for the new file if O_CREAT is set, else ignored + * + * This is the helper to open a file from kernelspace if you really + * have to. But in generally you should not do this, so please move + * along, nothing to see here.. + */ +struct file *filp_open(const char *filename, int flags, int mode) +{ + struct open_flags op; + int lookup = build_open_flags(flags, mode, &op); + return do_filp_open(AT_FDCWD, filename, &op, lookup); +} +EXPORT_SYMBOL(filp_open); + long do_sys_open(int dfd, const char __user *filename, int flags, int mode) { + struct open_flags op; + int lookup = build_open_flags(flags, mode, &op); char *tmp = getname(filename); int fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); + struct file *f = do_filp_open(dfd, tmp, &op, lookup); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); diff --git a/include/linux/fs.h b/include/linux/fs.h index e38b50a4b9d..9c75714f92c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2205,8 +2205,6 @@ extern struct file *create_read_pipe(struct file *f, int flags); extern struct file *create_write_pipe(int flags); extern void free_write_pipe(struct file *); -extern struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode, int acc_mode); extern int may_open(struct path *, int, int); extern int kernel_read(struct file *, loff_t, char *, unsigned long); -- cgit v1.2.3-18-g5258 From 13aab428a73d3200b9283b61b7fdf5713181ac66 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Feb 2011 17:54:08 -0500 Subject: separate -ESTALE/-ECHILD retries in do_filp_open() from real work new helper: path_openat(). Does what do_filp_open() does, except that it tries only the walk mode (RCU/normal/force revalidation) it had been told to. Both create and non-create branches are using path_lookupat() now. Fixed the double audit_inode() in non-create branch. Signed-off-by: Al Viro --- fs/namei.c | 49 ++++++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 9c7fa946abe..01a17dd2f15 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2292,19 +2292,14 @@ exit: return ERR_PTR(error); } -/* - * Note that the low bits of the passed in "open_flag" - * are not the same as in the local variable "flag". See - * open_to_namei_flags() for more details. - */ -struct file *do_filp_open(int dfd, const char *pathname, +static struct file *path_openat(int dfd, const char *pathname, const struct open_flags *op, int flags) { struct file *filp; struct nameidata nd; - int error; struct path path; int count = 0; + int error; filp = get_empty_filp(); if (!filp) @@ -2319,42 +2314,27 @@ struct file *do_filp_open(int dfd, const char *pathname, goto creat; /* !O_CREAT, simple open */ - error = do_path_lookup(dfd, pathname, flags | op->intent, &nd); + error = path_lookupat(dfd, pathname, flags | op->intent, &nd); if (unlikely(error)) - goto out_filp2; + goto out_filp; error = -ELOOP; if (!(nd.flags & LOOKUP_FOLLOW)) { if (nd.inode->i_op->follow_link) - goto out_path2; + goto out_path; } error = -ENOTDIR; if (nd.flags & LOOKUP_DIRECTORY) { if (!nd.inode->i_op->lookup) - goto out_path2; + goto out_path; } audit_inode(pathname, nd.path.dentry); filp = finish_open(&nd, op->open_flag, op->acc_mode); -out2: release_open_intent(&nd); return filp; -out_path2: - path_put(&nd.path); -out_filp2: - filp = ERR_PTR(error); - goto out2; - creat: /* OK, have to create the file. Find the parent. */ - error = path_lookupat(dfd, pathname, - LOOKUP_PARENT | LOOKUP_RCU | flags, &nd); - if (unlikely(error == -ECHILD)) - error = path_lookupat(dfd, pathname, LOOKUP_PARENT | flags, &nd); - if (unlikely(error == -ESTALE)) { -reval: - flags |= LOOKUP_REVAL; - error = path_lookupat(dfd, pathname, LOOKUP_PARENT | flags, &nd); - } + error = path_lookupat(dfd, pathname, LOOKUP_PARENT | flags, &nd); if (unlikely(error)) goto out_filp; if (unlikely(!audit_dummy_context())) @@ -2398,8 +2378,6 @@ reval: out: if (nd.root.mnt) path_put(&nd.root); - if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) - goto reval; release_open_intent(&nd); return filp; @@ -2412,6 +2390,19 @@ out_filp: goto out; } +struct file *do_filp_open(int dfd, const char *pathname, + const struct open_flags *op, int flags) +{ + struct file *filp; + + filp = path_openat(dfd, pathname, op, flags | LOOKUP_RCU); + if (unlikely(filp == ERR_PTR(-ECHILD))) + filp = path_openat(dfd, pathname, op, flags); + if (unlikely(filp == ERR_PTR(-ESTALE))) + filp = path_openat(dfd, pathname, op, flags | LOOKUP_REVAL); + return filp; +} + /** * lookup_create - lookup a dentry, creating it if it doesn't exist * @nd: nameidata info -- cgit v1.2.3-18-g5258 From 7bc055d1d524f209bf49d8b9cb220712dd7df4ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Feb 2011 19:41:31 -0500 Subject: kill out_dput: in link_path_walk() Signed-off-by: Al Viro --- fs/namei.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 01a17dd2f15..fea36369dc8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1407,22 +1407,19 @@ static int link_path_walk(const char *name, struct nameidata *nd) err = do_lookup(nd, &this, &next, &inode); if (err) break; - err = -ENOENT; - if (!inode) - goto out_dput; - if (inode->i_op->follow_link) { + if (inode && inode->i_op->follow_link) { err = do_follow_link(inode, &next, nd); if (err) goto return_err; nd->inode = nd->path.dentry->d_inode; - err = -ENOENT; - if (!nd->inode) - break; } else { path_to_nameidata(&next, nd); nd->inode = inode; } + err = -ENOENT; + if (!nd->inode) + break; err = -ENOTDIR; if (!nd->inode->i_op->lookup) break; @@ -1472,10 +1469,6 @@ lookup_parent: nd->last = this; nd->last_type = type; return 0; -out_dput: - if (!(nd->flags & LOOKUP_RCU)) - path_put_conditional(&next, nd); - break; } if (!(nd->flags & LOOKUP_RCU)) path_put(&nd->path); -- cgit v1.2.3-18-g5258 From 9856fa1b281eccdc9f8d94d716e96818c675e78e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Mar 2011 14:22:06 -0500 Subject: pull handling of . and .. into inlined helper getting LOOKUP_RCU checks out of link_path_walk()... Signed-off-by: Al Viro --- fs/namei.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index fea36369dc8..d29f91e8ff3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1318,6 +1318,18 @@ static inline int may_lookup(struct nameidata *nd) return exec_permission(nd->inode, 0); } +static inline int handle_dots(struct nameidata *nd, int type) +{ + if (type == LAST_DOTDOT) { + if (nd->flags & LOOKUP_RCU) { + if (follow_dotdot_rcu(nd)) + return -ECHILD; + } else + follow_dotdot(nd); + } + return 0; +} + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -1393,13 +1405,8 @@ static int link_path_walk(const char *name, struct nameidata *nd) * parent relationships. */ if (unlikely(type != LAST_NORM)) { - if (type == LAST_DOTDOT) { - if (nd->flags & LOOKUP_RCU) { - if (follow_dotdot_rcu(nd)) - return -ECHILD; - } else - follow_dotdot(nd); - } + if (handle_dots(nd, type)) + return -ECHILD; continue; } @@ -1434,13 +1441,8 @@ last_component: if (lookup_flags & LOOKUP_PARENT) goto lookup_parent; if (unlikely(type != LAST_NORM)) { - if (type == LAST_DOTDOT) { - if (nd->flags & LOOKUP_RCU) { - if (follow_dotdot_rcu(nd)) - return -ECHILD; - } else - follow_dotdot(nd); - } + if (handle_dots(nd, type)) + return -ECHILD; return 0; } err = do_lookup(nd, &this, &next, &inode); -- cgit v1.2.3-18-g5258 From 4455ca6223cc59cbc0a75f4be8bce9e84cc0d6b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Mar 2011 14:28:10 -0500 Subject: clear RCU on all failure exits from link_path_walk() Signed-off-by: Al Viro --- fs/namei.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index d29f91e8ff3..f09887a4583 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1405,8 +1405,9 @@ static int link_path_walk(const char *name, struct nameidata *nd) * parent relationships. */ if (unlikely(type != LAST_NORM)) { - if (handle_dots(nd, type)) - return -ECHILD; + err = handle_dots(nd, type); + if (err) + goto return_err; continue; } @@ -1441,8 +1442,9 @@ last_component: if (lookup_flags & LOOKUP_PARENT) goto lookup_parent; if (unlikely(type != LAST_NORM)) { - if (handle_dots(nd, type)) - return -ECHILD; + err = handle_dots(nd, type); + if (err) + goto return_err; return 0; } err = do_lookup(nd, &this, &next, &inode); @@ -1475,6 +1477,12 @@ lookup_parent: if (!(nd->flags & LOOKUP_RCU)) path_put(&nd->path); return_err: + if (nd->flags & LOOKUP_RCU) { + nd->flags &= ~LOOKUP_RCU; + nd->root.mnt = NULL; + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + } return err; } @@ -1585,16 +1593,10 @@ static int path_lookupat(int dfd, const char *name, retval = link_path_walk(name, nd); if (nd->flags & LOOKUP_RCU) { - /* RCU dangling. Cancel it. */ - if (!retval) { - if (nameidata_drop_rcu_last(nd)) - retval = -ECHILD; - } else { - nd->flags &= ~LOOKUP_RCU; - nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - } + /* went all way through without dropping RCU */ + BUG_ON(retval); + if (nameidata_drop_rcu_last(nd)) + retval = -ECHILD; } if (!retval) -- cgit v1.2.3-18-g5258 From ef7562d5283a91da3ba5c14de3221f47b7f08823 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Mar 2011 14:35:59 -0500 Subject: make handle_dots() leave RCU mode on error Signed-off-by: Al Viro --- fs/namei.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f09887a4583..ea14bfb0478 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1052,7 +1052,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) seq = read_seqcount_begin(&parent->d_seq); if (read_seqcount_retry(&old->d_seq, nd->seq)) - return -ECHILD; + goto failed; inode = parent->d_inode; nd->path.dentry = parent; nd->seq = seq; @@ -1065,8 +1065,14 @@ static int follow_dotdot_rcu(struct nameidata *nd) } __follow_mount_rcu(nd, &nd->path, &inode, true); nd->inode = inode; - return 0; + +failed: + nd->flags &= ~LOOKUP_RCU; + nd->root.mnt = NULL; + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + return -ECHILD; } /* @@ -1405,9 +1411,8 @@ static int link_path_walk(const char *name, struct nameidata *nd) * parent relationships. */ if (unlikely(type != LAST_NORM)) { - err = handle_dots(nd, type); - if (err) - goto return_err; + if (handle_dots(nd, type)) + return -ECHILD; continue; } @@ -1441,12 +1446,8 @@ last_component: nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; if (lookup_flags & LOOKUP_PARENT) goto lookup_parent; - if (unlikely(type != LAST_NORM)) { - err = handle_dots(nd, type); - if (err) - goto return_err; - return 0; - } + if (unlikely(type != LAST_NORM)) + return handle_dots(nd, type); err = do_lookup(nd, &this, &next, &inode); if (err) break; -- cgit v1.2.3-18-g5258 From a7472baba22dd5d68580f528374f93421b33667e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Mar 2011 14:39:30 -0500 Subject: make nameidata_dentry_drop_rcu_maybe() always leave RCU mode Now we have do_follow_link() guaranteed to leave without dangling RCU and the next step will get LOOKUP_RCU logics completely out of link_path_walk(). Signed-off-by: Al Viro --- fs/namei.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index ea14bfb0478..53bba7c1a52 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -498,8 +498,15 @@ err_root: /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) { - if (nd->flags & LOOKUP_RCU) - return nameidata_dentry_drop_rcu(nd, dentry); + if (nd->flags & LOOKUP_RCU) { + if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) { + nd->flags &= ~LOOKUP_RCU; + nd->root.mnt = NULL; + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + return -ECHILD; + } + } return 0; } @@ -1424,7 +1431,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (inode && inode->i_op->follow_link) { err = do_follow_link(inode, &next, nd); if (err) - goto return_err; + re