diff options
Diffstat (limited to 'fs/exportfs')
| -rw-r--r-- | fs/exportfs/expfs.c | 371 |
1 files changed, 213 insertions, 158 deletions
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 51b304056f1..b01fbfb51f4 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -19,19 +19,19 @@ #define dprintk(fmt, args...) do{}while(0) -static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name, - struct dentry *child); +static int get_name(const struct path *path, char *name, struct dentry *child); static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, char *name, struct dentry *child) { const struct export_operations *nop = dir->d_sb->s_export_op; + struct path path = {.mnt = mnt, .dentry = dir}; if (nop->get_name) return nop->get_name(dir, name, child); else - return get_name(mnt, dir, name, child); + return get_name(&path, name, child); } /* @@ -43,176 +43,196 @@ find_acceptable_alias(struct dentry *result, void *context) { struct dentry *dentry, *toput = NULL; + struct inode *inode; if (acceptable(context, result)) return result; - spin_lock(&dcache_lock); - list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { - dget_locked(dentry); - spin_unlock(&dcache_lock); + inode = result->d_inode; + spin_lock(&inode->i_lock); + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + dget(dentry); + spin_unlock(&inode->i_lock); if (toput) dput(toput); if (dentry != result && acceptable(context, dentry)) { dput(result); return dentry; } - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); toput = dentry; } - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); if (toput) dput(toput); return NULL; } -/* - * Find root of a disconnected subtree and return a reference to it. - */ -static struct dentry * -find_disconnected_root(struct dentry *dentry) +static bool dentry_connected(struct dentry *dentry) { dget(dentry); - while (!IS_ROOT(dentry)) { + while (dentry->d_flags & DCACHE_DISCONNECTED) { struct dentry *parent = dget_parent(dentry); - if (!(parent->d_flags & DCACHE_DISCONNECTED)) { + dput(dentry); + if (IS_ROOT(dentry)) { dput(parent); - break; + return false; } + dentry = parent; + } + dput(dentry); + return true; +} + +static void clear_disconnected(struct dentry *dentry) +{ + dget(dentry); + while (dentry->d_flags & DCACHE_DISCONNECTED) { + struct dentry *parent = dget_parent(dentry); + + WARN_ON_ONCE(IS_ROOT(dentry)); + + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_DISCONNECTED; + spin_unlock(&dentry->d_lock); dput(dentry); dentry = parent; } - return dentry; + dput(dentry); +} + +/* + * Reconnect a directory dentry with its parent. + * + * This can return a dentry, or NULL, or an error. + * + * In the first case the returned dentry is the parent of the given + * dentry, and may itself need to be reconnected to its parent. + * + * In the NULL case, a concurrent VFS operation has either renamed or + * removed this directory. The concurrent operation has reconnected our + * dentry, so we no longer need to. + */ +static struct dentry *reconnect_one(struct vfsmount *mnt, + struct dentry *dentry, char *nbuf) +{ + struct dentry *parent; + struct dentry *tmp; + int err; + + parent = ERR_PTR(-EACCES); + mutex_lock(&dentry->d_inode->i_mutex); + if (mnt->mnt_sb->s_export_op->get_parent) + parent = mnt->mnt_sb->s_export_op->get_parent(dentry); + mutex_unlock(&dentry->d_inode->i_mutex); + + if (IS_ERR(parent)) { + dprintk("%s: get_parent of %ld failed, err %d\n", + __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); + return parent; + } + + dprintk("%s: find name of %lu in %lu\n", __func__, + dentry->d_inode->i_ino, parent->d_inode->i_ino); + err = exportfs_get_name(mnt, parent, nbuf, dentry); + if (err == -ENOENT) + goto out_reconnected; + if (err) + goto out_err; + dprintk("%s: found name: %s\n", __func__, nbuf); + mutex_lock(&parent->d_inode->i_mutex); + tmp = lookup_one_len(nbuf, parent, strlen(nbuf)); + mutex_unlock(&parent->d_inode->i_mutex); + if (IS_ERR(tmp)) { + dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + goto out_err; + } + if (tmp != dentry) { + dput(tmp); + goto out_reconnected; + } + dput(tmp); + if (IS_ROOT(dentry)) { + err = -ESTALE; + goto out_err; + } + return parent; + +out_err: + dput(parent); + return ERR_PTR(err); +out_reconnected: + dput(parent); + /* + * Someone must have renamed our entry into another parent, in + * which case it has been reconnected by the rename. + * + * Or someone removed it entirely, in which case filehandle + * lookup will succeed but the directory is now IS_DEAD and + * subsequent operations on it will fail. + * + * Alternatively, maybe there was no race at all, and the + * filesystem is just corrupt and gave us a parent that doesn't + * actually contain any entry pointing to this inode. So, + * double check that this worked and return -ESTALE if not: + */ + if (!dentry_connected(dentry)) + return ERR_PTR(-ESTALE); + return NULL; } /* * Make sure target_dir is fully connected to the dentry tree. * - * It may already be, as the flag isn't always updated when connection happens. + * On successful return, DCACHE_DISCONNECTED will be cleared on + * target_dir, and target_dir->d_parent->...->d_parent will reach the + * root of the filesystem. + * + * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. + * But the converse is not true: target_dir may have DCACHE_DISCONNECTED + * set but already be connected. In that case we'll verify the + * connection to root and then clear the flag. + * + * Note that target_dir could be removed by a concurrent operation. In + * that case reconnect_path may still succeed with target_dir fully + * connected, but further operations using the filehandle will fail when + * necessary (due to S_DEAD being set on the directory). */ static int reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) { - int noprogress = 0; - int err = -ESTALE; + struct dentry *dentry, *parent; - /* - * It is possible that a confused file system might not let us complete - * the path to the root. For example, if get_parent returns a directory - * in which we cannot find a name for the child. While this implies a - * very sick filesystem we don't want it to cause knfsd to spin. Hence - * the noprogress counter. If we go through the loop 10 times (2 is - * probably enough) without getting anywhere, we just give up - */ - while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { - struct dentry *pd = find_disconnected_root(target_dir); - - if (!IS_ROOT(pd)) { - /* must have found a connected parent - great */ - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else if (pd == mnt->mnt_sb->s_root) { - printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else { - /* - * We have hit the top of a disconnected path, try to - * find parent and connect. - * - * Racing with some other process renaming a directory - * isn't much of a problem here. If someone renames - * the directory, it will end up properly connected, - * which is what we want - * - * Getting the parent can't be supported generically, - * the locking is too icky. - * - * Instead we just return EACCES. If server reboots - * or inodes get flushed, you lose - */ - struct dentry *ppd = ERR_PTR(-EACCES); - struct dentry *npd; - - mutex_lock(&pd->d_inode->i_mutex); - if (mnt->mnt_sb->s_export_op->get_parent) - ppd = mnt->mnt_sb->s_export_op->get_parent(pd); - mutex_unlock(&pd->d_inode->i_mutex); - - if (IS_ERR(ppd)) { - err = PTR_ERR(ppd); - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, pd->d_inode->i_ino, err); - dput(pd); - break; - } + dentry = dget(target_dir); - dprintk("%s: find name of %lu in %lu\n", __func__, - pd->d_inode->i_ino, ppd->d_inode->i_ino); - err = exportfs_get_name(mnt, ppd, nbuf, pd); - if (err) { - dput(ppd); - dput(pd); - if (err == -ENOENT) - /* some race between get_parent and - * get_name? just try again - */ - continue; - break; - } - dprintk("%s: found name: %s\n", __func__, nbuf); - mutex_lock(&ppd->d_inode->i_mutex); - npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); - mutex_unlock(&ppd->d_inode->i_mutex); - if (IS_ERR(npd)) { - err = PTR_ERR(npd); - dprintk("%s: lookup failed: %d\n", - __func__, err); - dput(ppd); - dput(pd); - break; - } - /* we didn't really want npd, we really wanted - * a side-effect of the lookup. - * hopefully, npd == pd, though it isn't really - * a problem if it isn't - */ - if (npd == pd) - noprogress = 0; - else - printk("%s: npd != pd\n", __func__); - dput(npd); - dput(ppd); - if (IS_ROOT(pd)) { - /* something went wrong, we have to give up */ - dput(pd); - break; - } - } - dput(pd); - } + while (dentry->d_flags & DCACHE_DISCONNECTED) { + BUG_ON(dentry == mnt->mnt_sb->s_root); - if (target_dir->d_flags & DCACHE_DISCONNECTED) { - /* something went wrong - oh-well */ - if (!err) - err = -ESTALE; - return err; - } + if (IS_ROOT(dentry)) + parent = reconnect_one(mnt, dentry, nbuf); + else + parent = dget_parent(dentry); + if (!parent) + break; + dput(dentry); + if (IS_ERR(parent)) + return PTR_ERR(parent); + dentry = parent; + } + dput(dentry); + clear_disconnected(target_dir); return 0; } struct getdents_callback { + struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ - unsigned long ino; /* the inum we are looking for */ + u64 ino; /* the inum we are looking for */ int found; /* inode matched? */ int sequence; /* sequence counter */ }; @@ -228,7 +248,7 @@ static int filldir_one(void * __buf, const char * name, int len, int result = 0; buf->sequence++; - if (buf->ino == ino) { + if (buf->ino == ino && len <= NAME_MAX) { memcpy(buf->name, name, len); buf->name[len] = '\0'; buf->found = 1; @@ -239,21 +259,28 @@ static int filldir_one(void * __buf, const char * name, int len, /** * get_name - default export_operations->get_name function - * @dentry: the directory in which to find a name + * @path: the directory in which to find a name * @name: a pointer to a %NAME_MAX+1 char buffer to store the name * @child: the dentry for the child directory. * * calls readdir on the parent until it finds an entry with * the same inode number as the child, and returns that. */ -static int get_name(struct vfsmount *mnt, struct dentry *dentry, - char *name, struct dentry *child) +static int get_name(const struct path *path, char *name, struct dentry *child) { const struct cred *cred = current_cred(); - struct inode *dir = dentry->d_inode; + struct inode *dir = path->dentry->d_inode; int error; struct file *file; - struct getdents_callback buffer; + struct kstat stat; + struct path child_path = { + .mnt = path->mnt, + .dentry = child, + }; + struct getdents_callback buffer = { + .ctx.actor = filldir_one, + .name = name, + }; error = -ENOTDIR; if (!dir || !S_ISDIR(dir->i_mode)) @@ -262,25 +289,32 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry, if (!dir->i_fop) goto out; /* + * inode->i_ino is unsigned long, kstat->ino is u64, so the + * former would be insufficient on 32-bit hosts when the + * filesystem supports 64-bit inode numbers. So we need to + * actually call ->getattr, not just read i_ino: + */ + error = vfs_getattr_nosec(&child_path, &stat); + if (error) + return error; + buffer.ino = stat.ino; + /* * Open the directory ... */ - file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred); + file = dentry_open(path, O_RDONLY, cred); error = PTR_ERR(file); if (IS_ERR(file)) goto out; error = -EINVAL; - if (!file->f_op->readdir) + if (!file->f_op->iterate) goto out_close; - buffer.name = name; - buffer.ino = child->d_inode->i_ino; - buffer.found = 0; buffer.sequence = 0; while (1) { int old_seq = buffer.sequence; - error = vfs_readdir(file, filldir_one, &buffer); + error = iterate_dir(file, &buffer.ctx); if (buffer.found) { error = 0; break; @@ -302,37 +336,36 @@ out: /** * export_encode_fh - default export_operations->encode_fh function - * @dentry: the dentry to encode - * @fh: where to store the file handle fragment + * @inode: the object to encode + * @fid: where to store the file handle fragment * @max_len: maximum length to store there - * @connectable: whether to store parent information + * @parent: parent directory inode, if wanted * * This default encode_fh function assumes that the 32 inode number * is suitable for locating an inode, and that the generation number * can be used to check that it is still valid. It places them in the * filehandle fragment where export_decode_fh expects to find them. */ -static int export_encode_fh(struct dentry *dentry, struct fid *fid, - int *max_len, int connectable) +static int export_encode_fh(struct inode *inode, struct fid *fid, + int *max_len, struct inode *parent) { - struct inode * inode = dentry->d_inode; int len = *max_len; int type = FILEID_INO32_GEN; - - if (len < 2 || (connectable && len < 4)) - return 255; + + if (parent && (len < 4)) { + *max_len = 4; + return FILEID_INVALID; + } else if (len < 2) { + *max_len = 2; + return FILEID_INVALID; + } len = 2; fid->i32.ino = inode->i_ino; fid->i32.gen = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; + if (parent) { fid->i32.parent_ino = parent->i_ino; fid->i32.parent_gen = parent->i_generation; - spin_unlock(&dentry->d_lock); len = 4; type = FILEID_INO32_GEN_PARENT; } @@ -340,16 +373,36 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid, return type; } +int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, + int *max_len, struct inode *parent) +{ + const struct export_operations *nop = inode->i_sb->s_export_op; + + if (nop && nop->encode_fh) + return nop->encode_fh(inode, fid->raw, max_len, parent); + + return export_encode_fh(inode, fid, max_len, parent); +} +EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh); + int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int connectable) { - const struct export_operations *nop = dentry->d_sb->s_export_op; int error; + struct dentry *p = NULL; + struct inode *inode = dentry->d_inode, *parent = NULL; - if (nop->encode_fh) - error = nop->encode_fh(dentry, fid->raw, max_len, connectable); - else - error = export_encode_fh(dentry, fid, max_len, connectable); + if (connectable && !S_ISDIR(inode->i_mode)) { + p = dget_parent(dentry); + /* + * note that while p might've ceased to be our parent already, + * it's still pinned by and still positive. + */ + parent = p->d_inode; + } + + error = exportfs_encode_inode_fh(inode, fid, max_len, parent); + dput(p); return error; } @@ -367,6 +420,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, /* * Try to get any dentry for the given file handle from the filesystem. */ + if (!nop || !nop->fh_to_dentry) + return ERR_PTR(-ESTALE); result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); if (!result) result = ERR_PTR(-ESTALE); |
