diff options
Diffstat (limited to 'fs/exportfs/expfs.c')
| -rw-r--r-- | fs/exportfs/expfs.c | 271 | 
1 files changed, 151 insertions, 120 deletions
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index a235f001688..b01fbfb51f4 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -69,145 +69,162 @@ find_acceptable_alias(struct dentry *result,  	return NULL;  } -/* - * Find root of a disconnected subtree and return a reference to it. - */ -static struct dentry * -find_disconnected_root(struct dentry *dentry) +static bool dentry_connected(struct dentry *dentry)  {  	dget(dentry); -	while (!IS_ROOT(dentry)) { +	while (dentry->d_flags & DCACHE_DISCONNECTED) {  		struct dentry *parent = dget_parent(dentry); -		if (!(parent->d_flags & DCACHE_DISCONNECTED)) { +		dput(dentry); +		if (IS_ROOT(dentry)) {  			dput(parent); -			break; +			return false;  		} +		dentry = parent; +	} +	dput(dentry); +	return true; +} + +static void clear_disconnected(struct dentry *dentry) +{ +	dget(dentry); +	while (dentry->d_flags & DCACHE_DISCONNECTED) { +		struct dentry *parent = dget_parent(dentry); + +		WARN_ON_ONCE(IS_ROOT(dentry)); + +		spin_lock(&dentry->d_lock); +		dentry->d_flags &= ~DCACHE_DISCONNECTED; +		spin_unlock(&dentry->d_lock);  		dput(dentry);  		dentry = parent;  	} -	return dentry; +	dput(dentry); +} + +/* + * Reconnect a directory dentry with its parent. + * + * This can return a dentry, or NULL, or an error. + * + * In the first case the returned dentry is the parent of the given + * dentry, and may itself need to be reconnected to its parent. + * + * In the NULL case, a concurrent VFS operation has either renamed or + * removed this directory.  The concurrent operation has reconnected our + * dentry, so we no longer need to. + */ +static struct dentry *reconnect_one(struct vfsmount *mnt, +		struct dentry *dentry, char *nbuf) +{ +	struct dentry *parent; +	struct dentry *tmp; +	int err; + +	parent = ERR_PTR(-EACCES); +	mutex_lock(&dentry->d_inode->i_mutex); +	if (mnt->mnt_sb->s_export_op->get_parent) +		parent = mnt->mnt_sb->s_export_op->get_parent(dentry); +	mutex_unlock(&dentry->d_inode->i_mutex); + +	if (IS_ERR(parent)) { +		dprintk("%s: get_parent of %ld failed, err %d\n", +			__func__, dentry->d_inode->i_ino, PTR_ERR(parent)); +		return parent; +	} + +	dprintk("%s: find name of %lu in %lu\n", __func__, +		dentry->d_inode->i_ino, parent->d_inode->i_ino); +	err = exportfs_get_name(mnt, parent, nbuf, dentry); +	if (err == -ENOENT) +		goto out_reconnected; +	if (err) +		goto out_err; +	dprintk("%s: found name: %s\n", __func__, nbuf); +	mutex_lock(&parent->d_inode->i_mutex); +	tmp = lookup_one_len(nbuf, parent, strlen(nbuf)); +	mutex_unlock(&parent->d_inode->i_mutex); +	if (IS_ERR(tmp)) { +		dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); +		goto out_err; +	} +	if (tmp != dentry) { +		dput(tmp); +		goto out_reconnected; +	} +	dput(tmp); +	if (IS_ROOT(dentry)) { +		err = -ESTALE; +		goto out_err; +	} +	return parent; + +out_err: +	dput(parent); +	return ERR_PTR(err); +out_reconnected: +	dput(parent); +	/* +	 * Someone must have renamed our entry into another parent, in +	 * which case it has been reconnected by the rename. +	 * +	 * Or someone removed it entirely, in which case filehandle +	 * lookup will succeed but the directory is now IS_DEAD and +	 * subsequent operations on it will fail. +	 * +	 * Alternatively, maybe there was no race at all, and the +	 * filesystem is just corrupt and gave us a parent that doesn't +	 * actually contain any entry pointing to this inode.  So, +	 * double check that this worked and return -ESTALE if not: +	 */ +	if (!dentry_connected(dentry)) +		return ERR_PTR(-ESTALE); +	return NULL;  }  /*   * Make sure target_dir is fully connected to the dentry tree.   * - * It may already be, as the flag isn't always updated when connection happens. + * On successful return, DCACHE_DISCONNECTED will be cleared on + * target_dir, and target_dir->d_parent->...->d_parent will reach the + * root of the filesystem. + * + * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. + * But the converse is not true: target_dir may have DCACHE_DISCONNECTED + * set but already be connected.  In that case we'll verify the + * connection to root and then clear the flag. + * + * Note that target_dir could be removed by a concurrent operation.  In + * that case reconnect_path may still succeed with target_dir fully + * connected, but further operations using the filehandle will fail when + * necessary (due to S_DEAD being set on the directory).   */  static int  reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)  { -	int noprogress = 0; -	int err = -ESTALE; +	struct dentry *dentry, *parent; -	/* -	 * It is possible that a confused file system might not let us complete -	 * the path to the root.  For example, if get_parent returns a directory -	 * in which we cannot find a name for the child.  While this implies a -	 * very sick filesystem we don't want it to cause knfsd to spin.  Hence -	 * the noprogress counter.  If we go through the loop 10 times (2 is -	 * probably enough) without getting anywhere, we just give up -	 */ -	while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { -		struct dentry *pd = find_disconnected_root(target_dir); - -		if (!IS_ROOT(pd)) { -			/* must have found a connected parent - great */ -			spin_lock(&pd->d_lock); -			pd->d_flags &= ~DCACHE_DISCONNECTED; -			spin_unlock(&pd->d_lock); -			noprogress = 0; -		} else if (pd == mnt->mnt_sb->s_root) { -			printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); -			spin_lock(&pd->d_lock); -			pd->d_flags &= ~DCACHE_DISCONNECTED; -			spin_unlock(&pd->d_lock); -			noprogress = 0; -		} else { -			/* -			 * We have hit the top of a disconnected path, try to -			 * find parent and connect. -			 * -			 * Racing with some other process renaming a directory -			 * isn't much of a problem here.  If someone renames -			 * the directory, it will end up properly connected, -			 * which is what we want -			 * -			 * Getting the parent can't be supported generically, -			 * the locking is too icky. -			 * -			 * Instead we just return EACCES.  If server reboots -			 * or inodes get flushed, you lose -			 */ -			struct dentry *ppd = ERR_PTR(-EACCES); -			struct dentry *npd; - -			mutex_lock(&pd->d_inode->i_mutex); -			if (mnt->mnt_sb->s_export_op->get_parent) -				ppd = mnt->mnt_sb->s_export_op->get_parent(pd); -			mutex_unlock(&pd->d_inode->i_mutex); - -			if (IS_ERR(ppd)) { -				err = PTR_ERR(ppd); -				dprintk("%s: get_parent of %ld failed, err %d\n", -					__func__, pd->d_inode->i_ino, err); -				dput(pd); -				break; -			} +	dentry = dget(target_dir); -			dprintk("%s: find name of %lu in %lu\n", __func__, -				pd->d_inode->i_ino, ppd->d_inode->i_ino); -			err = exportfs_get_name(mnt, ppd, nbuf, pd); -			if (err) { -				dput(ppd); -				dput(pd); -				if (err == -ENOENT) -					/* some race between get_parent and -					 * get_name?  just try again -					 */ -					continue; -				break; -			} -			dprintk("%s: found name: %s\n", __func__, nbuf); -			mutex_lock(&ppd->d_inode->i_mutex); -			npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); -			mutex_unlock(&ppd->d_inode->i_mutex); -			if (IS_ERR(npd)) { -				err = PTR_ERR(npd); -				dprintk("%s: lookup failed: %d\n", -					__func__, err); -				dput(ppd); -				dput(pd); -				break; -			} -			/* we didn't really want npd, we really wanted -			 * a side-effect of the lookup. -			 * hopefully, npd == pd, though it isn't really -			 * a problem if it isn't -			 */ -			if (npd == pd) -				noprogress = 0; -			else -				printk("%s: npd != pd\n", __func__); -			dput(npd); -			dput(ppd); -			if (IS_ROOT(pd)) { -				/* something went wrong, we have to give up */ -				dput(pd); -				break; -			} -		} -		dput(pd); -	} +	while (dentry->d_flags & DCACHE_DISCONNECTED) { +		BUG_ON(dentry == mnt->mnt_sb->s_root); -	if (target_dir->d_flags & DCACHE_DISCONNECTED) { -		/* something went wrong - oh-well */ -		if (!err) -			err = -ESTALE; -		return err; -	} +		if (IS_ROOT(dentry)) +			parent = reconnect_one(mnt, dentry, nbuf); +		else +			parent = dget_parent(dentry); +		if (!parent) +			break; +		dput(dentry); +		if (IS_ERR(parent)) +			return PTR_ERR(parent); +		dentry = parent; +	} +	dput(dentry); +	clear_disconnected(target_dir);  	return 0;  } @@ -215,7 +232,7 @@ struct getdents_callback {  	struct dir_context ctx;  	char *name;		/* name that was found. It already points to a  				   buffer NAME_MAX+1 is size */ -	unsigned long ino;	/* the inum we are looking for */ +	u64 ino;		/* the inum we are looking for */  	int found;		/* inode matched? */  	int sequence;		/* sequence counter */  }; @@ -242,7 +259,7 @@ static int filldir_one(void * __buf, const char * name, int len,  /**   * get_name - default export_operations->get_name function - * @dentry: the directory in which to find a name + * @path:   the directory in which to find a name   * @name:   a pointer to a %NAME_MAX+1 char buffer to store the name   * @child:  the dentry for the child directory.   * @@ -255,10 +272,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child)  	struct inode *dir = path->dentry->d_inode;  	int error;  	struct file *file; +	struct kstat stat; +	struct path child_path = { +		.mnt = path->mnt, +		.dentry = child, +	};  	struct getdents_callback buffer = {  		.ctx.actor = filldir_one,  		.name = name, -		.ino = child->d_inode->i_ino  	};  	error = -ENOTDIR; @@ -268,6 +289,16 @@ static int get_name(const struct path *path, char *name, struct dentry *child)  	if (!dir->i_fop)  		goto out;  	/* +	 * inode->i_ino is unsigned long, kstat->ino is u64, so the +	 * former would be insufficient on 32-bit hosts when the +	 * filesystem supports 64-bit inode numbers.  So we need to +	 * actually call ->getattr, not just read i_ino: +	 */ +	error = vfs_getattr_nosec(&child_path, &stat); +	if (error) +		return error; +	buffer.ino = stat.ino; +	/*  	 * Open the directory ...  	 */  	file = dentry_open(path, O_RDONLY, cred); @@ -306,7 +337,7 @@ out:  /**   * export_encode_fh - default export_operations->encode_fh function   * @inode:   the object to encode - * @fh:      where to store the file handle fragment + * @fid:     where to store the file handle fragment   * @max_len: maximum length to store there   * @parent:  parent directory inode, if wanted   *  | 
