diff options
Diffstat (limited to 'fs/exportfs/expfs.c')
| -rw-r--r-- | fs/exportfs/expfs.c | 371 | 
1 files changed, 213 insertions, 158 deletions
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 51b304056f1..b01fbfb51f4 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -19,19 +19,19 @@  #define dprintk(fmt, args...) do{}while(0) -static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name, -		struct dentry *child); +static int get_name(const struct path *path, char *name, struct dentry *child);  static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir,  		char *name, struct dentry *child)  {  	const struct export_operations *nop = dir->d_sb->s_export_op; +	struct path path = {.mnt = mnt, .dentry = dir};  	if (nop->get_name)  		return nop->get_name(dir, name, child);  	else -		return get_name(mnt, dir, name, child); +		return get_name(&path, name, child);  }  /* @@ -43,176 +43,196 @@ find_acceptable_alias(struct dentry *result,  		void *context)  {  	struct dentry *dentry, *toput = NULL; +	struct inode *inode;  	if (acceptable(context, result))  		return result; -	spin_lock(&dcache_lock); -	list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { -		dget_locked(dentry); -		spin_unlock(&dcache_lock); +	inode = result->d_inode; +	spin_lock(&inode->i_lock); +	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { +		dget(dentry); +		spin_unlock(&inode->i_lock);  		if (toput)  			dput(toput);  		if (dentry != result && acceptable(context, dentry)) {  			dput(result);  			return dentry;  		} -		spin_lock(&dcache_lock); +		spin_lock(&inode->i_lock);  		toput = dentry;  	} -	spin_unlock(&dcache_lock); +	spin_unlock(&inode->i_lock);  	if (toput)  		dput(toput);  	return NULL;  } -/* - * Find root of a disconnected subtree and return a reference to it. - */ -static struct dentry * -find_disconnected_root(struct dentry *dentry) +static bool dentry_connected(struct dentry *dentry)  {  	dget(dentry); -	while (!IS_ROOT(dentry)) { +	while (dentry->d_flags & DCACHE_DISCONNECTED) {  		struct dentry *parent = dget_parent(dentry); -		if (!(parent->d_flags & DCACHE_DISCONNECTED)) { +		dput(dentry); +		if (IS_ROOT(dentry)) {  			dput(parent); -			break; +			return false;  		} +		dentry = parent; +	} +	dput(dentry); +	return true; +} + +static void clear_disconnected(struct dentry *dentry) +{ +	dget(dentry); +	while (dentry->d_flags & DCACHE_DISCONNECTED) { +		struct dentry *parent = dget_parent(dentry); + +		WARN_ON_ONCE(IS_ROOT(dentry)); + +		spin_lock(&dentry->d_lock); +		dentry->d_flags &= ~DCACHE_DISCONNECTED; +		spin_unlock(&dentry->d_lock);  		dput(dentry);  		dentry = parent;  	} -	return dentry; +	dput(dentry); +} + +/* + * Reconnect a directory dentry with its parent. + * + * This can return a dentry, or NULL, or an error. + * + * In the first case the returned dentry is the parent of the given + * dentry, and may itself need to be reconnected to its parent. + * + * In the NULL case, a concurrent VFS operation has either renamed or + * removed this directory.  The concurrent operation has reconnected our + * dentry, so we no longer need to. + */ +static struct dentry *reconnect_one(struct vfsmount *mnt, +		struct dentry *dentry, char *nbuf) +{ +	struct dentry *parent; +	struct dentry *tmp; +	int err; + +	parent = ERR_PTR(-EACCES); +	mutex_lock(&dentry->d_inode->i_mutex); +	if (mnt->mnt_sb->s_export_op->get_parent) +		parent = mnt->mnt_sb->s_export_op->get_parent(dentry); +	mutex_unlock(&dentry->d_inode->i_mutex); + +	if (IS_ERR(parent)) { +		dprintk("%s: get_parent of %ld failed, err %d\n", +			__func__, dentry->d_inode->i_ino, PTR_ERR(parent)); +		return parent; +	} + +	dprintk("%s: find name of %lu in %lu\n", __func__, +		dentry->d_inode->i_ino, parent->d_inode->i_ino); +	err = exportfs_get_name(mnt, parent, nbuf, dentry); +	if (err == -ENOENT) +		goto out_reconnected; +	if (err) +		goto out_err; +	dprintk("%s: found name: %s\n", __func__, nbuf); +	mutex_lock(&parent->d_inode->i_mutex); +	tmp = lookup_one_len(nbuf, parent, strlen(nbuf)); +	mutex_unlock(&parent->d_inode->i_mutex); +	if (IS_ERR(tmp)) { +		dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); +		goto out_err; +	} +	if (tmp != dentry) { +		dput(tmp); +		goto out_reconnected; +	} +	dput(tmp); +	if (IS_ROOT(dentry)) { +		err = -ESTALE; +		goto out_err; +	} +	return parent; + +out_err: +	dput(parent); +	return ERR_PTR(err); +out_reconnected: +	dput(parent); +	/* +	 * Someone must have renamed our entry into another parent, in +	 * which case it has been reconnected by the rename. +	 * +	 * Or someone removed it entirely, in which case filehandle +	 * lookup will succeed but the directory is now IS_DEAD and +	 * subsequent operations on it will fail. +	 * +	 * Alternatively, maybe there was no race at all, and the +	 * filesystem is just corrupt and gave us a parent that doesn't +	 * actually contain any entry pointing to this inode.  So, +	 * double check that this worked and return -ESTALE if not: +	 */ +	if (!dentry_connected(dentry)) +		return ERR_PTR(-ESTALE); +	return NULL;  }  /*   * Make sure target_dir is fully connected to the dentry tree.   * - * It may already be, as the flag isn't always updated when connection happens. + * On successful return, DCACHE_DISCONNECTED will be cleared on + * target_dir, and target_dir->d_parent->...->d_parent will reach the + * root of the filesystem. + * + * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. + * But the converse is not true: target_dir may have DCACHE_DISCONNECTED + * set but already be connected.  In that case we'll verify the + * connection to root and then clear the flag. + * + * Note that target_dir could be removed by a concurrent operation.  In + * that case reconnect_path may still succeed with target_dir fully + * connected, but further operations using the filehandle will fail when + * necessary (due to S_DEAD being set on the directory).   */  static int  reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)  { -	int noprogress = 0; -	int err = -ESTALE; +	struct dentry *dentry, *parent; -	/* -	 * It is possible that a confused file system might not let us complete -	 * the path to the root.  For example, if get_parent returns a directory -	 * in which we cannot find a name for the child.  While this implies a -	 * very sick filesystem we don't want it to cause knfsd to spin.  Hence -	 * the noprogress counter.  If we go through the loop 10 times (2 is -	 * probably enough) without getting anywhere, we just give up -	 */ -	while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { -		struct dentry *pd = find_disconnected_root(target_dir); - -		if (!IS_ROOT(pd)) { -			/* must have found a connected parent - great */ -			spin_lock(&pd->d_lock); -			pd->d_flags &= ~DCACHE_DISCONNECTED; -			spin_unlock(&pd->d_lock); -			noprogress = 0; -		} else if (pd == mnt->mnt_sb->s_root) { -			printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); -			spin_lock(&pd->d_lock); -			pd->d_flags &= ~DCACHE_DISCONNECTED; -			spin_unlock(&pd->d_lock); -			noprogress = 0; -		} else { -			/* -			 * We have hit the top of a disconnected path, try to -			 * find parent and connect. -			 * -			 * Racing with some other process renaming a directory -			 * isn't much of a problem here.  If someone renames -			 * the directory, it will end up properly connected, -			 * which is what we want -			 * -			 * Getting the parent can't be supported generically, -			 * the locking is too icky. -			 * -			 * Instead we just return EACCES.  If server reboots -			 * or inodes get flushed, you lose -			 */ -			struct dentry *ppd = ERR_PTR(-EACCES); -			struct dentry *npd; - -			mutex_lock(&pd->d_inode->i_mutex); -			if (mnt->mnt_sb->s_export_op->get_parent) -				ppd = mnt->mnt_sb->s_export_op->get_parent(pd); -			mutex_unlock(&pd->d_inode->i_mutex); - -			if (IS_ERR(ppd)) { -				err = PTR_ERR(ppd); -				dprintk("%s: get_parent of %ld failed, err %d\n", -					__func__, pd->d_inode->i_ino, err); -				dput(pd); -				break; -			} +	dentry = dget(target_dir); -			dprintk("%s: find name of %lu in %lu\n", __func__, -				pd->d_inode->i_ino, ppd->d_inode->i_ino); -			err = exportfs_get_name(mnt, ppd, nbuf, pd); -			if (err) { -				dput(ppd); -				dput(pd); -				if (err == -ENOENT) -					/* some race between get_parent and -					 * get_name?  just try again -					 */ -					continue; -				break; -			} -			dprintk("%s: found name: %s\n", __func__, nbuf); -			mutex_lock(&ppd->d_inode->i_mutex); -			npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); -			mutex_unlock(&ppd->d_inode->i_mutex); -			if (IS_ERR(npd)) { -				err = PTR_ERR(npd); -				dprintk("%s: lookup failed: %d\n", -					__func__, err); -				dput(ppd); -				dput(pd); -				break; -			} -			/* we didn't really want npd, we really wanted -			 * a side-effect of the lookup. -			 * hopefully, npd == pd, though it isn't really -			 * a problem if it isn't -			 */ -			if (npd == pd) -				noprogress = 0; -			else -				printk("%s: npd != pd\n", __func__); -			dput(npd); -			dput(ppd); -			if (IS_ROOT(pd)) { -				/* something went wrong, we have to give up */ -				dput(pd); -				break; -			} -		} -		dput(pd); -	} +	while (dentry->d_flags & DCACHE_DISCONNECTED) { +		BUG_ON(dentry == mnt->mnt_sb->s_root); -	if (target_dir->d_flags & DCACHE_DISCONNECTED) { -		/* something went wrong - oh-well */ -		if (!err) -			err = -ESTALE; -		return err; -	} +		if (IS_ROOT(dentry)) +			parent = reconnect_one(mnt, dentry, nbuf); +		else +			parent = dget_parent(dentry); +		if (!parent) +			break; +		dput(dentry); +		if (IS_ERR(parent)) +			return PTR_ERR(parent); +		dentry = parent; +	} +	dput(dentry); +	clear_disconnected(target_dir);  	return 0;  }  struct getdents_callback { +	struct dir_context ctx;  	char *name;		/* name that was found. It already points to a  				   buffer NAME_MAX+1 is size */ -	unsigned long ino;	/* the inum we are looking for */ +	u64 ino;		/* the inum we are looking for */  	int found;		/* inode matched? */  	int sequence;		/* sequence counter */  }; @@ -228,7 +248,7 @@ static int filldir_one(void * __buf, const char * name, int len,  	int result = 0;  	buf->sequence++; -	if (buf->ino == ino) { +	if (buf->ino == ino && len <= NAME_MAX) {  		memcpy(buf->name, name, len);  		buf->name[len] = '\0';  		buf->found = 1; @@ -239,21 +259,28 @@ static int filldir_one(void * __buf, const char * name, int len,  /**   * get_name - default export_operations->get_name function - * @dentry: the directory in which to find a name + * @path:   the directory in which to find a name   * @name:   a pointer to a %NAME_MAX+1 char buffer to store the name   * @child:  the dentry for the child directory.   *   * calls readdir on the parent until it finds an entry with   * the same inode number as the child, and returns that.   */ -static int get_name(struct vfsmount *mnt, struct dentry *dentry, -		char *name, struct dentry *child) +static int get_name(const struct path *path, char *name, struct dentry *child)  {  	const struct cred *cred = current_cred(); -	struct inode *dir = dentry->d_inode; +	struct inode *dir = path->dentry->d_inode;  	int error;  	struct file *file; -	struct getdents_callback buffer; +	struct kstat stat; +	struct path child_path = { +		.mnt = path->mnt, +		.dentry = child, +	}; +	struct getdents_callback buffer = { +		.ctx.actor = filldir_one, +		.name = name, +	};  	error = -ENOTDIR;  	if (!dir || !S_ISDIR(dir->i_mode)) @@ -262,25 +289,32 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry,  	if (!dir->i_fop)  		goto out;  	/* +	 * inode->i_ino is unsigned long, kstat->ino is u64, so the +	 * former would be insufficient on 32-bit hosts when the +	 * filesystem supports 64-bit inode numbers.  So we need to +	 * actually call ->getattr, not just read i_ino: +	 */ +	error = vfs_getattr_nosec(&child_path, &stat); +	if (error) +		return error; +	buffer.ino = stat.ino; +	/*  	 * Open the directory ...  	 */ -	file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred); +	file = dentry_open(path, O_RDONLY, cred);  	error = PTR_ERR(file);  	if (IS_ERR(file))  		goto out;  	error = -EINVAL; -	if (!file->f_op->readdir) +	if (!file->f_op->iterate)  		goto out_close; -	buffer.name = name; -	buffer.ino = child->d_inode->i_ino; -	buffer.found = 0;  	buffer.sequence = 0;  	while (1) {  		int old_seq = buffer.sequence; -		error = vfs_readdir(file, filldir_one, &buffer); +		error = iterate_dir(file, &buffer.ctx);  		if (buffer.found) {  			error = 0;  			break; @@ -302,37 +336,36 @@ out:  /**   * export_encode_fh - default export_operations->encode_fh function - * @dentry:  the dentry to encode - * @fh:      where to store the file handle fragment + * @inode:   the object to encode + * @fid:     where to store the file handle fragment   * @max_len: maximum length to store there - * @connectable: whether to store parent information + * @parent:  parent directory inode, if wanted   *   * This default encode_fh function assumes that the 32 inode number   * is suitable for locating an inode, and that the generation number   * can be used to check that it is still valid.  It places them in the   * filehandle fragment where export_decode_fh expects to find them.   */ -static int export_encode_fh(struct dentry *dentry, struct fid *fid, -		int *max_len, int connectable) +static int export_encode_fh(struct inode *inode, struct fid *fid, +		int *max_len, struct inode *parent)  { -	struct inode * inode = dentry->d_inode;  	int len = *max_len;  	int type = FILEID_INO32_GEN; -	 -	if (len < 2 || (connectable && len < 4)) -		return 255; + +	if (parent && (len < 4)) { +		*max_len = 4; +		return FILEID_INVALID; +	} else if (len < 2) { +		*max_len = 2; +		return FILEID_INVALID; +	}  	len = 2;  	fid->i32.ino = inode->i_ino;  	fid->i32.gen = inode->i_generation; -	if (connectable && !S_ISDIR(inode->i_mode)) { -		struct inode *parent; - -		spin_lock(&dentry->d_lock); -		parent = dentry->d_parent->d_inode; +	if (parent) {  		fid->i32.parent_ino = parent->i_ino;  		fid->i32.parent_gen = parent->i_generation; -		spin_unlock(&dentry->d_lock);  		len = 4;  		type = FILEID_INO32_GEN_PARENT;  	} @@ -340,16 +373,36 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,  	return type;  } +int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, +			     int *max_len, struct inode *parent) +{ +	const struct export_operations *nop = inode->i_sb->s_export_op; + +	if (nop && nop->encode_fh) +		return nop->encode_fh(inode, fid->raw, max_len, parent); + +	return export_encode_fh(inode, fid, max_len, parent); +} +EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh); +  int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,  		int connectable)  { -	const struct export_operations *nop = dentry->d_sb->s_export_op;  	int error; +	struct dentry *p = NULL; +	struct inode *inode = dentry->d_inode, *parent = NULL; -	if (nop->encode_fh) -		error = nop->encode_fh(dentry, fid->raw, max_len, connectable); -	else -		error = export_encode_fh(dentry, fid, max_len, connectable); +	if (connectable && !S_ISDIR(inode->i_mode)) { +		p = dget_parent(dentry); +		/* +		 * note that while p might've ceased to be our parent already, +		 * it's still pinned by and still positive. +		 */ +		parent = p->d_inode; +	} + +	error = exportfs_encode_inode_fh(inode, fid, max_len, parent); +	dput(p);  	return error;  } @@ -367,6 +420,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,  	/*  	 * Try to get any dentry for the given file handle from the filesystem.  	 */ +	if (!nop || !nop->fh_to_dentry) +		return ERR_PTR(-ESTALE);  	result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);  	if (!result)  		result = ERR_PTR(-ESTALE);  | 
