From b1b5d7f9b7ac6a8e3452ac43a53eebf69fdf5a77 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Tue, 30 Aug 2005 14:28:56 -0500
Subject: JFS: jfs_delete_inode should always call clear_inode.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/inode.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 767c7ecb429..37da3e33e75 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -128,21 +128,21 @@ void jfs_delete_inode(struct inode *inode)
 {
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
-	if (is_bad_inode(inode) ||
-	    (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I)))
-			return;
+	if (!is_bad_inode(inode) &&
+	    (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) {
 
-	if (test_cflag(COMMIT_Freewmap, inode))
-		jfs_free_zero_link(inode);
+		if (test_cflag(COMMIT_Freewmap, inode))
+			jfs_free_zero_link(inode);
 
-	diFree(inode);
+		diFree(inode);
 
-	/*
-	 * Free the inode from the quota allocation.
-	 */
-	DQUOT_INIT(inode);
-	DQUOT_FREE_INODE(inode);
-	DQUOT_DROP(inode);
+		/*
+		 * Free the inode from the quota allocation.
+		 */
+		DQUOT_INIT(inode);
+		DQUOT_FREE_INODE(inode);
+		DQUOT_DROP(inode);
+	}
 
 	clear_inode(inode);
 }
-- 
cgit v1.2.3-18-g5258


From 4f4b401bfaa97edbea41a1fcab794148e7ac0421 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Thu, 1 Sep 2005 09:02:43 -0500
Subject: JFS: allow extended attributes to be set within a existing
 transaction

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/acl.c       | 24 +++++++++++++++------
 fs/jfs/jfs_acl.h   | 12 +++++++++--
 fs/jfs/jfs_xattr.h |  4 ++--
 fs/jfs/namei.c     | 63 ++++++++++++++++++++++++++++++++++++------------------
 fs/jfs/xattr.c     | 58 ++++++++++++++++++++++++++++++++++---------------
 5 files changed, 112 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e892dab40c2..461e4934ca7 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -23,6 +23,7 @@
 #include <linux/quotaops.h>
 #include <linux/posix_acl_xattr.h>
 #include "jfs_incore.h"
+#include "jfs_txnmgr.h"
 #include "jfs_xattr.h"
 #include "jfs_acl.h"
 
@@ -75,7 +76,8 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
+		       struct posix_acl *acl)
 {
 	char *ea_name;
 	struct jfs_inode_info *ji = JFS_IP(inode);
@@ -110,7 +112,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 		if (rc < 0)
 			goto out;
 	}
-	rc = __jfs_setxattr(inode, ea_name, value, size, 0);
+	rc = __jfs_setxattr(tid, inode, ea_name, value, size, 0);
 out:
 	kfree(value);
 
@@ -143,7 +145,7 @@ int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 	return generic_permission(inode, mask, jfs_check_acl);
 }
 
-int jfs_init_acl(struct inode *inode, struct inode *dir)
+int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
 {
 	struct posix_acl *acl = NULL;
 	struct posix_acl *clone;
@@ -159,7 +161,7 @@ int jfs_init_acl(struct inode *inode, struct inode *dir)
 
 	if (acl) {
 		if (S_ISDIR(inode->i_mode)) {
-			rc = jfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+			rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl);
 			if (rc)
 				goto cleanup;
 		}
@@ -173,7 +175,8 @@ int jfs_init_acl(struct inode *inode, struct inode *dir)
 		if (rc >= 0) {
 			inode->i_mode = mode;
 			if (rc > 0)
-				rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+				rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS,
+						 clone);
 		}
 		posix_acl_release(clone);
 cleanup:
@@ -202,8 +205,15 @@ static int jfs_acl_chmod(struct inode *inode)
 		return -ENOMEM;
 
 	rc = posix_acl_chmod_masq(clone, inode->i_mode);
-	if (!rc)
-		rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+	if (!rc) {
+		tid_t tid = txBegin(inode->i_sb, 0);
+		down(&JFS_IP(inode)->commit_sem);
+		rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone);
+		if (!rc)
+			rc = txCommit(tid, 1, &inode, 0);
+		txEnd(tid);
+		up(&JFS_IP(inode)->commit_sem);
+	}
 
 	posix_acl_release(clone);
 	return rc;
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index a3acd3eec05..a76293767c7 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -21,8 +21,16 @@
 #ifdef CONFIG_JFS_POSIX_ACL
 
 int jfs_permission(struct inode *, int, struct nameidata *);
-int jfs_init_acl(struct inode *, struct inode *);
+int jfs_init_acl(tid_t, struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
 
-#endif		/* CONFIG_JFS_POSIX_ACL */
+#else
+
+static inline int jfs_init_acl(tid_t tid, struct inode *inode,
+			       struct inode *dir)
+{
+	return 0;
+}
+
+#endif
 #endif		/* _H_JFS_ACL */
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index a1052f3f0be..116a73ce307 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -52,8 +52,8 @@ struct jfs_ea_list {
 #define	END_EALIST(ealist) \
 	((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
 
-extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t,
-			  int);
+extern int __jfs_setxattr(tid_t, struct inode *, const char *, const void *,
+			  size_t, int);
 extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t,
 			int);
 extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 49ccde3937f..f23f9c2aa52 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -38,6 +38,24 @@ struct dentry_operations jfs_ci_dentry_operations;
 
 static s64 commitZeroLink(tid_t, struct inode *);
 
+/*
+ * NAME:	free_ea_wmap(inode)
+ *
+ * FUNCTION:	free uncommitted extended attributes from working map 
+ *
+ */
+static inline void free_ea_wmap(struct inode *inode)
+{
+	dxd_t *ea = &JFS_IP(inode)->ea;
+
+	if (ea->flag & DXD_EXTENT) {
+		/* free EA pages from cache */
+		invalidate_dxd_metapages(inode, *ea);
+		dbFree(inode, addressDXD(ea), lengthDXD(ea));
+	}
+	ea->flag = 0;
+}
+
 /*
  * NAME:	jfs_create(dip, dentry, mode)
  *
@@ -89,8 +107,13 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 	down(&JFS_IP(dip)->commit_sem);
 	down(&JFS_IP(ip)->commit_sem);
 
+	rc = jfs_init_acl(tid, ip, dip);
+	if (rc)
+		goto out3;
+
 	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
 		jfs_err("jfs_create: dtSearch returned %d", rc);
+		txAbort(tid, 0);
 		goto out3;
 	}
 
@@ -139,6 +162,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 	up(&JFS_IP(dip)->commit_sem);
 	up(&JFS_IP(ip)->commit_sem);
 	if (rc) {
+		free_ea_wmap(ip);
 		ip->i_nlink = 0;
 		iput(ip);
 	} else
@@ -147,11 +171,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
       out2:
 	free_UCSname(&dname);
 
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (rc == 0)
-		jfs_init_acl(ip, dip);
-#endif
-
       out1:
 
 	jfs_info("jfs_create: rc:%d", rc);
@@ -216,8 +235,13 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	down(&JFS_IP(dip)->commit_sem);
 	down(&JFS_IP(ip)->commit_sem);
 
+	rc = jfs_init_acl(tid, ip, dip);
+	if (rc)
+		goto out3;
+
 	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
 		jfs_err("jfs_mkdir: dtSearch returned %d", rc);
+		txAbort(tid, 0);
 		goto out3;
 	}
 
@@ -267,6 +291,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	up(&JFS_IP(dip)->commit_sem);
 	up(&JFS_IP(ip)->commit_sem);
 	if (rc) {
+		free_ea_wmap(ip);
 		ip->i_nlink = 0;
 		iput(ip);
 	} else
@@ -275,10 +300,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
       out2:
 	free_UCSname(&dname);
 
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (rc == 0)
-		jfs_init_acl(ip, dip);
-#endif
 
       out1:
 
@@ -1000,6 +1021,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	up(&JFS_IP(dip)->commit_sem);
 	up(&JFS_IP(ip)->commit_sem);
 	if (rc) {
+		free_ea_wmap(ip);
 		ip->i_nlink = 0;
 		iput(ip);
 	} else
@@ -1008,11 +1030,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
       out2:
 	free_UCSname(&dname);
 
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (rc == 0)
-		jfs_init_acl(ip, dip);
-#endif
-
       out1:
 	jfs_info("jfs_symlink: rc:%d", rc);
 	return rc;
@@ -1328,17 +1345,25 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	down(&JFS_IP(dir)->commit_sem);
 	down(&JFS_IP(ip)->commit_sem);
 
-	if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE)))
+	rc = jfs_init_acl(tid, ip, dir);
+	if (rc)
 		goto out3;
 
+	if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) {
+		txAbort(tid, 0);
+		goto out3;
+	}
+
 	tblk = tid_to_tblock(tid);
 	tblk->xflag |= COMMIT_CREATE;
 	tblk->ino = ip->i_ino;
 	tblk->u.ixpxd = JFS_IP(ip)->ixpxd;
 
 	ino = ip->i_ino;
-	if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack)))
+	if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) {
+		txAbort(tid, 0);
 		goto out3;
+	}
 
 	ip->i_op = &jfs_file_inode_operations;
 	jfs_ip->dev = new_encode_dev(rdev);
@@ -1360,6 +1385,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	up(&JFS_IP(ip)->commit_sem);
 	up(&JFS_IP(dir)->commit_sem);
 	if (rc) {
+		free_ea_wmap(ip);
 		ip->i_nlink = 0;
 		iput(ip);
 	} else
@@ -1368,11 +1394,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
       out1:
 	free_UCSname(&dname);
 
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (rc == 0)
-		jfs_init_acl(ip, dir);
-#endif
-
       out:
 	jfs_info("jfs_mknod: returning %d", rc);
 	return rc;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 554ec739e49..35674b2a0e6 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -633,12 +633,12 @@ static void ea_release(struct inode *inode, struct ea_buffer *ea_buf)
 	}
 }
 
-static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
+static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
+		  int new_size)
 {
 	struct jfs_inode_info *ji = JFS_IP(inode);
 	unsigned long old_blocks, new_blocks;
 	int rc = 0;
-	tid_t tid;
 
 	if (new_size == 0) {
 		ea_release(inode, ea_buf);
@@ -664,9 +664,6 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
 	if (rc)
 		return rc;
 
-	tid = txBegin(inode->i_sb, 0);
-	down(&ji->commit_sem);
-
 	old_blocks = new_blocks = 0;
 
 	if (ji->ea.flag & DXD_EXTENT) {
@@ -695,11 +692,8 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
 		DQUOT_FREE_BLOCK(inode, old_blocks);
 
 	inode->i_ctime = CURRENT_TIME;
-	rc = txCommit(tid, 1, &inode, 0);
-	txEnd(tid);
-	up(&ji->commit_sem);
 
-	return rc;
+	return 0;
 }
 
 /*
@@ -810,8 +804,8 @@ static int can_set_xattr(struct inode *inode, const char *name,
 	return permission(inode, MAY_WRITE, NULL);
 }
 
-int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
-		   size_t value_len, int flags)
+int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
+		   const void *value, size_t value_len, int flags)
 {
 	struct jfs_ea_list *ealist;
 	struct jfs_ea *ea, *old_ea = NULL, *next_ea = NULL;
@@ -825,9 +819,6 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
 	int rc;
 	int length;
 
-	if ((rc = can_set_xattr(inode, name, value, value_len)))
-		return rc;
-
 	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
 		os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
 				  GFP_KERNEL);
@@ -939,7 +930,7 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
 
 	ealist->size = cpu_to_le32(new_size);
 
-	rc = ea_put(inode, &ea_buf, new_size);
+	rc = ea_put(tid, inode, &ea_buf, new_size);
 
 	goto out;
       release:
@@ -955,12 +946,29 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
 int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		 size_t value_len, int flags)
 {
+	struct inode *inode = dentry->d_inode;
+	struct jfs_inode_info *ji = JFS_IP(inode);
+	int rc;
+	tid_t tid;
+
+	if ((rc = can_set_xattr(inode, name, value, value_len)))
+		return rc;
+
 	if (value == NULL) {	/* empty EA, do not remove */
 		value = "";
 		value_len = 0;
 	}
 
-	return __jfs_setxattr(dentry->d_inode, name, value, value_len, flags);
+	tid = txBegin(inode->i_sb, 0);
+	down(&ji->commit_sem);
+	rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len,
+			    flags);
+	if (!rc)
+		rc = txCommit(tid, 1, &inode, 0);
+	txEnd(tid);
+	up(&ji->commit_sem);
+
+	return rc;
 }
 
 static int can_get_xattr(struct inode *inode, const char *name)
@@ -1122,5 +1130,21 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
 
 int jfs_removexattr(struct dentry *dentry, const char *name)
 {
-	return __jfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+	struct inode *inode = dentry->d_inode;
+	struct jfs_inode_info *ji = JFS_IP(inode);
+	int rc;
+	tid_t tid;
+
+	if ((rc = can_set_xattr(inode, name, NULL, 0)))
+		return rc;
+
+	tid = txBegin(inode->i_sb, 0);
+	down(&ji->commit_sem);
+	rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+	if (!rc)
+		rc = txCommit(tid, 1, &inode, 0);
+	txEnd(tid);
+	up(&ji->commit_sem);
+
+	return rc;
 }
-- 
cgit v1.2.3-18-g5258


From 1d15b10f95d4c4295a0f2288c7be7b6a005490da Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Thu, 1 Sep 2005 09:05:39 -0500
Subject: JFS: Implement jfs_init_security

This atomically initializes the security xattr when an object is created

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_xattr.h | 10 ++++++++++
 fs/jfs/namei.c     | 22 ++++++++++++++++++++++
 fs/jfs/xattr.c     | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+)

(limited to 'fs')

diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index 116a73ce307..25e9990bccd 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -61,4 +61,14 @@ extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
 extern int jfs_removexattr(struct dentry *, const char *);
 
+#ifdef CONFIG_JFS_SECURITY
+extern int jfs_init_security(tid_t, struct inode *, struct inode *);
+#else
+static inline int jfs_init_security(tid_t tid, struct inode *inode,
+				    struct inode *dir)
+{
+	return 0;
+}
+#endif
+
 #endif	/* H_JFS_XATTR */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index f23f9c2aa52..1abe7343f92 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -111,6 +111,12 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 	if (rc)
 		goto out3;
 
+	rc = jfs_init_security(tid, ip, dip);
+	if (rc) {
+		txAbort(tid, 0);
+		goto out3;
+	}
+
 	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
 		jfs_err("jfs_create: dtSearch returned %d", rc);
 		txAbort(tid, 0);
@@ -239,6 +245,12 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	if (rc)
 		goto out3;
 
+	rc = jfs_init_security(tid, ip, dip);
+	if (rc) {
+		txAbort(tid, 0);
+		goto out3;
+	}
+
 	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
 		jfs_err("jfs_mkdir: dtSearch returned %d", rc);
 		txAbort(tid, 0);
@@ -906,6 +918,10 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	down(&JFS_IP(dip)->commit_sem);
 	down(&JFS_IP(ip)->commit_sem);
 
+	rc = jfs_init_security(tid, ip, dip);
+	if (rc)
+		goto out3;
+
 	tblk = tid_to_tblock(tid);
 	tblk->xflag |= COMMIT_CREATE;
 	tblk->ino = ip->i_ino;
@@ -1349,6 +1365,12 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	if (rc)
 		goto out3;
 
+	rc = jfs_init_security(tid, ip, dir);
+	if (rc) {
+		txAbort(tid, 0);
+		goto out3;
+	}
+
 	if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) {
 		txAbort(tid, 0);
 		goto out3;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 35674b2a0e6..23aa5066b5a 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -21,6 +21,7 @@
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/quotaops.h>
+#include <linux/security.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_dmap.h"
@@ -1148,3 +1149,38 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
 
 	return rc;
 }
+
+#ifdef CONFIG_JFS_SECURITY
+int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
+{
+	int rc;
+	size_t len;
+	void *value;
+	char *suffix;
+	char *name;
+
+	rc = security_inode_init_security(inode, dir, &suffix, &value, &len);
+	if (rc) {
+		if (rc == -EOPNOTSUPP)
+			return 0;
+		return rc;
+	}
+	name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix),
+		       GFP_NOFS);
+	if (!name) {
+		rc = -ENOMEM;
+		goto kmalloc_failed;
+	}
+	strcpy(name, XATTR_SECURITY_PREFIX);
+	strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
+
+	rc = __jfs_setxattr(tid, inode, name, value, len, 0);
+
+	kfree(name);
+kmalloc_failed:
+	kfree(suffix);
+	kfree(value);
+
+	return rc;
+}
+#endif
-- 
cgit v1.2.3-18-g5258


From e85b565233236a2a833adea73fb2f0e0f8fa2a61 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 9 Sep 2005 13:01:29 -0700
Subject: [PATCH] move truncate_inode_pages() into ->delete_inode()

Allow file systems supporting ->delete_inode() to call
truncate_inode_pages() on their own.  OCFS2 wants this so it can query the
cluster before making a final decision on whether to wipe an inode from
disk or not.  In some corner cases an inode marked on the local node via
voting may not actually get orphaned.  A good example is node death before
the transaction moving the inode to the orphan dir commits to the journal.
Without this patch, the truncate_inode_pages() call in
generic_delete_inode() would discard valid data for such inodes.

During earlier discussion in the 2.6.13 merge plan thread, Christoph
Hellwig indicated that other file systems might also find this useful.

IMHO, the best solution would be to just allow ->drop_inode() to do the
cluster query but it seems that would require a substantial reworking of
that section of the code.  Assuming it is safe to call write_inode_now() in
ocfs2_delete_inode() for those inodes which won't actually get wiped, this
solution should get us by for now.

Trivial testing of this patch (and a related OCFS2 update) has shown this
to avoid the corruption I'm seeing.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Acked-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 71df1b1e8f7..f80a79ff156 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1034,19 +1034,21 @@ void generic_delete_inode(struct inode *inode)
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 
-	if (inode->i_data.nrpages)
-		truncate_inode_pages(&inode->i_data, 0);
-
 	security_inode_delete(inode);
 
 	if (op->delete_inode) {
 		void (*delete)(struct inode *) = op->delete_inode;
 		if (!is_bad_inode(inode))
 			DQUOT_INIT(inode);
-		/* s_op->delete_inode internally recalls clear_inode() */
+		/* Filesystems implementing their own
+		 * s_op->delete_inode are required to call
+		 * truncate_inode_pages and clear_inode()
+		 * internally */
 		delete(inode);
-	} else
+	} else {
+		truncate_inode_pages(&inode->i_data, 0);
 		clear_inode(inode);
+	}
 	spin_lock(&inode_lock);
 	hlist_del_init(&inode->i_hash);
 	spin_unlock(&inode_lock);
-- 
cgit v1.2.3-18-g5258


From fef266580e5cf897a1b63528fc6b1185e2d6bb87 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 9 Sep 2005 13:01:31 -0700
Subject: [PATCH] update filesystems for new delete_inode behavior

Update the file systems in fs/ implementing a delete_inode() callback to
call truncate_inode_pages().  One implementation note: In developing this
patch I put the calls to truncate_inode_pages() at the very top of those
filesystems delete_inode() callbacks in order to retain the previous
behavior.  I'm guessing that some of those could probably be optimized.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Acked-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/affs/inode.c         | 1 +
 fs/bfs/inode.c          | 2 ++
 fs/ext2/inode.c         | 2 ++
 fs/ext3/inode.c         | 2 ++
 fs/fat/inode.c          | 2 ++
 fs/hostfs/hostfs_kern.c | 1 +
 fs/hpfs/inode.c         | 1 +
 fs/jffs/inode-v23.c     | 1 +
 fs/jfs/inode.c          | 2 ++
 fs/minix/inode.c        | 1 +
 fs/ncpfs/inode.c        | 2 ++
 fs/nfs/inode.c          | 2 ++
 fs/proc/inode.c         | 2 ++
 fs/qnx4/inode.c         | 1 +
 fs/reiserfs/inode.c     | 2 ++
 fs/smbfs/inode.c        | 1 +
 fs/sysv/inode.c         | 1 +
 fs/udf/inode.c          | 2 ++
 fs/ufs/inode.c          | 1 +
 19 files changed, 29 insertions(+)

(limited to 'fs')

diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 7aa6f200453..9ebe881c678 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -255,6 +255,7 @@ void
 affs_delete_inode(struct inode *inode)
 {
 	pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+	truncate_inode_pages(&inode->i_data, 0);
 	inode->i_size = 0;
 	if (S_ISREG(inode->i_mode))
 		affs_truncate(inode);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 64e0fb33fc0..628c2c1a7d7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -143,6 +143,8 @@ static void bfs_delete_inode(struct inode * inode)
 
 	dprintf("ino=%08lx\n", inode->i_ino);
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) {
 		printf("invalid ino=%08lx\n", inode->i_ino);
 		return;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 53dceb0c659..fdba4d1d3c6 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -71,6 +71,8 @@ void ext2_put_inode(struct inode *inode)
  */
 void ext2_delete_inode (struct inode * inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (is_bad_inode(inode))
 		goto no_delete;
 	EXT2_I(inode)->i_dtime	= get_seconds();
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9989fdcf4d5..b5177c90d6f 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -187,6 +187,8 @@ void ext3_delete_inode (struct inode * inode)
 {
 	handle_t *handle;
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (is_bad_inode(inode))
 		goto no_delete;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 96ae85b67eb..a7cbe68e225 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -335,6 +335,8 @@ EXPORT_SYMBOL(fat_build_inode);
 
 static void fat_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (!is_bad_inode(inode)) {
 		inode->i_size = 0;
 		fat_truncate(inode);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b2d18200a00..59c5062cd63 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -284,6 +284,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
 
 static void hostfs_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	if(HOSTFS_I(inode)->fd != -1) {
 		close_file(&HOSTFS_I(inode)->fd);
 		HOSTFS_I(inode)->fd = -1;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 38b1741fa53..e3d17e9ea6c 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -284,6 +284,7 @@ void hpfs_write_if_changed(struct inode *inode)
 
 void hpfs_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	lock_kernel();
 	hpfs_remove_fnode(inode->i_sb, inode->i_ino);
 	unlock_kernel();
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 777b90057b8..3dcc6d2162c 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1744,6 +1744,7 @@ jffs_delete_inode(struct inode *inode)
 	D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n",
 		  inode->i_ino));
 
+	truncate_inode_pages(&inode->i_data, 0);
 	lock_kernel();
 	inode->i_size = 0;
 	inode->i_blocks = 0;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 767c7ecb429..cff352f4ec1 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -132,6 +132,8 @@ void jfs_delete_inode(struct inode *inode)
 	    (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I)))
 			return;
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (test_cflag(COMMIT_Freewmap, inode))
 		jfs_free_zero_link(inode);
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 3f18c21198d..790cc0d0e97 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,6 +24,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data);
 
 static void minix_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	inode->i_size = 0;
 	minix_truncate(inode);
 	minix_free_inode(inode);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 44795d2f4b3..8c8839203cd 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -286,6 +286,8 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
 static void
 ncp_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (S_ISDIR(inode->i_mode)) {
 		DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino);
 	}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 541b418327c..6922469d6fc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -146,6 +146,8 @@ nfs_delete_inode(struct inode * inode)
 {
 	dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	nfs_wb_all(inode);
 	/*
 	 * The following should never happen...
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 133c2868510..effa6c0c467 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -60,6 +60,8 @@ static void proc_delete_inode(struct inode *inode)
 	struct proc_dir_entry *de;
 	struct task_struct *tsk;
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	/* Let go of any associated process */
 	tsk = PROC_I(inode)->task;
 	if (tsk)
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b79162a3547..80f32911c0c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -63,6 +63,7 @@ int qnx4_sync_inode(struct inode *inode)
 static void qnx4_delete_inode(struct inode *inode)
 {
 	QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
+	truncate_inode_pages(&inode->i_data, 0);
 	inode->i_size = 0;
 	qnx4_truncate(inode);
 	lock_kernel();
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ff291c973a5..1a8a1bf2154 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -33,6 +33,8 @@ void reiserfs_delete_inode(struct inode *inode)
 	    2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
 	struct reiserfs_transaction_handle th;
 
+	truncate_inode_pages(&inode->i_data, 0);
+
 	reiserfs_write_lock(inode->i_sb);
 
 	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 4765aaac9fd..10b994428fe 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -331,6 +331,7 @@ static void
 smb_delete_inode(struct inode *ino)
 {
 	DEBUG1("ino=%ld\n", ino->i_ino);
+	truncate_inode_pages(&ino->i_data, 0);
 	lock_kernel();
 	if (smb_close(ino))
 		PARANOIA("could not close inode %ld\n", ino->i_ino);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0530077d9dd..fa33eceb001 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -292,6 +292,7 @@ int sysv_sync_inode(struct inode * inode)
 
 static void sysv_delete_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	inode->i_size = 0;
 	sysv_truncate(inode);
 	lock_kernel();
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3d68de39fad..b83890beaaa 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -87,6 +87,8 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
  */
 void udf_delete_inode(struct inode * inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+
 	if (is_bad_inode(inode))
 		goto no_delete;
 
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 718627ca8b5..55f4aa16e3f 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -804,6 +804,7 @@ int ufs_sync_inode (struct inode *inode)
 
 void ufs_delete_inode (struct inode * inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
 	/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
 	lock_kernel();
 	mark_inode_dirty(inode);
-- 
cgit v1.2.3-18-g5258


From 10f47e6a1b8b276323b652053945c87a63a5812d Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Fri, 9 Sep 2005 13:01:39 -0700
Subject: [PATCH] ext2: Enable atomic inode security labeling

This patch modifies ext2 to call the inode_init_security LSM hook to obtain
the security attribute for a newly created inode and to set the resulting
attribute on the new inode.  This parallels the existing processing for
setting ACLs on newly created inodes.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/ialloc.c         |  5 +++++
 fs/ext2/xattr.h          |  8 ++++++++
 fs/ext2/xattr_security.c | 22 ++++++++++++++++++++++
 3 files changed, 35 insertions(+)

(limited to 'fs')

diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 161f156d98c..c8d07030c89 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -615,6 +615,11 @@ got:
 		DQUOT_DROP(inode);
 		goto fail2;
 	}
+	err = ext2_init_security(inode,dir);
+	if (err) {
+		DQUOT_FREE_INODE(inode);
+		goto fail2;
+	}
 	mark_inode_dirty(inode);
 	ext2_debug("allocating inode %lu\n", inode->i_ino);
 	ext2_preread_inode(inode);
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 5f3bfde3b81..67cfeb66e89 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,3 +116,11 @@ exit_ext2_xattr(void)
 
 # endif  /* CONFIG_EXT2_FS_XATTR */
 
+#ifdef CONFIG_EXT2_FS_SECURITY
+extern int ext2_init_security(struct inode *inode, struct inode *dir);
+#else
+static inline int ext2_init_security(struct inode *inode, struct inode *dir)
+{
+	return 0;
+}
+#endif
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 6a6c59fbe59..a2661279847 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
 #include <linux/ext2_fs.h>
+#include <linux/security.h>
 #include "xattr.h"
 
 static size_t
@@ -45,6 +46,27 @@ ext2_xattr_security_set(struct inode *inode, const char *name,
 			      value, size, flags);
 }
 
+int
+ext2_init_security(struct inode *inode, struct inode *dir)
+{
+	int err;
+	size_t len;
+	void *value;
+	char *name;
+
+	err = security_inode_init_security(inode, dir, &name, &value, &len);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+	err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY,
+			     name, value, len, 0);
+	kfree(name);
+	kfree(value);
+	return err;
+}
+
 struct xattr_handler ext2_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
 	.list	= ext2_xattr_security_list,
-- 
cgit v1.2.3-18-g5258


From ac50960afa31877493add6d941d8402fa879c452 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Fri, 9 Sep 2005 13:01:41 -0700
Subject: [PATCH] ext3: Enable atomic inode security labeling

This patch modifies ext3 to call the inode_init_security LSM hook to obtain
the security attribute for a newly created inode and to set the resulting
attribute on the new inode as part of the same transaction.  This parallels
the existing processing for setting ACLs on newly created inodes.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/ialloc.c         |  5 +++++
 fs/ext3/xattr.h          | 11 +++++++++++
 fs/ext3/xattr_security.c | 22 ++++++++++++++++++++++
 3 files changed, 38 insertions(+)

(limited to 'fs')

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 6981bd014ed..96552769d03 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -607,6 +607,11 @@ got:
 		DQUOT_DROP(inode);
 		goto fail2;
   	}
+	err = ext3_init_security(handle,inode, dir);
+	if (err) {
+		DQUOT_FREE_INODE(inode);
+		goto fail2;
+	}
 	err = ext3_mark_inode_dirty(handle, inode);
 	if (err) {
 		ext3_std_error(sb, err);
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index eb31a69e82d..2ceae38f3d4 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -133,3 +133,14 @@ exit_ext3_xattr(void)
 #define ext3_xattr_handlers	NULL
 
 # endif  /* CONFIG_EXT3_FS_XATTR */
+
+#ifdef CONFIG_EXT3_FS_SECURITY
+extern int ext3_init_security(handle_t *handle, struct inode *inode,
+				struct inode *dir);
+#else
+static inline int ext3_init_security(handle_t *handle, struct inode *inode,
+				struct inode *dir)
+{
+	return 0;
+}
+#endif
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index ddc1c41750e..b9c40c15647 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -9,6 +9,7 @@
 #include <linux/smp_lock.h>
 #include <linux/ext3_jbd.h>
 #include <linux/ext3_fs.h>
+#include <linux/security.h>
 #include "xattr.h"
 
 static size_t
@@ -47,6 +48,27 @@ ext3_xattr_security_set(struct inode *inode, const char *name,
 			      value, size, flags);
 }
 
+int
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+	int err;
+	size_t len;
+	void *value;
+	char *name;
+
+	err = security_inode_init_security(inode, dir, &name, &value, &len);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+	err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
+				    name, value, len, 0);
+	kfree(name);
+	kfree(value);
+	return err;
+}
+
 struct xattr_handler ext3_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
 	.list	= ext3_xattr_security_list,
-- 
cgit v1.2.3-18-g5258


From a74574aafea3a63add3251047601611111f44562 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Fri, 9 Sep 2005 13:01:44 -0700
Subject: [PATCH] Remove security_inode_post_create/mkdir/symlink/mknod hooks

This patch removes the inode_post_create/mkdir/mknod/symlink LSM hooks as
they are obsoleted by the new inode_init_security hook that enables atomic
inode security labeling.

If anyone sees any reason to retain these hooks, please speak now.  Also,
is anyone using the post_rename/link hooks; if not, those could also be
removed.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 145e852c4bd..993a65a7d57 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1316,10 +1316,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		return error;
 	DQUOT_INIT(dir);
 	error = dir->i_op->create(dir, dentry, mode, nd);
-	if (!error) {
+	if (!error)
 		fsnotify_create(dir, dentry->d_name.name);
-		security_inode_post_create(dir, dentry, mode);
-	}
 	return error;
 }
 
@@ -1635,10 +1633,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 
 	DQUOT_INIT(dir);
 	error = dir->i_op->mknod(dir, dentry, mode, dev);
-	if (!error) {
+	if (!error)
 		fsnotify_create(dir, dentry->d_name.name);
-		security_inode_post_mknod(dir, dentry, mode, dev);
-	}
 	return error;
 }
 
@@ -1708,10 +1704,8 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	DQUOT_INIT(dir);
 	error = dir->i_op->mkdir(dir, dentry, mode);
-	if (!error) {
+	if (!error)
 		fsnotify_mkdir(dir, dentry->d_name.name);
-		security_inode_post_mkdir(dir,dentry, mode);
-	}
 	return error;
 }
 
@@ -1947,10 +1941,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
 
 	DQUOT_INIT(dir);
 	error = dir->i_op->symlink(dir, dentry, oldname);
-	if (!error) {
+	if (!error)
 		fsnotify_create(dir, dentry->d_name.name);
-		security_inode_post_symlink(dir, dentry, oldname);
-	}
 	return error;
 }
 
-- 
cgit v1.2.3-18-g5258


From e31e14ec356f36b131576be5bc31d8fef7e95483 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Fri, 9 Sep 2005 13:01:45 -0700
Subject: [PATCH] remove the inode_post_link and inode_post_rename LSM hooks

This patch removes the inode_post_link and inode_post_rename LSM hooks as
they are unused (and likely useless).

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 993a65a7d57..21d85f1ac83 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2012,10 +2012,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	DQUOT_INIT(dir);
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
 	up(&old_dentry->d_inode->i_sem);
-	if (!error) {
+	if (!error)
 		fsnotify_create(dir, new_dentry->d_name.name);
-		security_inode_post_link(old_dentry, dir, new_dentry);
-	}
 	return error;
 }
 
@@ -2134,11 +2132,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 			d_rehash(new_dentry);
 		dput(new_dentry);
 	}
-	if (!error) {
+	if (!error)
 		d_move(old_dentry,new_dentry);
-		security_inode_post_rename(old_dir, old_dentry,
-					   new_dir, new_dentry);
-	}
 	return error;
 }
 
@@ -2164,7 +2159,6 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 		/* The following d_move() should become unconditional */
 		if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
 			d_move(old_dentry, new_dentry);
-		security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
 	}
 	if (target)
 		up(&target->i_sem);
-- 
cgit v1.2.3-18-g5258


From f76baf9365bd66216bf0e0ebfc083e22eda6215b Mon Sep 17 00:00:00 2001
From: Alexander Krizhanovsky <klx@yandex.ru>
Date: Fri, 9 Sep 2005 13:01:59 -0700
Subject: [PATCH] autofs: fix "busy inodes after umount..."

This patch for old autofs (version 3) cleans dentries which are not putted
after killing the automount daemon (it's analogue of recent patch for
autofs4).

Signed-off-by: Alexander Krizhanovsky <klx@yandex.ru>
Cc: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs/autofs_i.h | 3 ++-
 fs/autofs/dirhash.c  | 5 +++--
 fs/autofs/inode.c    | 3 ++-
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 6171431272d..990c28da5ae 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -105,6 +105,7 @@ struct autofs_sb_info {
 	struct file *pipe;
 	pid_t oz_pgrp;
 	int catatonic;
+	struct super_block *sb;
 	unsigned long exp_timeout;
 	ino_t next_dir_ino;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
@@ -134,7 +135,7 @@ void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
 void autofs_hash_delete(struct autofs_dir_ent *);
 struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
 void autofs_hash_dputall(struct autofs_dirhash *);
-void autofs_hash_nuke(struct autofs_dirhash *);
+void autofs_hash_nuke(struct autofs_sb_info *);
 
 /* Expiration-handling functions */
 
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 448143fd079..5ccfcf26310 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -232,13 +232,13 @@ void autofs_hash_dputall(struct autofs_dirhash *dh)
 
 /* Delete everything.  This is used on filesystem destruction, so we
    make no attempt to keep the pointers valid */
-void autofs_hash_nuke(struct autofs_dirhash *dh)
+void autofs_hash_nuke(struct autofs_sb_info *sbi)
 {
 	int i;
 	struct autofs_dir_ent *ent, *nent;
 
 	for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
-		for ( ent = dh->h[i] ; ent ; ent = nent ) {
+		for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
 			nent = ent->next;
 			if ( ent->dentry )
 				dput(ent->dentry);
@@ -246,4 +246,5 @@ void autofs_hash_nuke(struct autofs_dirhash *dh)
 			kfree(ent);
 		}
 	}
+	shrink_dcache_sb(sbi->sb);
 }
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 4888c1fabbf..65e5ed42190 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -27,7 +27,7 @@ static void autofs_put_super(struct super_block *sb)
 	if ( !sbi->catatonic )
 		autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
 
-	autofs_hash_nuke(&sbi->dirhash);
+	autofs_hash_nuke(sbi);
 	for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) {
 		if ( test_bit(n, sbi->symlink_bitmap) )
 			kfree(sbi->symlink[n].data);
@@ -148,6 +148,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
 	s->s_magic = AUTOFS_SUPER_MAGIC;
 	s->s_op = &autofs_sops;
 	s->s_time_gran = 1;
+	sbi->sb = s;
 
 	root_inode = iget(s, AUTOFS_ROOT_INO);
 	root = d_alloc_root(root_inode);
-- 
cgit v1.2.3-18-g5258


From fac92becdaecff64dd91daab0292c5131de92f0d Mon Sep 17 00:00:00 2001
From: Andrew Stribblehill <a.d.stribblehill@durham.ac.uk>
Date: Fri, 9 Sep 2005 13:02:04 -0700
Subject: [PATCH] bfs: fix endianness, signedness; add trivial bugfix

* Makes BFS code endianness-clean.

* Fixes some signedness warnings.

* Fixes a problem in fs/bfs/inode.c:164 where inodes not synced to disk
  don't get fully marked as clean.  Here's how to reproduce it:

# mount -o loop -t bfs /bfs.img /mnt
# df -i /mnt
Filesystem            Inodes   IUsed   IFree IUse% Mounted on
/bfs.img                  48       1      47    3% /mnt
# df -k /mnt
Filesystem           1K-blocks      Used Available Use% Mounted on
/bfs.img                   512         5       508   1% /mnt
# cp 60k-archive.zip /mnt/mt.zip
# df -k /mnt
Filesystem           1K-blocks      Used Available Use% Mounted on
/bfs.img                   512        65       447  13% /mnt
# df -i /mnt
Filesystem            Inodes   IUsed   IFree IUse% Mounted on
/bfs.img                  48       2      46    5% /mnt
# rm /mnt/mt.zip
# echo $?
0

 [If the unlink happens before the buffers flush, the following happens:]

# df -i /mnt
Filesystem            Inodes   IUsed   IFree IUse% Mounted on
/bfs.img                  48       2      46    5% /mnt
# df -k /mnt
Filesystem           1K-blocks      Used Available Use% Mounted on
/bfs.img                   512        65       447  13% /mnt

 fs/bfs/bfs.h           |    1

Signed-off-by: Andrew Stribblehill <ads@wompom.org>
Cc: <tigran@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bfs/bfs.h   |   1 -
 fs/bfs/dir.c   |  25 +++++++-------
 fs/bfs/file.c  |  23 +++++++------
 fs/bfs/inode.c | 102 +++++++++++++++++++++++++++++++--------------------------
 4 files changed, 81 insertions(+), 70 deletions(-)

(limited to 'fs')

diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 1020dbc88be..1fbc53f14ab 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -20,7 +20,6 @@ struct bfs_sb_info {
 	unsigned long si_lasti;
 	unsigned long * si_imap;
 	struct buffer_head * si_sbh;		/* buffer header w/superblock */
-	struct bfs_super_block * si_bfs_sb;	/* superblock in si_sbh->b_data */
 };
 
 /*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 5a1e5ce057f..e240c335eb2 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -2,6 +2,7 @@
  *	fs/bfs/dir.c
  *	BFS directory operations.
  *	Copyright (C) 1999,2000  Tigran Aivazian <tigran@veritas.com>
+ *      Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
  */
 
 #include <linux/time.h>
@@ -20,9 +21,9 @@
 #define dprintf(x...)
 #endif
 
-static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino);
+static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino);
 static struct buffer_head * bfs_find_entry(struct inode * dir, 
-	const char * name, int namelen, struct bfs_dirent ** res_dir);
+	const unsigned char * name, int namelen, struct bfs_dirent ** res_dir);
 
 static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
 {
@@ -53,7 +54,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
 			de = (struct bfs_dirent *)(bh->b_data + offset);
 			if (de->ino) {
 				int size = strnlen(de->name, BFS_NAMELEN);
-				if (filldir(dirent, de->name, size, f->f_pos, de->ino, DT_UNKNOWN) < 0) {
+				if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) {
 					brelse(bh);
 					unlock_kernel();
 					return 0;
@@ -107,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode,
 	inode->i_mapping->a_ops = &bfs_aops;
 	inode->i_mode = mode;
 	inode->i_ino = ino;
-	BFS_I(inode)->i_dsk_ino = ino;
+	BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino);
 	BFS_I(inode)->i_sblock = 0;
 	BFS_I(inode)->i_eblock = 0;
 	insert_inode_hash(inode);
@@ -139,7 +140,7 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st
 	lock_kernel();
 	bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
 	if (bh) {
-		unsigned long ino = le32_to_cpu(de->ino);
+		unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
 		brelse(bh);
 		inode = iget(dir->i_sb, ino);
 		if (!inode) {
@@ -183,7 +184,7 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry)
 	inode = dentry->d_inode;
 	lock_kernel();
 	bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
-	if (!bh || de->ino != inode->i_ino) 
+	if (!bh || le16_to_cpu(de->ino) != inode->i_ino)
 		goto out_brelse;
 
 	if (!inode->i_nlink) {
@@ -224,7 +225,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry,
 				old_dentry->d_name.name, 
 				old_dentry->d_name.len, &old_de);
 
-	if (!old_bh || old_de->ino != old_inode->i_ino)
+	if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino)
 		goto end_rename;
 
 	error = -EPERM;
@@ -270,7 +271,7 @@ struct inode_operations bfs_dir_inops = {
 	.rename			= bfs_rename,
 };
 
-static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino)
+static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino)
 {
 	struct buffer_head * bh;
 	struct bfs_dirent * de;
@@ -304,7 +305,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
 				}
 				dir->i_mtime = CURRENT_TIME_SEC;
 				mark_inode_dirty(dir);
-				de->ino = ino;
+				de->ino = cpu_to_le16((u16)ino);
 				for (i=0; i<BFS_NAMELEN; i++)
 					de->name[i] = (i < namelen) ? name[i] : 0;
 				mark_buffer_dirty(bh);
@@ -317,7 +318,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
 	return -ENOSPC;
 }
 
-static inline int bfs_namecmp(int len, const char * name, const char * buffer)
+static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer)
 {
 	if (len < BFS_NAMELEN && buffer[len])
 		return 0;
@@ -325,7 +326,7 @@ static inline int bfs_namecmp(int len, const char * name, const char * buffer)
 }
 
 static struct buffer_head * bfs_find_entry(struct inode * dir, 
-	const char * name, int namelen, struct bfs_dirent ** res_dir)
+	const unsigned char * name, int namelen, struct bfs_dirent ** res_dir)
 {
 	unsigned long block, offset;
 	struct buffer_head * bh;
@@ -346,7 +347,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir,
 		}
 		de = (struct bfs_dirent *)(bh->b_data + offset);
 		offset += BFS_DIRENT_SIZE;
-		if (de->ino && bfs_namecmp(namelen, name, de->name)) {
+		if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) {
 			*res_dir = de;
 			return bh;
 		}
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 747fd1ea55e..807723b65da 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -40,8 +40,8 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo
 	return 0;
 }
 
-static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned long end, 
-				unsigned long where)
+static int bfs_move_blocks(struct super_block *sb, unsigned long start,
+                           unsigned long end, unsigned long where)
 {
 	unsigned long i;
 
@@ -57,20 +57,21 @@ static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned
 static int bfs_get_block(struct inode * inode, sector_t block, 
 	struct buffer_head * bh_result, int create)
 {
-	long phys;
+	unsigned long phys;
 	int err;
 	struct super_block *sb = inode->i_sb;
 	struct bfs_sb_info *info = BFS_SB(sb);
 	struct bfs_inode_info *bi = BFS_I(inode);
 	struct buffer_head *sbh = info->si_sbh;
 
-	if (block < 0 || block > info->si_blocks)
+	if (block > info->si_blocks)
 		return -EIO;
 
 	phys = bi->i_sblock + block;
 	if (!create) {
 		if (phys <= bi->i_eblock) {
-			dprintf("c=%d, b=%08lx, phys=%08lx (granted)\n", create, block, phys);
+			dprintf("c=%d, b=%08lx, phys=%09lx (granted)\n",
+                                create, (unsigned long)block, phys);
 			map_bh(bh_result, sb, phys);
 		}
 		return 0;
@@ -80,7 +81,7 @@ static int bfs_get_block(struct inode * inode, sector_t block,
 	   of blocks allocated for this file, we can grant it */
 	if (inode->i_size && phys <= bi->i_eblock) {
 		dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", 
-				create, block, phys);
+				create, (unsigned long)block, phys);
 		map_bh(bh_result, sb, phys);
 		return 0;
 	}
@@ -88,11 +89,12 @@ static int bfs_get_block(struct inode * inode, sector_t block,
 	/* the rest has to be protected against itself */
 	lock_kernel();
 
-	/* if the last data block for this file is the last allocated block, we can
-	   extend the file trivially, without moving it anywhere */
+	/* if the last data block for this file is the last allocated
+	   block, we can extend the file trivially, without moving it
+	   anywhere */
 	if (bi->i_eblock == info->si_lf_eblk) {
 		dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", 
-				create, block, phys);
+				create, (unsigned long)block, phys);
 		map_bh(bh_result, sb, phys);
 		info->si_freeb -= phys - bi->i_eblock;
 		info->si_lf_eblk = bi->i_eblock = phys;
@@ -114,7 +116,8 @@ static int bfs_get_block(struct inode * inode, sector_t block,
 	} else
 		err = 0;
 
-	dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", create, block, phys);
+	dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n",
+                create, (unsigned long)block, phys);
 	bi->i_sblock = phys;
 	phys += block;
 	info->si_lf_eblk = bi->i_eblock = phys;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 628c2c1a7d7..c7b39aa279d 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -3,6 +3,8 @@
  *	BFS superblock and inode operations.
  *	Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
  *	From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
+ *
+ *      Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
  */
 
 #include <linux/module.h>
@@ -54,46 +56,50 @@ static void bfs_read_inode(struct inode * inode)
 	off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
 	di = (struct bfs_inode *)bh->b_data + off;
 
-	inode->i_mode = 0x0000FFFF & di->i_mode;
-	if (di->i_vtype == BFS_VDIR) {
+	inode->i_mode = 0x0000FFFF &  le32_to_cpu(di->i_mode);
+	if (le32_to_cpu(di->i_vtype) == BFS_VDIR) {
 		inode->i_mode |= S_IFDIR;
 		inode->i_op = &bfs_dir_inops;
 		inode->i_fop = &bfs_dir_operations;
-	} else if (di->i_vtype == BFS_VREG) {
+	} else if (le32_to_cpu(di->i_vtype) == BFS_VREG) {
 		inode->i_mode |= S_IFREG;
 		inode->i_op = &bfs_file_inops;
 		inode->i_fop = &bfs_file_operations;
 		inode->i_mapping->a_ops = &bfs_aops;
 	}
 
-	inode->i_uid = di->i_uid;
-	inode->i_gid = di->i_gid;
-	inode->i_nlink = di->i_nlink;
+	BFS_I(inode)->i_sblock =  le32_to_cpu(di->i_sblock);
+	BFS_I(inode)->i_eblock =  le32_to_cpu(di->i_eblock);
+	inode->i_uid =  le32_to_cpu(di->i_uid);
+	inode->i_gid =  le32_to_cpu(di->i_gid);
+	inode->i_nlink =  le32_to_cpu(di->i_nlink);
 	inode->i_size = BFS_FILESIZE(di);
 	inode->i_blocks = BFS_FILEBLOCKS(di);
+        if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks);
 	inode->i_blksize = PAGE_SIZE;
-	inode->i_atime.tv_sec = di->i_atime;
-	inode->i_mtime.tv_sec = di->i_mtime;
-	inode->i_ctime.tv_sec = di->i_ctime;
+	inode->i_atime.tv_sec =  le32_to_cpu(di->i_atime);
+	inode->i_mtime.tv_sec =  le32_to_cpu(di->i_mtime);
+	inode->i_ctime.tv_sec =  le32_to_cpu(di->i_ctime);
 	inode->i_atime.tv_nsec = 0;
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_ctime.tv_nsec = 0;
-	BFS_I(inode)->i_dsk_ino = di->i_ino; /* can be 0 so we store a copy */
-	BFS_I(inode)->i_sblock = di->i_sblock;
-	BFS_I(inode)->i_eblock = di->i_eblock;
+	BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */
 
 	brelse(bh);
 }
 
 static int bfs_write_inode(struct inode * inode, int unused)
 {
-	unsigned long ino = inode->i_ino;
+	unsigned int ino = (u16)inode->i_ino;
+        unsigned long i_sblock;
 	struct bfs_inode * di;
 	struct buffer_head * bh;
 	int block, off;
 
+        dprintf("ino=%08x\n", ino);
+
 	if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) {
-		printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino);
+		printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino);
 		return -EIO;
 	}
 
@@ -101,7 +107,7 @@ static int bfs_write_inode(struct inode * inode, int unused)
 	block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1;
 	bh = sb_bread(inode->i_sb, block);
 	if (!bh) {
-		printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino);
+		printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino);
 		unlock_kernel();
 		return -EIO;
 	}
@@ -109,24 +115,26 @@ static int bfs_write_inode(struct inode * inode, int unused)
 	off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
 	di = (struct bfs_inode *)bh->b_data + off;
 
-	if (inode->i_ino == BFS_ROOT_INO)
-		di->i_vtype = BFS_VDIR;
+	if (ino == BFS_ROOT_INO)
+		di->i_vtype = cpu_to_le32(BFS_VDIR);
 	else
-		di->i_vtype = BFS_VREG;
-
-	di->i_ino = inode->i_ino;
-	di->i_mode = inode->i_mode;
-	di->i_uid = inode->i_uid;
-	di->i_gid = inode->i_gid;
-	di->i_nlink = inode->i_nlink;
-	di->i_atime = inode->i_atime.tv_sec;
-	di->i_mtime = inode->i_mtime.tv_sec;
-	di->i_ctime = inode->i_ctime.tv_sec;
-	di->i_sblock = BFS_I(inode)->i_sblock;
-	di->i_eblock = BFS_I(inode)->i_eblock;
-	di->i_eoffset = di->i_sblock * BFS_BSIZE + inode->i_size - 1;
+		di->i_vtype = cpu_to_le32(BFS_VREG);
+
+	di->i_ino = cpu_to_le16(ino);
+	di->i_mode = cpu_to_le32(inode->i_mode);
+	di->i_uid = cpu_to_le32(inode->i_uid);
+	di->i_gid = cpu_to_le32(inode->i_gid);
+	di->i_nlink = cpu_to_le32(inode->i_nlink);
+	di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+	di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+	di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+        i_sblock = BFS_I(inode)->i_sblock;
+	di->i_sblock = cpu_to_le32(i_sblock);
+	di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
+	di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
 
 	mark_buffer_dirty(bh);
+        dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off);
 	brelse(bh);
 	unlock_kernel();
 	return 0;
@@ -140,13 +148,14 @@ static void bfs_delete_inode(struct inode * inode)
 	int block, off;
 	struct super_block * s = inode->i_sb;
 	struct bfs_sb_info * info = BFS_SB(s);
+	struct bfs_inode_info * bi = BFS_I(inode);
 
-	dprintf("ino=%08lx\n", inode->i_ino);
+	dprintf("ino=%08lx\n", ino);
 
 	truncate_inode_pages(&inode->i_data, 0);
 
-	if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) {
-		printf("invalid ino=%08lx\n", inode->i_ino);
+	if (ino < BFS_ROOT_INO || ino > info->si_lasti) {
+		printf("invalid ino=%08lx\n", ino);
 		return;
 	}
 	
@@ -162,13 +171,13 @@ static void bfs_delete_inode(struct inode * inode)
 		return;
 	}
 	off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
-	di = (struct bfs_inode *)bh->b_data + off;
-	if (di->i_ino) {
-		info->si_freeb += BFS_FILEBLOCKS(di);
+	di = (struct bfs_inode *) bh->b_data + off;
+        if (bi->i_dsk_ino) {
+		info->si_freeb += 1 + bi->i_eblock - bi->i_sblock;
 		info->si_freei++;
-		clear_bit(di->i_ino, info->si_imap);
+		clear_bit(ino, info->si_imap);
 		dump_imap("delete_inode", s);
-	}
+        }
 	di->i_ino = 0;
 	di->i_sblock = 0;
 	mark_buffer_dirty(bh);
@@ -274,14 +283,14 @@ static struct super_operations bfs_sops = {
 
 void dump_imap(const char *prefix, struct super_block * s)
 {
-#if 0
+#ifdef DEBUG
 	int i;
 	char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL);
 
 	if (!tmpbuf)
 		return;
 	for (i=BFS_SB(s)->si_lasti; i>=0; i--) {
-		if (i>PAGE_SIZE-100) break;
+		if (i > PAGE_SIZE-100) break;
 		if (test_bit(i, BFS_SB(s)->si_imap))
 			strcat(tmpbuf, "1");
 		else
@@ -297,7 +306,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 	struct buffer_head * bh;
 	struct bfs_super_block * bfs_sb;
 	struct inode * inode;
-	int i, imap_len;
+	unsigned i, imap_len;
 	struct bfs_sb_info * info;
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
@@ -312,19 +321,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 	if(!bh)
 		goto out;
 	bfs_sb = (struct bfs_super_block *)bh->b_data;
-	if (bfs_sb->s_magic != BFS_MAGIC) {
+	if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
 		if (!silent)
 			printf("No BFS filesystem on %s (magic=%08x)\n", 
-				s->s_id, bfs_sb->s_magic);
+				s->s_id,  le32_to_cpu(bfs_sb->s_magic));
 		goto out;
 	}
 	if (BFS_UNCLEAN(bfs_sb, s) && !silent)
 		printf("%s is unclean, continuing\n", s->s_id);
 
 	s->s_magic = BFS_MAGIC;
-	info->si_bfs_sb = bfs_sb;
 	info->si_sbh = bh;
-	info->si_lasti = (bfs_sb->s_start - BFS_BSIZE)/sizeof(struct bfs_inode) 
+	info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode)
 			+ BFS_ROOT_INO - 1;
 
 	imap_len = info->si_lasti/8 + 1;
@@ -348,8 +356,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 		goto out;
 	}
 
-	info->si_blocks = (bfs_sb->s_end + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
-	info->si_freeb = (bfs_sb->s_end + 1 - bfs_sb->s_start)>>BFS_BSIZE_BITS;
+	info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
+	info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 -  cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS;
 	info->si_freei = 0;
 	info->si_lf_eblk = 0;
 	info->si_lf_sblk = 0;
-- 
cgit v1.2.3-18-g5258


From 8f58202bf6b915656e116ece3bc4ace14bfe533a Mon Sep 17 00:00:00 2001
From: Wendy Cheng <wcheng@redhat.com>
Date: Fri, 9 Sep 2005 13:02:08 -0700
Subject: [PATCH] change io_cancel return code for no cancel case

Note that other than few exceptions, most of the current filesystem and/or
drivers do not have aio cancel specifically defined (kiob->ki_cancel field
is mostly NULL).  However, sys_io_cancel system call universally sets
return code to -EAGAIN.  This gives applications a wrong impression that
this call is implemented but just never works.  We have customer inquires
about this issue.

Changed by Benjamin LaHaise to EINVAL instead of ENOSYS

Signed-off-by: S. Wendy Cheng <wcheng@redhat.com>
Acked-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 4f641abac3c..769791df36b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1673,7 +1673,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
 				ret = -EFAULT;
 		}
 	} else
-		printk(KERN_DEBUG "iocb has no cancel operation\n");
+		ret = -EINVAL;
 
 	put_ioctx(ctx);
 
-- 
cgit v1.2.3-18-g5258


From ac0b1bc1edbe81c0cb36cad7e7f5b91f4d9e12ed Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@linux.intel.com>
Date: Fri, 9 Sep 2005 13:02:09 -0700
Subject: [PATCH] aio: kiocb locking to serialise retry and cancel

Implement a per-kiocb lock to serialise retry operations and cancel.  This
is done using wait_on_bit_lock() on the KIF_LOCKED bit of kiocb->ki_flags.
Also, make the cancellation path lock the kiocb and subsequently release
all references to it if the cancel was successful.  This version includes a
fix for the deadlock with __aio_run_iocbs.

Signed-off-by: Benjamin LaHaise <bcrl@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 769791df36b..201c1847fa0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -546,6 +546,24 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	return ioctx;
 }
 
+static int lock_kiocb_action(void *param)
+{
+	schedule();
+	return 0;
+}
+
+static inline void lock_kiocb(struct kiocb *iocb)
+{
+	wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action,
+			 TASK_UNINTERRUPTIBLE);
+}
+
+static inline void unlock_kiocb(struct kiocb *iocb)
+{
+	kiocbClearLocked(iocb);
+	wake_up_bit(&iocb->ki_flags, KIF_LOCKED);
+}
+
 /*
  * use_mm
  *	Makes the calling kernel thread take on the specified
@@ -786,7 +804,9 @@ static int __aio_run_iocbs(struct kioctx *ctx)
 		 * Hold an extra reference while retrying i/o.
 		 */
 		iocb->ki_users++;       /* grab extra reference */
+		lock_kiocb(iocb);
 		aio_run_iocb(iocb);
+		unlock_kiocb(iocb);
 		if (__aio_put_req(ctx, iocb))  /* drop extra ref */
 			put_ioctx(ctx);
  	}
@@ -1527,10 +1547,9 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		goto out_put_req;
 
 	spin_lock_irq(&ctx->ctx_lock);
-	if (likely(list_empty(&ctx->run_list))) {
-		aio_run_iocb(req);
-	} else {
-		list_add_tail(&req->ki_run_list, &ctx->run_list);
+	aio_run_iocb(req);
+	unlock_kiocb(req);
+	if (!list_empty(&ctx->run_list)) {
 		/* drain the run list */
 		while (__aio_run_iocbs(ctx))
 			;
@@ -1661,6 +1680,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
 	if (NULL != cancel) {
 		struct io_event tmp;
 		pr_debug("calling cancel\n");
+		lock_kiocb(kiocb);
 		memset(&tmp, 0, sizeof(tmp));
 		tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
 		tmp.data = kiocb->ki_user_data;
@@ -1672,6 +1692,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
 			if (copy_to_user(result, &tmp, sizeof(tmp)))
 				ret = -EFAULT;
 		}
+		unlock_kiocb(kiocb);
 	} else
 		ret = -EINVAL;
 
-- 
cgit v1.2.3-18-g5258


From badf16621c1f9d1ac753be056fce11b43d6e0be5 Mon Sep 17 00:00:00 2001
From: Dipankar Sarma <dipankar@in.ibm.com>
Date: Fri, 9 Sep 2005 13:04:10 -0700
Subject: [PATCH] files: break up files struct

In order for the RCU to work, the file table array, sets and their sizes must
be updated atomically.  Instead of ensuring this through too many memory
barriers, we put the arrays and their sizes in a separate structure.  This
patch takes the first step of putting the file table elements in a separate
structure fdtable that is embedded withing files_struct.  It also changes all
the users to refer to the file table using files_fdtable() macro.  Subsequent
applciation of RCU becomes easier after this.

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c       |  8 +++++---
 fs/fcntl.c      | 47 +++++++++++++++++++++++++++++------------------
 fs/file.c       | 42 +++++++++++++++++++++++++-----------------
 fs/locks.c      |  8 +++++---
 fs/open.c       | 41 ++++++++++++++++++++++++-----------------
 fs/proc/array.c |  5 ++++-
 fs/proc/base.c  |  4 +++-
 fs/select.c     | 12 +++++++++---
 8 files changed, 104 insertions(+), 63 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 222ab1c572d..14dd03907cc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -798,6 +798,7 @@ no_thread_group:
 static inline void flush_old_files(struct files_struct * files)
 {
 	long j = -1;
+	struct fdtable *fdt;
 
 	spin_lock(&files->file_lock);
 	for (;;) {
@@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files)
 
 		j++;
 		i = j * __NFDBITS;
-		if (i >= files->max_fds || i >= files->max_fdset)
+		fdt = files_fdtable(files);
+		if (i >= fdt->max_fds || i >= fdt->max_fdset)
 			break;
-		set = files->close_on_exec->fds_bits[j];
+		set = fdt->close_on_exec->fds_bits[j];
 		if (!set)
 			continue;
-		files->close_on_exec->fds_bits[j] = 0;
+		fdt->close_on_exec->fds_bits[j] = 0;
 		spin_unlock(&files->file_lock);
 		for ( ; set ; i++,set >>= 1) {
 			if (set & 1) {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6fbc9d8fcc3..bfecc623808 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -24,20 +24,24 @@
 void fastcall set_close_on_exec(unsigned int fd, int flag)
 {
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
 	if (flag)
-		FD_SET(fd, files->close_on_exec);
+		FD_SET(fd, fdt->close_on_exec);
 	else
-		FD_CLR(fd, files->close_on_exec);
+		FD_CLR(fd, fdt->close_on_exec);
 	spin_unlock(&files->file_lock);
 }
 
 static inline int get_close_on_exec(unsigned int fd)
 {
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 	int res;
 	spin_lock(&files->file_lock);
-	res = FD_ISSET(fd, files->close_on_exec);
+	fdt = files_fdtable(files);
+	res = FD_ISSET(fd, fdt->close_on_exec);
 	spin_unlock(&files->file_lock);
 	return res;
 }
@@ -54,24 +58,26 @@ static int locate_fd(struct files_struct *files,
 	unsigned int newfd;
 	unsigned int start;
 	int error;
+	struct fdtable *fdt;
 
 	error = -EINVAL;
 	if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 		goto out;
 
+	fdt = files_fdtable(files);
 repeat:
 	/*
 	 * Someone might have closed fd's in the range
-	 * orig_start..files->next_fd
+	 * orig_start..fdt->next_fd
 	 */
 	start = orig_start;
-	if (start < files->next_fd)
-		start = files->next_fd;
+	if (start < fdt->next_fd)
+		start = fdt->next_fd;
 
 	newfd = start;
-	if (start < files->max_fdset) {
-		newfd = find_next_zero_bit(files->open_fds->fds_bits,
-			files->max_fdset, start);
+	if (start < fdt->max_fdset) {
+		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
+			fdt->max_fdset, start);
 	}
 	
 	error = -EMFILE;
@@ -89,8 +95,8 @@ repeat:
 	if (error)
 		goto repeat;
 
-	if (start <= files->next_fd)
-		files->next_fd = newfd + 1;
+	if (start <= fdt->next_fd)
+		fdt->next_fd = newfd + 1;
 	
 	error = newfd;
 	
@@ -101,13 +107,16 @@ out:
 static int dupfd(struct file *file, unsigned int start)
 {
 	struct files_struct * files = current->files;
+	struct fdtable *fdt;
 	int fd;
 
 	spin_lock(&files->file_lock);
 	fd = locate_fd(files, file, start);
 	if (fd >= 0) {
-		FD_SET(fd, files->open_fds);
-		FD_CLR(fd, files->close_on_exec);
+		/* locate_fd() may have expanded fdtable, load the ptr */
+		fdt = files_fdtable(files);
+		FD_SET(fd, fdt->open_fds);
+		FD_CLR(fd, fdt->close_on_exec);
 		spin_unlock(&files->file_lock);
 		fd_install(fd, file);
 	} else {
@@ -123,6 +132,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 	int err = -EBADF;
 	struct file * file, *tofree;
 	struct files_struct * files = current->files;
+	struct fdtable *fdt;
 
 	spin_lock(&files->file_lock);
 	if (!(file = fcheck(oldfd)))
@@ -148,13 +158,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 
 	/* Yes. It's a race. In user space. Nothing sane to do */
 	err = -EBUSY;
-	tofree = files->fd[newfd];
-	if (!tofree && FD_ISSET(newfd, files->open_fds))
+	fdt = files_fdtable(files);
+	tofree = fdt->fd[newfd];
+	if (!tofree && FD_ISSET(newfd, fdt->open_fds))
 		goto out_fput;
 
-	files->fd[newfd] = file;
-	FD_SET(newfd, files->open_fds);
-	FD_CLR(newfd, files->close_on_exec);
+	fdt->fd[newfd] = file;
+	FD_SET(newfd, fdt->open_fds);
+	FD_CLR(newfd, fdt->close_on_exec);
 	spin_unlock(&files->file_lock);
 
 	if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 92b5f25985d..f5926ce73f3 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -59,13 +59,15 @@ static int expand_fd_array(struct files_struct *files, int nr)
 {
 	struct file **new_fds;
 	int error, nfds;
+	struct fdtable *fdt;
 
 	
 	error = -EMFILE;
-	if (files->max_fds >= NR_OPEN || nr >= NR_OPEN)
+	fdt = files_fdtable(files);
+	if (fdt->max_fds >= NR_OPEN || nr >= NR_OPEN)
 		goto out;
 
-	nfds = files->max_fds;
+	nfds = fdt->max_fds;
 	spin_unlock(&files->file_lock);
 
 	/* 
@@ -95,13 +97,14 @@ static int expand_fd_array(struct files_struct *files, int nr)
 		goto out;
 
 	/* Copy the existing array and install the new pointer */
+	fdt = files_fdtable(files);
 
-	if (nfds > files->max_fds) {
+	if (nfds > fdt->max_fds) {
 		struct file **old_fds;
 		int i;
 		
-		old_fds = xchg(&files->fd, new_fds);
-		i = xchg(&files->max_fds, nfds);
+		old_fds = xchg(&fdt->fd, new_fds);
+		i = xchg(&fdt->max_fds, nfds);
 
 		/* Don't copy/clear the array if we are creating a new
 		   fd array for fork() */
@@ -164,12 +167,14 @@ static int expand_fdset(struct files_struct *files, int nr)
 {
 	fd_set *new_openset = NULL, *new_execset = NULL;
 	int error, nfds = 0;
+	struct fdtable *fdt;
 
 	error = -EMFILE;
-	if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)
+	fdt = files_fdtable(files);
+	if (fdt->max_fdset >= NR_OPEN || nr >= NR_OPEN)
 		goto out;
 
-	nfds = files->max_fdset;
+	nfds = fdt->max_fdset;
 	spin_unlock(&files->file_lock);
 
 	/* Expand to the max in easy steps */
@@ -193,24 +198,25 @@ static int expand_fdset(struct files_struct *files, int nr)
 	error = 0;
 	
 	/* Copy the existing tables and install the new pointers */
-	if (nfds > files->max_fdset) {
-		int i = files->max_fdset / (sizeof(unsigned long) * 8);
-		int count = (nfds - files->max_fdset) / 8;
+	fdt = files_fdtable(files);
+	if (nfds > fdt->max_fdset) {
+		int i = fdt->max_fdset / (sizeof(unsigned long) * 8);
+		int count = (nfds - fdt->max_fdset) / 8;
 		
 		/* 
 		 * Don't copy the entire array if the current fdset is
 		 * not yet initialised.  
 		 */
 		if (i) {
-			memcpy (new_openset, files->open_fds, files->max_fdset/8);
-			memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
+			memcpy (new_openset, fdt->open_fds, fdt->max_fdset/8);
+			memcpy (new_execset, fdt->close_on_exec, fdt->max_fdset/8);
 			memset (&new_openset->fds_bits[i], 0, count);
 			memset (&new_execset->fds_bits[i], 0, count);
 		}
 		
-		nfds = xchg(&files->max_fdset, nfds);
-		new_openset = xchg(&files->open_fds, new_openset);
-		new_execset = xchg(&files->close_on_exec, new_execset);
+		nfds = xchg(&fdt->max_fdset, nfds);
+		new_openset = xchg(&fdt->open_fds, new_openset);
+		new_execset = xchg(&fdt->close_on_exec, new_execset);
 		spin_unlock(&files->file_lock);
 		free_fdset (new_openset, nfds);
 		free_fdset (new_execset, nfds);
@@ -237,13 +243,15 @@ out:
 int expand_files(struct files_struct *files, int nr)
 {
 	int err, expand = 0;
+	struct fdtable *fdt;
 
-	if (nr >= files->max_fdset) {
+	fdt = files_fdtable(files);
+	if (nr >= fdt->max_fdset) {
 		expand = 1;
 		if ((err = expand_fdset(files, nr)))
 			goto out;
 	}
-	if (nr >= files->max_fds) {
+	if (nr >= fdt->max_fds) {
 		expand = 1;
 		if ((err = expand_fd_array(files, nr)))
 			goto out;
diff --git a/fs/locks.c b/fs/locks.c
index 11956b6179f..c2c09b4798d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from)
 {
 	struct files_struct *files = current->files;
 	int i, j;
+	struct fdtable *fdt;
 
 	if (from == files)
 		return;
 
 	lock_kernel();
 	j = 0;
+	fdt = files_fdtable(files);
 	for (;;) {
 		unsigned long set;
 		i = j * __NFDBITS;
-		if (i >= files->max_fdset || i >= files->max_fds)
+		if (i >= fdt->max_fdset || i >= fdt->max_fds)
 			break;
-		set = files->open_fds->fds_bits[j++];
+		set = fdt->open_fds->fds_bits[j++];
 		while (set) {
 			if (set & 1) {
-				struct file *file = files->fd[i];
+				struct file *file = fdt->fd[i];
 				if (file)
 					__steal_locks(file, from);
 			}
diff --git a/fs/open.c b/fs/open.c
index 4ee2dcc31c2..b6542516a0c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -842,14 +842,16 @@ int get_unused_fd(void)
 {
 	struct files_struct * files = current->files;
 	int fd, error;
+	struct fdtable *fdt;
 
   	error = -EMFILE;
 	spin_lock(&files->file_lock);
 
 repeat:
- 	fd = find_next_zero_bit(files->open_fds->fds_bits, 
-				files->max_fdset, 
-				files->next_fd);
+	fdt = files_fdtable(files);
+ 	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+				fdt->max_fdset,
+				fdt->next_fd);
 
 	/*
 	 * N.B. For clone tasks sharing a files structure, this test
@@ -872,14 +874,14 @@ repeat:
 		goto repeat;
 	}
 
-	FD_SET(fd, files->open_fds);
-	FD_CLR(fd, files->close_on_exec);
-	files->next_fd = fd + 1;
+	FD_SET(fd, fdt->open_fds);
+	FD_CLR(fd, fdt->close_on_exec);
+	fdt->next_fd = fd + 1;
 #if 1
 	/* Sanity check */
-	if (files->fd[fd] != NULL) {
+	if (fdt->fd[fd] != NULL) {
 		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
-		files->fd[fd] = NULL;
+		fdt->fd[fd] = NULL;
 	}
 #endif
 	error = fd;
@@ -893,9 +895,10 @@ EXPORT_SYMBOL(get_unused_fd);
 
 static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
-	__FD_CLR(fd, files->open_fds);
-	if (fd < files->next_fd)
-		files->next_fd = fd;
+	struct fdtable *fdt = files_fdtable(files);
+	__FD_CLR(fd, fdt->open_fds);
+	if (fd < fdt->next_fd)
+		fdt->next_fd = fd;
 }
 
 void fastcall put_unused_fd(unsigned int fd)
@@ -924,10 +927,12 @@ EXPORT_SYMBOL(put_unused_fd);
 void fastcall fd_install(unsigned int fd, struct file * file)
 {
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 	spin_lock(&files->file_lock);
-	if (unlikely(files->fd[fd] != NULL))
+	fdt = files_fdtable(files);
+	if (unlikely(fdt->fd[fd] != NULL))
 		BUG();
-	files->fd[fd] = file;
+	fdt->fd[fd] = file;
 	spin_unlock(&files->file_lock);
 }
 
@@ -1010,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd)
 {
 	struct file * filp;
 	struct files_struct *files = current->files;
+	struct fdtable *fdt;
 
 	spin_lock(&files->file_lock);
-	if (fd >= files->max_fds)
+	fdt = files_fdtable(files);
+	if (fd >= fdt->max_fds)
 		goto out_unlock;
-	filp = files->fd[fd];
+	filp = fdt->fd[fd];
 	if (!filp)
 		goto out_unlock;
-	files->fd[fd] = NULL;
-	FD_CLR(fd, files->close_on_exec);
+	fdt->fd[fd] = NULL;
+	FD_CLR(fd, fdt->close_on_exec);
 	__put_unused_fd(files, fd);
 	spin_unlock(&files->file_lock);
 	return filp_close(filp, files);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 37668fe998a..d88d518d30f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
 {
 	struct group_info *group_info;
 	int g;
+	struct fdtable *fdt = NULL;
 
 	read_lock(&tasklist_lock);
 	buffer += sprintf(buffer,
@@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer)
 		p->gid, p->egid, p->sgid, p->fsgid);
 	read_unlock(&tasklist_lock);
 	task_lock(p);
+	if (p->files)
+		fdt = files_fdtable(p->files);
 	buffer += sprintf(buffer,
 		"FDSize:\t%d\n"
 		"Groups:\t",
-		p->files ? p->files->max_fds : 0);
+		fdt ? fdt->max_fds : 0);
 
 	group_info = p->group_info;
 	get_group_info(group_info);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 84751f3f52d..d0087a0b024 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1039,6 +1039,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 	int retval;
 	char buf[NUMBUF];
 	struct files_struct * files;
+	struct fdtable *fdt;
 
 	retval = -ENOENT;
 	if (!pid_alive(p))
@@ -1062,8 +1063,9 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 			if (!files)
 				goto out;
 			spin_lock(&files->file_lock);
+			fdt = files_fdtable(files);
 			for (fd = filp->f_pos-2;
-			     fd < files->max_fds;
+			     fd < fdt->max_fds;
 			     fd++, filp->f_pos++) {
 				unsigned int i,j;
 
diff --git a/fs/select.c b/fs/select.c
index b80e7eb0ac0..2e56325c73c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -132,11 +132,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
 	unsigned long *open_fds;
 	unsigned long set;
 	int max;
+	struct fdtable *fdt;
 
 	/* handle last in-complete long-word first */
 	set = ~(~0UL << (n & (__NFDBITS-1)));
 	n /= __NFDBITS;
-	open_fds = current->files->open_fds->fds_bits+n;
+	fdt = files_fdtable(current->files);
+	open_fds = fdt->open_fds->fds_bits+n;
 	max = 0;
 	if (set) {
 		set &= BITS(fds, n);
@@ -299,6 +301,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
 	char *bits;
 	long timeout;
 	int ret, size, max_fdset;
+	struct fdtable *fdt;
 
 	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (tvp) {
@@ -326,7 +329,8 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
 		goto out_nofds;
 
 	/* max_fdset can increase, so grab it once to avoid race */
-	max_fdset = current->files->max_fdset;
+	fdt = files_fdtable(current->files);
+	max_fdset = fdt->max_fdset;
 	if (n > max_fdset)
 		n = max_fdset;
 
@@ -464,9 +468,11 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
  	unsigned int i;
 	struct poll_list *head;
  	struct poll_list *walk;
+	struct fdtable *fdt;
 
 	/* Do a sanity check on nfds ... */
-	if (nfds > current->files->max_fdset && nfds > OPEN_MAX)
+	fdt = files_fdtable(current->files);
+	if (nfds > fdt->max_fdset && nfds > OPEN_MAX)
 		return -EINVAL;
 
 	if (timeout) {
-- 
cgit v1.2.3-18-g5258


From ab2af1f5005069321c5d130f09cce577b03f43ef Mon Sep 17 00:00:00 2001
From: Dipankar Sarma <dipankar@in.ibm.com>
Date: Fri, 9 Sep 2005 13:04:13 -0700
Subject: [PATCH] files: files struct with RCU

Patch to eliminate struct files_struct.file_lock spinlock on the reader side
and use rcu refcounting rcuref_xxx api for the f_count refcounter.  The
updates to the fdtable are done by allocating a new fdtable structure and
setting files->fdt to point to the new structure.  The fdtable structure is
protected by RCU thereby allowing lock-free lookup.  For fd arrays/sets that
are vmalloced, we use keventd to free them since RCU callbacks can't sleep.  A
global list of fdtable to be freed is not scalable, so we use a per-cpu list.
If keventd is already handling the current cpu's work, we use a timer to defer
queueing of that work.

Since the last publication, this patch has been re-written to avoid using
explicit memory barriers and use rcu_assign_pointer(), rcu_dereference()
premitives instead.  This required that the fd information is kept in a
separate structure (fdtable) and updated atomically.

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c        |   3 +-
 fs/fcntl.c      |  13 +-
 fs/file.c       | 389 +++++++++++++++++++++++++++++++++++++-------------------
 fs/file_table.c |  40 ++++--
 fs/open.c       |   8 +-
 5 files changed, 303 insertions(+), 150 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 201c1847fa0..38f62680fd6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
 #include <linux/highmem.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
+#include <linux/rcuref.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	/* Must be done under the lock to serialise against cancellation.
 	 * Call this aio_fput as it duplicates fput via the fput_work.
 	 */
-	if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
+	if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index bfecc623808..d2f3ed8acd9 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -16,6 +16,7 @@
 #include <linux/security.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
+#include <linux/rcupdate.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -64,8 +65,8 @@ static int locate_fd(struct files_struct *files,
 	if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 		goto out;
 
-	fdt = files_fdtable(files);
 repeat:
+	fdt = files_fdtable(files);
 	/*
 	 * Someone might have closed fd's in the range
 	 * orig_start..fdt->next_fd
@@ -95,9 +96,15 @@ repeat:
 	if (error)
 		goto repeat;
 
+	/*
+	 * We reacquired files_lock, so we are safe as long as
+	 * we reacquire the fdtable pointer and use it while holding
+	 * the lock, no one can free it during that time.
+	 */
+	fdt = files_fdtable(files);
 	if (start <= fdt->next_fd)
 		fdt->next_fd = newfd + 1;
-	
+
 	error = newfd;
 	
 out:
@@ -163,7 +170,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 	if (!tofree && FD_ISSET(newfd, fdt->open_fds))
 		goto out_fput;
 
-	fdt->fd[newfd] = file;
+	rcu_assign_pointer(fdt->fd[newfd], file);
 	FD_SET(newfd, fdt->open_fds);
 	FD_CLR(newfd, fdt->close_on_exec);
 	spin_unlock(&files->file_lock);
diff --git a/fs/file.c b/fs/file.c
index f5926ce73f3..2127a7b9dc3 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -13,6 +13,25 @@
 #include <linux/vmalloc.h>
 #include <linux/file.h>
 #include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+
+struct fdtable_defer {
+	spinlock_t lock;
+	struct work_struct wq;
+	struct timer_list timer;
+	struct fdtable *next;
+};
+
+/*
+ * We use this list to defer free fdtables that have vmalloced
+ * sets/arrays. By keeping a per-cpu list, we avoid having to embed
+ * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
+ * this per-task structure.
+ */
+static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
 
 
 /*
@@ -48,85 +67,143 @@ void free_fd_array(struct file **array, int num)
 		vfree(array);
 }
 
-/*
- * Expand the fd array in the files_struct.  Called with the files
- * spinlock held for write.
- */
-
-static int expand_fd_array(struct files_struct *files, int nr)
-	__releases(files->file_lock)
-	__acquires(files->file_lock)
+static void __free_fdtable(struct fdtable *fdt)
 {
-	struct file **new_fds;
-	int error, nfds;
-	struct fdtable *fdt;
+	int fdset_size, fdarray_size;
 
-	
-	error = -EMFILE;
-	fdt = files_fdtable(files);
-	if (fdt->max_fds >= NR_OPEN || nr >= NR_OPEN)
-		goto out;
+	fdset_size = fdt->max_fdset / 8;
+	fdarray_size = fdt->max_fds * sizeof(struct file *);
+	free_fdset(fdt->open_fds, fdset_size);
+	free_fdset(fdt->close_on_exec, fdset_size);
+	free_fd_array(fdt->fd, fdarray_size);
+	kfree(fdt);
+}
 
-	nfds = fdt->max_fds;
-	spin_unlock(&files->file_lock);
+static void fdtable_timer(unsigned long data)
+{
+	struct fdtable_defer *fddef = (struct fdtable_defer *)data;
 
-	/* 
-	 * Expand to the max in easy steps, and keep expanding it until
-	 * we have enough for the requested fd array size. 
+	spin_lock(&fddef->lock);
+	/*
+	 * If someone already emptied the queue return.
 	 */
+	if (!fddef->next)
+		goto out;
+	if (!schedule_work(&fddef->wq))
+		mod_timer(&fddef->timer, 5);
+out:
+	spin_unlock(&fddef->lock);
+}
 
-	do {
-#if NR_OPEN_DEFAULT < 256
-		if (nfds < 256)
-			nfds = 256;
-		else 
-#endif
-		if (nfds < (PAGE_SIZE / sizeof(struct file *)))
-			nfds = PAGE_SIZE / sizeof(struct file *);
-		else {
-			nfds = nfds * 2;
-			if (nfds > NR_OPEN)
-				nfds = NR_OPEN;
-		}
-	} while (nfds <= nr);
+static void free_fdtable_work(struct fdtable_defer *f)
+{
+	struct fdtable *fdt;
 
-	error = -ENOMEM;
-	new_fds = alloc_fd_array(nfds);
-	spin_lock(&files->file_lock);
-	if (!new_fds)
-		goto out;
+	spin_lock_bh(&f->lock);
+	fdt = f->next;
+	f->next = NULL;
+	spin_unlock_bh(&f->lock);
+	while(fdt) {
+		struct fdtable *next = fdt->next;
+		__free_fdtable(fdt);
+		fdt = next;
+	}
+}
 
-	/* Copy the existing array and install the new pointer */
-	fdt = files_fdtable(files);
+static void free_fdtable_rcu(struct rcu_head *rcu)
+{
+	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
+	int fdset_size, fdarray_size;
+	struct fdtable_defer *fddef;
 
-	if (nfds > fdt->max_fds) {
-		struct file **old_fds;
-		int i;
-		
-		old_fds = xchg(&fdt->fd, new_fds);
-		i = xchg(&fdt->max_fds, nfds);
-
-		/* Don't copy/clear the array if we are creating a new
-		   fd array for fork() */
-		if (i) {
-			memcpy(new_fds, old_fds, i * sizeof(struct file *));
-			/* clear the remainder of the array */
-			memset(&new_fds[i], 0,
-			       (nfds-i) * sizeof(struct file *)); 
-
-			spin_unlock(&files->file_lock);
-			free_fd_array(old_fds, i);
-			spin_lock(&files->file_lock);
-		}
+	BUG_ON(!fdt);
+	fdset_size = fdt->max_fdset / 8;
+	fdarray_size = fdt->max_fds * sizeof(struct file *);
+
+	if (fdt->free_files) {
+		/*
+		 * The this fdtable was embedded in the files structure
+		 * and the files structure itself was getting destroyed.
+		 * It is now safe to free the files structure.
+		 */
+		kmem_cache_free(files_cachep, fdt->free_files);
+		return;
+	}
+	if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
+		/*
+		 * The fdtable was embedded
+		 */
+		return;
+	}
+	if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
+		kfree(fdt->open_fds);
+		kfree(fdt->close_on_exec);
+		kfree(fdt->fd);
+		kfree(fdt);
 	} else {
-		/* Somebody expanded the array while we slept ... */
-		spin_unlock(&files->file_lock);
-		free_fd_array(new_fds, nfds);
-		spin_lock(&files->file_lock);
+		fddef = &get_cpu_var(fdtable_defer_list);
+		spin_lock(&fddef->lock);
+		fdt->next = fddef->next;
+		fddef->next = fdt;
+		/*
+		 * vmallocs are handled from the workqueue context.
+		 * If the per-cpu workqueue is running, then we
+		 * defer work scheduling through a timer.
+		 */
+		if (!schedule_work(&fddef->wq))
+			mod_timer(&fddef->timer, 5);
+		spin_unlock(&fddef->lock);
+		put_cpu_var(fdtable_defer_list);
 	}
-	error = 0;
-out:
-	return error;
+}
+
+void free_fdtable(struct fdtable *fdt)
+{
+	if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
+					fdt->max_fds > NR_OPEN_DEFAULT)
+		call_rcu(&fdt->rcu, free_fdtable_rcu);
+}
+
+/*
+ * Expand the fdset in the files_struct.  Called with the files spinlock
+ * held for write.
+ */
+static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
+{
+	int i;
+	int count;
+
+	BUG_ON(nfdt->max_fdset < fdt->max_fdset);
+	BUG_ON(nfdt->max_fds < fdt->max_fds);
+	/* Copy the existing tables and install the new pointers */
+
+	i = fdt->max_fdset / (sizeof(unsigned long) * 8);
+	count = (nfdt->max_fdset - fdt->max_fdset) / 8;
+
+	/*
+	 * Don't copy the entire array if the current fdset is
+	 * not yet initialised.
+	 */
+	if (i) {
+		memcpy (nfdt->open_fds, fdt->open_fds,
+						fdt->max_fdset/8);
+		memcpy (nfdt->close_on_exec, fdt->close_on_exec,
+						fdt->max_fdset/8);
+		memset (&nfdt->open_fds->fds_bits[i], 0, count);
+		memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
+	}
+
+	/* Don't copy/clear the array if we are creating a new
+	   fd array for fork() */
+	if (fdt->max_fds) {
+		memcpy(nfdt->fd, fdt->fd,
+			fdt->max_fds * sizeof(struct file *));
+		/* clear the remainder of the array */
+		memset(&nfdt->fd[fdt->max_fds], 0,
+		       (nfdt->max_fds - fdt->max_fds) *
+					sizeof(struct file *));
+	}
+	nfdt->next_fd = fdt->next_fd;
 }
 
 /*
@@ -157,28 +234,21 @@ void free_fdset(fd_set *array, int num)
 		vfree(array);
 }
 
-/*
- * Expand the fdset in the files_struct.  Called with the files spinlock
- * held for write.
- */
-static int expand_fdset(struct files_struct *files, int nr)
-	__releases(file->file_lock)
-	__acquires(file->file_lock)
+static struct fdtable *alloc_fdtable(int nr)
 {
-	fd_set *new_openset = NULL, *new_execset = NULL;
-	int error, nfds = 0;
-	struct fdtable *fdt;
-
-	error = -EMFILE;
-	fdt = files_fdtable(files);
-	if (fdt->max_fdset >= NR_OPEN || nr >= NR_OPEN)
-		goto out;
+	struct fdtable *fdt = NULL;
+	int nfds = 0;
+  	fd_set *new_openset = NULL, *new_execset = NULL;
+	struct file **new_fds;
 
-	nfds = fdt->max_fdset;
-	spin_unlock(&files->file_lock);
+	fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
+	if (!fdt)
+  		goto out;
+	memset(fdt, 0, sizeof(*fdt));
 
-	/* Expand to the max in easy steps */
-	do {
+	nfds = __FD_SETSIZE;
+  	/* Expand to the max in easy steps */
+  	do {
 		if (nfds < (PAGE_SIZE * 8))
 			nfds = PAGE_SIZE * 8;
 		else {
@@ -188,50 +258,88 @@ static int expand_fdset(struct files_struct *files, int nr)
 		}
 	} while (nfds <= nr);
 
-	error = -ENOMEM;
-	new_openset = alloc_fdset(nfds);
-	new_execset = alloc_fdset(nfds);
-	spin_lock(&files->file_lock);
-	if (!new_openset || !new_execset)
+  	new_openset = alloc_fdset(nfds);
+  	new_execset = alloc_fdset(nfds);
+  	if (!new_openset || !new_execset)
+  		goto out;
+	fdt->open_fds = new_openset;
+	fdt->close_on_exec = new_execset;
+	fdt->max_fdset = nfds;
+
+	nfds = NR_OPEN_DEFAULT;
+	/*
+	 * Expand to the max in easy steps, and keep expanding it until
+	 * we have enough for the requested fd array size.
+	 */
+	do {
+#if NR_OPEN_DEFAULT < 256
+		if (nfds < 256)
+			nfds = 256;
+		else
+#endif
+		if (nfds < (PAGE_SIZE / sizeof(struct file *)))
+			nfds = PAGE_SIZE / sizeof(struct file *);
+		else {
+			nfds = nfds * 2;
+			if (nfds > NR_OPEN)
+				nfds = NR_OPEN;
+  		}
+	} while (nfds <= nr);
+	new_fds = alloc_fd_array(nfds);
+	if (!new_fds)
 		goto out;
+	fdt->fd = new_fds;
+	fdt->max_fds = nfds;
+	fdt->free_files = NULL;
+	return fdt;
+out:
+  	if (new_openset)
+  		free_fdset(new_openset, nfds);
+  	if (new_execset)
+  		free_fdset(new_execset, nfds);
+	kfree(fdt);
+	return NULL;
+}
 
-	error = 0;
-	
-	/* Copy the existing tables and install the new pointers */
+/*
+ * Expands the file descriptor table - it will allocate a new fdtable and
+ * both fd array and fdset. It is expected to be called with the
+ * files_lock held.
+ */
+static int expand_fdtable(struct files_struct *files, int nr)
+	__releases(files->file_lock)
+	__acquires(files->file_lock)
+{
+	int error = 0;
+	struct fdtable *fdt;
+	struct fdtable *nfdt = NULL;
+
+	spin_unlock(&files->file_lock);
+	nfdt = alloc_fdtable(nr);
+	if (!nfdt) {
+		error = -ENOMEM;
+		spin_lock(&files->file_lock);
+		goto out;
+	}
+
+	spin_lock(&files->file_lock);
 	fdt = files_fdtable(files);
-	if (nfds > fdt->max_fdset) {
-		int i = fdt->max_fdset / (sizeof(unsigned long) * 8);
-		int count = (nfds - fdt->max_fdset) / 8;
-		
-		/* 
-		 * Don't copy the entire array if the current fdset is
-		 * not yet initialised.  
-		 */
-		if (i) {
-			memcpy (new_openset, fdt->open_fds, fdt->max_fdset/8);
-			memcpy (new_execset, fdt->close_on_exec, fdt->max_fdset/8);
-			memset (&new_openset->fds_bits[i], 0, count);
-			memset (&new_execset->fds_bits[i], 0, count);
-		}
-		
-		nfds = xchg(&fdt->max_fdset, nfds);
-		new_openset = xchg(&fdt->open_fds, new_openset);
-		new_execset = xchg(&fdt->close_on_exec, new_execset);
+	/*
+	 * Check again since another task may have expanded the
+	 * fd table while we dropped the lock
+	 */
+	if (nr >= fdt->max_fds || nr >= fdt->max_fdset) {
+		copy_fdtable(nfdt, fdt);
+	} else {
+		/* Somebody expanded while we dropped file_lock */
 		spin_unlock(&files->file_lock);
-		free_fdset (new_openset, nfds);
-		free_fdset (new_execset, nfds);
+		__free_fdtable(nfdt);
 		spin_lock(&files->file_lock);
-		return 0;
-	} 
-	/* Somebody expanded the array while we slept ... */
-
+		goto out;
+	}
+	rcu_assign_pointer(files->fdt, nfdt);
+	free_fdtable(fdt);
 out:
-	spin_unlock(&files->file_lock);
-	if (new_openset)
-		free_fdset(new_openset, nfds);
-	if (new_execset)
-		free_fdset(new_execset, nfds);
-	spin_lock(&files->file_lock);
 	return error;
 }
 
@@ -246,17 +354,36 @@ int expand_files(struct files_struct *files, int nr)
 	struct fdtable *fdt;
 
 	fdt = files_fdtable(files);
-	if (nr >= fdt->max_fdset) {
-		expand = 1;
-		if ((err = expand_fdset(files, nr)))
+	if (nr >= fdt->max_fdset || nr >= fdt->max_fds) {
+		if (fdt->max_fdset >= NR_OPEN ||
+			fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) {
+			err = -EMFILE;
 			goto out;
-	}
-	if (nr >= fdt->max_fds) {
+		}
 		expand = 1;
-		if ((err = expand_fd_array(files, nr)))
+		if ((err = expand_fdtable(files, nr)))
 			goto out;
 	}
 	err = expand;
 out:
 	return err;
 }
+
+static void __devinit fdtable_defer_list_init(int cpu)
+{
+	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
+	spin_lock_init(&fddef->lock);
+	INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
+	init_timer(&fddef->timer);
+	fddef->timer.data = (unsigned long)fddef;
+	fddef->timer.function = fdtable_timer;
+	fddef->next = NULL;
+}
+
+void __init files_defer_init(void)
+{
+	int i;
+	/* Really early - can't use for_each_cpu */
+	for (i = 0; i < NR_CPUS; i++)
+		fdtable_defer_list_init(i);
+}
diff --git a/fs/file_table.c b/fs/file_table.c
index 43e9e1737de..86ec8ae985b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -14,6 +14,7 @@
 #include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/eventpoll.h>
+#include <linux/rcupdate.h>
 #include <linux/mount.h>
 #include <linux/cdev.h>
 #include <linux/fsnotify.h>
@@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)
 	spin_unlock_irqrestore(&filp_count_lock, flags);
 }
 
-static inline void file_free(struct file *f)
+static inline void file_free_rcu(struct rcu_head *head)
 {
+	struct file *f =  container_of(head, struct file, f_rcuhead);
 	kmem_cache_free(filp_cachep, f);
 }
 
+static inline void file_free(struct file *f)
+{
+	call_rcu(&f->f_rcuhead, file_free_rcu);
+}
+
 /* Find an unused file structure and return a pointer to it.
  * Returns NULL, if there are no more free file structures or
  * we run out of memory.
@@ -110,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
 
 void fastcall fput(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count))
+	if (rcuref_dec_and_test(&file->f_count))
 		__fput(file);
 }
 
@@ -156,11 +163,17 @@ struct file fastcall *fget(unsigned int fd)
 	struct file *file;
 	struct files_struct *files = current->files;
 
-	spin_lock(&files->file_lock);
+	rcu_read_lock();
 	file = fcheck_files(files, fd);
-	if (file)
-		get_file(file);
-	spin_unlock(&files->file_lock);
+	if (file) {
+		if (!rcuref_inc_lf(&file->f_count)) {
+			/* File object ref couldn't be taken */
+			rcu_read_unlock();
+			return NULL;
+		}
+	}
+	rcu_read_unlock();
+
 	return file;
 }
 
@@ -182,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
 	if (likely((atomic_read(&files->count) == 1))) {
 		file = fcheck_files(files, fd);
 	} else {
-		spin_lock(&files->file_lock);
+		rcu_read_lock();
 		file = fcheck_files(files, fd);
 		if (file) {
-			get_file(file);
-			*fput_needed = 1;
+			if (rcuref_inc_lf(&file->f_count))
+				*fput_needed = 1;
+			else
+				/* Didn't get the reference, someone's freed */
+				file = NULL;
 		}
-		spin_unlock(&files->file_lock);
+		rcu_read_unlock();
 	}
+
 	return file;
 }
 
 
 void put_filp(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count)) {
+	if (rcuref_dec_and_test(&file->f_count)) {
 		security_file_free(file);
 		file_kill(file);
 		file_free(file);
@@ -257,4 +274,5 @@ void __init files_init(unsigned long mempages)
 	files_stat.max_files = n; 
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
+	files_defer_init();
 } 
diff --git a/fs/open.c b/fs/open.c
index b6542516a0c..2fac58c5191 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -24,6 +24,7 @@
 #include <linux/personality.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
+#include <linux/rcupdate.h>
 
 #include <asm/unistd.h>
 
@@ -930,9 +931,8 @@ void fastcall fd_install(unsigned int fd, struct file * file)
 	struct fdtable *fdt;
 	spin_lock(&files->file_lock);
 	fdt = files_fdtable(files);
-	if (unlikely(fdt->fd[fd] != NULL))
-		BUG();
-	fdt->fd[fd] = file;
+	BUG_ON(fdt->fd[fd] != NULL);
+	rcu_assign_pointer(fdt->fd[fd], file);
 	spin_unlock(&files->file_lock);
 }
 
@@ -1024,7 +1024,7 @@ asmlinkage long sys_close(unsigned int fd)
 	filp = fdt->fd[fd];
 	if (!filp)
 		goto out_unlock;
-	fdt->fd[fd] = NULL;
+	rcu_assign_pointer(fdt->fd[fd], NULL);
 	FD_CLR(fd, fdt->close_on_exec);
 	__put_unused_fd(files, fd);
 	spin_unlock(&files->file_lock);
-- 
cgit v1.2.3-18-g5258


From b835996f628eadb55c5fb222ba46fe9395bf73c7 Mon Sep 17 00:00:00 2001
From: Dipankar Sarma <dipankar@in.ibm.com>
Date: Fri, 9 Sep 2005 13:04:14 -0700
Subject: [PATCH] files: lock-free fd look-up

With the use of RCU in files structure, the look-up of files using fds can now
be lock-free.  The lookup is protected by rcu_read_lock()/rcu_read_unlock().
This patch changes the readers to use lock-free lookup.

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Ravikiran Thirumalai <kiran_th@gmail.com>
Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fcntl.c     |  4 ++--
 fs/proc/base.c | 29 +++++++++++++++--------------
 fs/select.c    | 13 ++++++++++---
 3 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index d2f3ed8acd9..863b46e0d78 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -40,10 +40,10 @@ static inline int get_close_on_exec(unsigned int fd)
 	struct files_struct *files = current->files;
 	struct fdtable *fdt;
 	int res;
-	spin_lock(&files->file_lock);
+	rcu_read_lock();
 	fdt = files_fdtable(files);
 	res = FD_ISSET(fd, fdt->close_on_exec);
-	spin_unlock(&files->file_lock);
+	rcu_read_unlock();
 	return res;
 }
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d0087a0b024..23db452ab42 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -62,6 +62,7 @@
 #include <linux/namespace.h>
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
+#include <linux/rcupdate.h>
 #include <linux/kallsyms.h>
 #include <linux/mount.h>
 #include <linux/security.h>
@@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
 
 	files = get_files_struct(task);
 	if (files) {
-		spin_lock(&files->file_lock);
+		rcu_read_lock();
 		file = fcheck_files(files, fd);
 		if (file) {
 			*mnt = mntget(file->f_vfsmnt);
 			*dentry = dget(file->f_dentry);
-			spin_unlock(&files->file_lock);
+			rcu_read_unlock();
 			put_files_struct(files);
 			return 0;
 		}
-		spin_unlock(&files->file_lock);
+		rcu_read_unlock();
 		put_files_struct(files);
 	}
 	return -ENOENT;
@@ -1062,7 +1063,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 			files = get_files_struct(p);
 			if (!files)
 				goto out;
-			spin_lock(&files->file_lock);
+			rcu_read_lock();
 			fdt = files_fdtable(files);
 			for (fd = filp->f_pos-2;
 			     fd < fdt->max_fds;
@@ -1071,7 +1072,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 
 				if (!fcheck_files(files, fd))
 					continue;
-				spin_unlock(&files->file_lock);
+				rcu_read_unlock();
 
 				j = NUMBUF;
 				i = fd;
@@ -1083,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
 
 				ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
 				if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
-					spin_lock(&files->file_lock);
+					rcu_read_lock();
 					break;
 				}
-				spin_lock(&files->file_lock);
+				rcu_read_lock();
 			}
-			spin_unlock(&files->file_lock);
+			rcu_read_unlock();
 			put_files_struct(files);
 	}
 out:
@@ -1263,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	files = get_files_struct(task);
 	if (files) {
-		spin_lock(&files->file_lock);
+		rcu_read_lock();
 		if (fcheck_files(files, fd)) {
-			spin_unlock(&files->file_lock);
+			rcu_read_unlock();
 			put_files_struct(files);
 			if (task_dumpable(task)) {
 				inode->i_uid = task->euid;
@@ -1277,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 			security_task_to_inode(task, inode);
 			return 1;
 		}
-		spin_unlock(&files->file_lock);
+		rcu_read_unlock();
 		put_files_struct(files);
 	}
 	d_drop(dentry);
@@ -1369,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
 	if (!files)
 		goto out_unlock;
 	inode->i_mode = S_IFLNK;
-	spin_lock(&files->file_lock);
+	rcu_read_lock();
 	file = fcheck_files(files, fd);
 	if (!file)
 		goto out_unlock2;
@@ -1377,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
 		inode->i_mode |= S_IRUSR | S_IXUSR;
 	if (file->f_mode & 2)
 		inode->i_mode |= S_IWUSR | S_IXUSR;
-	spin_unlock(&files->file_lock);
+	rcu_read_unlock();
 	put_files_struct(files);
 	inode->i_op = &proc_pid_link_inode_operations;
 	inode->i_size = 64;
@@ -1387,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
 	return NULL;
 
 out_unlock2:
-	spin_unlock(&files->file_lock);
+	rcu_read_unlock();
 	put_files_struct(files);
 out_unlock:
 	iput(inode);
diff --git a/fs/select.c b/fs/select.c
index 2e56325c73c..f10a10317d5 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -22,6 +22,7 @@
 #include <linux/personality.h> /* for STICKY_TIMEOUTS */
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/rcupdate.h>
 
 #include <asm/uaccess.h>
 
@@ -185,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
 	int retval, i;
 	long __timeout = *timeout;
 
- 	spin_lock(&current->files->file_lock);
+	rcu_read_lock();
 	retval = max_select_fd(n, fds);
-	spin_unlock(&current->files->file_lock);
+	rcu_read_unlock();
 
 	if (retval < 0)
 		return retval;
@@ -329,8 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
 		goto out_nofds;
 
 	/* max_fdset can increase, so grab it once to avoid race */
+	rcu_read_lock();
 	fdt = files_fdtable(current->files);
 	max_fdset = fdt->max_fdset;
+	rcu_read_unlock();
 	if (n > max_fdset)
 		n = max_fdset;
 
@@ -469,10 +472,14 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
 	struct poll_list *head;
  	struct poll_list *walk;
 	struct fdtable *fdt;
+	int max_fdset;
 
 	/* Do a sanity check on nfds ... */
+	rcu_read_lock();
 	fdt = files_fdtable(current->files);
-	if (nfds > fdt->max_fdset && nfds > OPEN_MAX)
+	max_fdset = fdt->max_fdset;
+	rcu_read_unlock();
+	if (nfds > max_fdset && nfds > OPEN_MAX)
 		return -EINVAL;
 
 	if (timeout) {
-- 
cgit v1.2.3-18-g5258


From 93fa58cb831337fdf5d36b3b913441100a484dae Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:18 -0700
Subject: [PATCH] v9fs: Documentation, Makefiles, Configuration

OVERVIEW

V9FS is a distributed file system for Linux which provides an
implementation of the Plan 9 resource sharing protocol 9P.  It can be
used to share all sorts of resources: static files, synthetic file servers
(such as /proc or /sys), devices, and application file servers (such as
FUSE).

BACKGROUND

Plan 9 (http://plan9.bell-labs.com/plan9) is a research operating
system and associated applications suite developed by the Computing
Science Research Center of AT&T Bell Laboratories (now a part of
Lucent Technologies), the same group that developed UNIX , C, and C++.
Plan 9 was initially released in 1993 to universities, and then made
generally available in 1995. Its core operating systems code laid the
foundation for the Inferno Operating System released as a product by
Lucent Bell-Labs in 1997. The Inferno venture was the only commercial
embodiment of Plan 9 and is currently maintained as a product by Vita
Nuova (http://www.vitanuova.com). After updated releases in 2000 and
2002, Plan 9 was open-sourced under the OSI approved Lucent Public
License in 2003.

The Plan 9 project was started by Ken Thompson and Rob Pike in 1985.
Their intent was to explore potential solutions to some of the
shortcomings of UNIX in the face of the widespread use of high-speed
networks to connect machines. In UNIX, networking was an afterthought
and UNIX clusters became little more than a network of stand-alone
systems. Plan 9 was designed from first principles as a seamless
distributed system with integrated secure network resource sharing.
Applications and services were architected in such a way as to allow
for implicit distribution across a cluster of systems. Configuring an
environment to use remote application components or services in place
of their local equivalent could be achieved with a few simple command
line instructions. For the most part, application implementations
operated independent of the location of their actual resources.

Commercial operating systems haven't changed much in the 20 years
since Plan 9 was conceived. Network and distributed systems support is
provided by a patchwork of middle-ware, with an endless number of
packages supplying pieces of the puzzle. Matters are complicated by
the use of different complicated protocols for individual services,
and separate implementations for kernel and application resources.
The V9FS project (http://v9fs.sourceforge.net) is an attempt to bring
Plan 9's unified approach to resource sharing to Linux and other
operating systems via support for the 9P2000 resource sharing
protocol.

V9FS HISTORY

V9FS was originally developed by Ron Minnich and Maya Gokhale at Los
Alamos National Labs (LANL) in 1997.  In November of 2001, Greg Watson
setup a SourceForge project as a public repository for the code which
supported the Linux 2.4 kernel.

About a year ago, I picked up the initial attempt Ron Minnich had
made to provide 2.6 support and got the code integrated into a 2.6.5
kernel.   I then went through a line-for-line re-write attempting to
clean-up the code while more closely following the Linux Kernel style
guidelines.  I co-authored a paper with Ron Minnich on the V9FS Linux
support including performance comparisons to NFSv3 using Bonnie and
PostMark - this paper appeared at the USENIX/FREENIX 2005
conference in April 2005:
( http://www.usenix.org/events/usenix05/tech/freenix/hensbergen.html ).

CALL FOR PARTICIPATION/REQUEST FOR COMMENTS

Our 2.6 kernel support is stabilizing and we'd like to begin pursuing
its integration into the official kernel tree.  We would appreciate any
review, comments, critiques, and additions from this community and are
actively seeking people to join our project and help us produce
something that would be acceptable and useful to the Linux community.

STATUS

The code is reasonably stable, although there are no doubt corner cases
our regression tests haven't discovered yet.  It is in regular use by several
of the developers and has been tested on x86 and PowerPC
(32-bit and 64-bit) in both small and large (LANL cluster) deployments.
Our current regression tests include fsx, bonnie, and postmark.

It was our intention to keep things as simple as possible for this
release -- trying to focus on correctness within the core of the
protocol support versus a rich set of features.  For example: a more
complete security model and cache layer are in the road map, but
excluded from this release.   Additionally, we have removed support for
mmap operations at Al Viro's request.

PERFORMANCE

Detailed performance numbers and analysis are included in the FREENIX
paper, but we show comparable performance to NFSv3 for large file
operations based on the Bonnie benchmark, and superior performance for
many small file operations based on the PostMark benchmark.   Somewhat
preliminary graphs (from the FREENIX paper) are available
(http://v9fs.sourceforge.net/perf/index.html).

RESOURCES

The source code is available in a few different forms:

tarballs: http://v9fs.sf.net
CVSweb: http://cvs.sourceforge.net/viewcvs.py/v9fs/linux-9p/
CVS: :pserver:anonymous@cvs.sourceforge.net:/cvsroot/v9fs/linux-9p
Git: rsync://v9fs.graverobber.org/v9fs (webgit: http://v9fs.graverobber.org)
9P: tcp!v9fs.graverobber.org!6564

The user-level server is available from either the Plan 9 distribution
or from http://v9fs.sf.net
Other support applications are still being developed, but preliminary
version can be downloaded from sourceforge.

Documentation on the protocol has historically been the Plan 9 Man
pages (http://plan9.bell-labs.com/sys/man/5/INDEX.html), but there is
an effort under way to write a more complete Internet-Draft style
specification (http://v9fs.sf.net/rfc).

There are a couple of mailing lists supporting v9fs, but the most used
is v9fs-developer@lists.sourceforge.net -- please direct/cc your
comments there so the other v9fs contibutors can participate in the
conversation.  There is also an IRC channel: irc://freenode.net/#v9fs

This part of the patch contains Documentation, Makefiles, and configuration
file changes.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/Makefile | 17 +++++++++++++++++
 fs/Kconfig     | 11 +++++++++++
 fs/Makefile    |  1 +
 3 files changed, 29 insertions(+)
 create mode 100644 fs/9p/Makefile

(limited to 'fs')

diff --git a/fs/9p/Makefile b/fs/9p/Makefile
new file mode 100644
index 00000000000..e4e4ffe5a7d
--- /dev/null
+++ b/fs/9p/Makefile
@@ -0,0 +1,17 @@
+obj-$(CONFIG_9P_FS) := 9p2000.o
+
+9p2000-objs := \
+	vfs_super.o \
+	vfs_inode.o \
+	vfs_file.o \
+	vfs_dir.o \
+	vfs_dentry.o \
+	error.o \
+	mux.o \
+	trans_fd.o \
+	trans_sock.o \
+	9p.o \
+	conv.o \
+	v9fs.o \
+	fid.o
+
diff --git a/fs/Kconfig b/fs/Kconfig
index 5e817902cb3..443aed4e206 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1703,6 +1703,17 @@ config AFS_FS
 config RXRPC
 	tristate
 
+config 9P_FS
+	tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
+	depends on INET && EXPERIMENTAL
+	help
+	  If you say Y here, you will get experimental support for
+	  Plan 9 resource sharing via the 9P2000 protocol.
+
+	  See <http://v9fs.sf.net> for more information.
+
+	  If unsure, say N.
+
 endmenu
 
 menu "Partition Types"
diff --git a/fs/Makefile b/fs/Makefile
index 15158309dee..d646502c1ef 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_RELAYFS_FS)	+= relayfs/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_JFS_FS)		+= jfs/
 obj-$(CONFIG_XFS_FS)		+= xfs/
+obj-$(CONFIG_9P_FS)		+= 9p/
 obj-$(CONFIG_AFS_FS)		+= afs/
 obj-$(CONFIG_BEFS_FS)		+= befs/
 obj-$(CONFIG_HOSTFS)		+= hostfs/
-- 
cgit v1.2.3-18-g5258


From e69e7fe5b0c86b7271045444a3a681136234c659 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:18 -0700
Subject: [PATCH] v9fs: VFS file, dentry, and directory operations

This part of the patch contains the VFS file, dentry & directory interfaces.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_dentry.c | 126 +++++++++++++++++
 fs/9p/vfs_dir.c    | 226 ++++++++++++++++++++++++++++++
 fs/9p/vfs_file.c   | 401 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 753 insertions(+)
 create mode 100644 fs/9p/vfs_dentry.c
 create mode 100644 fs/9p/vfs_dir.c
 create mode 100644 fs/9p/vfs_file.c

(limited to 'fs')

diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
new file mode 100644
index 00000000000..306c96741f8
--- /dev/null
+++ b/fs/9p/vfs_dentry.c
@@ -0,0 +1,126 @@
+/*
+ *  linux/fs/9p/vfs_dentry.c
+ *
+ * This file contians vfs dentry ops for the 9P2000 protocol.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * v9fs_dentry_validate - VFS dcache hook to validate cache
+ * @dentry:  dentry that is being validated
+ * @nd: path data
+ *
+ * dcache really shouldn't be used for 9P2000 as at all due to
+ * potential attached semantics to directory traversal (walk).
+ *
+ * FUTURE: look into how to use dcache to allow multi-stage
+ * walks in Plan 9 & potential for better dcache operation which
+ * would remain valid for Plan 9 semantics.  Older versions
+ * had validation via stat for those interested.  However, since
+ * stat has the same approximate overhead as walk there really
+ * is no difference.  The only improvement would be from a
+ * time-decay cache like NFS has and that undermines the
+ * synchronous nature of 9P2000.
+ *
+ */
+
+static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *dc = current->fs->pwd;
+
+	dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry);
+	if (v9fs_fid_lookup(dentry, FID_OP)) {
+		dprintk(DEBUG_VFS, "VALID\n");
+		return 1;
+	}
+
+	while (dc != NULL) {
+		if (dc == dentry) {
+			dprintk(DEBUG_VFS, "VALID\n");
+			return 1;
+		}
+		if (dc == dc->d_parent)
+			break;
+
+		dc = dc->d_parent;
+	}
+
+	dprintk(DEBUG_VFS, "INVALID\n");
+	return 0;
+}
+
+/**
+ * v9fs_dentry_release - called when dentry is going to be freed
+ * @dentry:  dentry that is being release
+ *
+ */
+
+void v9fs_dentry_release(struct dentry *dentry)
+{
+	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+	if (dentry->d_fsdata != NULL) {
+		struct list_head *fid_list = dentry->d_fsdata;
+		struct v9fs_fid *temp = NULL;
+		struct v9fs_fid *current_fid = NULL;
+		struct v9fs_fcall *fcall = NULL;
+
+		list_for_each_entry_safe(current_fid, temp, fid_list, list) {
+			if (v9fs_t_clunk
+			    (current_fid->v9ses, current_fid->fid, &fcall))
+				dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+					FCALL_ERROR(fcall));
+
+			v9fs_put_idpool(current_fid->fid,
+					&current_fid->v9ses->fidpool);
+
+			kfree(fcall);
+			v9fs_fid_destroy(current_fid);
+		}
+
+		kfree(dentry->d_fsdata);	/* free the list_head */
+	}
+}
+
+struct dentry_operations v9fs_dentry_operations = {
+	.d_revalidate = v9fs_dentry_validate,
+	.d_release = v9fs_dentry_release,
+};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
new file mode 100644
index 00000000000..c478a738418
--- /dev/null
+++ b/fs/9p/vfs_dir.c
@@ -0,0 +1,226 @@
+/*
+ * linux/fs/9p/vfs_dir.c
+ *
+ * This file contains vfs directory ops for the 9P2000 protocol.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * dt_type - return file type
+ * @mistat: mistat structure
+ *
+ */
+
+static inline int dt_type(struct v9fs_stat *mistat)
+{
+	unsigned long perm = mistat->mode;
+	int rettype = DT_REG;
+
+	if (perm & V9FS_DMDIR)
+		rettype = DT_DIR;
+	if (perm & V9FS_DMSYMLINK)
+		rettype = DT_LNK;
+
+	return rettype;
+}
+
+/**
+ * v9fs_dir_readdir - read a directory
+ * @filep: opened file structure
+ * @dirent: directory structure ???
+ * @filldir: function to populate directory structure ???
+ *
+ */
+
+static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct v9fs_fcall *fcall = NULL;
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *file = filp->private_data;
+	unsigned int i, n;
+	int fid = -1;
+	int ret = 0;
+	struct v9fs_stat *mi = NULL;
+	int over = 0;
+
+	dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
+
+	fid = file->fid;
+
+	mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	if (!mi)
+		return -ENOMEM;
+
+	if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
+		kfree(file->rdir_fcall);
+		file->rdir_fcall = NULL;
+	}
+
+	if (file->rdir_fcall) {
+		n = file->rdir_fcall->params.rread.count;
+		i = file->rdir_fpos;
+		while (i < n) {
+			int s = v9fs_deserialize_stat(v9ses,
+				  file->rdir_fcall->params.rread.data + i,
+			          n - i, mi, v9ses->maxdata);
+
+			if (s == 0) {
+				dprintk(DEBUG_ERROR,
+					"error while deserializing mistat\n");
+				ret = -EIO;
+				goto FreeStructs;
+			}
+
+			over = filldir(dirent, mi->name, strlen(mi->name),
+				    filp->f_pos, v9fs_qid2ino(&mi->qid),
+				    dt_type(mi));
+
+			if (over) {
+				file->rdir_fpos = i;
+				file->rdir_pos = filp->f_pos;
+				break;
+			}
+
+			i += s;
+			filp->f_pos += s;
+		}
+
+		if (!over) {
+			kfree(file->rdir_fcall);
+			file->rdir_fcall = NULL;
+		}
+	}
+
+	while (!over) {
+		ret = v9fs_t_read(v9ses, fid, filp->f_pos,
+					    v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
+		if (ret < 0) {
+			dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
+				ret, fcall);
+			goto FreeStructs;
+		} else if (ret == 0)
+			break;
+
+		n = ret;
+		i = 0;
+		while (i < n) {
+			int s = v9fs_deserialize_stat(v9ses,
+			          fcall->params.rread.data + i, n - i, mi,
+			          v9ses->maxdata);
+
+			if (s == 0) {
+				dprintk(DEBUG_ERROR,
+					"error while deserializing mistat\n");
+				return -EIO;
+			}
+
+			over = filldir(dirent, mi->name, strlen(mi->name),
+				    filp->f_pos, v9fs_qid2ino(&mi->qid),
+				    dt_type(mi));
+
+			if (over) {
+				file->rdir_fcall = fcall;
+				file->rdir_fpos = i;
+				file->rdir_pos = filp->f_pos;
+				fcall = NULL;
+				break;
+			}
+
+			i += s;
+			filp->f_pos += s;
+		}
+
+		kfree(fcall);
+	}
+
+      FreeStructs:
+	kfree(fcall);
+	kfree(mi);
+	return ret;
+}
+
+/**
+ * v9fs_dir_release - close a directory
+ * @inode: inode of the directory
+ * @filp: file pointer to a directory
+ *
+ */
+
+int v9fs_dir_release(struct inode *inode, struct file *filp)
+{
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *fid = filp->private_data;
+	int fidnum = -1;
+
+	dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
+		fid->fid);
+	fidnum = fid->fid;
+
+	filemap_fdatawrite(inode->i_mapping);
+	filemap_fdatawait(inode->i_mapping);
+
+	if (fidnum >= 0) {
+		fid->fidopen--;
+		dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
+			fid->fid);
+
+		if (fid->fidopen == 0) {
+			if (v9fs_t_clunk(v9ses, fidnum, NULL))
+				dprintk(DEBUG_ERROR, "clunk failed\n");
+
+			v9fs_put_idpool(fid->fid, &v9ses->fidpool);
+		}
+
+		kfree(fid->rdir_fcall);
+
+		filp->private_data = NULL;
+		v9fs_fid_destroy(fid);
+	}
+
+	d_drop(filp->f_dentry);
+	return 0;
+}
+
+struct file_operations v9fs_dir_operations = {
+	.read = generic_read_dir,
+	.readdir = v9fs_dir_readdir,
+	.open = v9fs_file_open,
+	.release = v9fs_dir_release,
+};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
new file mode 100644
index 00000000000..1f8ae7d580a
--- /dev/null
+++ b/fs/9p/vfs_file.c
@@ -0,0 +1,401 @@
+/*
+ *  linux/fs/9p/vfs_file.c
+ *
+ * This file contians vfs file ops for 9P2000.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/version.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "fid.h"
+
+/**
+ * v9fs_file_open - open a file (or directory)
+ * @inode: inode to be opened
+ * @file: file being opened
+ *
+ */
+
+int v9fs_file_open(struct inode *inode, struct file *file)
+{
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *v9fid = v9fs_fid_lookup(file->f_dentry, FID_WALK);
+	struct v9fs_fid *v9newfid = NULL;
+	struct v9fs_fcall *fcall = NULL;
+	int open_mode = 0;
+	unsigned int iounit = 0;
+	int newfid = -1;
+	long result = -1;
+
+	dprintk(DEBUG_VFS, "inode: %p file: %p v9fid= %p\n", inode, file,
+		v9fid);
+
+	if (!v9fid) {
+		struct dentry *dentry = file->f_dentry;
+		dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n");
+
+		/* XXX - some duplication from lookup, generalize later */
+		/* basically vfs_lookup is too heavy weight */
+		v9fid = v9fs_fid_lookup(file->f_dentry, FID_OP);
+		if (!v9fid)
+			return -EBADF;
+
+		v9fid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
+		if (!v9fid)
+			return -EBADF;
+
+		newfid = v9fs_get_idpool(&v9ses->fidpool);
+		if (newfid < 0) {
+			eprintk(KERN_WARNING, "newfid fails!\n");
+			return -ENOSPC;
+		}
+
+		result =
+		    v9fs_t_walk(v9ses, v9fid->fid, newfid,
+				(char *)file->f_dentry->d_name.name, NULL);
+		if (result < 0) {
+			v9fs_put_idpool(newfid, &v9ses->fidpool);
+			dprintk(DEBUG_ERROR, "rewalk didn't work\n");
+			return -EBADF;
+		}
+
+		v9fid = v9fs_fid_create(dentry);
+		if (v9fid == NULL) {
+			dprintk(DEBUG_ERROR, "couldn't insert\n");
+			return -ENOMEM;
+		}
+		v9fid->fid = newfid;
+	}
+
+	if (v9fid->fidcreate) {
+		/* create case */
+		newfid = v9fid->fid;
+		iounit = v9fid->iounit;
+		v9fid->fidcreate = 0;
+	} else {
+		if (!S_ISDIR(inode->i_mode))
+			newfid = v9fid->fid;
+		else {
+			newfid = v9fs_get_idpool(&v9ses->fidpool);
+			if (newfid < 0) {
+				eprintk(KERN_WARNING, "allocation failed\n");
+				return -ENOSPC;
+			}
+			/* This would be a somewhat critical clone */
+			result =
+			    v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL,
+					&fcall);
+			if (result < 0) {
+				dprintk(DEBUG_ERROR, "clone error: %s\n",
+					FCALL_ERROR(fcall));
+				kfree(fcall);
+				return result;
+			}
+
+			v9newfid = v9fs_fid_create(file->f_dentry);
+			v9newfid->fid = newfid;
+			v9newfid->qid = v9fid->qid;
+			v9newfid->iounit = v9fid->iounit;
+			v9newfid->fidopen = 0;
+			v9newfid->fidclunked = 0;
+			v9newfid->v9ses = v9ses;
+			v9fid = v9newfid;
+			kfree(fcall);
+		}
+
+		/* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
+		/* translate open mode appropriately */
+		open_mode = file->f_flags & 0x3;
+
+		if (file->f_flags & O_EXCL)
+			open_mode |= V9FS_OEXCL;
+
+		if (v9ses->extended) {
+			if (file->f_flags & O_TRUNC)
+				open_mode |= V9FS_OTRUNC;
+
+			if (file->f_flags & O_APPEND)
+				open_mode |= V9FS_OAPPEND;
+		}
+
+		result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
+		if (result < 0) {
+			dprintk(DEBUG_ERROR,
+				"open failed, open_mode 0x%x: %s\n", open_mode,
+				FCALL_ERROR(fcall));
+			kfree(fcall);
+			return result;
+		}
+
+		iounit = fcall->params.ropen.iounit;
+		kfree(fcall);
+	}
+
+
+	file->private_data = v9fid;
+
+	v9fid->rdir_pos = 0;
+	v9fid->rdir_fcall = NULL;
+	v9fid->fidopen = 1;
+	v9fid->filp = file;
+	v9fid->iounit = iounit;
+
+	return 0;
+}
+
+/**
+ * v9fs_file_lock - lock a file (or directory)
+ * @inode: inode to be opened
+ * @file: file being opened
+ *
+ * XXX - this looks like a local only lock, we should extend into 9P
+ *       by using open exclusive
+ */
+
+static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	int res = 0;
+	struct inode *inode = filp->f_dentry->d_inode;
+
+	dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
+
+	/* No mandatory locks */
+	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+		return -ENOLCK;
+
+	if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+		filemap_fdatawrite(inode->i_mapping);
+		filemap_fdatawait(inode->i_mapping);
+		invalidate_inode_pages(&inode->i_data);
+	}
+
+	return res;
+}
+
+/**
+ * v9fs_read - read from a file (internal)
+ * @filep: file pointer to read
+ * @data: data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+
+static ssize_t
+v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *v9f = filp->private_data;
+	struct v9fs_fcall *fcall = NULL;
+	int fid = v9f->fid;
+	int rsize = 0;
+	int result = 0;
+	int total = 0;
+
+	dprintk(DEBUG_VFS, "\n");
+
+	rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+	if (v9f->iounit != 0 && rsize > v9f->iounit)
+		rsize = v9f->iounit;
+
+	do {
+		if (count < rsize)
+			rsize = count;
+
+		result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall);
+
+		if (result < 0) {
+			printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
+			       result);
+
+			kfree(fcall);
+			return total;
+		} else
+			*offset += result;
+
+		/* XXX - extra copy */
+		memcpy(buffer, fcall->params.rread.data, result);
+		count -= result;
+		buffer += result;
+		total += result;
+
+		kfree(fcall);
+
+		if (result < rsize)
+			break;
+	} while (count);
+
+	return total;
+}
+
+/**
+ * v9fs_file_read - read from a file
+ * @filep: file pointer to read
+ * @data: data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+
+static ssize_t
+v9fs_file_read(struct file *filp, char __user * data, size_t count,
+	       loff_t * offset)
+{
+	int retval = -1;
+	int ret = 0;
+	char *buffer;
+
+	buffer = kmalloc(count, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	retval = v9fs_read(filp, buffer, count, offset);
+	if (retval > 0) {
+		if ((ret = copy_to_user(data, buffer, retval)) != 0) {
+			dprintk(DEBUG_ERROR, "Problem copying to user %d\n",
+				ret);
+			retval = ret;
+		}
+	}
+
+	kfree(buffer);
+
+	return retval;
+}
+
+/**
+ * v9fs_write - write to a file
+ * @filep: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+
+static ssize_t
+v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	struct v9fs_fid *v9fid = filp->private_data;
+	struct v9fs_fcall *fcall;
+	int fid = v9fid->fid;
+	int result = -EIO;
+	int rsize = 0;
+	int total = 0;
+
+	dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count,
+		(int)*offset);
+	rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+	if (v9fid->iounit != 0 && rsize > v9fid->iounit)
+		rsize = v9fid->iounit;
+
+	dump_data(buffer, count);
+
+	do {
+		if (count < rsize)
+			rsize = count;
+
+		result =
+		    v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall);
+		if (result < 0) {
+			eprintk(KERN_ERR, "error while writing: %s(%d)\n",
+				FCALL_ERROR(fcall), result);
+			kfree(fcall);
+			return result;
+		} else
+			*offset += result;
+
+		kfree(fcall);
+
+		if (result != rsize) {
+			eprintk(KERN_ERR,
+				"short write: v9fs_t_write returned %d\n",
+				result);
+			break;
+		}
+
+		count -= result;
+		buffer += result;
+		total += result;
+	} while (count);
+
+	return total;
+}
+
+/**
+ * v9fs_file_write - write to a file
+ * @filep: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+
+static ssize_t
+v9fs_file_write(struct file *filp, const char __user * data,
+		size_t count, loff_t * offset)
+{
+	int ret = -1;
+	char *buffer;
+
+	buffer = kmalloc(count, GFP_KERNEL);
+	if (buffer == NULL)
+		return -ENOMEM;
+
+	ret = copy_from_user(buffer, data, count);
+	if (ret) {
+		dprintk(DEBUG_ERROR, "Problem copying from user\n");
+		ret = -EFAULT;
+	} else {
+		ret = v9fs_write(filp, buffer, count, offset);
+	}
+
+	kfree(buffer);
+
+	return ret;
+}
+
+struct file_operations v9fs_file_operations = {
+	.llseek = generic_file_llseek,
+	.read = v9fs_file_read,
+	.write = v9fs_file_write,
+	.open = v9fs_file_open,
+	.release = v9fs_dir_release,
+	.lock = v9fs_file_lock,
+};
-- 
cgit v1.2.3-18-g5258


From 2bad8471511ce5cc3ea90d0940622bd4b56b9cce Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:19 -0700
Subject: [PATCH] v9fs: VFS inode operations

This part of the patch contains the VFS inode interfaces.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 1371 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1371 insertions(+)
 create mode 100644 fs/9p/vfs_inode.c

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
new file mode 100644
index 00000000000..ef78af7ef04
--- /dev/null
+++ b/fs/9p/vfs_inode.c
@@ -0,0 +1,1371 @@
+/*
+ *  linux/fs/9p/vfs_inode.c
+ *
+ * This file contians vfs inode ops for the 9P2000 protocol.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+static struct inode_operations v9fs_dir_inode_operations;
+static struct inode_operations v9fs_file_inode_operations;
+static struct inode_operations v9fs_symlink_inode_operations;
+
+/**
+ * unixmode2p9mode - convert unix mode bits to plan 9
+ * @v9ses: v9fs session information
+ * @mode: mode to convert
+ *
+ */
+
+static inline int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
+{
+	int res;
+	res = mode & 0777;
+	if (S_ISDIR(mode))
+		res |= V9FS_DMDIR;
+	if (v9ses->extended) {
+		if (S_ISLNK(mode))
+			res |= V9FS_DMSYMLINK;
+		if (v9ses->nodev == 0) {
+			if (S_ISSOCK(mode))
+				res |= V9FS_DMSOCKET;
+			if (S_ISFIFO(mode))
+				res |= V9FS_DMNAMEDPIPE;
+			if (S_ISBLK(mode))
+				res |= V9FS_DMDEVICE;
+			if (S_ISCHR(mode))
+				res |= V9FS_DMDEVICE;
+		}
+
+		if ((mode & S_ISUID) == S_ISUID)
+			res |= V9FS_DMSETUID;
+		if ((mode & S_ISGID) == S_ISGID)
+			res |= V9FS_DMSETGID;
+		if ((mode & V9FS_DMLINK))
+			res |= V9FS_DMLINK;
+	}
+
+	return res;
+}
+
+/**
+ * p9mode2unixmode- convert plan9 mode bits to unix mode bits
+ * @v9ses: v9fs session information
+ * @mode: mode to convert
+ *
+ */
+
+static inline int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
+{
+	int res;
+
+	res = mode & 0777;
+
+	if ((mode & V9FS_DMDIR) == V9FS_DMDIR)
+		res |= S_IFDIR;
+	else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended))
+		res |= S_IFLNK;
+	else if ((mode & V9FS_DMSOCKET) && (v9ses->extended)
+		 && (v9ses->nodev == 0))
+		res |= S_IFSOCK;
+	else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended)
+		 && (v9ses->nodev == 0))
+		res |= S_IFIFO;
+	else if ((mode & V9FS_DMDEVICE) && (v9ses->extended)
+		 && (v9ses->nodev == 0))
+		res |= S_IFBLK;
+	else
+		res |= S_IFREG;
+
+	if (v9ses->extended) {
+		if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID)
+			res |= S_ISUID;
+
+		if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID)
+			res |= S_ISGID;
+	}
+
+	return res;
+}
+
+/**
+ * v9fs_blank_mistat - helper function to setup a 9P stat structure
+ * @v9ses: 9P session info (for determining extended mode)
+ * @mistat: structure to initialize
+ *
+ */
+
+static inline void
+v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
+{
+	mistat->type = ~0;
+	mistat->dev = ~0;
+	mistat->qid.type = ~0;
+	mistat->qid.version = ~0;
+	*((long long *)&mistat->qid.path) = ~0;
+	mistat->mode = ~0;
+	mistat->atime = ~0;
+	mistat->mtime = ~0;
+	mistat->length = ~0;
+	mistat->name = mistat->data;
+	mistat->uid = mistat->data;
+	mistat->gid = mistat->data;
+	mistat->muid = mistat->data;
+	if (v9ses->extended) {
+		mistat->n_uid = ~0;
+		mistat->n_gid = ~0;
+		mistat->n_muid = ~0;
+		mistat->extension = mistat->data;
+	}
+	*mistat->data = 0;
+}
+
+/**
+ * v9fs_mistat2unix - convert mistat to unix stat
+ * @mistat: Plan 9 metadata (mistat) structure
+ * @stat: unix metadata (stat) structure to populate
+ * @sb: superblock
+ *
+ */
+
+static void
+v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
+		 struct super_block *sb)
+{
+	struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
+
+	buf->st_nlink = 1;
+
+	buf->st_atime = mistat->atime;
+	buf->st_mtime = mistat->mtime;
+	buf->st_ctime = mistat->mtime;
+
+	if (v9ses && v9ses->extended) {
+		/* TODO: string to uid mapping via user-space daemon */
+		buf->st_uid = mistat->n_uid;
+		buf->st_gid = mistat->n_gid;
+
+		sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
+		sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
+	} else {
+		buf->st_uid = v9ses->uid;
+		buf->st_gid = v9ses->gid;
+	}
+
+	buf->st_uid = (unsigned short)-1;
+	buf->st_gid = (unsigned short)-1;
+
+	if (v9ses && v9ses->extended) {
+		if (mistat->n_uid != -1)
+			sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
+
+		if (mistat->n_gid != -1)
+			sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
+	}
+
+	if (buf->st_uid == (unsigned short)-1)
+		buf->st_uid = v9ses->uid;
+	if (buf->st_gid == (unsigned short)-1)
+		buf->st_gid = v9ses->gid;
+
+	buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
+	if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
+		char type = 0;
+		int major = -1;
+		int minor = -1;
+		sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+		switch (type) {
+		case 'c':
+			buf->st_mode &= ~S_IFBLK;
+			buf->st_mode |= S_IFCHR;
+			break;
+		case 'b':
+			break;
+		default:
+			dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
+				type, mistat->extension);
+		};
+		buf->st_rdev = MKDEV(major, minor);
+	} else
+		buf->st_rdev = 0;
+
+	buf->st_size = mistat->length;
+
+	buf->st_blksize = sb->s_blocksize;
+	buf->st_blocks =
+	    (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
+}
+
+/**
+ * v9fs_get_inode - helper function to setup an inode
+ * @sb: superblock
+ * @mode: mode to setup inode with
+ *
+ */
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+{
+	struct inode *inode = NULL;
+
+	dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+
+	inode = new_inode(sb);
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = current->fsuid;
+		inode->i_gid = current->fsgid;
+		inode->i_blksize = sb->s_blocksize;
+		inode->i_blocks = 0;
+		inode->i_rdev = 0;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+
+		switch (mode & S_IFMT) {
+		case S_IFIFO:
+		case S_IFBLK:
+		case S_IFCHR:
+		case S_IFSOCK:
+		case S_IFREG:
+			inode->i_op = &v9fs_file_inode_operations;
+			inode->i_fop = &v9fs_file_operations;
+			break;
+		case S_IFDIR:
+			inode->i_nlink++;
+			inode->i_op = &v9fs_dir_inode_operations;
+			inode->i_fop = &v9fs_dir_operations;
+			break;
+		case S_IFLNK:
+			inode->i_op = &v9fs_symlink_inode_operations;
+			break;
+		default:
+			dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
+				mode, mode & S_IFMT);
+			return ERR_PTR(-EINVAL);
+		}
+	} else {
+		eprintk(KERN_WARNING, "Problem allocating inode\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	return inode;
+}
+
+/**
+ * v9fs_create - helper function to create files and directories
+ * @dir: directory inode file is being created in
+ * @file_dentry: dentry file is being created in
+ * @perm: permissions file is being created with
+ * @open_mode: resulting open mode for file ???
+ *
+ */
+
+static int
+v9fs_create(struct inode *dir,
+	    struct dentry *file_dentry,
+	    unsigned int perm, unsigned int open_mode)
+{
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+	struct super_block *sb = dir->i_sb;
+	struct v9fs_fid *dirfid =
+	    v9fs_fid_lookup(file_dentry->d_parent, FID_WALK);
+	struct v9fs_fid *fid = NULL;
+	struct inode *file_inode = NULL;
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_qid qid;
+	struct stat newstat;
+	int dirfidnum = -1;
+	long newfid = -1;
+	int result = 0;
+	unsigned int iounit = 0;
+
+	perm = unixmode2p9mode(v9ses, perm);
+
+	dprintk(DEBUG_VFS, "dir: %p dentry: %p perm: %o mode: %o\n", dir,
+		file_dentry, perm, open_mode);
+
+	if (!dirfid)
+		return -EBADF;
+
+	dirfidnum = dirfid->fid;
+	if (dirfidnum < 0) {
+		dprintk(DEBUG_ERROR, "No fid for the directory #%lu\n",
+			dir->i_ino);
+		return -EBADF;
+	}
+
+	if (file_dentry->d_inode) {
+		dprintk(DEBUG_ERROR,
+			"Odd. There is an inode for dir %lu, name :%s:\n",
+			dir->i_ino, file_dentry->d_name.name);
+		return -EEXIST;
+	}
+
+	newfid = v9fs_get_idpool(&v9ses->fidpool);
+	if (newfid < 0) {
+		eprintk(KERN_WARNING, "no free fids available\n");
+		return -ENOSPC;
+	}
+
+	result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+		v9fs_put_idpool(newfid, &v9ses->fidpool);
+		newfid = 0;
+		goto CleanUpFid;
+	}
+
+	kfree(fcall);
+
+	result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
+			       perm, open_mode, &fcall);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "create fails: %s(%d)\n",
+			FCALL_ERROR(fcall), result);
+
+		goto CleanUpFid;
+	}
+
+	iounit = fcall->params.rcreate.iounit;
+	qid = fcall->params.rcreate.qid;
+	kfree(fcall);
+
+	fid = v9fs_fid_create(file_dentry);
+	if (!fid) {
+		result = -ENOMEM;
+		goto CleanUpFid;
+	}
+
+	fid->fid = newfid;
+	fid->fidopen = 0;
+	fid->fidcreate = 1;
+	fid->qid = qid;
+	fid->iounit = iounit;
+	fid->rdir_pos = 0;
+	fid->rdir_fcall = NULL;
+	fid->v9ses = v9ses;
+
+	if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) ||
+	    (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) ||
+	    (perm & V9FS_DMDEVICE))
+		return 0;
+
+	result = v9fs_t_stat(v9ses, newfid, &fcall);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall),
+			result);
+		goto CleanUpFid;
+	}
+
+	v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
+
+	file_inode = v9fs_get_inode(sb, newstat.st_mode);
+	if ((!file_inode) || IS_ERR(file_inode)) {
+		dprintk(DEBUG_ERROR, "create inode failed\n");
+		result = -EBADF;
+		goto CleanUpFid;
+	}
+
+	v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb);
+	kfree(fcall);
+	d_instantiate(file_dentry, file_inode);
+
+	if (perm & V9FS_DMDIR) {
+		if (v9fs_t_clunk(v9ses, newfid, &fcall))
+			dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
+				FCALL_ERROR(fcall));
+
+		v9fs_put_idpool(newfid, &v9ses->fidpool);
+		kfree(fcall);
+		fid->fidopen = 0;
+		fid->fidcreate = 0;
+		d_drop(file_dentry);
+	}
+
+	return 0;
+
+      CleanUpFid:
+	kfree(fcall);
+
+	if (newfid) {
+		if (v9fs_t_clunk(v9ses, newfid, &fcall))
+			dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+				FCALL_ERROR(fcall));
+
+		v9fs_put_idpool(newfid, &v9ses->fidpool);
+		kfree(fcall);
+	}
+	return result;
+}
+
+/**
+ * v9fs_remove - helper function to remove files and directories
+ * @inode: directory inode that is being deleted
+ * @dentry:  dentry that is being deleted
+ * @rmdir: where we are a file or a directory
+ *
+ */
+
+static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
+{
+	struct v9fs_fcall *fcall = NULL;
+	struct super_block *sb = NULL;
+	struct v9fs_session_info *v9ses = NULL;
+	struct v9fs_fid *v9fid = NULL;
+	struct inode *file_inode = NULL;
+	int fid = -1;
+	int result = 0;
+
+	dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
+		rmdir);
+
+	file_inode = file->d_inode;
+	sb = file_inode->i_sb;
+	v9ses = v9fs_inode2v9ses(file_inode);
+	v9fid = v9fs_fid_lookup(file, FID_OP);
+
+	if (!v9fid) {
+		dprintk(DEBUG_ERROR,
+			"no v9fs_fid\n");
+		return -EBADF;
+	}
+
+	fid = v9fid->fid;
+	if (fid < 0) {
+		dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
+			file_inode->i_ino);
+		return -EBADF;
+	}
+
+	result = v9fs_t_remove(v9ses, fid, &fcall);
+	if (result < 0)
+		dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n",
+			FCALL_ERROR(fcall), result);
+	else {
+		v9fs_put_idpool(fid, &v9ses->fidpool);
+		v9fs_fid_destroy(v9fid);
+	}
+
+	kfree(fcall);
+	return result;
+}
+
+/**
+ * v9fs_vfs_create - VFS hook to create files
+ * @inode: directory inode that is being deleted
+ * @dentry:  dentry that is being deleted
+ * @perm: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create(struct inode *inode, struct dentry *dentry, int perm,
+		struct nameidata *nd)
+{
+	return v9fs_create(inode, dentry, perm, O_RDWR);
+}
+
+/**
+ * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
+ * @i:  inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir(struct inode *inode, struct dentry *dentry, int mode)
+{
+	return v9fs_create(inode, dentry, mode | S_IFDIR, O_RDONLY);
+}
+
+/**
+ * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
+ * @dir:  inode that is being walked from
+ * @dentry: dentry that is being walked to?
+ * @nameidata: path data
+ *
+ */
+
+static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
+				      struct nameidata *nameidata)
+{
+	struct super_block *sb;
+	struct v9fs_session_info *v9ses;
+	struct v9fs_fid *dirfid;
+	struct v9fs_fid *fid;
+	struct inode *inode;
+	struct v9fs_fcall *fcall = NULL;
+	struct stat newstat;
+	int dirfidnum = -1;
+	int newfid = -1;
+	int result = 0;
+
+	dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
+		dir, dentry->d_iname, dentry, nameidata);
+
+	sb = dir->i_sb;
+	v9ses = v9fs_inode2v9ses(dir);
+	dirfid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
+
+	if (!dirfid) {
+		dprintk(DEBUG_ERROR, "no dirfid\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	dirfidnum = dirfid->fid;
+
+	if (dirfidnum < 0) {
+		dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n",
+			dir, dir->i_ino);
+		return ERR_PTR(-EBADF);
+	}
+
+	newfid = v9fs_get_idpool(&v9ses->fidpool);
+	if (newfid < 0) {
+		eprintk(KERN_WARNING, "newfid fails!\n");
+		return ERR_PTR(-ENOSPC);
+	}
+
+	result =
+	    v9fs_t_walk(v9ses, dirfidnum, newfid, (char *)dentry->d_name.name,
+			NULL);
+	if (result < 0) {
+		v9fs_put_idpool(newfid, &v9ses->fidpool);
+		if (result == -ENOENT) {
+			d_add(dentry, NULL);
+			dprintk(DEBUG_ERROR,
+				"Return negative dentry %p count %d\n",
+				dentry, atomic_read(&dentry->d_count));
+			return NULL;
+		}
+		dprintk(DEBUG_ERROR, "walk error:%d\n", result);
+		goto FreeFcall;
+	}
+
+	result = v9fs_t_stat(v9ses, newfid, &fcall);
+	if (result < 0) {
+		dprintk(DEBUG_ERROR, "stat error\n");
+		goto FreeFcall;
+	}
+
+	v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
+	inode = v9fs_get_inode(sb, newstat.st_mode);
+
+	if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
+		eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
+			PTR_ERR(inode));
+
+		result = -ENOSPC;
+		goto FreeFcall;
+	}
+
+	inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+
+	fid = v9fs_fid_create(dentry);
+	if (fid == NULL) {
+		dprintk(DEBUG_ERROR, "couldn't insert\n");
+		result = -ENOMEM;
+		goto FreeFcall;
+	}
+
+	fid->fid = newfid;
+	fid->fidopen = 0;
+	fid->v9ses = v9ses;
+	fid->qid = fcall->params.rstat.stat->qid;
+
+	dentry->d_op = &v9fs_dentry_operations;
+	v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb);
+
+	d_add(dentry, inode);
+	kfree(fcall);
+
+	return NULL;
+
+      FreeFcall:
+	kfree(fcall);
+	return ERR_PTR(result);
+}
+
+/**
+ * v9fs_vfs_unlink - VFS unlink hook to delete an inode
+ * @i:  inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ *
+ */
+
+static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
+{
+	return v9fs_remove(i, d, 0);
+}
+
+/**
+ * v9fs_vfs_rmdir - VFS unlink hook to delete a directory
+ * @i:  inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ *
+ */
+
+static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
+{
+	return v9fs_remove(i, d, 1);
+}
+
+/**
+ * v9fs_vfs_rename - VFS hook to rename an inode
+ * @old_dir:  old dir inode
+ * @old_dentry: old dentry
+ * @new_dir: new dir inode
+ * @new_dentry: new dentry
+ *
+ */
+
+static int
+v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct inode *old_inode = old_dentry->d_inode;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
+	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_WALK);
+	struct v9fs_fid *olddirfid =
+	    v9fs_fid_lookup(old_dentry->d_parent, FID_WALK);
+	struct v9fs_fid *newdirfid =
+	    v9fs_fid_lookup(new_dentry->d_parent, FID_WALK);
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	struct v9fs_fcall *fcall = NULL;
+	int fid = -1;
+	int olddirfidnum = -1;
+	int newdirfidnum = -1;
+	int retval = 0;
+
+	dprintk(DEBUG_VFS, "\n");
+
+	if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
+		dprintk(DEBUG_ERROR, "problem with arguments\n");
+		return -EBADF;
+	}
+
+	/* 9P can only handle file rename in the same directory */
+	if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
+		dprintk(DEBUG_ERROR, "old dir and new dir are different\n");
+		retval = -EPERM;
+		goto FreeFcallnBail;
+	}
+
+	fid = oldfid->fid;
+	olddirfidnum = olddirfid->fid;
+	newdirfidnum = newdirfid->fid;
+
+	if (fid < 0) {
+		dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
+			old_inode->i_ino);
+		retval = -EBADF;
+		goto FreeFcallnBail;
+	}
+
+	v9fs_blank_mistat(v9ses, mistat);
+
+	strcpy(mistat->data + 1, v9ses->name);
+	mistat->name = mistat->data + 1 + strlen(v9ses->name);
+
+	if (new_dentry->d_name.len >
+	    (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
+		dprintk(DEBUG_ERROR, "new name too long\n");
+		goto FreeFcallnBail;
+	}
+
+	strcpy(mistat->name, new_dentry->d_name.name);
+	retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
+
+      FreeFcallnBail:
+	kfree(mistat);
+
+	if (retval < 0)
+		dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+			FCALL_ERROR(fcall));
+
+	kfree(fcall);
+	return retval;
+}
+
+/**
+ * v9fs_vfs_getattr - retreive file metadata
+ * @mnt - mount information
+ * @dentry - file to get attributes on
+ * @stat - metadata structure to populate
+ *
+ */
+
+static int
+v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		 struct kstat *stat)
+{
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+	int err = -EPERM;
+
+	dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
+	if (!fid) {
+		dprintk(DEBUG_ERROR,
+			"couldn't find fid associated with dentry\n");
+		return -EBADF;
+	}
+
+	err = v9fs_t_stat(v9ses, fid->fid, &fcall);
+
+	if (err < 0)
+		dprintk(DEBUG_ERROR, "stat error\n");
+	else {
+		v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode,
+				  dentry->d_inode->i_sb);
+		generic_fillattr(dentry->d_inode, stat);
+	}
+
+	kfree(fcall);
+	return err;
+}
+
+/**
+ * v9fs_vfs_setattr - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	struct v9fs_fcall *fcall = NULL;
+	int res = -EPERM;
+
+	dprintk(DEBUG_VFS, "\n");
+	if (!fid) {
+		dprintk(DEBUG_ERROR,
+			"Couldn't find fid associated with dentry\n");
+		return -EBADF;
+	}
+
+	if (!mistat)
+		return -ENOMEM;
+
+	v9fs_blank_mistat(v9ses, mistat);
+	if (iattr->ia_valid & ATTR_MODE)
+		mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
+
+	if (iattr->ia_valid & ATTR_MTIME)
+		mistat->mtime = iattr->ia_mtime.tv_sec;
+
+	if (iattr->ia_valid & ATTR_ATIME)
+		mistat->atime = iattr->ia_atime.tv_sec;
+
+	if (iattr->ia_valid & ATTR_SIZE)
+		mistat->length = iattr->ia_size;
+
+	if (v9ses->extended) {
+		char *uid = kmalloc(strlen(mistat->uid), GFP_KERNEL);
+		char *gid = kmalloc(strlen(mistat->gid), GFP_KERNEL);
+		char *muid = kmalloc(strlen(mistat->muid), GFP_KERNEL);
+		char *name = kmalloc(strlen(mistat->name), GFP_KERNEL);
+		char *extension = kmalloc(strlen(mistat->extension),
+					  GFP_KERNEL);
+
+		if ((!uid) || (!gid) || (!muid) || (!name) || (!extension)) {
+			kfree(uid);
+			kfree(gid);
+			kfree(muid);
+			kfree(name);
+			kfree(extension);
+
+			return -ENOMEM;
+		}
+
+		strcpy(uid, mistat->uid);
+		strcpy(gid, mistat->gid);
+		strcpy(muid, mistat->muid);
+		strcpy(name, mistat->name);
+		strcpy(extension, mistat->extension);
+
+		if (iattr->ia_valid & ATTR_UID) {
+			if (strlen(uid) != 8) {
+				dprintk(DEBUG_ERROR, "uid strlen is %u not 8\n",
+					(unsigned int)strlen(uid));
+				sprintf(uid, "%08x", iattr->ia_uid);
+			} else {
+				kfree(uid);
+				uid = kmalloc(9, GFP_KERNEL);
+			}
+
+			sprintf(uid, "%08x", iattr->ia_uid);
+			mistat->n_uid = iattr->ia_uid;
+		}
+
+		if (iattr->ia_valid & ATTR_GID) {
+			if (strlen(gid) != 8)
+				dprintk(DEBUG_ERROR, "gid strlen is %u not 8\n",
+					(unsigned int)strlen(gid));
+			else {
+				kfree(gid);
+				gid = kmalloc(9, GFP_KERNEL);
+			}
+
+			sprintf(gid, "%08x", iattr->ia_gid);
+			mistat->n_gid = iattr->ia_gid;
+		}
+
+		mistat->uid = mistat->data;
+		strcpy(mistat->uid, uid);
+		mistat->gid = mistat->data + strlen(uid) + 1;
+		strcpy(mistat->gid, gid);
+		mistat->muid = mistat->gid + strlen(gid) + 1;
+		strcpy(mistat->muid, muid);
+		mistat->name = mistat->muid + strlen(muid) + 1;
+		strcpy(mistat->name, name);
+		mistat->extension = mistat->name + strlen(name) + 1;
+		strcpy(mistat->extension, extension);
+
+		kfree(uid);
+		kfree(gid);
+		kfree(muid);
+		kfree(name);
+		kfree(extension);
+	}
+
+	res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
+
+	if (res < 0)
+		dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall));
+
+	kfree(mistat);
+	kfree(fcall);
+
+	if (res >= 0)
+		res = inode_setattr(dentry->d_inode, iattr);
+
+	return res;
+}
+
+/**
+ * v9fs_mistat2inode - populate an inode structure with mistat info
+ * @mistat: Plan 9 metadata (mistat) structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
+		  struct super_block *sb)
+{
+	struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+	inode->i_nlink = 1;
+
+	inode->i_atime.tv_sec = mistat->atime;
+	inode->i_mtime.tv_sec = mistat->mtime;
+	inode->i_ctime.tv_sec = mistat->mtime;
+
+	inode->i_uid = -1;
+	inode->i_gid = -1;
+
+	if (v9ses->extended) {
+		/* TODO: string to uid mapping via user-space daemon */
+		inode->i_uid = mistat->n_uid;
+		inode->i_gid = mistat->n_gid;
+
+		if (mistat->n_uid == -1)
+			sscanf(mistat->uid, "%x", &inode->i_uid);
+
+		if (mistat->n_gid == -1)
+			sscanf(mistat->gid, "%x", &inode->i_gid);
+	}
+
+	if (inode->i_uid == -1)
+		inode->i_uid = v9ses->uid;
+	if (inode->i_gid == -1)
+		inode->i_gid = v9ses->gid;
+
+	inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
+	if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
+		char type = 0;
+		int major = -1;
+		int minor = -1;
+		sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+		switch (type) {
+		case 'c':
+			inode->i_mode &= ~S_IFBLK;
+			inode->i_mode |= S_IFCHR;
+			break;
+		case 'b':
+			break;
+		default:
+			dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
+				type, mistat->extension);
+		};
+		inode->i_rdev = MKDEV(major, minor);
+	} else
+		inode->i_rdev = 0;
+
+	inode->i_size = mistat->length;
+
+	inode->i_blksize = sb->s_blocksize;
+	inode->i_blocks =
+	    (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits;
+}
+
+/**
+ * v9fs_qid2ino - convert qid into inode number
+ * @qid: qid to hash
+ *
+ * BUG: potential for inode number collisions?
+ */
+
+ino_t v9fs_qid2ino(struct v9fs_qid *qid)
+{
+	u64 path = qid->path + 2;
+	ino_t i = 0;
+
+	if (sizeof(ino_t) == sizeof(path))
+		memcpy(&i, &path, sizeof(ino_t));
+	else
+		i = (ino_t) (path ^ (path >> 32));
+
+	return i;
+}
+
+/**
+ * v9fs_vfs_symlink - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See 9P2000.u RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	int retval = -EPERM;
+	struct v9fs_fid *newfid;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+	struct super_block *sb = dir ? dir->i_sb : NULL;
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+
+	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+		symname);
+
+	if ((!dentry) || (!sb) || (!v9ses)) {
+		dprintk(DEBUG_ERROR, "problem with arguments\n");
+		return -EBADF;
+	}
+
+	if (!v9ses->extended) {
+		dprintk(DEBUG_ERROR, "not extended\n");
+		goto FreeFcall;
+	}
+
+	/* issue a create */
+	retval = v9fs_create(dir, dentry, S_IFLNK, 0);
+	if (retval != 0)
+		goto FreeFcall;
+
+	newfid = v9fs_fid_lookup(dentry, FID_OP);
+
+	/* issue a twstat */
+	v9fs_blank_mistat(v9ses, mistat);
+	strcpy(mistat->data + 1, symname);
+	mistat->extension = mistat->data + 1;
+	retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+	if (retval < 0) {
+		dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeFcall;
+	}
+
+	kfree(fcall);
+
+	if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+		dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeFcall;
+	}
+
+	d_drop(dentry);		/* FID - will this also clunk? */
+
+      FreeFcall:
+	kfree(mistat);
+	kfree(fcall);
+
+	return retval;
+}
+
+/**
+ * v9fs_readlink - read a symlink's location (internal version)
+ * @dentry: dentry for symlink
+ * @buf: buffer to load symlink location into
+ * @buflen: length of buffer
+ *
+ */
+
+static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+	int retval = -EPERM;
+
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+
+	if (!fid) {
+		dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n");
+		retval = -EBADF;
+		goto FreeFcall;
+	}
+
+	if (!v9ses->extended) {
+		retval = -EBADF;
+		dprintk(DEBUG_ERROR, "not extended\n");
+		goto FreeFcall;
+	}
+
+	dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
+	retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
+
+	if (retval < 0) {
+		dprintk(DEBUG_ERROR, "stat error\n");
+		goto FreeFcall;
+	}
+
+	if (!fcall)
+		return -EIO;
+
+	if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) {
+		retval = -EINVAL;
+		goto FreeFcall;
+	}
+
+	/* copy extension buffer into buffer */
+	if (strlen(fcall->params.rstat.stat->extension) < buflen)
+		buflen = strlen(fcall->params.rstat.stat->extension);
+
+	memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1);
+
+	retval = buflen;
+
+      FreeFcall:
+	kfree(fcall);
+
+	return retval;
+}
+
+/**
+ * v9fs_vfs_readlink - read a symlink's location
+ * @dentry: dentry for symlink
+ * @buf: buffer to load symlink location into
+ * @buflen: length of buffer
+ *
+ */
+
+static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
+			     int buflen)
+{
+	int retval;
+	int ret;
+	char *link = __getname();
+
+	if (strlen(link) < buflen)
+		buflen = strlen(link);
+
+	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+	retval = v9fs_readlink(dentry, link, buflen);
+
+	if (retval > 0) {
+		if ((ret = copy_to_user(buffer, link, retval)) != 0) {
+			dprintk(DEBUG_ERROR, "problem copying to user: %d\n",
+				ret);
+			retval = ret;
+		}
+	}
+
+	putname(link);
+	return retval;
+}
+
+/**
+ * v9fs_vfs_follow_link - follow a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	int len = 0;
+	char *link = __getname();
+
+	dprintk(DEBUG_VFS, "%s n", dentry->d_name.name);
+
+	if (!link)
+		link = ERR_PTR(-ENOMEM);
+	else {
+		len = v9fs_readlink(dentry, link, strlen(link));
+
+		if (len < 0) {
+			putname(link);
+			link = ERR_PTR(len);
+		} else
+			link[len] = 0;
+	}
+	nd_set_link(nd, link);
+
+	return NULL;
+}
+
+/**
+ * v9fs_vfs_put_link - release a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+	char *s = nd_get_link(nd);
+
+	dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
+	if (!IS_ERR(s))
+		putname(s);
+}
+
+/**
+ * v9fs_vfs_link - create a hardlink
+ * @old_dentry: dentry for file to link to
+ * @dir: inode destination for new link
+ * @new_dentry: dentry for link
+ *
+ */
+
+/* XXX - lots of code dup'd from symlink and creates,
+ * figure out a better reuse strategy
+ */
+
+static int
+v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
+	      struct dentry *dentry)
+{
+	int retval = -EPERM;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_OP);
+	struct v9fs_fid *newfid = NULL;
+	char *symname = __getname();
+
+	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+		old_dentry->d_name.name);
+
+	if (!v9ses->extended) {
+		dprintk(DEBUG_ERROR, "not extended\n");
+		goto FreeMem;
+	}
+
+	/* get fid of old_dentry */
+	sprintf(symname, "hardlink(%d)\n", oldfid->fid);
+
+	/* issue a create */
+	retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
+	if (retval != 0)
+		goto FreeMem;
+
+	newfid = v9fs_fid_lookup(dentry, FID_OP);
+	if (!newfid) {
+		dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
+		goto FreeMem;
+	}
+
+	/* issue a twstat */
+	v9fs_blank_mistat(v9ses, mistat);
+	strcpy(mistat->data + 1, symname);
+	mistat->extension = mistat->data + 1;
+	retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+	if (retval < 0) {
+		dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeMem;
+	}
+
+	kfree(fcall);
+
+	if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+		dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeMem;
+	}
+
+	d_drop(dentry);		/* FID - will this also clunk? */
+
+	kfree(fcall);
+	fcall = NULL;
+
+      FreeMem:
+	kfree(mistat);
+	kfree(fcall);
+	putname(symname);
+	return retval;
+}
+
+/**
+ * v9fs_vfs_mknod - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @dev_t: device associated with special file
+ *
+ */
+
+static int
+v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+{
+	int retval = -EPERM;
+	struct v9fs_fid *newfid;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+	char *symname = __getname();
+
+	dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+		dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+
+	if (!new_valid_dev(rdev)) {
+		retval = -EINVAL;
+		goto FreeMem;
+	}
+
+	if (!v9ses->extended) {
+		dprintk(DEBUG_ERROR, "not extended\n");
+		goto FreeMem;
+	}
+
+	/* issue a create */
+	retval = v9fs_create(dir, dentry, mode, 0);
+
+	if (retval != 0)
+		goto FreeMem;
+
+	newfid = v9fs_fid_lookup(dentry, FID_OP);
+	if (!newfid) {
+		dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
+		retval = -EINVAL;
+		goto FreeMem;
+	}
+
+	/* build extension */
+	if (S_ISBLK(mode))
+		sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
+	else if (S_ISCHR(mode))
+		sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
+	else if (S_ISFIFO(mode)) ;	/* DO NOTHING */
+	else {
+		retval = -EINVAL;
+		goto FreeMem;
+	}
+
+	if (!S_ISFIFO(mode)) {
+		/* issue a twstat */
+		v9fs_blank_mistat(v9ses, mistat);
+		strcpy(mistat->data + 1, symname);
+		mistat->extension = mistat->data + 1;
+		retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+		if (retval < 0) {
+			dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+				FCALL_ERROR(fcall));
+			goto FreeMem;
+		}
+
+		kfree(fcall);
+	}
+
+	/* need to update dcache so we show up */
+	kfree(fcall);
+
+	if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+		dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+			FCALL_ERROR(fcall));
+		goto FreeMem;
+	}
+
+	d_drop(dentry);		/* FID - will this also clunk? */
+
+      FreeMem:
+	kfree(mistat);
+	kfree(fcall);
+	putname(symname);
+
+	return retval;
+}
+
+static struct inode_operations v9fs_dir_inode_operations = {
+	.create = v9fs_vfs_create,
+	.lookup = v9fs_vfs_lookup,
+	.symlink = v9fs_vfs_symlink,
+	.link = v9fs_vfs_link,
+	.unlink = v9fs_vfs_unlink,
+	.mkdir = v9fs_vfs_mkdir,
+	.rmdir = v9fs_vfs_rmdir,
+	.mknod = v9fs_vfs_mknod,
+	.rename = v9fs_vfs_rename,
+	.readlink = v9fs_vfs_readlink,
+	.getattr = v9fs_vfs_getattr,
+	.setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_file_inode_operations = {
+	.getattr = v9fs_vfs_getattr,
+	.setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_symlink_inode_operations = {
+	.readlink = v9fs_vfs_readlink,
+	.follow_link = v9fs_vfs_follow_link,
+	.put_link = v9fs_vfs_put_link,
+	.getattr = v9fs_vfs_getattr,
+	.setattr = v9fs_vfs_setattr,
+};
-- 
cgit v1.2.3-18-g5258


From 9e82cf6a802a72f0f447eb4c76d6a3fc8736a31d Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:20 -0700
Subject: [PATCH] v9fs: VFS superblock operations and glue

This part of the patch contains VFS superblock and mapping code.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/v9fs.c      | 448 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/v9fs.h      | 105 +++++++++++++
 fs/9p/v9fs_vfs.h  |  53 +++++++
 fs/9p/vfs_super.c | 271 +++++++++++++++++++++++++++++++++
 4 files changed, 877 insertions(+)
 create mode 100644 fs/9p/v9fs.c
 create mode 100644 fs/9p/v9fs.h
 create mode 100644 fs/9p/v9fs_vfs.h
 create mode 100644 fs/9p/vfs_super.c

(limited to 'fs')

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
new file mode 100644
index 00000000000..14d663ebfcb
--- /dev/null
+++ b/fs/9p/v9fs.c
@@ -0,0 +1,448 @@
+/*
+ *  linux/fs/9p/v9fs.c
+ *
+ *  This file contains functions assisting in mapping VFS to 9P2000
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/parser.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "transport.h"
+#include "mux.h"
+#include "conv.h"
+
+/* TODO: sysfs or debugfs interface */
+int v9fs_debug_level = 0;	/* feature-rific global debug level  */
+
+/*
+  * Option Parsing (code inspired by NFS code)
+  *
+  */
+
+enum {
+	/* Options that take integer arguments */
+	Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
+	Opt_rfdno, Opt_wfdno,
+	/* String options */
+	Opt_name, Opt_remotename,
+	/* Options that take no arguments */
+	Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
+	/* Error token */
+	Opt_err
+};
+
+static match_table_t tokens = {
+	{Opt_port, "port=%u"},
+	{Opt_msize, "msize=%u"},
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_afid, "afid=%u"},
+	{Opt_rfdno, "rfdno=%u"},
+	{Opt_wfdno, "wfdno=%u"},
+	{Opt_debug, "debug=%u"},
+	{Opt_name, "name=%s"},
+	{Opt_remotename, "aname=%s"},
+	{Opt_unix, "proto=unix"},
+	{Opt_tcp, "proto=tcp"},
+	{Opt_fd, "proto=fd"},
+	{Opt_tcp, "tcp"},
+	{Opt_unix, "unix"},
+	{Opt_fd, "fd"},
+	{Opt_legacy, "noextend"},
+	{Opt_nodevmap, "nodevmap"},
+	{Opt_err, NULL}
+};
+
+/*
+ *  Parse option string.
+ */
+
+/**
+ * v9fs_parse_options - parse mount options into session structure
+ * @options: options string passed from mount
+ * @v9ses: existing v9fs session information
+ *
+ */
+
+static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	int ret;
+
+	/* setup defaults */
+	v9ses->port = V9FS_PORT;
+	v9ses->maxdata = 9000;
+	v9ses->proto = PROTO_TCP;
+	v9ses->extended = 1;
+	v9ses->afid = ~0;
+	v9ses->debug = 0;
+	v9ses->rfdno = ~0;
+	v9ses->wfdno = ~0;
+
+	if (!options)
+		return;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		if (!*p)
+			continue;
+		token = match_token(p, tokens, args);
+		if (token < Opt_name) {
+			if ((ret = match_int(&args[0], &option)) < 0) {
+				dprintk(DEBUG_ERROR,
+					"integer field, but no integer?\n");
+				continue;
+			}
+
+		}
+		switch (token) {
+		case Opt_port:
+			v9ses->port = option;
+			break;
+		case Opt_msize:
+			v9ses->maxdata = option;
+			break;
+		case Opt_uid:
+			v9ses->uid = option;
+			break;
+		case Opt_gid:
+			v9ses->gid = option;
+			break;
+		case Opt_afid:
+			v9ses->afid = option;
+			break;
+		case Opt_rfdno:
+			v9ses->rfdno = option;
+			break;
+		case Opt_wfdno:
+			v9ses->wfdno = option;
+			break;
+		case Opt_debug:
+			v9ses->debug = option;
+			break;
+		case Opt_tcp:
+			v9ses->proto = PROTO_TCP;
+			break;
+		case Opt_unix:
+			v9ses->proto = PROTO_UNIX;
+			break;
+		case Opt_fd:
+			v9ses->proto = PROTO_FD;
+			break;
+		case Opt_name:
+			match_strcpy(v9ses->name, &args[0]);
+			break;
+		case Opt_remotename:
+			match_strcpy(v9ses->remotename, &args[0]);
+			break;
+		case Opt_legacy:
+			v9ses->extended = 0;
+			break;
+		case Opt_nodevmap:
+			v9ses->nodev = 1;
+			break;
+		default:
+			continue;
+		}
+	}
+}
+
+/**
+ * v9fs_inode2v9ses - safely extract v9fs session info from super block
+ * @inode: inode to extract information from
+ *
+ * Paranoid function to extract v9ses information from superblock,
+ * if anything is missing it will report an error.
+ *
+ */
+
+struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
+{
+	return (inode->i_sb->s_fs_info);
+}
+
+/**
+ * v9fs_get_idpool - allocate numeric id from pool
+ * @p - pool to allocate from
+ *
+ * XXX - This seems to be an awful generic function, should it be in idr.c with
+ *            the lock included in struct idr?
+ */
+
+int v9fs_get_idpool(struct v9fs_idpool *p)
+{
+	int i = 0;
+	int error;
+
+retry:
+	if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
+		return 0;
+
+	if (down_interruptible(&p->lock) == -EINTR) {
+		eprintk(KERN_WARNING, "Interrupted while locking\n");
+		return -1;
+	}
+
+	error = idr_get_new(&p->pool, NULL, &i);
+	up(&p->lock);
+
+	if (error == -EAGAIN)
+		goto retry;
+	else if (error)
+		return -1;
+
+	return i;
+}
+
+/**
+ * v9fs_put_idpool - release numeric id from pool
+ * @p - pool to allocate from
+ *
+ * XXX - This seems to be an awful generic function, should it be in idr.c with
+ *            the lock included in struct idr?
+ */
+
+void v9fs_put_idpool(int id, struct v9fs_idpool *p)
+{
+	if (down_interruptible(&p->lock) == -EINTR) {
+		eprintk(KERN_WARNING, "Interrupted while locking\n");
+		return;
+	}
+	idr_remove(&p->pool, id);
+	up(&p->lock);
+}
+
+/**
+ * v9fs_session_init - initialize session
+ * @v9ses: session information structure
+ * @dev_name: device being mounted
+ * @data: options
+ *
+ */
+
+int
+v9fs_session_init(struct v9fs_session_info *v9ses,
+		  const char *dev_name, char *data)
+{
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_transport *trans_proto;
+	int n = 0;
+	int newfid = -1;
+	int retval = -EINVAL;
+
+	v9ses->name = __getname();
+	if (!v9ses->name)
+		return -ENOMEM;
+
+	v9ses->remotename = __getname();
+	if (!v9ses->remotename) {
+		putname(v9ses->name);
+		return -ENOMEM;
+	}
+
+	strcpy(v9ses->name, V9FS_DEFUSER);
+	strcpy(v9ses->remotename, V9FS_DEFANAME);
+
+	v9fs_parse_options(data, v9ses);
+
+	/* set global debug level */
+	v9fs_debug_level = v9ses->debug;
+
+	/* id pools that are session-dependent: FIDs and TIDs */
+	idr_init(&v9ses->fidpool.pool);
+	init_MUTEX(&v9ses->fidpool.lock);
+	idr_init(&v9ses->tidpool.pool);
+	init_MUTEX(&v9ses->tidpool.lock);
+
+
+	switch (v9ses->proto) {
+	case PROTO_TCP:
+		trans_proto = &v9fs_trans_tcp;
+		break;
+	case PROTO_UNIX:
+		trans_proto = &v9fs_trans_unix;
+		*v9ses->remotename = 0;
+		break;
+	case PROTO_FD:
+		trans_proto = &v9fs_trans_fd;
+		*v9ses->remotename = 0;
+		if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
+			printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
+			retval = -ENOPROTOOPT;
+			goto SessCleanUp;
+		}
+		break;
+	default:
+		printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
+		retval = -ENOPROTOOPT;
+		goto SessCleanUp;
+	};
+
+	v9ses->transport = trans_proto;
+
+	if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
+		eprintk(KERN_ERR, "problem initializing transport\n");
+		goto SessCleanUp;
+	}
+
+	v9ses->inprogress = 0;
+	v9ses->shutdown = 0;
+	v9ses->session_hung = 0;
+
+	if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) {
+		dprintk(DEBUG_ERROR, "problem initializing mux\n");
+		goto SessCleanUp;
+	}
+
+	if (v9ses->afid == ~0) {
+		if (v9ses->extended)
+			retval =
+			    v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
+					   &fcall);
+		else
+			retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
+						&fcall);
+
+		if (retval < 0) {
+			dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
+			goto FreeFcall;
+		}
+
+		/* Really should check for 9P1 and report error */
+		if (!strcmp(fcall->params.rversion.version, "9P2000.u")) {
+			dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
+			v9ses->extended = 1;
+		} else {
+			dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
+			v9ses->extended = 0;
+		}
+
+		n = fcall->params.rversion.msize;
+		kfree(fcall);
+
+		if (n < v9ses->maxdata)
+			v9ses->maxdata = n;
+	}
+
+	newfid = v9fs_get_idpool(&v9ses->fidpool);
+	if (newfid < 0) {
+		eprintk(KERN_WARNING, "couldn't allocate FID\n");
+		retval = -ENOMEM;
+		goto SessCleanUp;
+	}
+	/* it is a little bit ugly, but we have to prevent newfid */
+	/* being the same as afid, so if it is, get a new fid     */
+	if (v9ses->afid != ~0 && newfid == v9ses->afid) {
+		newfid = v9fs_get_idpool(&v9ses->fidpool);
+		if (newfid < 0) {
+			eprintk(KERN_WARNING, "couldn't allocate FID\n");
+			retval = -ENOMEM;
+			goto SessCleanUp;
+		}
+	}
+
+	if ((retval =
+	     v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
+			   v9ses->afid, NULL))
+	    < 0) {
+		dprintk(DEBUG_ERROR, "cannot attach\n");
+		goto SessCleanUp;
+	}
+
+	if (v9ses->afid != ~0) {
+		if (v9fs_t_clunk(v9ses, v9ses->afid, NULL))
+			dprintk(DEBUG_ERROR, "clunk failed\n");
+	}
+
+	return newfid;
+
+      FreeFcall:
+	kfree(fcall);
+
+      SessCleanUp:
+	v9fs_session_close(v9ses);
+	return retval;
+}
+
+/**
+ * v9fs_session_close - shutdown a session
+ * @v9ses: session information structure
+ *
+ */
+
+void v9fs_session_close(struct v9fs_session_info *v9ses)
+{
+	if (v9ses->recvproc) {
+		send_sig(SIGKILL, v9ses->recvproc, 1);
+		wait_for_completion(&v9ses->proccmpl);
+	}
+
+	if (v9ses->transport)
+		v9ses->transport->close(v9ses->transport);
+
+	putname(v9ses->name);
+	putname(v9ses->remotename);
+}
+
+extern int v9fs_error_init(void);
+
+/**
+ * v9fs_init - Initialize module
+ *
+ */
+
+static int __init init_v9fs(void)
+{
+	v9fs_error_init();
+
+	printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
+
+	return register_filesystem(&v9fs_fs_type);
+}
+
+/**
+ * v9fs_init - shutdown module
+ *
+ */
+
+static void __exit exit_v9fs(void)
+{
+	unregister_filesystem(&v9fs_fs_type);
+}
+
+module_init(init_v9fs)
+module_exit(exit_v9fs)
+
+MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
+MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
+MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
new file mode 100644
index 00000000000..52203027b15
--- /dev/null
+++ b/fs/9p/v9fs.h
@@ -0,0 +1,105 @@
+/*
+ * V9FS definitions.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+/*
+  * Idpool structure provides lock and id management
+  *
+  */
+
+struct v9fs_idpool {
+	struct semaphore lock;
+	struct idr pool;
+};
+
+/*
+  * Session structure provides information for an opened session
+  *
+  */
+
+struct v9fs_session_info {
+	/* options */
+	unsigned int maxdata;
+	unsigned char extended;	/* set to 1 if we are using UNIX extensions */
+	unsigned char nodev;	/* set to 1 if no disable device mapping */
+	unsigned short port;	/* port to connect to */
+	unsigned short debug;	/* debug level */
+	unsigned short proto;	/* protocol to use */
+	unsigned int afid;	/* authentication fid */
+	unsigned int rfdno;	/* read file descriptor number */
+	unsigned int wfdno;	/* write file descriptor number */
+
+
+	char *name;		/* user name to mount as */
+	char *remotename;	/* name of remote hierarchy being mounted */
+	unsigned int uid;	/* default uid/muid for legacy support */
+	unsigned int gid;	/* default gid for legacy support */
+
+	/* book keeping */
+	struct v9fs_idpool fidpool;	/* The FID pool for file descriptors */
+	struct v9fs_idpool tidpool;	/* The TID pool for transactions ids */
+
+	/* transport information */
+	struct v9fs_transport *transport;
+
+	int inprogress;		/* session in progress => true */
+	int shutdown;		/* session shutting down. no more attaches. */
+	unsigned char session_hung;
+
+	/* mux private data */
+	struct v9fs_fcall *curfcall;
+	wait_queue_head_t read_wait;
+	struct completion fcread;
+	struct completion proccmpl;
+	struct task_struct *recvproc;
+
+	spinlock_t muxlock;
+	struct list_head mux_fcalls;
+};
+
+/* possible values of ->proto */
+enum {
+	PROTO_TCP,
+	PROTO_UNIX,
+	PROTO_FD,
+};
+
+int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
+struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
+void v9fs_session_close(struct v9fs_session_info *v9ses);
+int v9fs_get_idpool(struct v9fs_idpool *p);
+void v9fs_put_idpool(int id, struct v9fs_idpool *p);
+int v9fs_get_option(char *opts, char *name, char *buf, int buflen);
+long long v9fs_get_int_option(char *opts, char *name, long long dflt);
+int v9fs_parse_tcp_devname(const char *devname, char **addr, char **remotename);
+
+#define V9FS_MAGIC 0x01021997
+
+/* other default globals */
+#define V9FS_PORT		564
+#define V9FS_DEFUSER	"nobody"
+#define V9FS_DEFANAME	""
+
+/* inital pool sizes for fids and tags */
+#define V9FS_START_FIDS 8192
+#define V9FS_START_TIDS 256
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
new file mode 100644
index 00000000000..2f2cea7ee3e
--- /dev/null
+++ b/fs/9p/v9fs_vfs.h
@@ -0,0 +1,53 @@
+/*
+ * V9FS VFS extensions.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+/* plan9 semantics are that created files are implicitly opened.
+ * But linux semantics are that you call create, then open.
+ * the plan9 approach is superior as it provides an atomic
+ * open.
+ * we track the create fid here. When the file is opened, if fidopen is
+ * non-zero, we use the fid and can skip some steps.
+ * there may be a better way to do this, but I don't know it.
+ * one BAD way is to clunk the fid on create, then open it again:
+ * you lose the atomicity of file open
+ */
+
+/* special case:
+ * unlink calls remove, which is an implicit clunk. So we have to track
+ * that kind of thing so that we don't try to clunk a dead fid.
+ */
+
+extern struct file_system_type v9fs_fs_type;
+extern struct file_operations v9fs_file_operations;
+extern struct file_operations v9fs_dir_operations;
+extern struct dentry_operations v9fs_dentry_operations;
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+ino_t v9fs_qid2ino(struct v9fs_qid *qid);
+void v9fs_mistat2inode(struct v9fs_stat *, struct inode *,
+		       struct super_block *);
+int v9fs_dir_release(struct inode *inode, struct file *filp);
+int v9fs_file_open(struct inode *inode, struct file *file);
+void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat);
+void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
new file mode 100644
index 00000000000..ce0778acc90
--- /dev/null
+++ b/fs/9p/vfs_super.c
@@ -0,0 +1,271 @@
+/*
+ *  linux/fs/9p/vfs_super.c
+ *
+ * This file contians superblock ops for 9P2000. It is intended that
+ * you mount this file system on directories.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+static void v9fs_clear_inode(struct inode *);
+static struct super_operations v9fs_super_ops;
+
+/**
+ * v9fs_clear_inode - release an inode
+ * @inode: inode to release
+ *
+ */
+
+static void v9fs_clear_inode(struct inode *inode)
+{
+	filemap_fdatawrite(inode->i_mapping);
+}
+
+/**
+ * v9fs_set_super - set the superblock
+ * @s: super block
+ * @data: file system specific data
+ *
+ */
+
+static int v9fs_set_super(struct super_block *s, void *data)
+{
+	s->s_fs_info = data;
+	return set_anon_super(s, data);
+}
+
+/**
+ * v9fs_fill_super - populate superblock with info
+ * @sb: superblock
+ * @v9ses: session information
+ *
+ */
+
+static void
+v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
+		int flags)
+{
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
+	sb->s_blocksize = 1 << sb->s_blocksize_bits;
+	sb->s_magic = V9FS_MAGIC;
+	sb->s_op = &v9fs_super_ops;
+
+	sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
+	    MS_NODIRATIME | MS_NOATIME;
+}
+
+/**
+ * v9fs_get_sb - mount a superblock
+ * @fs_type: file system type
+ * @flags: mount flags
+ * @dev_name: device name that was mounted
+ * @data: mount options
+ *
+ */
+
+static struct super_block *v9fs_get_sb(struct file_system_type
+				       *fs_type, int flags,
+				       const char *dev_name, void *data)
+{
+	struct super_block *sb = NULL;
+	struct v9fs_fcall *fcall = NULL;
+	struct inode *inode = NULL;
+	struct dentry *root = NULL;
+	struct v9fs_session_info *v9ses = NULL;
+	struct v9fs_fid *root_fid = NULL;
+	int mode = S_IRWXUGO | S_ISVTX;
+	uid_t uid = current->fsuid;
+	gid_t gid = current->fsgid;
+	int stat_result = 0;
+	int newfid = 0;
+	int retval = 0;
+
+	dprintk(DEBUG_VFS, " \n");
+
+	v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL);
+	if (!v9ses)
+		return ERR_PTR(-ENOMEM);
+
+	if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
+		dprintk(DEBUG_ERROR, "problem initiating session\n");
+		retval = newfid;
+		goto free_session;
+	}
+
+	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
+
+	v9fs_fill_super(sb, v9ses, flags);
+
+	inode = v9fs_get_inode(sb, S_IFDIR | mode);
+	if (IS_ERR(inode)) {
+		retval = PTR_ERR(inode);
+		goto put_back_sb;
+	}
+
+	inode->i_uid = uid;
+	inode->i_gid = gid;
+
+	root = d_alloc_root(inode);
+
+	if (!root) {
+		retval = -ENOMEM;
+		goto release_inode;
+	}
+
+	sb->s_root = root;
+
+	/* Setup the Root Inode */
+	root_fid = v9fs_fid_create(root);
+	if (root_fid == NULL) {
+		retval = -ENOMEM;
+		goto release_dentry;
+	}
+
+	root_fid->fidopen = 0;
+	root_fid->v9ses = v9ses;
+
+	stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
+	if (stat_result < 0) {
+		dprintk(DEBUG_ERROR, "stat error\n");
+		v9fs_t_clunk(v9ses, newfid, NULL);
+		v9fs_put_idpool(newfid, &v9ses->fidpool);
+	} else {
+		root_fid->fid = newfid;
+		root_fid->qid = fcall->params.rstat.stat->qid;
+		root->d_inode->i_ino =
+		    v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+		v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb);
+	}
+
+	kfree(fcall);
+
+	if (stat_result < 0) {
+		retval = stat_result;
+		goto release_dentry;
+	}
+
+	return sb;
+
+      release_dentry:
+	dput(sb->s_root);
+
+      release_inode:
+	iput(inode);
+
+      put_back_sb:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	v9fs_session_close(v9ses);
+
+      free_session:
+	kfree(v9ses);
+
+	return ERR_PTR(retval);
+}
+
+/**
+ * v9fs_kill_super - Kill Superblock
+ * @s: superblock
+ *
+ */
+
+static void v9fs_kill_super(struct super_block *s)
+{
+	struct v9fs_session_info *v9ses = s->s_fs_info;
+
+	dprintk(DEBUG_VFS, " %p\n", s);
+
+	v9fs_dentry_release(s->s_root);	/* clunk root */
+
+	kill_anon_super(s);
+
+	v9fs_session_close(v9ses);
+	kfree(v9ses);
+	dprintk(DEBUG_VFS, "exiting kill_super\n");
+}
+
+/**
+ * v9fs_show_options - Show mount options in /proc/mounts
+ * @m: seq_file to write to
+ * @mnt: mount descriptor
+ *
+ */
+
+static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
+
+	if (v9ses->debug != 0)
+		seq_printf(m, ",debug=%u", v9ses->debug);
+	if (v9ses->port != V9FS_PORT)
+		seq_printf(m, ",port=%u", v9ses->port);
+	if (v9ses->maxdata != 9000)
+		seq_printf(m, ",msize=%u", v9ses->maxdata);
+	if (v9ses->afid != ~0)
+		seq_printf(m, ",afid=%u", v9ses->afid);
+	if (v9ses->proto == PROTO_UNIX)
+		seq_puts(m, ",proto=unix");
+	if (v9ses->extended == 0)
+		seq_puts(m, ",noextend");
+	if (v9ses->nodev == 1)
+		seq_puts(m, ",nodevmap");
+	seq_printf(m, ",name=%s", v9ses->name);
+	seq_printf(m, ",aname=%s", v9ses->remotename);
+	seq_printf(m, ",uid=%u", v9ses->uid);
+	seq_printf(m, ",gid=%u", v9ses->gid);
+	return 0;
+}
+
+static struct super_operations v9fs_super_ops = {
+	.statfs = simple_statfs,
+	.clear_inode = v9fs_clear_inode,
+	.show_options = v9fs_show_options,
+};
+
+struct file_system_type v9fs_fs_type = {
+	.name = "9P",
+	.get_sb = v9fs_get_sb,
+	.kill_sb = v9fs_kill_super,
+	.owner = THIS_MODULE,
+};
-- 
cgit v1.2.3-18-g5258


From b8cf945b3166c4394386f162a527c9950f396ce2 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:21 -0700
Subject: [PATCH] v9fs: 9P protocol implementation

This part of the patch contains the 9P protocol functions.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/9p.c   | 359 +++++++++++++++++++++++++++++++
 fs/9p/9p.h   | 341 +++++++++++++++++++++++++++++
 fs/9p/conv.c | 693 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/conv.h |  36 ++++
 4 files changed, 1429 insertions(+)
 create mode 100644 fs/9p/9p.c
 create mode 100644 fs/9p/9p.h
 create mode 100644 fs/9p/conv.c
 create mode 100644 fs/9p/conv.h

(limited to 'fs')

diff --git a/fs/9p/9p.c b/fs/9p/9p.c
new file mode 100644
index 00000000000..e847f504a47
--- /dev/null
+++ b/fs/9p/9p.c
@@ -0,0 +1,359 @@
+/*
+ *  linux/fs/9p/9p.c
+ *
+ *  This file contains functions 9P2000 functions
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "mux.h"
+
+/**
+ * v9fs_t_version - negotiate protocol parameters with sever
+ * @v9ses: 9P2000 session information
+ * @msize: requested max size packet
+ * @version: requested version.extension string
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+	       char *version, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
+	msg.id = TVERSION;
+	msg.params.tversion.msize = msize;
+	msg.params.tversion.version = version;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_attach - mount the server
+ * @v9ses: 9P2000 session information
+ * @uname: user name doing the attach
+ * @aname: remote name being attached to
+ * @fid: mount fid to attatch to root node
+ * @afid: authentication fid (in this case result key)
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+	      u32 fid, u32 afid, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
+		aname, fid, afid);
+	msg.id = TATTACH;
+	msg.params.tattach.fid = fid;
+	msg.params.tattach.afid = afid;
+	msg.params.tattach.uname = uname;
+	msg.params.tattach.aname = aname;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_clunk - release a fid (finish a transaction)
+ * @v9ses: 9P2000 session information
+ * @fid: fid to release
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+	     struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+	msg.id = TCLUNK;
+	msg.params.tclunk.fid = fid;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_v9fs_t_flush - flush a pending transaction
+ * @v9ses: 9P2000 session information
+ * @tag: tid to release
+ *
+ */
+
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "oldtag %d\n", tag);
+	msg.id = TFLUSH;
+	msg.params.tflush.oldtag = tag;
+	return v9fs_mux_rpc(v9ses, &msg, NULL);
+}
+
+/**
+ * v9fs_t_stat - read a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to get info about
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+	if (fcall)
+		*fcall = NULL;
+
+	msg.id = TSTAT;
+	msg.params.tstat.fid = fid;
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_wstat - write a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to write info about
+ * @stat: metadata
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+	     struct v9fs_stat *stat, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length);
+	msg.id = TWSTAT;
+	msg.params.twstat.fid = fid;
+	msg.params.twstat.stat = stat;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_walk - walk a fid to a new file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to walk
+ * @newfid: new fid (for clone operations)
+ * @name: path to walk fid to
+ * @fcall: pointer to response fcall
+ *
+ */
+
+/* TODO: support multiple walk */
+
+int
+v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+	    char *name, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
+	msg.id = TWALK;
+	msg.params.twalk.fid = fid;
+	msg.params.twalk.newfid = newfid;
+
+	if (name) {
+		msg.params.twalk.nwname = 1;
+		msg.params.twalk.wnames = &name;
+	} else {
+		msg.params.twalk.nwname = 0;
+	}
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_open - open a file
+ *
+ * @v9ses - 9P2000 session information
+ * @fid - fid to open
+ * @mode - mode to open file (R, RW, etc)
+ * @fcall - pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+	    struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+	long errorno = -1;
+
+	dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
+	msg.id = TOPEN;
+	msg.params.topen.fid = fid;
+	msg.params.topen.mode = mode;
+
+	errorno = v9fs_mux_rpc(v9ses, &msg, fcall);
+
+	return errorno;
+}
+
+/**
+ * v9fs_t_remove - remove a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to remove
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+	      struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d\n", fid);
+	msg.id = TREMOVE;
+	msg.params.tremove.fid = fid;
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_create - create a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to create
+ * @name: name of the file or directory to create
+ * @perm: permissions to create with
+ * @mode: mode to open file (R, RW, etc)
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
+	      u32 perm, u8 mode, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+
+	dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
+		fid, name, perm, mode);
+
+	msg.id = TCREATE;
+	msg.params.tcreate.fid = fid;
+	msg.params.tcreate.name = name;
+	msg.params.tcreate.perm = perm;
+	msg.params.tcreate.mode = mode;
+
+	return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_read - read data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to read from
+ * @offset: offset to start read at
+ * @count: how many bytes to read
+ * @fcall: pointer to response fcall (with data)
+ *
+ */
+
+int
+v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+	    u32 count, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+	struct v9fs_fcall *rc = NULL;
+	long errorno = -1;
+
+	dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid,
+		(long unsigned int)offset, count);
+	msg.id = TREAD;
+	msg.params.tread.fid = fid;
+	msg.params.tread.offset = offset;
+	msg.params.tread.count = count;
+	errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+
+	if (!errorno) {
+		errorno = rc->params.rread.count;
+		dump_data(rc->params.rread.data, rc->params.rread.count);
+	}
+
+	if (fcall)
+		*fcall = rc;
+	else
+		kfree(rc);
+
+	return errorno;
+}
+
+/**
+ * v9fs_t_write - write data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to write to
+ * @offset: offset to start write at
+ * @count: how many bytes to write
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid,
+	     u64 offset, u32 count, void *data, struct v9fs_fcall **fcall)
+{
+	struct v9fs_fcall msg;
+	struct v9fs_fcall *rc = NULL;
+	long errorno = -1;
+
+	dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid,
+		(unsigned long long)offset, count);
+	dump_data(data, count);
+
+	msg.id = TWRITE;
+	msg.params.twrite.fid = fid;
+	msg.params.twrite.offset = offset;
+	msg.params.twrite.count = count;
+	msg.params.twrite.data = data;
+
+	errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+
+	if (!errorno)
+		errorno = rc->params.rwrite.count;
+
+	if (fcall)
+		*fcall = rc;
+	else
+		kfree(rc);
+
+	return errorno;
+}
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
new file mode 100644
index 00000000000..f55424216be
--- /dev/null
+++ b/fs/9p/9p.h
@@ -0,0 +1,341 @@
+/*
+ * linux/fs/9p/9p.h
+ *
+ * 9P protocol definitions.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+/* Message Types */
+enum {
+	TVERSION = 100,
+	RVERSION,
+	TAUTH = 102,
+	RAUTH,
+	TATTACH = 104,
+	RATTACH,
+	TERROR = 106,
+	RERROR,
+	TFLUSH = 108,
+	RFLUSH,
+	TWALK = 110,
+	RWALK,
+	TOPEN = 112,
+	ROPEN,
+	TCREATE = 114,
+	RCREATE,
+	TREAD = 116,
+	RREAD,
+	TWRITE = 118,
+	RWRITE,
+	TCLUNK = 120,
+	RCLUNK,
+	TREMOVE = 122,
+	RREMOVE,
+	TSTAT = 124,
+	RSTAT,
+	TWSTAT = 126,
+	RWSTAT,
+};
+
+/* modes */
+enum {
+	V9FS_OREAD = 0x00,
+	V9FS_OWRITE = 0x01,
+	V9FS_ORDWR = 0x02,
+	V9FS_OEXEC = 0x03,
+	V9FS_OEXCL = 0x04,
+	V9FS_OTRUNC = 0x10,
+	V9FS_OREXEC = 0x20,
+	V9FS_ORCLOSE = 0x40,
+	V9FS_OAPPEND = 0x80,
+};
+
+/* permissions */
+enum {
+	V9FS_DMDIR = 0x80000000,
+	V9FS_DMAPPEND = 0x40000000,
+	V9FS_DMEXCL = 0x20000000,
+	V9FS_DMMOUNT = 0x10000000,
+	V9FS_DMAUTH = 0x08000000,
+	V9FS_DMTMP = 0x04000000,
+	V9FS_DMSYMLINK = 0x02000000,
+	V9FS_DMLINK = 0x01000000,
+	/* 9P2000.u extensions */
+	V9FS_DMDEVICE = 0x00800000,
+	V9FS_DMNAMEDPIPE = 0x00200000,
+	V9FS_DMSOCKET = 0x00100000,
+	V9FS_DMSETUID = 0x00080000,
+	V9FS_DMSETGID = 0x00040000,
+};
+
+/* qid.types */
+enum {
+	V9FS_QTDIR = 0x80,
+	V9FS_QTAPPEND = 0x40,
+	V9FS_QTEXCL = 0x20,
+	V9FS_QTMOUNT = 0x10,
+	V9FS_QTAUTH = 0x08,
+	V9FS_QTTMP = 0x04,
+	V9FS_QTSYMLINK = 0x02,
+	V9FS_QTLINK = 0x01,
+	V9FS_QTFILE = 0x00,
+};
+
+/* ample room for Twrite/Rread header (iounit) */
+#define V9FS_IOHDRSZ	24
+
+/* qids are the unique ID for a file (like an inode */
+struct v9fs_qid {
+	u8 type;
+	u32 version;
+	u64 path;
+};
+
+/* Plan 9 file metadata (stat) structure */
+struct v9fs_stat {
+	u16 size;
+	u16 type;
+	u32 dev;
+	struct v9fs_qid qid;
+	u32 mode;
+	u32 atime;
+	u32 mtime;
+	u64 length;
+	char *name;
+	char *uid;
+	char *gid;
+	char *muid;
+	char *extension;	/* 9p2000.u extensions */
+	u32 n_uid;		/* 9p2000.u extensions */
+	u32 n_gid;		/* 9p2000.u extensions */
+	u32 n_muid;		/* 9p2000.u extensions */
+	char data[0];
+};
+
+/* Structures for Protocol Operations */
+
+struct Tversion {
+	u32 msize;
+	char *version;
+};
+
+struct Rversion {
+	u32 msize;
+	char *version;
+};
+
+struct Tauth {
+	u32 afid;
+	char *uname;
+	char *aname;
+};
+
+struct Rauth {
+	struct v9fs_qid qid;
+};
+
+struct Rerror {
+	char *error;
+	u32 errno;		/* 9p2000.u extension */
+};
+
+struct Tflush {
+	u32 oldtag;
+};
+
+struct Rflush {
+};
+
+struct Tattach {
+	u32 fid;
+	u32 afid;
+	char *uname;
+	char *aname;
+};
+
+struct Rattach {
+	struct v9fs_qid qid;
+};
+
+struct Twalk {
+	u32 fid;
+	u32 newfid;
+	u32 nwname;
+	char **wnames;
+};
+
+struct Rwalk {
+	u32 nwqid;
+	struct v9fs_qid *wqids;
+};
+
+struct Topen {
+	u32 fid;
+	u8 mode;
+};
+
+struct Ropen {
+	struct v9fs_qid qid;
+	u32 iounit;
+};
+
+struct Tcreate {
+	u32 fid;
+	char *name;
+	u32 perm;
+	u8 mode;
+};
+
+struct Rcreate {
+	struct v9fs_qid qid;
+	u32 iounit;
+};
+
+struct Tread {
+	u32 fid;
+	u64 offset;
+	u32 count;
+};
+
+struct Rread {
+	u32 count;
+	u8 *data;
+};
+
+struct Twrite {
+	u32 fid;
+	u64 offset;
+	u32 count;
+	u8 *data;
+};
+
+struct Rwrite {
+	u32 count;
+};
+
+struct Tclunk {
+	u32 fid;
+};
+
+struct Rclunk {
+};
+
+struct Tremove {
+	u32 fid;
+};
+
+struct Rremove {
+};
+
+struct Tstat {
+	u32 fid;
+};
+
+struct Rstat {
+	struct v9fs_stat *stat;
+};
+
+struct Twstat {
+	u32 fid;
+	struct v9fs_stat *stat;
+};
+
+struct Rwstat {
+};
+
+/*
+  * fcall is the primary packet structure
+  *
+  */
+
+struct v9fs_fcall {
+	u32 size;
+	u8 id;
+	u16 tag;
+
+	union {
+		struct Tversion tversion;
+		struct Rversion rversion;
+		struct Tauth tauth;
+		struct Rauth rauth;
+		struct Rerror rerror;
+		struct Tflush tflush;
+		struct Rflush rflush;
+		struct Tattach tattach;
+		struct Rattach rattach;
+		struct Twalk twalk;
+		struct Rwalk rwalk;
+		struct Topen topen;
+		struct Ropen ropen;
+		struct Tcreate tcreate;
+		struct Rcreate rcreate;
+		struct Tread tread;
+		struct Rread rread;
+		struct Twrite twrite;
+		struct Rwrite rwrite;
+		struct Tclunk tclunk;
+		struct Rclunk rclunk;
+		struct Tremove tremove;
+		struct Rremove rremove;
+		struct Tstat tstat;
+		struct Rstat rstat;
+		struct Twstat twstat;
+		struct Rwstat rwstat;
+	} params;
+};
+
+#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "")
+
+int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+		   char *version, struct v9fs_fcall **rcall);
+
+int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+		  u32 fid, u32 afid, struct v9fs_fcall **rcall);
+
+int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+		 struct v9fs_fcall **rcall);
+
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
+
+int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
+		struct v9fs_fcall **rcall);
+
+int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+		 struct v9fs_stat *stat, struct v9fs_fcall **rcall);
+
+int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+		char *name, struct v9fs_fcall **rcall);
+
+int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+		struct v9fs_fcall **rcall);
+
+int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+		  struct v9fs_fcall **rcall);
+
+int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
+		  u32 perm, u8 mode, struct v9fs_fcall **rcall);
+
+int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
+		u64 offset, u32 count, struct v9fs_fcall **rcall);
+
+int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+		 u32 count, void *data, struct v9fs_fcall **rcall);
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
new file mode 100644
index 00000000000..1554731bd65
--- /dev/null
+++ b/fs/9p/conv.c
@@ -0,0 +1,693 @@
+/*
+ * linux/fs/9p/conv.c
+ *
+ * 9P protocol conversion functions
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "conv.h"
+
+/*
+ * Buffer to help with string parsing
+ */
+struct cbuf {
+	unsigned char *sp;
+	unsigned char *p;
+	unsigned char *ep;
+};
+
+static inline void buf_init(struct cbuf *buf, void *data, int datalen)
+{
+	buf->sp = buf->p = data;
+	buf->ep = data + datalen;
+}
+
+static inline int buf_check_overflow(struct cbuf *buf)
+{
+	return buf->p > buf->ep;
+}
+
+static inline void buf_check_size(struct cbuf *buf, int len)
+{
+	if (buf->p+len > buf->ep) {
+		if (buf->p < buf->ep) {
+			eprintk(KERN_ERR, "buffer overflow\n");
+			buf->p = buf->ep + 1;
+		}
+	}
+}
+
+static inline void *buf_alloc(struct cbuf *buf, int len)
+{
+	void *ret = NULL;
+
+	buf_check_size(buf, len);
+	ret = buf->p;
+	buf->p += len;
+
+	return ret;
+}
+
+static inline void buf_put_int8(struct cbuf *buf, u8 val)
+{
+	buf_check_size(buf, 1);
+
+	buf->p[0] = val;
+	buf->p++;
+}
+
+static inline void buf_put_int16(struct cbuf *buf, u16 val)
+{
+	buf_check_size(buf, 2);
+
+	*(__le16 *) buf->p = cpu_to_le16(val);
+	buf->p += 2;
+}
+
+static inline void buf_put_int32(struct cbuf *buf, u32 val)
+{
+	buf_check_size(buf, 4);
+
+	*(__le32 *)buf->p = cpu_to_le32(val);
+	buf->p += 4;
+}
+
+static inline void buf_put_int64(struct cbuf *buf, u64 val)
+{
+	buf_check_size(buf, 8);
+
+	*(__le64 *)buf->p = cpu_to_le64(val);
+	buf->p += 8;
+}
+
+static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
+{
+	buf_check_size(buf, slen + 2);
+
+	buf_put_int16(buf, slen);
+	memcpy(buf->p, s, slen);
+	buf->p += slen;
+}
+
+static inline void buf_put_string(struct cbuf *buf, const char *s)
+{
+	buf_put_stringn(buf, s, strlen(s));
+}
+
+static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
+{
+	buf_check_size(buf, datalen);
+
+	memcpy(buf->p, data, datalen);
+	buf->p += datalen;
+}
+
+static inline u8 buf_get_int8(struct cbuf *buf)
+{
+	u8 ret = 0;
+
+	buf_check_size(buf, 1);
+	ret = buf->p[0];
+
+	buf->p++;
+
+	return ret;
+}
+
+static inline u16 buf_get_int16(struct cbuf *buf)
+{
+	u16 ret = 0;
+
+	buf_check_size(buf, 2);
+	ret = le16_to_cpu(*(__le16 *)buf->p);
+
+	buf->p += 2;
+
+	return ret;
+}
+
+static inline u32 buf_get_int32(struct cbuf *buf)
+{
+	u32 ret = 0;
+
+	buf_check_size(buf, 4);
+	ret = le32_to_cpu(*(__le32 *)buf->p);
+
+	buf->p += 4;
+
+	return ret;
+}
+
+static inline u64 buf_get_int64(struct cbuf *buf)
+{
+	u64 ret = 0;
+
+	buf_check_size(buf, 8);
+	ret = le64_to_cpu(*(__le64 *)buf->p);
+
+	buf->p += 8;
+
+	return ret;
+}
+
+static inline int
+buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
+{
+
+	u16 len = buf_get_int16(buf);
+	buf_check_size(buf, len);
+	if (len + 1 > datalen)
+		return 0;
+
+	memcpy(data, buf->p, len);
+	data[len] = 0;
+	buf->p += len;
+
+	return len + 1;
+}
+
+static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
+{
+	char *ret = NULL;
+	int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p);
+
+	if (n > 0) {
+		ret = sbuf->p;
+		sbuf->p += n;
+	}
+
+	return ret;
+}
+
+static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
+{
+	buf_check_size(buf, datalen);
+
+	memcpy(data, buf->p, datalen);
+	buf->p += datalen;
+
+	return datalen;
+}
+
+static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
+				  int datalen)
+{
+	char *ret = NULL;
+	int n = 0;
+
+	buf_check_size(dbuf, datalen);
+
+	n = buf_get_data(buf, dbuf->p, datalen);
+
+	if (n > 0) {
+		ret = dbuf->p;
+		dbuf->p += n;
+	}
+
+	return ret;
+}
+
+/**
+ * v9fs_size_stat - calculate the size of a variable length stat struct
+ * @v9ses: session information
+ * @stat: metadata (stat) structure
+ *
+ */
+
+static int v9fs_size_stat(struct v9fs_session_info *v9ses,
+			  struct v9fs_stat *stat)
+{
+	int size = 0;
+
+	if (stat == NULL) {
+		eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
+		return 0;
+	}
+
+	size =			/* 2 + *//* size[2] */
+	    2 +			/* type[2] */
+	    4 +			/* dev[4] */
+	    1 +			/* qid.type[1] */
+	    4 +			/* qid.vers[4] */
+	    8 +			/* qid.path[8] */
+	    4 +			/* mode[4] */
+	    4 +			/* atime[4] */
+	    4 +			/* mtime[4] */
+	    8 +			/* length[8] */
+	    8;			/* minimum sum of string lengths */
+
+	if (stat->name)
+		size += strlen(stat->name);
+	if (stat->uid)
+		size += strlen(stat->uid);
+	if (stat->gid)
+		size += strlen(stat->gid);
+	if (stat->muid)
+		size += strlen(stat->muid);
+
+	if (v9ses->extended) {
+		size += 4 +	/* n_uid[4] */
+		    4 +		/* n_gid[4] */
+		    4 +		/* n_muid[4] */
+		    2;		/* string length of extension[4] */
+		if (stat->extension)
+			size += strlen(stat->extension);
+	}
+
+	return size;
+}
+
+/**
+ * serialize_stat - safely format a stat structure for transmission
+ * @v9ses: session info
+ * @stat: metadata (stat) structure
+ * @bufp: buffer to serialize structure into
+ *
+ */
+
+static int
+serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
+	       struct cbuf *bufp)
+{
+	buf_put_int16(bufp, stat->size);
+	buf_put_int16(bufp, stat->type);
+	buf_put_int32(bufp, stat->dev);
+	buf_put_int8(bufp, stat->qid.type);
+	buf_put_int32(bufp, stat->qid.version);
+	buf_put_int64(bufp, stat->qid.path);
+	buf_put_int32(bufp, stat->mode);
+	buf_put_int32(bufp, stat->atime);
+	buf_put_int32(bufp, stat->mtime);
+	buf_put_int64(bufp, stat->length);
+
+	buf_put_string(bufp, stat->name);
+	buf_put_string(bufp, stat->uid);
+	buf_put_string(bufp, stat->gid);
+	buf_put_string(bufp, stat->muid);
+
+	if (v9ses->extended) {
+		buf_put_string(bufp, stat->extension);
+		buf_put_int32(bufp, stat->n_uid);
+		buf_put_int32(bufp, stat->n_gid);
+		buf_put_int32(bufp, stat->n_muid);
+	}
+
+	if (buf_check_overflow(bufp))
+		return 0;
+
+	return stat->size;
+}
+
+/**
+ * deserialize_stat - safely decode a recieved metadata (stat) structure
+ * @v9ses: session info
+ * @bufp: buffer to deserialize
+ * @stat: metadata (stat) structure
+ * @dbufp: buffer to deserialize variable strings into
+ *
+ */
+
+static inline int
+deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
+		 struct v9fs_stat *stat, struct cbuf *dbufp)
+{
+
+	stat->size = buf_get_int16(bufp);
+	stat->type = buf_get_int16(bufp);
+	stat->dev = buf_get_int32(bufp);
+	stat->qid.type = buf_get_int8(bufp);
+	stat->qid.version = buf_get_int32(bufp);
+	stat->qid.path = buf_get_int64(bufp);
+	stat->mode = buf_get_int32(bufp);
+	stat->atime = buf_get_int32(bufp);
+	stat->mtime = buf_get_int32(bufp);
+	stat->length = buf_get_int64(bufp);
+	stat->name = buf_get_stringb(bufp, dbufp);
+	stat->uid = buf_get_stringb(bufp, dbufp);
+	stat->gid = buf_get_stringb(bufp, dbufp);
+	stat->muid = buf_get_stringb(bufp, dbufp);
+
+	if (v9ses->extended) {
+		stat->extension = buf_get_stringb(bufp, dbufp);
+		stat->n_uid = buf_get_int32(bufp);
+		stat->n_gid = buf_get_int32(bufp);
+		stat->n_muid = buf_get_int32(bufp);
+	}
+
+	if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+		return 0;
+
+	return stat->size + 2;
+}
+
+/**
+ * deserialize_statb - wrapper for decoding a received metadata structure
+ * @v9ses: session info
+ * @bufp: buffer to deserialize
+ * @dbufp: buffer to deserialize variable strings into
+ *
+ */
+
+static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
+						  *v9ses, struct cbuf *bufp,
+						  struct cbuf *dbufp)
+{
+	struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
+
+	if (ret) {
+		int n = deserialize_stat(v9ses, bufp, ret, dbufp);
+		if (n <= 0)
+			return NULL;
+	}
+
+	return ret;
+}
+
+/**
+ * v9fs_deserialize_stat - decode a received metadata structure
+ * @v9ses: session info
+ * @buf: buffer to deserialize
+ * @buflen: length of received buffer
+ * @stat: metadata structure to decode into
+ * @statlen: length of destination metadata structure
+ *
+ */
+
+int
+v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf,
+		      u32 buflen, struct v9fs_stat *stat, u32 statlen)
+{
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+	struct cbuf dbuffer;
+	struct cbuf *dbufp = &dbuffer;
+
+	buf_init(bufp, buf, buflen);
+	buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat),
+		 statlen - sizeof(struct v9fs_stat));
+
+	return deserialize_stat(v9ses, bufp, stat, dbufp);
+}
+
+static inline int
+v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
+{
+	int size = 4 + 1 + 2;	/* size[4] msg[1] tag[2] */
+	int i = 0;
+
+	switch (fcall->id) {
+	default:
+		eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
+		return 0;
+	case TVERSION:		/* msize[4] version[s] */
+		size += 4 + 2 + strlen(fcall->params.tversion.version);
+		break;
+	case TAUTH:		/* afid[4] uname[s] aname[s] */
+		size += 4 + 2 + strlen(fcall->params.tauth.uname) +
+		    2 + strlen(fcall->params.tauth.aname);
+		break;
+	case TFLUSH:		/* oldtag[2] */
+		size += 2;
+		break;
+	case TATTACH:		/* fid[4] afid[4] uname[s] aname[s] */
+		size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
+		    2 + strlen(fcall->params.tattach.aname);
+		break;
+	case TWALK:		/* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
+		size += 4 + 4 + 2;
+		/* now compute total for the array of names */
+		for (i = 0; i < fcall->params.twalk.nwname; i++)
+			size += 2 + strlen(fcall->params.twalk.wnames[i]);
+		break;
+	case TOPEN:		/* fid[4] mode[1] */
+		size += 4 + 1;
+		break;
+	case TCREATE:		/* fid[4] name[s] perm[4] mode[1] */
+		size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
+		break;
+	case TREAD:		/* fid[4] offset[8] count[4] */
+		size += 4 + 8 + 4;
+		break;
+	case TWRITE:		/* fid[4] offset[8] count[4] data[count] */
+		size += 4 + 8 + 4 + fcall->params.twrite.count;
+		break;
+	case TCLUNK:		/* fid[4] */
+		size += 4;
+		break;
+	case TREMOVE:		/* fid[4] */
+		size += 4;
+		break;
+	case TSTAT:		/* fid[4] */
+		size += 4;
+		break;
+	case TWSTAT:		/* fid[4] stat[n] */
+		fcall->params.twstat.stat->size =
+		    v9fs_size_stat(v9ses, fcall->params.twstat.stat);
+		size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
+	}
+	return size;
+}
+
+/*
+ * v9fs_serialize_fcall - marshall fcall struct into a packet
+ * @v9ses: session information
+ * @fcall: structure to convert
+ * @data: buffer to serialize fcall into
+ * @datalen: length of buffer to serialize fcall into
+ *
+ */
+
+int
+v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
+		     void *data, u32 datalen)
+{
+	int i = 0;
+	struct v9fs_stat *stat = NULL;
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+
+	buf_init(bufp, data, datalen);
+
+	if (!fcall) {
+		eprintk(KERN_ERR, "no fcall\n");
+		return -EINVAL;
+	}
+
+	fcall->size = v9fs_size_fcall(v9ses, fcall);
+
+	buf_put_int32(bufp, fcall->size);
+	buf_put_int8(bufp, fcall->id);
+	buf_put_int16(bufp, fcall->tag);
+
+	dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
+		fcall->tag);
+
+	/* now encode it */
+	switch (fcall->id) {
+	default:
+		eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
+		return -EPROTO;
+	case TVERSION:
+		buf_put_int32(bufp, fcall->params.tversion.msize);
+		buf_put_string(bufp, fcall->params.tversion.version);
+		break;
+	case TAUTH:
+		buf_put_int32(bufp, fcall->params.tauth.afid);
+		buf_put_string(bufp, fcall->params.tauth.uname);
+		buf_put_string(bufp, fcall->params.tauth.aname);
+		break;
+	case TFLUSH:
+		buf_put_int16(bufp, fcall->params.tflush.oldtag);
+		break;
+	case TATTACH:
+		buf_put_int32(bufp, fcall->params.tattach.fid);
+		buf_put_int32(bufp, fcall->params.tattach.afid);
+		buf_put_string(bufp, fcall->params.tattach.uname);
+		buf_put_string(bufp, fcall->params.tattach.aname);
+		break;
+	case TWALK:
+		buf_put_int32(bufp, fcall->params.twalk.fid);
+		buf_put_int32(bufp, fcall->params.twalk.newfid);
+		buf_put_int16(bufp, fcall->params.twalk.nwname);
+		for (i = 0; i < fcall->params.twalk.nwname; i++)
+			buf_put_string(bufp, fcall->params.twalk.wnames[i]);
+		break;
+	case TOPEN:
+		buf_put_int32(bufp, fcall->params.topen.fid);
+		buf_put_int8(bufp, fcall->params.topen.mode);
+		break;
+	case TCREATE:
+		buf_put_int32(bufp, fcall->params.tcreate.fid);
+		buf_put_string(bufp, fcall->params.tcreate.name);
+		buf_put_int32(bufp, fcall->params.tcreate.perm);
+		buf_put_int8(bufp, fcall->params.tcreate.mode);
+		break;
+	case TREAD:
+		buf_put_int32(bufp, fcall->params.tread.fid);
+		buf_put_int64(bufp, fcall->params.tread.offset);
+		buf_put_int32(bufp, fcall->params.tread.count);
+		break;
+	case TWRITE:
+		buf_put_int32(bufp, fcall->params.twrite.fid);
+		buf_put_int64(bufp, fcall->params.twrite.offset);
+		buf_put_int32(bufp, fcall->params.twrite.count);
+		buf_put_data(bufp, fcall->params.twrite.data,
+			     fcall->params.twrite.count);
+		break;
+	case TCLUNK:
+		buf_put_int32(bufp, fcall->params.tclunk.fid);
+		break;
+	case TREMOVE:
+		buf_put_int32(bufp, fcall->params.tremove.fid);
+		break;
+	case TSTAT:
+		buf_put_int32(bufp, fcall->params.tstat.fid);
+		break;
+	case TWSTAT:
+		buf_put_int32(bufp, fcall->params.twstat.fid);
+		stat = fcall->params.twstat.stat;
+
+		buf_put_int16(bufp, stat->size + 2);
+		serialize_stat(v9ses, stat, bufp);
+		break;
+	}
+
+	if (buf_check_overflow(bufp))
+		return -EIO;
+
+	return fcall->size;
+}
+
+/**
+ * deserialize_fcall - unmarshal a response
+ * @v9ses: session information
+ * @msgsize: size of rcall message
+ * @buf: recieved buffer
+ * @buflen: length of received buffer
+ * @rcall: fcall structure to populate
+ * @rcalllen: length of fcall structure to populate
+ *
+ */
+
+int
+v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
+		       void *buf, u32 buflen, struct v9fs_fcall *rcall,
+		       int rcalllen)
+{
+
+	struct cbuf buffer;
+	struct cbuf *bufp = &buffer;
+	struct cbuf dbuffer;
+	struct cbuf *dbufp = &dbuffer;
+	int i = 0;
+
+	buf_init(bufp, buf, buflen);
+	buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
+		 rcalllen - sizeof(struct v9fs_fcall));
+
+	rcall->size = msgsize;
+	rcall->id = buf_get_int8(bufp);
+	rcall->tag = buf_get_int16(bufp);
+
+	dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
+		rcall->tag);
+	switch (rcall->id) {
+	default:
+		eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
+		return -EPROTO;
+	case RVERSION:
+		rcall->params.rversion.msize = buf_get_int32(bufp);
+		rcall->params.rversion.version = buf_get_stringb(bufp, dbufp);
+		break;
+	case RFLUSH:
+		break;
+	case RATTACH:
+		rcall->params.rattach.qid.type = buf_get_int8(bufp);
+		rcall->params.rattach.qid.version = buf_get_int32(bufp);
+		rcall->params.rattach.qid.path = buf_get_int64(bufp);
+		break;
+	case RWALK:
+		rcall->params.rwalk.nwqid = buf_get_int16(bufp);
+		rcall->params.rwalk.wqids = buf_alloc(bufp,
+		      rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
+		if (rcall->params.rwalk.wqids)
+			for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
+				rcall->params.rwalk.wqids[i].type =
+				    buf_get_int8(bufp);
+				rcall->params.rwalk.wqids[i].version =
+				    buf_get_int16(bufp);
+				rcall->params.rwalk.wqids[i].path =
+				    buf_get_int64(bufp);
+			}
+		break;
+	case ROPEN:
+		rcall->params.ropen.qid.type = buf_get_int8(bufp);
+		rcall->params.ropen.qid.version = buf_get_int32(bufp);
+		rcall->params.ropen.qid.path = buf_get_int64(bufp);
+		rcall->params.ropen.iounit = buf_get_int32(bufp);
+		break;
+	case RCREATE:
+		rcall->params.rcreate.qid.type = buf_get_int8(bufp);
+		rcall->params.rcreate.qid.version = buf_get_int32(bufp);
+		rcall->params.rcreate.qid.path = buf_get_int64(bufp);
+		rcall->params.rcreate.iounit = buf_get_int32(bufp);
+		break;
+	case RREAD:
+		rcall->params.rread.count = buf_get_int32(bufp);
+		rcall->params.rread.data = buf_get_datab(bufp, dbufp,
+			rcall->params.rread.count);
+		break;
+	case RWRITE:
+		rcall->params.rwrite.count = buf_get_int32(bufp);
+		break;
+	case RCLUNK:
+		break;
+	case RREMOVE:
+		break;
+	case RSTAT:
+		buf_get_int16(bufp);
+		rcall->params.rstat.stat =
+		    deserialize_statb(v9ses, bufp, dbufp);
+		break;
+	case RWSTAT:
+		break;
+	case RERROR:
+		rcall->params.rerror.error = buf_get_stringb(bufp, dbufp);
+		if (v9ses->extended)
+			rcall->params.rerror.errno = buf_get_int16(bufp);
+		break;
+	}
+
+	if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+		return -EIO;
+
+	return rcall->size;
+}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
new file mode 100644
index 00000000000..ee849613c61
--- /dev/null
+++ b/fs/9p/conv.h
@@ -0,0 +1,36 @@
+/*
+ * linux/fs/9p/conv.h
+ *
+ * 9P protocol conversion definitions
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf,
+			  u32 buflen, struct v9fs_stat *stat, u32 statlen);
+int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall,
+			 void *buf, u32 buflen);
+int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
+			   void *buf, u32 buflen, struct v9fs_fcall *rcall,
+			   int rcalllen);
+
+/* this one is actually in error.c right now */
+int v9fs_errstr2errno(char *errstr);
-- 
cgit v1.2.3-18-g5258


From 426cc91aa651b50713d06d45e5c3c3e90cfd40d9 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:22 -0700
Subject: [PATCH] v9fs: transport modules

This part of the patch contains transport routines.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/mux.c        | 440 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/mux.h        |  39 +++++
 fs/9p/trans_fd.c   | 172 +++++++++++++++++++++
 fs/9p/trans_sock.c | 282 ++++++++++++++++++++++++++++++++++
 fs/9p/transport.h  |  46 ++++++
 fs/9p/v9fs.c       |   5 -
 6 files changed, 979 insertions(+), 5 deletions(-)
 create mode 100644 fs/9p/mux.c
 create mode 100644 fs/9p/mux.h
 create mode 100644 fs/9p/trans_fd.c
 create mode 100644 fs/9p/trans_sock.c
 create mode 100644 fs/9p/transport.h

(limited to 'fs')

diff --git a/fs/9p/mux.c b/fs/9p/mux.c
new file mode 100644
index 00000000000..8ebc1af2c24
--- /dev/null
+++ b/fs/9p/mux.c
@@ -0,0 +1,440 @@
+/*
+ * linux/fs/9p/mux.c
+ *
+ * Protocol Multiplexer
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kthread.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "transport.h"
+#include "conv.h"
+#include "mux.h"
+
+/**
+ * dprintcond - print condition of session info
+ * @v9ses: session info structure
+ * @req: RPC request structure
+ *
+ */
+
+static inline int
+dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+	dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status,
+		req->rcall);
+	return 0;
+}
+
+/**
+ * xread - force read of a certain number of bytes
+ * @v9ses: session info structure
+ * @ptr: pointer to buffer
+ * @sz: number of bytes to read
+ *
+ * Chuck Cranor CS-533 project1
+ */
+
+static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz)
+{
+	int rd = 0;
+	int ret = 0;
+	while (rd < sz) {
+		ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd);
+		if (ret <= 0) {
+			dprintk(DEBUG_ERROR, "xread errno %d\n", ret);
+			return ret;
+		}
+		rd += ret;
+		ptr += ret;
+	}
+	return (rd);
+}
+
+/**
+ * read_message - read a full 9P2000 fcall packet
+ * @v9ses: session info structure
+ * @rcall: fcall structure to read into
+ * @rcalllen: size of fcall buffer
+ *
+ */
+
+static int
+read_message(struct v9fs_session_info *v9ses,
+	     struct v9fs_fcall *rcall, int rcalllen)
+{
+	unsigned char buf[4];
+	void *data;
+	int size = 0;
+	int res = 0;
+
+	res = xread(v9ses, buf, sizeof(buf));
+	if (res < 0) {
+		dprintk(DEBUG_ERROR,
+			"Reading of count field failed returned: %d\n", res);
+		return res;
+	}
+
+	if (res < 4) {
+		dprintk(DEBUG_ERROR,
+			"Reading of count field failed returned: %d\n", res);
+		return -EIO;
+	}
+
+	size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
+	dprintk(DEBUG_MUX, "got a packet count: %d\n", size);
+
+	/* adjust for the four bytes of size */
+	size -= 4;
+
+	if (size > v9ses->maxdata) {
+		dprintk(DEBUG_ERROR, "packet too big: %d\n", size);
+		return -E2BIG;
+	}
+
+	data = kmalloc(size, GFP_KERNEL);
+	if (!data) {
+		eprintk(KERN_WARNING, "out of memory\n");
+		return -ENOMEM;
+	}
+
+	res = xread(v9ses, data, size);
+	if (res < size) {
+		dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n",
+			res);
+		kfree(data);
+		return res;
+	}
+
+	/* we now have an in-memory string that is the reply.
+	 * deserialize it. There is very little to go wrong at this point
+	 * save for v9fs_alloc errors.
+	 */
+	res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata,
+				     rcall, rcalllen);
+
+	kfree(data);
+
+	if (res < 0)
+		return res;
+
+	return 0;
+}
+
+/**
+ * v9fs_recv - receive an RPC response for a particular tag
+ * @v9ses: session info structure
+ * @req: RPC request structure
+ *
+ */
+
+static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+	int ret = 0;
+
+	dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
+	ret = wait_event_interruptible(v9ses->read_wait,
+		       ((v9ses->transport->status != Connected) ||
+			(req->rcall != 0) || dprintcond(v9ses, req)));
+
+	dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
+	if (v9ses->transport->status == Disconnected)
+		return -ECONNRESET;
+
+	if (ret == 0) {
+		spin_lock(&v9ses->muxlock);
+		list_del(&req->next);
+		spin_unlock(&v9ses->muxlock);
+	}
+
+	return ret;
+}
+
+/**
+ * v9fs_send - send a 9P request
+ * @v9ses: session info structure
+ * @req: RPC request to send
+ *
+ */
+
+static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+	int ret = -1;
+	void *data = NULL;
+	struct v9fs_fcall *tcall = req->tcall;
+
+	data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	tcall->size = 0;	/* enforce size recalculation */
+	ret =
+	    v9fs_serialize_fcall(v9ses, tcall, data,
+				 v9ses->maxdata + V9FS_IOHDRSZ);
+	if (ret < 0)
+		goto free_data;
+
+	spin_lock(&v9ses->muxlock);
+	list_add(&req->next, &v9ses->mux_fcalls);
+	spin_unlock(&v9ses->muxlock);
+
+	dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag,
+		tcall->size);
+	ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
+
+	if (ret != tcall->size) {
+		spin_lock(&v9ses->muxlock);
+		list_del(&req->next);
+		kfree(req->rcall);
+
+		spin_unlock(&v9ses->muxlock);
+		if (ret >= 0)
+			ret = -EREMOTEIO;
+	} else
+		ret = 0;
+
+      free_data:
+	kfree(data);
+	return ret;
+}
+
+/**
+ * v9fs_mux_rpc - send a request, receive a response
+ * @v9ses: session info structure
+ * @tcall: fcall to send
+ * @rcall: buffer to place response into
+ *
+ */
+
+long
+v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
+	     struct v9fs_fcall **rcall)
+{
+	int tid = -1;
+	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_rpcreq req;
+	int ret = -1;
+
+	if (!v9ses)
+		return -EINVAL;
+
+	if (rcall)
+		*rcall = NULL;
+
+	if (tcall->id != TVERSION) {
+		tid = v9fs_get_idpool(&v9ses->tidpool);
+		if (tid < 0)
+			return -ENOMEM;
+	}
+
+	tcall->tag = tid;
+
+	req.tcall = tcall;
+	req.rcall = NULL;
+
+	ret = v9fs_send(v9ses, &req);
+
+	if (ret < 0) {
+		if (tcall->id != TVERSION)
+			v9fs_put_idpool(tid, &v9ses->tidpool);
+		dprintk(DEBUG_MUX, "error %d\n", ret);
+		return ret;
+	}
+
+	ret = v9fs_recv(v9ses, &req);
+
+	fcall = req.rcall;
+
+	dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret);
+	if (ret == -ERESTARTSYS) {
+		if (v9ses->transport->status != Disconnected
+		    && tcall->id != TFLUSH) {
+			unsigned long flags;
+
+			dprintk(DEBUG_MUX, "flushing the tag: %d\n",
+				tcall->tag);
+			clear_thread_flag(TIF_SIGPENDING);
+			v9fs_t_flush(v9ses, tcall->tag);
+			spin_lock_irqsave(&current->sighand->siglock, flags);
+			recalc_sigpending();
+			spin_unlock_irqrestore(&current->sighand->siglock,
+					       flags);
+			dprintk(DEBUG_MUX, "flushing done\n");
+		}
+
+		goto release_req;
+	} else if (ret < 0)
+		goto release_req;
+
+	if (!fcall)
+		ret = -EIO;
+	else {
+		if (fcall->id == RERROR) {
+			ret = v9fs_errstr2errno(fcall->params.rerror.error);
+			if (ret == 0) {	/* string match failed */
+				if (fcall->params.rerror.errno)
+					ret = -(fcall->params.rerror.errno);
+				else
+					ret = -ESERVERFAULT;
+			}
+		} else if (fcall->id != tcall->id + 1) {
+			dprintk(DEBUG_ERROR,
+				"fcall mismatch: expected %d, got %d\n",
+				tcall->id + 1, fcall->id);
+			ret = -EIO;
+		}
+	}
+
+      release_req:
+	if (tcall->id != TVERSION)
+		v9fs_put_idpool(tid, &v9ses->tidpool);
+	if (rcall)
+		*rcall = fcall;
+	else
+		kfree(fcall);
+
+	return ret;
+}
+
+/**
+ * v9fs_recvproc - kproc to handle demultiplexing responses
+ * @data: session info structure
+ *
+ */
+
+static int v9fs_recvproc(void *data)
+{
+	struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data;
+	struct v9fs_fcall *rcall = NULL;
+	struct v9fs_rpcreq *rptr;
+	struct v9fs_rpcreq *req;
+	struct v9fs_rpcreq *rreq;
+	int err = 0;
+
+	allow_signal(SIGKILL);
+	set_current_state(TASK_INTERRUPTIBLE);
+	complete(&v9ses->proccmpl);
+	while (!kthread_should_stop() && err >= 0) {
+		req = rptr = rreq = NULL;
+
+		rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
+		if (!rcall) {
+			eprintk(KERN_ERR, "no memory for buffers\n");
+			break;
+		}
+
+		err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
+		if (err < 0) {
+			kfree(rcall);
+			break;
+		}
+		spin_lock(&v9ses->muxlock);
+		list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+			if (rreq->tcall->tag == rcall->tag) {
+				req = rreq;
+				req->rcall = rcall;
+				break;
+			}
+		}
+
+		if (req && (req->tcall->id == TFLUSH)) {
+			struct v9fs_rpcreq *treq = NULL;
+			list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) {
+				if (treq->tcall->tag ==
+				    req->tcall->params.tflush.oldtag) {
+					list_del(&rptr->next);
+					kfree(treq->rcall);
+					break;
+				}
+			}
+		}
+
+		spin_unlock(&v9ses->muxlock);
+
+		if (!req) {
+			dprintk(DEBUG_ERROR,
+				"unexpected response: id %d tag %d\n",
+				rcall->id, rcall->tag);
+
+			kfree(rcall);
+		}
+
+		wake_up_all(&v9ses->read_wait);
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+
+	/* Inform all pending processes about the failure */
+	wake_up_all(&v9ses->read_wait);
+
+	if (signal_pending(current))
+		complete(&v9ses->proccmpl);
+
+	dprintk(DEBUG_MUX, "recvproc: end\n");
+	v9ses->recvproc = NULL;
+
+	return err >= 0;
+}
+
+/**
+ * v9fs_mux_init - initialize multiplexer (spawn kproc)
+ * @v9ses: session info structure
+ * @dev_name: mount device information (to create unique kproc)
+ *
+ */
+
+int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name)
+{
+	char procname[60];
+
+	strncpy(procname, dev_name, sizeof(procname));
+	procname[sizeof(procname) - 1] = 0;
+
+	init_waitqueue_head(&v9ses->read_wait);
+	init_completion(&v9ses->fcread);
+	init_completion(&v9ses->proccmpl);
+	spin_lock_init(&v9ses->muxlock);
+	INIT_LIST_HEAD(&v9ses->mux_fcalls);
+	v9ses->recvproc = NULL;
+	v9ses->curfcall = NULL;
+
+	v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
+					 "v9fs_recvproc %s", procname);
+
+	if (IS_ERR(v9ses->recvproc)) {
+		eprintk(KERN_ERR, "cannot create receiving thread\n");
+		v9fs_session_close(v9ses);
+		return -ECONNABORTED;
+	}
+
+	wake_up_process(v9ses->recvproc);
+	wait_for_completion(&v9ses->proccmpl);
+
+	return 0;
+}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
new file mode 100644
index 00000000000..d7d8fa1c152
--- /dev/null
+++ b/fs/9p/mux.h
@@ -0,0 +1,39 @@
+/*
+ * linux/fs/9p/mux.h
+ *
+ * Multiplexer Definitions
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+/* structure to manage each RPC transaction */
+
+struct v9fs_rpcreq {
+	struct v9fs_fcall *tcall;
+	struct v9fs_fcall *rcall;
+
+	/* XXX - could we put scatter/gather buffers here? */
+
+	struct list_head next;
+};
+
+int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
+long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
+		  struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
new file mode 100644
index 00000000000..63b58ce98ff
--- /dev/null
+++ b/fs/9p/trans_fd.c
@@ -0,0 +1,172 @@
+/*
+ * linux/fs/9p/trans_fd.c
+ *
+ * File Descriptor Transport Layer
+ *
+ *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/un.h>
+#include <asm/uaccess.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+#include <linux/file.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "transport.h"
+
+struct v9fs_trans_fd {
+	struct file *in_file;
+	struct file *out_file;
+};
+
+/**
+ * v9fs_fd_recv - receive from a socket
+ * @v9ses: session information
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
+ *
+ */
+
+static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len)
+{
+	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+
+	if (!trans || trans->status != Connected || !ts)
+		return -EIO;
+
+	return kernel_read(ts->in_file, ts->in_file->f_pos, v, len);
+}
+
+/**
+ * v9fs_fd_send - send to a socket
+ * @v9ses: session information
+ * @v: buffer to send data from
+ * @len: size of send buffer
+ *
+ */
+
+static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len)
+{
+	struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+	mm_segment_t oldfs = get_fs();
+	int ret = 0;
+
+	if (!trans || trans->status != Connected || !ts)
+		return -EIO;
+
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos);
+	set_fs(oldfs);
+
+	return ret;
+}
+
+/**
+ * v9fs_fd_init - initialize file descriptor transport
+ * @v9ses: session information
+ * @addr: address of server to mount
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+	struct v9fs_trans_fd *ts = NULL;
+	struct v9fs_transport *trans = v9ses->transport;
+
+	if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
+		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
+		return -ENOPROTOOPT;
+	}
+
+	sema_init(&trans->writelock, 1);
+	sema_init(&trans->readlock, 1);
+
+	ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
+
+	if (!ts)
+		return -ENOMEM;
+
+	ts->in_file = fget( v9ses->rfdno );
+	ts->out_file = fget( v9ses->wfdno );
+
+	if (!ts->in_file || !ts->out_file) {
+		if (ts->in_file)
+			fput(ts->in_file);
+
+		if (ts->out_file)
+			fput(ts->out_file);
+
+		kfree(ts);
+		return -EIO;
+	}
+
+	trans->priv = ts;
+	trans->status = Connected;
+
+	return 0;
+}
+
+
+/**
+ * v9fs_fd_close - shutdown file descriptor
+ * @trans: private socket structure
+ *
+ */
+
+static void v9fs_fd_close(struct v9fs_transport *trans)
+{
+	struct v9fs_trans_fd *ts;
+
+	if (!trans)
+		return;
+
+	trans->status = Disconnected;
+	ts = trans->priv;
+
+	if (!ts)
+		return;
+
+	if (ts->in_file)
+		fput(ts->in_file);
+
+	if (ts->out_file)
+		fput(ts->out_file);
+
+	kfree(ts);
+}
+
+struct v9fs_transport v9fs_trans_fd = {
+	.init = v9fs_fd_init,
+	.write = v9fs_fd_send,
+	.read = v9fs_fd_recv,
+	.close = v9fs_fd_close,
+};
+
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
new file mode 100644
index 00000000000..081d1c84780
--- /dev/null
+++ b/fs/9p/trans_sock.c
@@ -0,0 +1,282 @@
+/*
+ * linux/fs/9p/trans_socket.c
+ *
+ * Socket Transport Layer
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
+ *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/un.h>
+#include <asm/uaccess.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "transport.h"
+
+#define V9FS_PORT 564
+
+struct v9fs_trans_sock {
+	struct socket *s;
+};
+
+/**
+ * v9fs_sock_recv - receive from a socket
+ * @v9ses: session information
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
+ *
+ */
+
+static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
+{
+	struct msghdr msg;
+	struct kvec iov;
+	int result;
+	mm_segment_t oldfs;
+	struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+
+	if (trans->status == Disconnected)
+		return -EREMOTEIO;
+
+	result = -EINVAL;
+
+	oldfs = get_fs();
+	set_fs(get_ds());
+
+	iov.iov_base = v;
+	iov.iov_len = len;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_namelen = 0;
+	msg.msg_flags = MSG_NOSIGNAL;
+
+	result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0);
+
+	dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state);
+	set_fs(oldfs);
+
+	if (result <= 0) {
+		if (result != -ERESTARTSYS)
+			trans->status = Disconnected;
+	}
+
+	return result;
+}
+
+/**
+ * v9fs_sock_send - send to a socket
+ * @v9ses: session information
+ * @v: buffer to send data from
+ * @len: size of send buffer
+ *
+ */
+
+static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
+{
+	struct kvec iov;
+	struct msghdr msg;
+	int result = -1;
+	mm_segment_t oldfs;
+	struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+
+	dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len);
+	dump_data(v, len);
+
+	down(&trans->writelock);
+
+	oldfs = get_fs();
+	set_fs(get_ds());
+	iov.iov_base = v;
+	iov.iov_len = len;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_namelen = 0;
+	msg.msg_flags = MSG_NOSIGNAL;
+	result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
+	set_fs(oldfs);
+
+	if (result < 0) {
+		if (result != -ERESTARTSYS)
+			trans->status = Disconnected;
+	}
+
+	up(&trans->writelock);
+	return result;
+}
+
+/**
+ * v9fs_tcp_init - initialize TCP socket
+ * @v9ses: session information
+ * @addr: address of server to mount
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+	struct socket *csocket = NULL;
+	struct sockaddr_in sin_server;
+	int rc = 0;
+	struct v9fs_trans_sock *ts = NULL;
+	struct v9fs_transport *trans = v9ses->transport;
+
+	sema_init(&trans->writelock, 1);
+	sema_init(&trans->readlock, 1);
+
+	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
+
+	if (!ts)
+		return -ENOMEM;
+
+	trans->priv = ts;
+	ts->s = NULL;
+
+	if (!addr)
+		return -EINVAL;
+
+	dprintk(DEBUG_TRANS, "Connecting to %s\n", addr);
+
+	sin_server.sin_family = AF_INET;
+	sin_server.sin_addr.s_addr = in_aton(addr);
+	sin_server.sin_port = htons(v9ses->port);
+	sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
+	rc = csocket->ops->connect(csocket,
+				   (struct sockaddr *)&sin_server,
+				   sizeof(struct sockaddr_in), 0);
+	if (rc < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_tcp: problem connecting socket to %s\n",
+			addr);
+		return rc;
+	}
+	csocket->sk->sk_allocation = GFP_NOIO;
+	ts->s = csocket;
+	trans->status = Connected;
+
+	return 0;
+}
+
+/**
+ * v9fs_unix_init - initialize UNIX domain socket
+ * @v9ses: session information
+ * @dev_name: path to named pipe
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
+	       char *data)
+{
+	int rc;
+	struct socket *csocket;
+	struct sockaddr_un sun_server;
+	struct v9fs_transport *trans;
+	struct v9fs_trans_sock *ts;
+
+	rc = 0;
+	csocket = NULL;
+	trans = v9ses->transport;
+
+	if (strlen(dev_name) > UNIX_PATH_MAX) {
+		eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
+			dev_name);
+		return -ENOMEM;
+	}
+
+	ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	trans->priv = ts;
+	ts->s = NULL;
+
+	sema_init(&trans->writelock, 1);
+	sema_init(&trans->readlock, 1);
+
+	sun_server.sun_family = PF_UNIX;
+	strcpy(sun_server.sun_path, dev_name);
+	sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+	rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+		sizeof(struct sockaddr_un) - 1, 0);	/* -1 *is* important */
+	if (rc < 0) {
+		eprintk(KERN_ERR,
+			"v9fs_trans_unix: problem connecting socket: %s: %d\n",
+			dev_name, rc);
+		return rc;
+	}
+	csocket->sk->sk_allocation = GFP_NOIO;
+	ts->s = csocket;
+	trans->status = Connected;
+
+	return 0;
+}
+
+/**
+ * v9fs_sock_close - shutdown socket
+ * @trans: private socket structure
+ *
+ */
+
+static void v9fs_sock_close(struct v9fs_transport *trans)
+{
+	struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+
+	if ((ts) && (ts->s)) {
+		dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
+		sock_release(ts->s);
+		ts->s = NULL;
+		trans->status = Disconnected;
+		dprintk(DEBUG_TRANS, "socket closed\n");
+	}
+
+	kfree(ts);
+}
+
+struct v9fs_transport v9fs_trans_tcp = {
+	.init = v9fs_tcp_init,
+	.write = v9fs_sock_send,
+	.read = v9fs_sock_recv,
+	.close = v9fs_sock_close,
+};
+
+struct v9fs_transport v9fs_trans_unix = {
+	.init = v9fs_unix_init,
+	.write = v9fs_sock_send,
+	.read = v9fs_sock_recv,
+	.close = v9fs_sock_close,
+};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
new file mode 100644
index 00000000000..9e9cd418efd
--- /dev/null
+++ b/fs/9p/transport.h
@@ -0,0 +1,46 @@
+/*
+ * linux/fs/9p/transport.h
+ *
+ * Transport Definition
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+enum v9fs_transport_status {
+	Connected,
+	Disconnected,
+	Hung,
+};
+
+struct v9fs_transport {
+	enum v9fs_transport_status status;
+	struct semaphore writelock;
+	struct semaphore readlock;
+	void *priv;
+
+	int (*init) (struct v9fs_session_info *, const char *, char *);
+	int (*write) (struct v9fs_transport *, void *, int);
+	int (*read) (struct v9fs_transport *, void *, int);
+	void (*close) (struct v9fs_transport *);
+};
+
+extern struct v9fs_transport v9fs_trans_tcp;
+extern struct v9fs_transport v9fs_trans_unix;
+extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 14d663ebfcb..a573b751dd9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -296,11 +296,6 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 	case PROTO_FD:
 		trans_proto = &v9fs_trans_fd;
 		*v9ses->remotename = 0;
-		if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
-			printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
-			retval = -ENOPROTOOPT;
-			goto SessCleanUp;
-		}
 		break;
 	default:
 		printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
-- 
cgit v1.2.3-18-g5258


From 322b329ab787de5f45abca9c9eabfd33bc5927e8 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:23 -0700
Subject: [PATCH] v9fs: Support to force umount

Support for force umount

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/mux.c       | 20 ++++++++++++++++++++
 fs/9p/mux.h       |  1 +
 fs/9p/v9fs.c      |  9 +++++++++
 fs/9p/v9fs.h      |  4 +---
 fs/9p/vfs_super.c |  9 +++++++++
 5 files changed, 40 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 8ebc1af2c24..0854bef58c1 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -323,6 +323,26 @@ v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
 	return ret;
 }
 
+/**
+ * v9fs_mux_cancel_requests - cancels all pending requests
+ *
+ * @v9ses: session info structure
+ * @err: error code to return to the requests
+ */
+void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err)
+{
+	struct v9fs_rpcreq *rptr;
+	struct v9fs_rpcreq *rreq;
+
+	dprintk(DEBUG_MUX, " %d\n", err);
+	spin_lock(&v9ses->muxlock);
+	list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+		rreq->err = err;
+	}
+	spin_unlock(&v9ses->muxlock);
+	wake_up_all(&v9ses->read_wait);
+}
+
 /**
  * v9fs_recvproc - kproc to handle demultiplexing responses
  * @data: session info structure
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index d7d8fa1c152..82ce793af1b 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -37,3 +37,4 @@ struct v9fs_rpcreq {
 int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
 long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
 		  struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
+void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index a573b751dd9..13bdbbab438 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -409,6 +409,15 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
 	putname(v9ses->remotename);
 }
 
+/**
+ * v9fs_session_cancel - mark transport as disconnected
+ * 	and cancel all pending requests.
+ */
+void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
+	v9ses->transport->status = Disconnected;
+	v9fs_mux_cancel_requests(v9ses, -EIO);
+}
+
 extern int v9fs_error_init(void);
 
 /**
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 52203027b15..45dcef42bdd 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -89,9 +89,7 @@ struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
 void v9fs_session_close(struct v9fs_session_info *v9ses);
 int v9fs_get_idpool(struct v9fs_idpool *p);
 void v9fs_put_idpool(int id, struct v9fs_idpool *p);
-int v9fs_get_option(char *opts, char *name, char *buf, int buflen);
-long long v9fs_get_int_option(char *opts, char *name, long long dflt);
-int v9fs_parse_tcp_devname(const char *devname, char **addr, char **remotename);
+void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 
 #define V9FS_MAGIC 0x01021997
 
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index ce0778acc90..868f350b2c5 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -257,10 +257,19 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	return 0;
 }
 
+static void
+v9fs_umount_begin(struct super_block *sb)
+{
+	struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+	v9fs_session_cancel(v9ses);
+}
+
 static struct super_operations v9fs_super_ops = {
 	.statfs = simple_statfs,
 	.clear_inode = v9fs_clear_inode,
 	.show_options = v9fs_show_options,
+	.umount_begin = v9fs_umount_begin,
 };
 
 struct file_system_type v9fs_fs_type = {
-- 
cgit v1.2.3-18-g5258


From 3ed8491c8a75cefe95b57f7f428a3e2ddd421e97 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:24 -0700
Subject: [PATCH] v9fs: debug and support routines

This part of the patch contains debug and other misc routines.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/debug.h |  70 +++++++++++++++++
 fs/9p/error.c |  93 ++++++++++++++++++++++
 fs/9p/error.h | 181 +++++++++++++++++++++++++++++++++++++++++++
 fs/9p/fid.c   | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/fid.h   |  57 ++++++++++++++
 5 files changed, 642 insertions(+)
 create mode 100644 fs/9p/debug.h
 create mode 100644 fs/9p/error.c
 create mode 100644 fs/9p/error.h
 create mode 100644 fs/9p/fid.c
 create mode 100644 fs/9p/fid.h

(limited to 'fs')

diff --git a/fs/9p/debug.h b/fs/9p/debug.h
new file mode 100644
index 00000000000..4445f06919d
--- /dev/null
+++ b/fs/9p/debug.h
@@ -0,0 +1,70 @@
+/*
+ *  linux/fs/9p/debug.h - V9FS Debug Definitions
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#define DEBUG_ERROR		(1<<0)
+#define DEBUG_CURRENT		(1<<1)
+#define DEBUG_9P	        (1<<2)
+#define DEBUG_VFS	        (1<<3)
+#define DEBUG_CONV		(1<<4)
+#define DEBUG_MUX		(1<<5)
+#define DEBUG_TRANS		(1<<6)
+#define DEBUG_SLABS	      	(1<<7)
+
+#define DEBUG_DUMP_PKT		0
+
+extern int v9fs_debug_level;
+
+#define dprintk(level, format, arg...) \
+do {  \
+	if((v9fs_debug_level & level)==level) \
+		printk(KERN_NOTICE "-- %s (%d): " \
+		format , __FUNCTION__, current->pid , ## arg); \
+} while(0)
+
+#define eprintk(level, format, arg...) \
+do { \
+	printk(level "v9fs: %s (%d): " \
+		format , __FUNCTION__, current->pid , ## arg); \
+} while(0)
+
+#if DEBUG_DUMP_PKT
+static inline void dump_data(const unsigned char *data, unsigned int datalen)
+{
+	int i, j;
+	int len = datalen;
+
+	printk(KERN_DEBUG "data ");
+	for (i = 0; i < len; i += 4) {
+		for (j = 0; (j < 4) && (i + j < len); j++)
+			printk(KERN_DEBUG "%02x", data[i + j]);
+		printk(KERN_DEBUG " ");
+	}
+	printk(KERN_DEBUG "\n");
+}
+#else				/* DEBUG_DUMP_PKT */
+static inline void dump_data(const unsigned char *data, unsigned int datalen)
+{
+
+}
+#endif				/* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
new file mode 100644
index 00000000000..fee5d19179c
--- /dev/null
+++ b/fs/9p/error.c
@@ -0,0 +1,93 @@
+/*
+ * linux/fs/9p/error.c
+ *
+ * Error string handling
+ *
+ * Plan 9 uses error strings, Unix uses error numbers.  These functions
+ * try to help manage that and provide for dynamically adding error
+ * mappings.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/list.h>
+#include <linux/jhash.h>
+
+#include "debug.h"
+#include "error.h"
+
+/**
+ * v9fs_error_init - preload
+ * @errstr: error string
+ *
+ */
+
+int v9fs_error_init(void)
+{
+	struct errormap *c;
+	int bucket;
+
+	/* initialize hash table */
+	for (bucket = 0; bucket < ERRHASHSZ; bucket++)
+		INIT_HLIST_HEAD(&hash_errmap[bucket]);
+
+	/* load initial error map into hash table */
+	for (c = errmap; c->name != NULL; c++) {
+		bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ;
+		INIT_HLIST_NODE(&c->list);
+		hlist_add_head(&c->list, &hash_errmap[bucket]);
+	}
+
+	return 1;
+}
+
+/**
+ * errstr2errno - convert error string to error number
+ * @errstr: error string
+ *
+ */
+
+int v9fs_errstr2errno(char *errstr)
+{
+	int errno = 0;
+	struct hlist_node *p = NULL;
+	struct errormap *c = NULL;
+	int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ;
+
+	hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
+		if (!strcmp(c->name, errstr)) {
+			errno = c->val;
+			break;
+		}
+	}
+
+	if (errno == 0) {
+		/* TODO: if error isn't found, add it dynamically */
+		printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
+		       errstr);
+		errno = 1;
+	}
+
+	return -errno;
+}
diff --git a/fs/9p/error.h b/fs/9p/error.h
new file mode 100644
index 00000000000..4bf2cf5aa1b
--- /dev/null
+++ b/fs/9p/error.h
@@ -0,0 +1,181 @@
+/*
+ * linux/fs/9p/error.h
+ *
+ * Huge Nasty Error Table
+ *
+ * Plan 9 uses error strings, Unix uses error numbers.  This table tries to
+ * match UNIX strings and Plan 9 strings to unix error numbers.  It is used
+ * to preload the dynamic error table which can also track user-specific error
+ * strings.
+ *
+ *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/errno.h>
+
+struct errormap {
+	char *name;
+	int val;
+
+	struct hlist_node list;
+};
+
+#define ERRHASHSZ		32
+static struct hlist_head hash_errmap[ERRHASHSZ];
+
+/* FixMe - reduce to a reasonable size */
+static struct errormap errmap[] = {
+	{"Operation not permitted", 1},
+	{"wstat prohibited", 1},
+	{"No such file or directory", 2},
+	{"file not found", 2},
+	{"Interrupted system call", 4},
+	{"Input/output error", 5},
+	{"No such device or address", 6},
+	{"Argument list too long", 7},
+	{"Bad file descriptor", 9},
+	{"Resource temporarily unavailable", 11},
+	{"Cannot allocate memory", 12},
+	{"Permission denied", 13},
+	{"Bad address", 14},
+	{"Block device required", 15},
+	{"Device or resource busy", 16},
+	{"File exists", 17},
+	{"Invalid cross-device link", 18},
+	{"No such device", 19},
+	{"Not a directory", 20},
+	{"Is a directory", 21},
+	{"Invalid argument", 22},
+	{"Too many open files in system", 23},
+	{"Too many open files", 24},
+	{"Text file busy", 26},
+	{"File too large", 27},
+	{"No space left on device", 28},
+	{"Illegal seek", 29},
+	{"Read-only file system", 30},
+	{"Too many links", 31},
+	{"Broken pipe", 32},
+	{"Numerical argument out of domain", 33},
+	{"Numerical result out of range", 34},
+	{"Resource deadlock avoided", 35},
+	{"File name too long", 36},
+	{"No locks available", 37},
+	{"Function not implemented", 38},
+	{"Directory not empty", 39},
+	{"Too many levels of symbolic links", 40},
+	{"Unknown error 41", 41},
+	{"No message of desired type", 42},
+	{"Identifier removed", 43},
+	{"File locking deadlock error", 58},
+	{"No data available", 61},
+	{"Machine is not on the network", 64},
+	{"Package not installed", 65},
+	{"Object is remote", 66},
+	{"Link has been severed", 67},
+	{"Communication error on send", 70},
+	{"Protocol error", 71},
+	{"Bad message", 74},
+	{"File descriptor in bad state", 77},
+	{"Streams pipe error", 86},
+	{"Too many users", 87},
+	{"Socket operation on non-socket", 88},
+	{"Message too long", 90},
+	{"Protocol not available", 92},
+	{"Protocol not supported", 93},
+	{"Socket type not supported", 94},
+	{"Operation not supported", 95},
+	{"Protocol family not supported", 96},
+	{"Network is down", 100},
+	{"Network is unreachable", 101},
+	{"Network dropped connection on reset", 102},
+	{"Software caused connection abort", 103},
+	{"Connection reset by peer", 104},
+	{"No buffer space available", 105},
+	{"Transport endpoint is already connected", 106},
+	{"Transport endpoint is not connected", 107},
+	{"Cannot send after transport endpoint shutdown", 108},
+	{"Connection timed out", 110},
+	{"Connection refused", 111},
+	{"Host is down", 112},
+	{"No route to host", 113},
+	{"Operation already in progress", 114},
+	{"Operation now in progress", 115},
+	{"Is a named type file", 120},
+	{"Remote I/O error", 121},
+	{"Disk quota exceeded", 122},
+	{"Operation canceled", 125},
+	{"Unknown error 126", 126},
+	{"Unknown error 127", 127},
+/* errors from fossil, vacfs, and u9fs */
+	{"fid unknown or out of range", EBADF},
+	{"permission denied", EACCES},
+	{"file does not exist", ENOENT},
+	{"authentication failed", ECONNREFUSED},
+	{"bad offset in directory read", ESPIPE},
+	{"bad use of fid", EBADF},
+	{"wstat can't convert between files and directories", EPERM},
+	{"directory is not empty", ENOTEMPTY},
+	{"file exists", EEXIST},
+	{"file already exists", EEXIST},
+	{"file or directory already exists", EEXIST},
+	{"fid already in use", EBADF},
+	{"file in use", ETXTBSY},
+	{"i/o error", EIO},
+	{"file already open for I/O", ETXTBSY},
+	{"illegal mode", EINVAL},
+	{"illegal name", ENAMETOOLONG},
+	{"not a directory", ENOTDIR},
+	{"not a member of proposed group", EINVAL},
+	{"not owner", EACCES},
+	{"only owner can change group in wstat", EACCES},
+	{"read only file system", EROFS},
+	{"no access to special file", EPERM},
+	{"i/o count too large", EIO},
+	{"unknown group", EINVAL},
+	{"unknown user", EINVAL},
+	{"bogus wstat buffer", EPROTO},
+	{"exclusive use file already open", EAGAIN},
+	{"corrupted directory entry", EIO},
+	{"corrupted file entry", EIO},
+	{"corrupted block label", EIO},
+	{"corrupted meta data", EIO},
+	{"illegal offset", EINVAL},
+	{"illegal path element", ENOENT},
+	{"root of file system is corrupted", EIO},
+	{"corrupted super block", EIO},
+	{"protocol botch", EPROTO},
+	{"file system is full", ENOSPC},
+	{"file is in use", EAGAIN},
+	{"directory entry is not allocated", ENOENT},
+	{"file is read only", EROFS},
+	{"file has been removed", EIDRM},
+	{"only support truncation to zero length", EPERM},
+	{"cannot remove root", EPERM},
+	{"file too big", EFBIG},
+	{"venti i/o error", EIO},
+	/* these are not errors */
+	{"u9fs rhostsauth: no authentication required", 0},
+	{"u9fs authnone: no authentication required", 0},
+	{NULL, -1}
+};
+
+extern int v9fs_error_init(void);
+extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
new file mode 100644
index 00000000000..821c9c4d76a
--- /dev/null
+++ b/fs/9p/fid.c
@@ -0,0 +1,241 @@
+/*
+ * V9FS FID Management
+ *
+ *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "transport.h"
+#include "mux.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * v9fs_fid_insert - add a fid to a dentry
+ * @fid: fid to add
+ * @dentry: dentry that it is being added to
+ *
+ */
+
+static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry)
+{
+	struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+	dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
+		dentry->d_iname, dentry);
+	if (dentry->d_fsdata == NULL) {
+		dentry->d_fsdata =
+		    kmalloc(sizeof(struct list_head), GFP_KERNEL);
+		if (dentry->d_fsdata == NULL) {
+			dprintk(DEBUG_ERROR, "Out of memory\n");
+			return -ENOMEM;
+		}
+		fid_list = (struct list_head *)dentry->d_fsdata;
+		INIT_LIST_HEAD(fid_list);	/* Initialize list head */
+	}
+
+	fid->uid = current->uid;
+	fid->pid = current->pid;
+	list_add(&fid->list, fid_list);
+	return 0;
+}
+
+/**
+ * v9fs_fid_create - allocate a FID structure
+ * @dentry - dentry to link newly created fid to
+ *
+ */
+
+struct v9fs_fid *v9fs_fid_create(struct dentry *dentry)
+{
+	struct v9fs_fid *new;
+
+	new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+	if (new == NULL) {
+		dprintk(DEBUG_ERROR, "Out of Memory\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	new->fid = -1;
+	new->fidopen = 0;
+	new->fidcreate = 0;
+	new->fidclunked = 0;
+	new->iounit = 0;
+
+	if (v9fs_fid_insert(new, dentry) == 0)
+		return new;
+	else {
+		dprintk(DEBUG_ERROR, "Problems inserting to dentry\n");
+		kfree(new);
+		return NULL;
+	}
+}
+
+/**
+ * v9fs_fid_destroy - deallocate a FID structure
+ * @fid: fid to destroy
+ *
+ */
+
+void v9fs_fid_destroy(struct v9fs_fid *fid)
+{
+	list_del(&fid->list);
+	kfree(fid);
+}
+
+/**
+ * v9fs_fid_lookup - retrieve the right fid from a  particular dentry
+ * @dentry: dentry to look for fid in
+ * @type: intent of lookup (operation or traversal)
+ *
+ * search list of fids associated with a dentry for a fid with a matching
+ * thread id or uid.  If that fails, look up the dentry's parents to see if you
+ * can find a matching fid.
+ *
+ */
+
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type)
+{
+	struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+	struct v9fs_fid *current_fid = NULL;
+	struct v9fs_fid *temp = NULL;
+	struct v9fs_fid *return_fid = NULL;
+	int found_parent = 0;
+	int found_user = 0;
+
+	dprintk(DEBUG_9P, " dentry: %s (%p) type %d\n", dentry->d_iname, dentry,
+		type);
+
+	if (fid_list && !list_empty(fid_list)) {
+		list_for_each_entry_safe(current_fid, temp, fid_list, list) {
+			if (current_fid->uid == current->uid) {
+				if (return_fid == NULL) {
+					if ((type == FID_OP)
+					    || (!current_fid->fidopen)) {
+						return_fid = current_fid;
+						found_user = 1;
+					}
+				}
+			}
+			if (current_fid->pid == current->real_parent->pid) {
+				if ((return_fid == NULL) || (found_parent)
+				    || (found_user)) {
+					if ((type == FID_OP)
+					    || (!current_fid->fidopen)) {
+						return_fid = current_fid;
+						found_parent = 1;
+						found_user = 0;
+					}
+				}
+			}
+			if (current_fid->pid == current->pid) {
+				if ((type == FID_OP) ||
+				    (!current_fid->fidopen)) {
+					return_fid = current_fid;
+					found_parent = 0;
+					found_user = 0;
+				}
+			}
+		}
+	}
+
+	/* we are at the root but didn't match */
+	if ((!return_fid) && (dentry->d_parent == dentry)) {
+		/* TODO: clone attach with new uid */
+		return_fid = current_fid;
+	}
+
+	if (!return_fid) {
+		struct dentry *par = current->fs->pwd->d_parent;
+		int count = 1;
+		while (par != NULL) {
+			if (par == dentry)
+				break;
+			count++;
+			if (par == par->d_parent) {
+				dprintk(DEBUG_ERROR,
+					"got to root without finding dentry\n");
+				break;
+			}
+			par = par->d_parent;
+		}
+
+/* XXX - there may be some duplication we can get rid of */
+		if (par == dentry) {
+			/* we need to fid_lookup the starting point */
+			int fidnum = -1;
+			int oldfid = -1;
+			int result = -1;
+			struct v9fs_session_info *v9ses =
+			    v9fs_inode2v9ses(current->fs->pwd->d_inode);
+
+			current_fid =
+			    v9fs_fid_lookup(current->fs->pwd, FID_WALK);
+			if (current_fid == NULL) {
+				dprintk(DEBUG_ERROR,
+					"process cwd doesn't have a fid\n");
+				return return_fid;
+			}
+			oldfid = current_fid->fid;
+			par = current->fs->pwd;
+			/* TODO: take advantage of multiwalk */
+
+			fidnum = v9fs_get_idpool(&v9ses->fidpool);
+			if (fidnum < 0) {
+				dprintk(DEBUG_ERROR,
+					"could not get a new fid num\n");
+				return return_fid;
+			}
+
+			while (par != dentry) {
+				result =
+				    v9fs_t_walk(v9ses, oldfid, fidnum, "..",
+						NULL);
+				if (result < 0) {
+					dprintk(DEBUG_ERROR,
+						"problem walking to parent\n");
+
+					break;
+				}
+				oldfid = fidnum;
+				if (par == par->d_parent) {
+					dprintk(DEBUG_ERROR,
+						"can't find dentry\n");
+					break;
+				}
+				par = par->d_parent;
+			}
+			if (par == dentry) {
+				return_fid = v9fs_fid_create(dentry);
+				return_fid->fid = fidnum;
+			}
+		}
+	}
+
+	return return_fid;
+}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
new file mode 100644
index 00000000000..7db478ccca3
--- /dev/null
+++ b/fs/9p/fid.h
@@ -0,0 +1,57 @@
+/*
+ * V9FS FID Management
+ *
+ *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/list.h>
+
+#define FID_OP   0
+#define FID_WALK 1
+
+struct v9fs_fid {
+	struct list_head list;	 /* list of fids associated with a dentry */
+	struct list_head active; /* XXX - debug */
+
+	u32 fid;
+	unsigned char fidopen;	  /* set when fid is opened */
+	unsigned char fidcreate;  /* set when fid was just created */
+	unsigned char fidclunked; /* set when fid has already been clunked */
+
+	struct v9fs_qid qid;
+	u32 iounit;
+
+	/* readdir stuff */
+	int rdir_fpos;
+	loff_t rdir_pos;
+	struct v9fs_fcall *rdir_fcall;
+
+	/* management stuff */
+	pid_t pid;		/* thread associated with this fid */
+	uid_t uid;		/* user associated with this fid */
+
+	/* private data */
+	struct file *filp;	/* backpointer to File struct for open files */
+	struct v9fs_session_info *v9ses;	/* session info for this FID */
+};
+
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type);
+void v9fs_fid_destroy(struct v9fs_fid *fid);
+struct v9fs_fid *v9fs_fid_create(struct dentry *);
-- 
cgit v1.2.3-18-g5258


From 1346f51ede71fc1e5021062898d150e192dc4dc8 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:25 -0700
Subject: [PATCH] v9fs: Change error magic numbers to defined constants

Change magic error numbers to system defined constants in v9fs error.h As
suggested by Jan-Benedict Glaw.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/error.h | 158 ++++++++++++++++++++++++++++------------------------------
 1 file changed, 77 insertions(+), 81 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/error.h b/fs/9p/error.h
index 4bf2cf5aa1b..6dbb66f5b28 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -30,6 +30,7 @@
  */
 
 #include <linux/errno.h>
+#include <asm/errno.h>
 
 struct errormap {
 	char *name;
@@ -43,87 +44,82 @@ static struct hlist_head hash_errmap[ERRHASHSZ];
 
 /* FixMe - reduce to a reasonable size */
 static struct errormap errmap[] = {
-	{"Operation not permitted", 1},
-	{"wstat prohibited", 1},
-	{"No such file or directory", 2},
-	{"file not found", 2},
-	{"Interrupted system call", 4},
-	{"Input/output error", 5},
-	{"No such device or address", 6},
-	{"Argument list too long", 7},
-	{"Bad file descriptor", 9},
-	{"Resource temporarily unavailable", 11},
-	{"Cannot allocate memory", 12},
-	{"Permission denied", 13},
-	{"Bad address", 14},
-	{"Block device required", 15},
-	{"Device or resource busy", 16},
-	{"File exists", 17},
-	{"Invalid cross-device link", 18},
-	{"No such device", 19},
-	{"Not a directory", 20},
-	{"Is a directory", 21},
-	{"Invalid argument", 22},
-	{"Too many open files in system", 23},
-	{"Too many open files", 24},
-	{"Text file busy", 26},
-	{"File too large", 27},
-	{"No space left on device", 28},
-	{"Illegal seek", 29},
-	{"Read-only file system", 30},
-	{"Too many links", 31},
-	{"Broken pipe", 32},
-	{"Numerical argument out of domain", 33},
-	{"Numerical result out of range", 34},
-	{"Resource deadlock avoided", 35},
-	{"File name too long", 36},
-	{"No locks available", 37},
-	{"Function not implemented", 38},
-	{"Directory not empty", 39},
-	{"Too many levels of symbolic links", 40},
-	{"Unknown error 41", 41},
-	{"No message of desired type", 42},
-	{"Identifier removed", 43},
-	{"File locking deadlock error", 58},
-	{"No data available", 61},
-	{"Machine is not on the network", 64},
-	{"Package not installed", 65},
-	{"Object is remote", 66},
-	{"Link has been severed", 67},
-	{"Communication error on send", 70},
-	{"Protocol error", 71},
-	{"Bad message", 74},
-	{"File descriptor in bad state", 77},
-	{"Streams pipe error", 86},
-	{"Too many users", 87},
-	{"Socket operation on non-socket", 88},
-	{"Message too long", 90},
-	{"Protocol not available", 92},
-	{"Protocol not supported", 93},
-	{"Socket type not supported", 94},
-	{"Operation not supported", 95},
-	{"Protocol family not supported", 96},
-	{"Network is down", 100},
-	{"Network is unreachable", 101},
-	{"Network dropped connection on reset", 102},
-	{"Software caused connection abort", 103},
-	{"Connection reset by peer", 104},
-	{"No buffer space available", 105},
-	{"Transport endpoint is already connected", 106},
-	{"Transport endpoint is not connected", 107},
-	{"Cannot send after transport endpoint shutdown", 108},
-	{"Connection timed out", 110},
-	{"Connection refused", 111},
-	{"Host is down", 112},
-	{"No route to host", 113},
-	{"Operation already in progress", 114},
-	{"Operation now in progress", 115},
-	{"Is a named type file", 120},
-	{"Remote I/O error", 121},
-	{"Disk quota exceeded", 122},
-	{"Operation canceled", 125},
-	{"Unknown error 126", 126},
-	{"Unknown error 127", 127},
+	{"Operation not permitted", EPERM},
+	{"wstat prohibited", EPERM},
+	{"No such file or directory", ENOENT},
+	{"file not found", ENOENT},
+	{"Interrupted system call", EINTR},
+	{"Input/output error", EIO},
+	{"No such device or address", ENXIO},
+	{"Argument list too long", E2BIG},
+	{"Bad file descriptor", EBADF},
+	{"Resource temporarily unavailable", EAGAIN},
+	{"Cannot allocate memory", ENOMEM},
+	{"Permission denied", EACCES},
+	{"Bad address", EFAULT},
+	{"Block device required", ENOTBLK},
+	{"Device or resource busy", EBUSY},
+	{"File exists", EEXIST},
+	{"Invalid cross-device link", EXDEV},
+	{"No such device", ENODEV},
+	{"Not a directory", ENOTDIR},
+	{"Is a directory", EISDIR},
+	{"Invalid argument", EINVAL},
+	{"Too many open files in system", ENFILE},
+	{"Too many open files", EMFILE},
+	{"Text file busy", ETXTBSY},
+	{"File too large", EFBIG},
+	{"No space left on device", ENOSPC},
+	{"Illegal seek", ESPIPE},
+	{"Read-only file system", EROFS},
+	{"Too many links", EMLINK},
+	{"Broken pipe", EPIPE},
+	{"Numerical argument out of domain", EDOM},
+	{"Numerical result out of range", ERANGE},
+	{"Resource deadlock avoided", EDEADLK},
+	{"File name too long", ENAMETOOLONG},
+	{"No locks available", ENOLCK},
+	{"Function not implemented", ENOSYS},
+	{"Directory not empty", ENOTEMPTY},
+	{"Too many levels of symbolic links", ELOOP},
+	{"No message of desired type", ENOMSG},
+	{"Identifier removed", EIDRM},
+	{"No data available", ENODATA},
+	{"Machine is not on the network", ENONET},
+	{"Package not installed", ENOPKG},
+	{"Object is remote", EREMOTE},
+	{"Link has been severed", ENOLINK},
+	{"Communication error on send", ECOMM},
+	{"Protocol error", EPROTO},
+	{"Bad message", EBADMSG},
+	{"File descriptor in bad state", EBADFD},
+	{"Streams pipe error", ESTRPIPE},
+	{"Too many users", EUSERS},
+	{"Socket operation on non-socket", ENOTSOCK},
+	{"Message too long", EMSGSIZE},
+	{"Protocol not available", ENOPROTOOPT},
+	{"Protocol not supported", EPROTONOSUPPORT},
+	{"Socket type not supported", ESOCKTNOSUPPORT},
+	{"Operation not supported", EOPNOTSUPP},
+	{"Protocol family not supported", EPFNOSUPPORT},
+	{"Network is down", ENETDOWN},
+	{"Network is unreachable", ENETUNREACH},
+	{"Network dropped connection on reset", ENETRESET},
+	{"Software caused connection abort", ECONNABORTED},
+	{"Connection reset by peer", ECONNRESET},
+	{"No buffer space available", ENOBUFS},
+	{"Transport endpoint is already connected", EISCONN},
+	{"Transport endpoint is not connected", ENOTCONN},
+	{"Cannot send after transport endpoint shutdown", ESHUTDOWN},
+	{"Connection timed out", ETIMEDOUT},
+	{"Connection refused", ECONNREFUSED},
+	{"Host is down", EHOSTDOWN},
+	{"No route to host", EHOSTUNREACH},
+	{"Operation already in progress", EALREADY},
+	{"Operation now in progress", EINPROGRESS},
+	{"Is a named type file", EISNAM},
+	{"Remote I/O error", EREMOTEIO},
+	{"Disk quota exceeded", EDQUOT},
 /* errors from fossil, vacfs, and u9fs */
 	{"fid unknown or out of range", EBADF},
 	{"permission denied", EACCES},
-- 
cgit v1.2.3-18-g5258


From 73c592b9b844cc353bbaea690fb4aa652ac168a6 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:26 -0700
Subject: [PATCH] v9fs: Clean-up vfs_inode and setattr functions

Cleanup code in v9fs vfs_inode as suggested by Alexey Dobriyan.  Did some
major revamping of the v9fs setattr code to remove unnecessary allocations and
clean up some dead-code.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/error.h     |   2 +-
 fs/9p/vfs_inode.c | 131 +++++++++++++++---------------------------------------
 2 files changed, 36 insertions(+), 97 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/error.h b/fs/9p/error.h
index 6dbb66f5b28..2eb5927d589 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -139,7 +139,7 @@ static struct errormap errmap[] = {
 	{"illegal mode", EINVAL},
 	{"illegal name", ENAMETOOLONG},
 	{"not a directory", ENOTDIR},
-	{"not a member of proposed group", EINVAL},
+	{"not a member of proposed group", EPERM},
 	{"not owner", EACCES},
 	{"only owner can change group in wstat", EACCES},
 	{"read only file system", EROFS},
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index ef78af7ef04..6d2357d1dac 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1,7 +1,7 @@
 /*
  *  linux/fs/9p/vfs_inode.c
  *
- * This file contians vfs inode ops for the 9P2000 protocol.
+ * This file contains vfs inode ops for the 9P2000 protocol.
  *
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
@@ -54,7 +54,7 @@ static struct inode_operations v9fs_symlink_inode_operations;
  *
  */
 
-static inline int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
+static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
 {
 	int res;
 	res = mode & 0777;
@@ -92,7 +92,7 @@ static inline int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
  *
  */
 
-static inline int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
+static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
 {
 	int res;
 
@@ -132,7 +132,7 @@ static inline int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
  *
  */
 
-static inline void
+static void
 v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
 {
 	mistat->type = ~0;
@@ -160,7 +160,7 @@ v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
 /**
  * v9fs_mistat2unix - convert mistat to unix stat
  * @mistat: Plan 9 metadata (mistat) structure
- * @stat: unix metadata (stat) structure to populate
+ * @buf: unix metadata (stat) structure to populate
  * @sb: superblock
  *
  */
@@ -177,22 +177,11 @@ v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
 	buf->st_mtime = mistat->mtime;
 	buf->st_ctime = mistat->mtime;
 
-	if (v9ses && v9ses->extended) {
-		/* TODO: string to uid mapping via user-space daemon */
-		buf->st_uid = mistat->n_uid;
-		buf->st_gid = mistat->n_gid;
-
-		sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
-		sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
-	} else {
-		buf->st_uid = v9ses->uid;
-		buf->st_gid = v9ses->gid;
-	}
-
 	buf->st_uid = (unsigned short)-1;
 	buf->st_gid = (unsigned short)-1;
 
 	if (v9ses && v9ses->extended) {
+		/* TODO: string to uid mapping via user-space daemon */
 		if (mistat->n_uid != -1)
 			sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
 
@@ -290,7 +279,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
  * @dir: directory inode file is being created in
  * @file_dentry: dentry file is being created in
  * @perm: permissions file is being created with
- * @open_mode: resulting open mode for file ???
+ * @open_mode: resulting open mode for file
  *
  */
 
@@ -434,9 +423,9 @@ v9fs_create(struct inode *dir,
 
 /**
  * v9fs_remove - helper function to remove files and directories
- * @inode: directory inode that is being deleted
- * @dentry:  dentry that is being deleted
- * @rmdir: where we are a file or a directory
+ * @dir: directory inode that is being deleted
+ * @file:  dentry that is being deleted
+ * @rmdir: removing a directory
  *
  */
 
@@ -502,7 +491,7 @@ v9fs_vfs_create(struct inode *inode, struct dentry *dentry, int perm,
 
 /**
  * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
- * @i:  inode that is being unlinked
+ * @inode:  inode that is being unlinked
  * @dentry: dentry that is being unlinked
  * @mode: mode for new directory
  *
@@ -624,7 +613,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 /**
  * v9fs_vfs_unlink - VFS unlink hook to delete an inode
  * @i:  inode that is being unlinked
- * @dentry: dentry that is being unlinked
+ * @d: dentry that is being unlinked
  *
  */
 
@@ -636,7 +625,7 @@ static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
 /**
  * v9fs_vfs_rmdir - VFS unlink hook to delete a directory
  * @i:  inode that is being unlinked
- * @dentry: dentry that is being unlinked
+ * @d: dentry that is being unlinked
  *
  */
 
@@ -674,6 +663,9 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	dprintk(DEBUG_VFS, "\n");
 
+	if (!mistat)
+		return -ENOMEM;
+
 	if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
 		dprintk(DEBUG_ERROR, "problem with arguments\n");
 		return -EBADF;
@@ -771,20 +763,21 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
 	struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
-	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
 	struct v9fs_fcall *fcall = NULL;
+	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
 	int res = -EPERM;
 
 	dprintk(DEBUG_VFS, "\n");
+
+	if (!mistat)
+		return -ENOMEM;
+
 	if (!fid) {
 		dprintk(DEBUG_ERROR,
 			"Couldn't find fid associated with dentry\n");
 		return -EBADF;
 	}
 
-	if (!mistat)
-		return -ENOMEM;
-
 	v9fs_blank_mistat(v9ses, mistat);
 	if (iattr->ia_valid & ATTR_MODE)
 		mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
@@ -799,72 +792,19 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 		mistat->length = iattr->ia_size;
 
 	if (v9ses->extended) {
-		char *uid = kmalloc(strlen(mistat->uid), GFP_KERNEL);
-		char *gid = kmalloc(strlen(mistat->gid), GFP_KERNEL);
-		char *muid = kmalloc(strlen(mistat->muid), GFP_KERNEL);
-		char *name = kmalloc(strlen(mistat->name), GFP_KERNEL);
-		char *extension = kmalloc(strlen(mistat->extension),
-					  GFP_KERNEL);
-
-		if ((!uid) || (!gid) || (!muid) || (!name) || (!extension)) {
-			kfree(uid);
-			kfree(gid);
-			kfree(muid);
-			kfree(name);
-			kfree(extension);
-
-			return -ENOMEM;
-		}
-
-		strcpy(uid, mistat->uid);
-		strcpy(gid, mistat->gid);
-		strcpy(muid, mistat->muid);
-		strcpy(name, mistat->name);
-		strcpy(extension, mistat->extension);
+		char *ptr = mistat->data+1;
 
 		if (iattr->ia_valid & ATTR_UID) {
-			if (strlen(uid) != 8) {
-				dprintk(DEBUG_ERROR, "uid strlen is %u not 8\n",
-					(unsigned int)strlen(uid));
-				sprintf(uid, "%08x", iattr->ia_uid);
-			} else {
-				kfree(uid);
-				uid = kmalloc(9, GFP_KERNEL);
-			}
-
-			sprintf(uid, "%08x", iattr->ia_uid);
+			mistat->uid = ptr;
+			ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
 			mistat->n_uid = iattr->ia_uid;
 		}
 
 		if (iattr->ia_valid & ATTR_GID) {
-			if (strlen(gid) != 8)
-				dprintk(DEBUG_ERROR, "gid strlen is %u not 8\n",
-					(unsigned int)strlen(gid));
-			else {
-				kfree(gid);
-				gid = kmalloc(9, GFP_KERNEL);
-			}
-
-			sprintf(gid, "%08x", iattr->ia_gid);
+			mistat->gid = ptr;
+			ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
 			mistat->n_gid = iattr->ia_gid;
 		}
-
-		mistat->uid = mistat->data;
-		strcpy(mistat->uid, uid);
-		mistat->gid = mistat->data + strlen(uid) + 1;
-		strcpy(mistat->gid, gid);
-		mistat->muid = mistat->gid + strlen(gid) + 1;
-		strcpy(mistat->muid, muid);
-		mistat->name = mistat->muid + strlen(muid) + 1;
-		strcpy(mistat->name, name);
-		mistat->extension = mistat->name + strlen(name) + 1;
-		strcpy(mistat->extension, extension);
-
-		kfree(uid);
-		kfree(gid);
-		kfree(muid);
-		kfree(name);
-		kfree(extension);
 	}
 
 	res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
@@ -985,17 +925,14 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 	int retval = -EPERM;
 	struct v9fs_fid *newfid;
 	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-	struct super_block *sb = dir ? dir->i_sb : NULL;
 	struct v9fs_fcall *fcall = NULL;
 	struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
 
 	dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
 		symname);
 
-	if ((!dentry) || (!sb) || (!v9ses)) {
-		dprintk(DEBUG_ERROR, "problem with arguments\n");
-		return -EBADF;
-	}
+	if (!mistat)
+		return -ENOMEM;
 
 	if (!v9ses->extended) {
 		dprintk(DEBUG_ERROR, "not extended\n");
@@ -1040,7 +977,7 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 /**
  * v9fs_readlink - read a symlink's location (internal version)
  * @dentry: dentry for symlink
- * @buf: buffer to load symlink location into
+ * @buffer: buffer to load symlink location into
  * @buflen: length of buffer
  *
  */
@@ -1179,7 +1116,7 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
  * v9fs_vfs_link - create a hardlink
  * @old_dentry: dentry for file to link to
  * @dir: inode destination for new link
- * @new_dentry: dentry for link
+ * @dentry: dentry for link
  *
  */
 
@@ -1274,6 +1211,9 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
 		dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
 
+	if (!mistat)
+		return -ENOMEM;
+
 	if (!new_valid_dev(rdev)) {
 		retval = -EINVAL;
 		goto FreeMem;
@@ -1302,7 +1242,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 		sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
 	else if (S_ISCHR(mode))
 		sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
-	else if (S_ISFIFO(mode)) ;	/* DO NOTHING */
+	else if (S_ISFIFO(mode))
+		;	/* DO NOTHING */
 	else {
 		retval = -EINVAL;
 		goto FreeMem;
@@ -1319,8 +1260,6 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 				FCALL_ERROR(fcall));
 			goto FreeMem;
 		}
-
-		kfree(fcall);
 	}
 
 	/* need to update dcache so we show up */
-- 
cgit v1.2.3-18-g5258


From 5d58bec5b7a8b8303df0a4dcb9a18feeefac6091 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:27 -0700
Subject: [PATCH] v9fs: Fix support for special files (devices, named pipes,
 etc.)

Fix v9fs special files (block, char devices) support.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 6d2357d1dac..36fff087167 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -250,6 +250,9 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 		case S_IFBLK:
 		case S_IFCHR:
 		case S_IFSOCK:
+			init_special_inode(inode, inode->i_mode,
+					   inode->i_rdev);
+			break;
 		case S_IFREG:
 			inode->i_op = &v9fs_file_inode_operations;
 			inode->i_fop = &v9fs_file_operations;
-- 
cgit v1.2.3-18-g5258


From b501611a6f78558eafcf09b228abd866d4ea5d9f Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:27 -0700
Subject: [PATCH] v9fs: readlink extended mode check

LANL reported some issues with random crashes during mount of legacy protocol
servers (9P2000 versus 9P2000.u) -- crash was always happening in readlink
(which should never happen in legacy mode).  Added some sanity conditionals to
the get_inode code which should prevent the errors LANL was seeing.  Code
tested benign through regression.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 36fff087167..0c13fc60004 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -44,6 +44,7 @@
 #include "fid.h"
 
 static struct inode_operations v9fs_dir_inode_operations;
+static struct inode_operations v9fs_dir_inode_operations_ext;
 static struct inode_operations v9fs_file_inode_operations;
 static struct inode_operations v9fs_symlink_inode_operations;
 
@@ -232,6 +233,7 @@ v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
 struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 {
 	struct inode *inode = NULL;
+	struct v9fs_session_info *v9ses = sb->s_fs_info;
 
 	dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
 
@@ -250,6 +252,10 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 		case S_IFBLK:
 		case S_IFCHR:
 		case S_IFSOCK:
+			if(!v9ses->extended) {
+				dprintk(DEBUG_ERROR, "special files without extended mode\n");
+				return ERR_PTR(-EINVAL);
+			}
 			init_special_inode(inode, inode->i_mode,
 					   inode->i_rdev);
 			break;
@@ -257,14 +263,21 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 			inode->i_op = &v9fs_file_inode_operations;
 			inode->i_fop = &v9fs_file_operations;
 			break;
+		case S_IFLNK:
+			if(!v9ses->extended) {
+				dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n");
+				return ERR_PTR(-EINVAL);
+			}
+			inode->i_op = &v9fs_symlink_inode_operations;
+			break;
 		case S_IFDIR:
 			inode->i_nlink++;
-			inode->i_op = &v9fs_dir_inode_operations;
+			if(v9ses->extended)
+				inode->i_op = &v9fs_dir_inode_operations_ext;
+			else
+				inode->i_op = &v9fs_dir_inode_operations;
 			inode->i_fop = &v9fs_dir_operations;
 			break;
-		case S_IFLNK:
-			inode->i_op = &v9fs_symlink_inode_operations;
-			break;
 		default:
 			dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
 				mode, mode & S_IFMT);
@@ -1284,7 +1297,7 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	return retval;
 }
 
-static struct inode_operations v9fs_dir_inode_operations = {
+static struct inode_operations v9fs_dir_inode_operations_ext = {
 	.create = v9fs_vfs_create,
 	.lookup = v9fs_vfs_lookup,
 	.symlink = v9fs_vfs_symlink,
@@ -1299,6 +1312,18 @@ static struct inode_operations v9fs_dir_inode_operations = {
 	.setattr = v9fs_vfs_setattr,
 };
 
+static struct inode_operations v9fs_dir_inode_operations = {
+	.create = v9fs_vfs_create,
+	.lookup = v9fs_vfs_lookup,
+	.unlink = v9fs_vfs_unlink,
+	.mkdir = v9fs_vfs_mkdir,
+	.rmdir = v9fs_vfs_rmdir,
+	.mknod = v9fs_vfs_mknod,
+	.rename = v9fs_vfs_rename,
+	.getattr = v9fs_vfs_getattr,
+	.setattr = v9fs_vfs_setattr,
+};
+
 static struct inode_operations v9fs_file_inode_operations = {
 	.getattr = v9fs_vfs_getattr,
 	.setattr = v9fs_vfs_setattr,
-- 
cgit v1.2.3-18-g5258


From cb2e87a65d6cd735eb06fa595bf90497af28c37b Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 9 Sep 2005 13:04:28 -0700
Subject: [PATCH] v9fs: fix handling of malformed 9P messages

This patch attempts to do a better job of cleaning up after detecting errors
on the transport.  This should also improve error reporting on broken
connections to servers.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/error.h      |  1 +
 fs/9p/mux.c        | 53 ++++++++++++++++++++++++++++++++++-------------------
 fs/9p/mux.h        |  1 +
 fs/9p/trans_sock.c | 12 ++++++++++--
 4 files changed, 46 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/error.h b/fs/9p/error.h
index 2eb5927d589..78f89acf7c9 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -47,6 +47,7 @@ static struct errormap errmap[] = {
 	{"Operation not permitted", EPERM},
 	{"wstat prohibited", EPERM},
 	{"No such file or directory", ENOENT},
+	{"directory entry not found", ENOENT},
 	{"file not found", ENOENT},
 	{"Interrupted system call", EINTR},
 	{"Input/output error", EIO},
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 0854bef58c1..8835b576f74 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -162,18 +162,21 @@ static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
 	dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
 	ret = wait_event_interruptible(v9ses->read_wait,
 		       ((v9ses->transport->status != Connected) ||
-			(req->rcall != 0) || dprintcond(v9ses, req)));
+			(req->rcall != 0) || (req->err < 0) ||
+			dprintcond(v9ses, req)));
 
 	dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
+
+	spin_lock(&v9ses->muxlock);
+	list_del(&req->next);
+	spin_unlock(&v9ses->muxlock);
+
+	if (req->err < 0)
+		return req->err;
+
 	if (v9ses->transport->status == Disconnected)
 		return -ECONNRESET;
 
-	if (ret == 0) {
-		spin_lock(&v9ses->muxlock);
-		list_del(&req->next);
-		spin_unlock(&v9ses->muxlock);
-	}
-
 	return ret;
 }
 
@@ -245,6 +248,9 @@ v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
 	if (!v9ses)
 		return -EINVAL;
 
+	if (!v9ses->transport || v9ses->transport->status != Connected)
+		return -EIO;
+
 	if (rcall)
 		*rcall = NULL;
 
@@ -257,6 +263,7 @@ v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
 	tcall->tag = tid;
 
 	req.tcall = tcall;
+	req.err = 0;
 	req.rcall = NULL;
 
 	ret = v9fs_send(v9ses, &req);
@@ -371,16 +378,21 @@ static int v9fs_recvproc(void *data)
 		}
 
 		err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
-		if (err < 0) {
-			kfree(rcall);
-			break;
-		}
 		spin_lock(&v9ses->muxlock);
-		list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
-			if (rreq->tcall->tag == rcall->tag) {
-				req = rreq;
-				req->rcall = rcall;
-				break;
+		if (err < 0) {
+			list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+				rreq->err = err;
+			}
+			if(err != -ERESTARTSYS)
+				eprintk(KERN_ERR,
+					"Transport error while reading message %d\n", err);
+		} else {
+			list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+				if (rreq->tcall->tag == rcall->tag) {
+					req = rreq;
+					req->rcall = rcall;
+					break;
+				}
 			}
 		}
 
@@ -399,9 +411,10 @@ static int v9fs_recvproc(void *data)
 		spin_unlock(&v9ses->muxlock);
 
 		if (!req) {
-			dprintk(DEBUG_ERROR,
-				"unexpected response: id %d tag %d\n",
-				rcall->id, rcall->tag);
+			if (err >= 0)
+				dprintk(DEBUG_ERROR,
+					"unexpected response: id %d tag %d\n",
+					rcall->id, rcall->tag);
 
 			kfree(rcall);
 		}
@@ -410,6 +423,8 @@ static int v9fs_recvproc(void *data)
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
 
+	v9ses->transport->close(v9ses->transport);
+
 	/* Inform all pending processes about the failure */
 	wake_up_all(&v9ses->read_wait);
 
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 82ce793af1b..4994cb10bad 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -28,6 +28,7 @@
 struct v9fs_rpcreq {
 	struct v9fs_fcall *tcall;
 	struct v9fs_fcall *rcall;
+	int err;	/* error code if response failed */
 
 	/* XXX - could we put scatter/gather buffers here? */
 
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
index 081d1c84780..01e26f0013a 100644
--- a/fs/9p/trans_sock.c
+++ b/fs/9p/trans_sock.c
@@ -254,7 +254,12 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
 
 static void v9fs_sock_close(struct v9fs_transport *trans)
 {
-	struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+	struct v9fs_trans_sock *ts;
+
+	if (!trans)
+		return;
+
+	ts = trans->priv;
 
 	if ((ts) && (ts->s)) {
 		dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
@@ -264,7 +269,10 @@ static void v9fs_sock_close(struct v9fs_transport *trans)
 		dprintk(DEBUG_TRANS, "socket closed\n");
 	}
 
-	kfree(ts);
+	if (ts)
+		kfree(ts);
+
+	trans->priv = NULL;
 }
 
 struct v9fs_transport v9fs_trans_tcp = {
-- 
cgit v1.2.3-18-g5258


From 04578f174f43d29b569500f01ba772afa4016330 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:22 -0700
Subject: [PATCH] FUSE - MAINTAINERS, Kconfig and Makefile changes

This patch adds FUSE filesystem to MAINTAINERS, fs/Kconfig and
fs/Makefile.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig  | 13 +++++++++++++
 fs/Makefile |  1 +
 2 files changed, 14 insertions(+)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 443aed4e206..068ccea2f18 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -462,6 +462,19 @@ config AUTOFS4_FS
 	  local network, you probably do not need an automounter, and can say
 	  N here.
 
+config FUSE_FS
+	tristate "Filesystem in Userspace support"
+	help
+	  With FUSE it is possible to implement a fully functional filesystem
+	  in a userspace program.
+
+	  There's also companion library: libfuse.  This library along with
+	  utilities is available from the FUSE homepage:
+	  <http://fuse.sourceforge.net/>
+
+	  If you want to develop a userspace FS, or if you want to use
+	  a filesystem based on FUSE, answer Y or M.
+
 menu "CD-ROM/DVD Filesystems"
 
 config ISO9660_FS
diff --git a/fs/Makefile b/fs/Makefile
index d646502c1ef..1972da18627 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -89,6 +89,7 @@ obj-$(CONFIG_QNX4FS_FS)		+= qnx4/
 obj-$(CONFIG_AUTOFS_FS)		+= autofs/
 obj-$(CONFIG_AUTOFS4_FS)	+= autofs4/
 obj-$(CONFIG_ADFS_FS)		+= adfs/
+obj-$(CONFIG_FUSE_FS)		+= fuse/
 obj-$(CONFIG_UDF_FS)		+= udf/
 obj-$(CONFIG_RELAYFS_FS)	+= relayfs/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
-- 
cgit v1.2.3-18-g5258


From d8a5ba45457e4a22aa39c939121efd7bb6c76672 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:26 -0700
Subject: [PATCH] FUSE - core

This patch adds FUSE core.

This contains the following files:

 o inode.c
    - superblock operations (alloc_inode, destroy_inode, read_inode,
      clear_inode, put_super, show_options)
    - registers FUSE filesystem

 o fuse_i.h
    - private header file

Requirements
============

 The most important difference between orinary filesystems and FUSE is
 the fact, that the filesystem data/metadata is provided by a userspace
 process run with the privileges of the mount "owner" instead of the
 kernel, or some remote entity usually running with elevated
 privileges.

 The security implication of this is that a non-privileged user must
 not be able to use this capability to compromise the system.  Obvious
 requirements arising from this are:

  - mount owner should not be able to get elevated privileges with the
    help of the mounted filesystem

  - mount owner should not be able to induce undesired behavior in
    other users' or the super user's processes

  - mount owner should not get illegitimate access to information from
    other users' and the super user's processes

 These are currently ensured with the following constraints:

  1) mount is only allowed to directory or file which the mount owner
    can modify without limitation (write access + no sticky bit for
    directories)

  2) nosuid,nodev mount options are forced

  3) any process running with fsuid different from the owner is denied
     all access to the filesystem

 1) and 2) are ensured by the "fusermount" mount utility which is a
    setuid root application doing the actual mount operation.

 3) is ensured by a check in the permission() method in kernel

 I started thinking about doing 3) in a different way because Christoph
 H. made a big deal out of it, saying that FUSE is unacceptable into
 mainline in this form.

 The suggested use of private namespaces would be OK, but in their
 current form have many limitations that make their use impractical (as
 discussed in this thread).

 Suggested improvements that would address these limitations:

   - implement shared subtrees

   - allow a process to join an existing namespace (make namespaces
     first-class objects)

   - implement the namespace creation/joining in a PAM module

 With all that in place the check of owner against current->fsuid may
 be removed from the FUSE kernel module, without compromising the
 security requirements.

 Suid programs still interesting questions, since they get access even
 to the private namespace causing some information leak (exact
 order/timing of filesystem operations performed), giving some
 ptrace-like capabilities to unprivileged users.  BTW this problem is
 not strictly limited to the namespace approach, since suid programs
 setting fsuid and accessing users' files will succeed with the current
 approach too.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/Makefile |   7 +
 fs/fuse/fuse_i.h |  89 ++++++++++++
 fs/fuse/inode.c  | 428 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 524 insertions(+)
 create mode 100644 fs/fuse/Makefile
 create mode 100644 fs/fuse/fuse_i.h
 create mode 100644 fs/fuse/inode.c

(limited to 'fs')

diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
new file mode 100644
index 00000000000..9c3e4cc7b1a
--- /dev/null
+++ b/fs/fuse/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the FUSE filesystem.
+#
+
+obj-$(CONFIG_FUSE_FS) += fuse.o
+
+fuse-objs := inode.o
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
new file mode 100644
index 00000000000..eed6e89ce01
--- /dev/null
+++ b/fs/fuse/fuse_i.h
@@ -0,0 +1,89 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include <linux/fuse.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/backing-dev.h>
+#include <asm/semaphore.h>
+
+/** FUSE inode */
+struct fuse_inode {
+	/** Inode data */
+	struct inode inode;
+
+	/** Unique ID, which identifies the inode between userspace
+	 * and kernel */
+	u64 nodeid;
+
+	/** Time in jiffies until the file attributes are valid */
+	unsigned long i_time;
+};
+
+/**
+ * A Fuse connection.
+ *
+ * This structure is created, when the filesystem is mounted, and is
+ * destroyed, when the client device is closed and the filesystem is
+ * unmounted.
+ */
+struct fuse_conn {
+	/** The superblock of the mounted filesystem */
+	struct super_block *sb;
+
+	/** The user id for this mount */
+	uid_t user_id;
+
+	/** Backing dev info */
+	struct backing_dev_info bdi;
+};
+
+static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb)
+{
+	return (struct fuse_conn **) &sb->s_fs_info;
+}
+
+static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
+{
+	return *get_fuse_conn_super_p(sb);
+}
+
+static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
+{
+	return get_fuse_conn_super(inode->i_sb);
+}
+
+static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
+{
+	return container_of(inode, struct fuse_inode, inode);
+}
+
+static inline u64 get_node_id(struct inode *inode)
+{
+	return get_fuse_inode(inode)->nodeid;
+}
+
+/**
+ * This is the single global spinlock which protects FUSE's structures
+ *
+ * The following data is protected by this lock:
+ *
+ *  - the s_fs_info field of the super block
+ *  - the sb (super_block) field in fuse_conn
+ */
+extern spinlock_t fuse_lock;
+
+/**
+ * Check if the connection can be released, and if yes, then free the
+ * connection structure
+ */
+void fuse_release_conn(struct fuse_conn *fc);
+
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
new file mode 100644
index 00000000000..ea6339c2b6a
--- /dev/null
+++ b/fs/fuse/inode.c
@@ -0,0 +1,428 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/parser.h>
+#include <linux/statfs.h>
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Filesystem in Userspace");
+MODULE_LICENSE("GPL");
+
+spinlock_t fuse_lock;
+static kmem_cache_t *fuse_inode_cachep;
+static int mount_count;
+
+static int mount_max = 1000;
+module_param(mount_max, int, 0644);
+MODULE_PARM_DESC(mount_max, "Maximum number of FUSE mounts allowed, if -1 then unlimited (default: 1000)");
+
+#define FUSE_SUPER_MAGIC 0x65735546
+
+struct fuse_mount_data {
+	int fd;
+	unsigned rootmode;
+	unsigned user_id;
+};
+
+static struct inode *fuse_alloc_inode(struct super_block *sb)
+{
+	struct inode *inode;
+	struct fuse_inode *fi;
+
+	inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL);
+	if (!inode)
+		return NULL;
+
+	fi = get_fuse_inode(inode);
+	fi->i_time = jiffies - 1;
+	fi->nodeid = 0;
+
+	return inode;
+}
+
+static void fuse_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(fuse_inode_cachep, inode);
+}
+
+static void fuse_read_inode(struct inode *inode)
+{
+	/* No op */
+}
+
+static void fuse_clear_inode(struct inode *inode)
+{
+}
+
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
+{
+	if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
+		invalidate_inode_pages(inode->i_mapping);
+
+	inode->i_ino     = attr->ino;
+	inode->i_mode    = (inode->i_mode & S_IFMT) + (attr->mode & 07777);
+	inode->i_nlink   = attr->nlink;
+	inode->i_uid     = attr->uid;
+	inode->i_gid     = attr->gid;
+	i_size_write(inode, attr->size);
+	inode->i_blksize = PAGE_CACHE_SIZE;
+	inode->i_blocks  = attr->blocks;
+	inode->i_atime.tv_sec   = attr->atime;
+	inode->i_atime.tv_nsec  = attr->atimensec;
+	inode->i_mtime.tv_sec   = attr->mtime;
+	inode->i_mtime.tv_nsec  = attr->mtimensec;
+	inode->i_ctime.tv_sec   = attr->ctime;
+	inode->i_ctime.tv_nsec  = attr->ctimensec;
+}
+
+static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
+{
+	inode->i_mode = attr->mode & S_IFMT;
+	i_size_write(inode, attr->size);
+}
+
+static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+{
+	unsigned long nodeid = *(unsigned long *) _nodeidp;
+	if (get_node_id(inode) == nodeid)
+		return 1;
+	else
+		return 0;
+}
+
+static int fuse_inode_set(struct inode *inode, void *_nodeidp)
+{
+	unsigned long nodeid = *(unsigned long *) _nodeidp;
+	get_fuse_inode(inode)->nodeid = nodeid;
+	return 0;
+}
+
+struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+			int generation, struct fuse_attr *attr, int version)
+{
+	struct inode *inode;
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+	int retried = 0;
+
+ retry:
+	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
+	if (!inode)
+		return NULL;
+
+	if ((inode->i_state & I_NEW)) {
+		inode->i_generation = generation;
+		inode->i_data.backing_dev_info = &fc->bdi;
+		fuse_init_inode(inode, attr);
+		unlock_new_inode(inode);
+	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
+		BUG_ON(retried);
+		/* Inode has changed type, any I/O on the old should fail */
+		make_bad_inode(inode);
+		iput(inode);
+		retried = 1;
+		goto retry;
+	}
+
+	fuse_change_attributes(inode, attr);
+	inode->i_version = version;
+	return inode;
+}
+
+static void fuse_put_super(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	spin_lock(&fuse_lock);
+	mount_count --;
+	fc->sb = NULL;
+	fc->user_id = 0;
+	fuse_release_conn(fc);
+	*get_fuse_conn_super_p(sb) = NULL;
+	spin_unlock(&fuse_lock);
+}
+
+enum {
+	OPT_FD,
+	OPT_ROOTMODE,
+	OPT_USER_ID,
+	OPT_DEFAULT_PERMISSIONS,
+	OPT_ALLOW_OTHER,
+	OPT_ALLOW_ROOT,
+	OPT_KERNEL_CACHE,
+	OPT_ERR
+};
+
+static match_table_t tokens = {
+	{OPT_FD,			"fd=%u"},
+	{OPT_ROOTMODE,			"rootmode=%o"},
+	{OPT_USER_ID,			"user_id=%u"},
+	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
+	{OPT_ALLOW_OTHER,		"allow_other"},
+	{OPT_ALLOW_ROOT,		"allow_root"},
+	{OPT_KERNEL_CACHE,		"kernel_cache"},
+	{OPT_ERR,			NULL}
+};
+
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
+{
+	char *p;
+	memset(d, 0, sizeof(struct fuse_mount_data));
+	d->fd = -1;
+
+	while ((p = strsep(&opt, ",")) != NULL) {
+		int token;
+		int value;
+		substring_t args[MAX_OPT_ARGS];
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case OPT_FD:
+			if (match_int(&args[0], &value))
+				return 0;
+			d->fd = value;
+			break;
+
+		case OPT_ROOTMODE:
+			if (match_octal(&args[0], &value))
+				return 0;
+			d->rootmode = value;
+			break;
+
+		case OPT_USER_ID:
+			if (match_int(&args[0], &value))
+				return 0;
+			d->user_id = value;
+			break;
+
+		default:
+			return 0;
+		}
+	}
+	if (d->fd == -1)
+		return 0;
+
+	return 1;
+}
+
+static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
+
+	seq_printf(m, ",user_id=%u", fc->user_id);
+	return 0;
+}
+
+void fuse_release_conn(struct fuse_conn *fc)
+{
+	kfree(fc);
+}
+
+static struct fuse_conn *new_conn(void)
+{
+	struct fuse_conn *fc;
+
+	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+	if (fc != NULL) {
+		memset(fc, 0, sizeof(*fc));
+		fc->sb = NULL;
+		fc->user_id = 0;
+		fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+		fc->bdi.unplug_io_fn = default_unplug_io_fn;
+	}
+	return fc;
+}
+
+static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
+{
+	struct fuse_conn *fc;
+
+	fc = new_conn();
+	if (fc == NULL)
+		return NULL;
+	spin_lock(&fuse_lock);
+	fc->sb = sb;
+	spin_unlock(&fuse_lock);
+	return fc;
+}
+
+static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
+{
+	struct fuse_attr attr;
+	memset(&attr, 0, sizeof(attr));
+
+	attr.mode = mode;
+	attr.ino = FUSE_ROOT_ID;
+	return fuse_iget(sb, 1, 0, &attr, 0);
+}
+
+static struct super_operations fuse_super_operations = {
+	.alloc_inode    = fuse_alloc_inode,
+	.destroy_inode  = fuse_destroy_inode,
+	.read_inode	= fuse_read_inode,
+	.clear_inode	= fuse_clear_inode,
+	.put_super	= fuse_put_super,
+	.show_options	= fuse_show_options,
+};
+
+static int inc_mount_count(void)
+{
+	int success = 0;
+	spin_lock(&fuse_lock);
+	mount_count ++;
+	if (mount_max == -1 || mount_count <= mount_max)
+		success = 1;
+	spin_unlock(&fuse_lock);
+	return success;
+}
+
+static int fuse_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct fuse_conn *fc;
+	struct inode *root;
+	struct fuse_mount_data d;
+	struct file *file;
+	int err;
+
+	if (!parse_fuse_opt((char *) data, &d))
+		return -EINVAL;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = FUSE_SUPER_MAGIC;
+	sb->s_op = &fuse_super_operations;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+	file = fget(d.fd);
+	if (!file)
+		return -EINVAL;
+
+	fc = get_conn(file, sb);
+	fput(file);
+	if (fc == NULL)
+		return -EINVAL;
+
+	fc->user_id = d.user_id;
+
+	*get_fuse_conn_super_p(sb) = fc;
+
+	err = -ENFILE;
+	if (!inc_mount_count() && current->uid != 0)
+		goto err;
+
+	err = -ENOMEM;
+	root = get_root_inode(sb, d.rootmode);
+	if (root == NULL)
+		goto err;
+
+	sb->s_root = d_alloc_root(root);
+	if (!sb->s_root) {
+		iput(root);
+		goto err;
+	}
+	return 0;
+
+ err:
+	spin_lock(&fuse_lock);
+	mount_count --;
+	fc->sb = NULL;
+	fuse_release_conn(fc);
+	spin_unlock(&fuse_lock);
+	*get_fuse_conn_super_p(sb) = NULL;
+	return err;
+}
+
+static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
+				       int flags, const char *dev_name,
+				       void *raw_data)
+{
+	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
+}
+
+static struct file_system_type fuse_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "fuse",
+	.get_sb		= fuse_get_sb,
+	.kill_sb	= kill_anon_super,
+};
+
+static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
+				 unsigned long flags)
+{
+	struct inode * inode = foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(inode);
+}
+
+static int __init fuse_fs_init(void)
+{
+	int err;
+
+	err = register_filesystem(&fuse_fs_type);
+	if (err)
+		printk("fuse: failed to register filesystem\n");
+	else {
+		fuse_inode_cachep = kmem_cache_create("fuse_inode",
+						      sizeof(struct fuse_inode),
+						      0, SLAB_HWCACHE_ALIGN,
+						      fuse_inode_init_once, NULL);
+		if (!fuse_inode_cachep) {
+			unregister_filesystem(&fuse_fs_type);
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+static void fuse_fs_cleanup(void)
+{
+	unregister_filesystem(&fuse_fs_type);
+	kmem_cache_destroy(fuse_inode_cachep);
+}
+
+static int __init fuse_init(void)
+{
+	int res;
+
+	printk("fuse init (API version %i.%i)\n",
+	       FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
+
+	spin_lock_init(&fuse_lock);
+	res = fuse_fs_init();
+	if (res)
+		goto err;
+
+	return 0;
+
+ err:
+	return res;
+}
+
+static void __exit fuse_exit(void)
+{
+	printk(KERN_DEBUG "fuse exit\n");
+
+	fuse_fs_cleanup();
+}
+
+module_init(fuse_init);
+module_exit(fuse_exit);
-- 
cgit v1.2.3-18-g5258


From 334f485df85ac7736ebe14940bf0a059c5f26d7d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:27 -0700
Subject: [PATCH] FUSE - device functions

This adds the FUSE device handling functions.

This contains the following files:

 o dev.c
    - fuse device operations (read, write, release, poll)
    - registers misc device
    - support for sending requests to userspace

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/Makefile |   2 +-
 fs/fuse/dev.c    | 884 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h | 223 ++++++++++++++
 fs/fuse/inode.c  |  58 +++-
 4 files changed, 1161 insertions(+), 6 deletions(-)
 create mode 100644 fs/fuse/dev.c

(limited to 'fs')

diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 9c3e4cc7b1a..21021c35648 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
 
-fuse-objs := inode.o
+fuse-objs := dev.o inode.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
new file mode 100644
index 00000000000..9aaf10a6588
--- /dev/null
+++ b/fs/fuse/dev.c
@@ -0,0 +1,884 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/uio.h>
+#include <linux/miscdevice.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+
+MODULE_ALIAS_MISCDEV(FUSE_MINOR);
+
+static kmem_cache_t *fuse_req_cachep;
+
+static inline struct fuse_conn *fuse_get_conn(struct file *file)
+{
+	struct fuse_conn *fc;
+	spin_lock(&fuse_lock);
+	fc = file->private_data;
+	if (fc && !fc->sb)
+		fc = NULL;
+	spin_unlock(&fuse_lock);
+	return fc;
+}
+
+static inline void fuse_request_init(struct fuse_req *req)
+{
+	memset(req, 0, sizeof(*req));
+	INIT_LIST_HEAD(&req->list);
+	init_waitqueue_head(&req->waitq);
+	atomic_set(&req->count, 1);
+}
+
+struct fuse_req *fuse_request_alloc(void)
+{
+	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
+	if (req)
+		fuse_request_init(req);
+	return req;
+}
+
+void fuse_request_free(struct fuse_req *req)
+{
+	kmem_cache_free(fuse_req_cachep, req);
+}
+
+static inline void block_sigs(sigset_t *oldset)
+{
+	sigset_t mask;
+
+	siginitsetinv(&mask, sigmask(SIGKILL));
+	sigprocmask(SIG_BLOCK, &mask, oldset);
+}
+
+static inline void restore_sigs(sigset_t *oldset)
+{
+	sigprocmask(SIG_SETMASK, oldset, NULL);
+}
+
+void fuse_reset_request(struct fuse_req *req)
+{
+	int preallocated = req->preallocated;
+	BUG_ON(atomic_read(&req->count) != 1);
+	fuse_request_init(req);
+	req->preallocated = preallocated;
+}
+
+static void __fuse_get_request(struct fuse_req *req)
+{
+	atomic_inc(&req->count);
+}
+
+/* Must be called with > 1 refcount */
+static void __fuse_put_request(struct fuse_req *req)
+{
+	BUG_ON(atomic_read(&req->count) < 2);
+	atomic_dec(&req->count);
+}
+
+static struct fuse_req *do_get_request(struct fuse_conn *fc)
+{
+	struct fuse_req *req;
+
+	spin_lock(&fuse_lock);
+	BUG_ON(list_empty(&fc->unused_list));
+	req = list_entry(fc->unused_list.next, struct fuse_req, list);
+	list_del_init(&req->list);
+	spin_unlock(&fuse_lock);
+	fuse_request_init(req);
+	req->preallocated = 1;
+	req->in.h.uid = current->fsuid;
+	req->in.h.gid = current->fsgid;
+	req->in.h.pid = current->pid;
+	return req;
+}
+
+struct fuse_req *fuse_get_request(struct fuse_conn *fc)
+{
+	if (down_interruptible(&fc->outstanding_sem))
+		return NULL;
+	return do_get_request(fc);
+}
+
+/*
+ * Non-interruptible version of the above function is for operations
+ * which can't legally return -ERESTART{SYS,NOINTR}.  This can still
+ * return NULL, but only in case the signal is SIGKILL.
+ */
+struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc)
+{
+	int intr;
+	sigset_t oldset;
+
+	block_sigs(&oldset);
+	intr = down_interruptible(&fc->outstanding_sem);
+	restore_sigs(&oldset);
+	return intr ? NULL : do_get_request(fc);
+}
+
+static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+	spin_lock(&fuse_lock);
+	if (req->preallocated)
+		list_add(&req->list, &fc->unused_list);
+	else
+		fuse_request_free(req);
+
+	/* If we are in debt decrease that first */
+	if (fc->outstanding_debt)
+		fc->outstanding_debt--;
+	else
+		up(&fc->outstanding_sem);
+	spin_unlock(&fuse_lock);
+}
+
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+	if (atomic_dec_and_test(&req->count))
+		fuse_putback_request(fc, req);
+}
+
+/*
+ * This function is called when a request is finished.  Either a reply
+ * has arrived or it was interrupted (and not yet sent) or some error
+ * occured during communication with userspace, or the device file was
+ * closed.  It decreases the referece count for the request.  In case
+ * of a background request the referece to the stored objects are
+ * released.  The requester thread is woken up (if still waiting), and
+ * finally the request is either freed or put on the unused_list
+ *
+ * Called with fuse_lock, unlocks it
+ */
+static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+	int putback;
+	req->finished = 1;
+	putback = atomic_dec_and_test(&req->count);
+	spin_unlock(&fuse_lock);
+	if (req->background) {
+		if (req->inode)
+			iput(req->inode);
+		if (req->inode2)
+			iput(req->inode2);
+		if (req->file)
+			fput(req->file);
+	}
+	wake_up(&req->waitq);
+	if (req->in.h.opcode == FUSE_INIT) {
+		int i;
+
+		if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
+			fc->conn_error = 1;
+
+		/* After INIT reply is received other requests can go
+		   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
+		   up()s on outstanding_sem.  The last up() is done in
+		   fuse_putback_request() */
+		for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
+			up(&fc->outstanding_sem);
+	}
+	if (putback)
+		fuse_putback_request(fc, req);
+}
+
+static void background_request(struct fuse_req *req)
+{
+	/* Need to get hold of the inode(s) and/or file used in the
+	   request, so FORGET and RELEASE are not sent too early */
+	req->background = 1;
+	if (req->inode)
+		req->inode = igrab(req->inode);
+	if (req->inode2)
+		req->inode2 = igrab(req->inode2);
+	if (req->file)
+		get_file(req->file);
+}
+
+static int request_wait_answer_nonint(struct fuse_req *req)
+{
+	int err;
+	sigset_t oldset;
+	block_sigs(&oldset);
+	err = wait_event_interruptible(req->waitq, req->finished);
+	restore_sigs(&oldset);
+	return err;
+}
+
+/* Called with fuse_lock held.  Releases, and then reacquires it. */
+static void request_wait_answer(struct fuse_req *req, int interruptible)
+{
+	int intr;
+
+	spin_unlock(&fuse_lock);
+	if (interruptible)
+		intr = wait_event_interruptible(req->waitq, req->finished);
+	else
+		intr = request_wait_answer_nonint(req);
+	spin_lock(&fuse_lock);
+	if (intr && interruptible && req->sent) {
+		/* If request is already in userspace, only allow KILL
+		   signal to interrupt */
+		spin_unlock(&fuse_lock);
+		intr = request_wait_answer_nonint(req);
+		spin_lock(&fuse_lock);
+	}
+	if (!intr)
+		return;
+
+	if (!interruptible || req->sent)
+		req->out.h.error = -EINTR;
+	else
+		req->out.h.error = -ERESTARTNOINTR;
+
+	req->interrupted = 1;
+	if (req->locked) {
+		/* This is uninterruptible sleep, because data is
+		   being copied to/from the buffers of req.  During
+		   locked state, there mustn't be any filesystem
+		   operation (e.g. page fault), since that could lead
+		   to deadlock */
+		spin_unlock(&fuse_lock);
+		wait_event(req->waitq, !req->locked);
+		spin_lock(&fuse_lock);
+	}
+	if (!req->sent && !list_empty(&req->list)) {
+		list_del(&req->list);
+		__fuse_put_request(req);
+	} else if (!req->finished && req->sent)
+		background_request(req);
+}
+
+static unsigned len_args(unsigned numargs, struct fuse_arg *args)
+{
+	unsigned nbytes = 0;
+	unsigned i;
+
+	for (i = 0; i < numargs; i++)
+		nbytes += args[i].size;
+
+	return nbytes;
+}
+
+static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+	fc->reqctr++;
+	/* zero is special */
+	if (fc->reqctr == 0)
+		fc->reqctr = 1;
+	req->in.h.unique = fc->reqctr;
+	req->in.h.len = sizeof(struct fuse_in_header) +
+		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
+	if (!req->preallocated) {
+		/* If request is not preallocated (either FORGET or
+		   RELEASE), then still decrease outstanding_sem, so
+		   user can't open infinite number of files while not
+		   processing the RELEASE requests.  However for
+		   efficiency do it without blocking, so if down()
+		   would block, just increase the debt instead */
+		if (down_trylock(&fc->outstanding_sem))
+			fc->outstanding_debt++;
+	}
+	list_add_tail(&req->list, &fc->pending);
+	wake_up(&fc->waitq);
+}
+
+static void request_send_wait(struct fuse_conn *fc, struct fuse_req *req,
+			      int interruptible)
+{
+	req->isreply = 1;
+	spin_lock(&fuse_lock);
+	if (!fc->file)
+		req->out.h.error = -ENOTCONN;
+	else if (fc->conn_error)
+		req->out.h.error = -ECONNREFUSED;
+	else {
+		queue_request(fc, req);
+		/* acquire extra reference, since request is still needed
+		   after request_end() */
+		__fuse_get_request(req);
+
+		request_wait_answer(req, interruptible);
+	}
+	spin_unlock(&fuse_lock);
+}
+
+void request_send(struct fuse_conn *fc, struct fuse_req *req)
+{
+	request_send_wait(fc, req, 1);
+}
+
+/*
+ * Non-interruptible version of the above function is for operations
+ * which can't legally return -ERESTART{SYS,NOINTR}.  This can still
+ * be interrupted but only with SIGKILL.
+ */
+void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req)
+{
+	request_send_wait(fc, req, 0);
+}
+
+static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
+{
+	spin_lock(&fuse_lock);
+	if (fc->file) {
+		queue_request(fc, req);
+		spin_unlock(&fuse_lock);
+	} else {
+		req->out.h.error = -ENOTCONN;
+		request_end(fc, req);
+	}
+}
+
+void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
+{
+	req->isreply = 0;
+	request_send_nowait(fc, req);
+}
+
+void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
+{
+	req->isreply = 1;
+	background_request(req);
+	request_send_nowait(fc, req);
+}
+
+void fuse_send_init(struct fuse_conn *fc)
+{
+	/* This is called from fuse_read_super() so there's guaranteed
+	   to be a request available */
+	struct fuse_req *req = do_get_request(fc);
+	struct fuse_init_in_out *arg = &req->misc.init_in_out;
+	arg->major = FUSE_KERNEL_VERSION;
+	arg->minor = FUSE_KERNEL_MINOR_VERSION;
+	req->in.h.opcode = FUSE_INIT;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(*arg);
+	req->in.args[0].value = arg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(*arg);
+	req->out.args[0].value = arg;
+	request_send_background(fc, req);
+}
+
+/*
+ * Lock the request.  Up to the next unlock_request() there mustn't be
+ * anything that could cause a page-fault.  If the request was already
+ * interrupted bail out.
+ */
+static inline int lock_request(struct fuse_req *req)
+{
+	int err = 0;
+	if (req) {
+		spin_lock(&fuse_lock);
+		if (req->interrupted)
+			err = -ENOENT;
+		else
+			req->locked = 1;
+		spin_unlock(&fuse_lock);
+	}
+	return err;
+}
+
+/*
+ * Unlock request.  If it was interrupted during being locked, the
+ * requester thread is currently waiting for it to be unlocked, so
+ * wake it up.
+ */
+static inline void unlock_request(struct fuse_req *req)
+{
+	if (req) {
+		spin_lock(&fuse_lock);
+		req->locked = 0;
+		if (req->interrupted)
+			wake_up(&req->waitq);
+		spin_unlock(&fuse_lock);
+	}
+}
+
+struct fuse_copy_state {
+	int write;
+	struct fuse_req *req;
+	const struct iovec *iov;
+	unsigned long nr_segs;
+	unsigned long seglen;
+	unsigned long addr;
+	struct page *pg;
+	void *mapaddr;
+	void *buf;
+	unsigned len;
+};
+
+static void fuse_copy_init(struct fuse_copy_state *cs, int write,
+			   struct fuse_req *req, const struct iovec *iov,
+			   unsigned long nr_segs)
+{
+	memset(cs, 0, sizeof(*cs));
+	cs->write = write;
+	cs->req = req;
+	cs->iov = iov;
+	cs->nr_segs = nr_segs;
+}
+
+/* Unmap and put previous page of userspace buffer */
+static inline void fuse_copy_finish(struct fuse_copy_state *cs)
+{
+	if (cs->mapaddr) {
+		kunmap_atomic(cs->mapaddr, KM_USER0);
+		if (cs->write) {
+			flush_dcache_page(cs->pg);
+			set_page_dirty_lock(cs->pg);
+		}
+		put_page(cs->pg);
+		cs->mapaddr = NULL;
+	}
+}
+
+/*
+ * Get another pagefull of userspace buffer, and map it to kernel
+ * address space, and lock request
+ */
+static int fuse_copy_fill(struct fuse_copy_state *cs)
+{
+	unsigned long offset;
+	int err;
+
+	unlock_request(cs->req);
+	fuse_copy_finish(cs);
+	if (!cs->seglen) {
+		BUG_ON(!cs->nr_segs);
+		cs->seglen = cs->iov[0].iov_len;
+		cs->addr = (unsigned long) cs->iov[0].iov_base;
+		cs->iov ++;
+		cs->nr_segs --;
+	}
+	down_read(&current->mm->mmap_sem);
+	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
+			     &cs->pg, NULL);
+	up_read(&current->mm->mmap_sem);
+	if (err < 0)
+		return err;
+	BUG_ON(err != 1);
+	offset = cs->addr % PAGE_SIZE;
+	cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
+	cs->buf = cs->mapaddr + offset;
+	cs->len = min(PAGE_SIZE - offset, cs->seglen);
+	cs->seglen -= cs->len;
+	cs->addr += cs->len;
+
+	return lock_request(cs->req);
+}
+
+/* Do as much copy to/from userspace buffer as we can */
+static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
+			       unsigned *size)
+{
+	unsigned ncpy = min(*size, cs->len);
+	if (val) {
+		if (cs->write)
+			memcpy(cs->buf, *val, ncpy);
+		else
+			memcpy(*val, cs->buf, ncpy);
+		*val += ncpy;
+	}
+	*size -= ncpy;
+	cs->len -= ncpy;
+	cs->buf += ncpy;
+	return ncpy;
+}
+
+/*
+ * Copy a page in the request to/from the userspace buffer.  Must be
+ * done atomically
+ */
+static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
+				 unsigned offset, unsigned count, int zeroing)
+{
+	if (page && zeroing && count < PAGE_SIZE) {
+		void *mapaddr = kmap_atomic(page, KM_USER1);
+		memset(mapaddr, 0, PAGE_SIZE);
+		kunmap_atomic(mapaddr, KM_USER1);
+	}
+	while (count) {
+		int err;
+		if (!cs->len && (err = fuse_copy_fill(cs)))
+			return err;
+		if (page) {
+			void *mapaddr = kmap_atomic(page, KM_USER1);
+			void *buf = mapaddr + offset;
+			offset += fuse_copy_do(cs, &buf, &count);
+			kunmap_atomic(mapaddr, KM_USER1);
+		} else
+			offset += fuse_copy_do(cs, NULL, &count);
+	}
+	if (page && !cs->write)
+		flush_dcache_page(page);
+	return 0;
+}
+
+/* Copy pages in the request to/from userspace buffer */
+static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
+			   int zeroing)
+{
+	unsigned i;
+	struct fuse_req *req = cs->req;
+	unsigned offset = req->page_offset;
+	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
+
+	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
+		struct page *page = req->pages[i];
+		int err = fuse_copy_page(cs, page, offset, count, zeroing);
+		if (err)
+			return err;
+
+		nbytes -= count;
+		count = min(nbytes, (unsigned) PAGE_SIZE);
+		offset = 0;
+	}
+	return 0;
+}
+
+/* Copy a single argument in the request to/from userspace buffer */
+static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
+{
+	while (size) {
+		int err;
+		if (!cs->len && (err = fuse_copy_fill(cs)))
+			return err;
+		fuse_copy_do(cs, &val, &size);
+	}
+	return 0;
+}
+
+/* Copy request arguments to/from userspace buffer */
+static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
+			  unsigned argpages, struct fuse_arg *args,
+			  int zeroing)
+{
+	int err = 0;
+	unsigned i;
+
+	for (i = 0; !err && i < numargs; i++)  {
+		struct fuse_arg *arg = &args[i];
+		if (i == numargs - 1 && argpages)
+			err = fuse_copy_pages(cs, arg->size, zeroing);
+		else
+			err = fuse_copy_one(cs, arg->value, arg->size);
+	}
+	return err;
+}
+
+/* Wait until a request is available on the pending list */
+static void request_wait(struct fuse_conn *fc)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue_exclusive(&fc->waitq, &wait);
+	while (fc->sb && list_empty(&fc->pending)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (signal_pending(current))
+			break;
+
+		spin_unlock(&fuse_lock);
+		schedule();
+		spin_lock(&fuse_lock);
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&fc->waitq, &wait);
+}
+
+/*
+ * Read a single request into the userspace filesystem's buffer.  This
+ * function waits until a request is available, then removes it from
+ * the pending list and copies request data to userspace buffer.  If
+ * no reply is needed (FORGET) or request has been interrupted or
+ * there was an error during the copying then it's finished by calling
+ * request_end().  Otherwise add it to the processing list, and set
+ * the 'sent' flag.
+ */
+static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
+			      unsigned long nr_segs, loff_t *off)
+{
+	int err;
+	struct fuse_conn *fc;
+	struct fuse_req *req;
+	struct fuse_in *in;
+	struct fuse_copy_state cs;
+	unsigned reqsize;
+
+	spin_lock(&fuse_lock);
+	fc = file->private_data;
+	err = -EPERM;
+	if (!fc)
+		goto err_unlock;
+	request_wait(fc);
+	err = -ENODEV;
+	if (!fc->sb)
+		goto err_unlock;
+	err = -ERESTARTSYS;
+	if (list_empty(&fc->pending))
+		goto err_unlock;
+
+	req = list_entry(fc->pending.next, struct fuse_req, list);
+	list_del_init(&req->list);
+	spin_unlock(&fuse_lock);
+
+	in = &req->in;
+	reqsize = req->in.h.len;
+	fuse_copy_init(&cs, 1, req, iov, nr_segs);
+	err = -EINVAL;
+	if (iov_length(iov, nr_segs) >= reqsize) {
+		err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
+		if (!err)
+			err = fuse_copy_args(&cs, in->numargs, in->argpages,
+					     (struct fuse_arg *) in->args, 0);
+	}
+	fuse_copy_finish(&cs);
+
+	spin_lock(&fuse_lock);
+	req->locked = 0;
+	if (!err && req->interrupted)
+		err = -ENOENT;
+	if (err) {
+		if (!req->interrupted)
+			req->out.h.error = -EIO;
+		request_end(fc, req);
+		return err;
+	}
+	if (!req->isreply)
+		request_end(fc, req);
+	else {
+		req->sent = 1;
+		list_add_tail(&req->list, &fc->processing);
+		spin_unlock(&fuse_lock);
+	}
+	return reqsize;
+
+ err_unlock:
+	spin_unlock(&fuse_lock);
+	return err;
+}
+
+static ssize_t fuse_dev_read(struct file *file, char __user *buf,
+			     size_t nbytes, loff_t *off)
+{
+	struct iovec iov;
+	iov.iov_len = nbytes;
+	iov.iov_base = buf;
+	return fuse_dev_readv(file, &iov, 1, off);
+}
+
+/* Look up request on processing list by unique ID */
+static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
+{
+	struct list_head *entry;
+
+	list_for_each(entry, &fc->processing) {
+		struct fuse_req *req;
+		req = list_entry(entry, struct fuse_req, list);
+		if (req->in.h.unique == unique)
+			return req;
+	}
+	return NULL;
+}
+
+static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
+			 unsigned nbytes)
+{
+	unsigned reqsize = sizeof(struct fuse_out_header);
+
+	if (out->h.error)
+		return nbytes != reqsize ? -EINVAL : 0;
+
+	reqsize += len_args(out->numargs, out->args);
+
+	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
+		return -EINVAL;
+	else if (reqsize > nbytes) {
+		struct fuse_arg *lastarg = &out->args[out->numargs-1];
+		unsigned diffsize = reqsize - nbytes;
+		if (diffsize > lastarg->size)
+			return -EINVAL;
+		lastarg->size -= diffsize;
+	}
+	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
+			      out->page_zeroing);
+}
+
+/*
+ * Write a single reply to a request.  First the header is copied from
+ * the write buffer.  The request is then searched on the processing
+ * list by the unique ID found in the header.  If found, then remove
+ * it from the list and copy the rest of the buffer to the request.
+ * The request is finished by calling request_end()
+ */
+static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
+			       unsigned long nr_segs, loff_t *off)
+{
+	int err;
+	unsigned nbytes = iov_length(iov, nr_segs);
+	struct fuse_req *req;
+	struct fuse_out_header oh;
+	struct fuse_copy_state cs;
+	struct fuse_conn *fc = fuse_get_conn(file);
+	if (!fc)
+		return -ENODEV;
+
+	fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
+	if (nbytes < sizeof(struct fuse_out_header))
+		return -EINVAL;
+
+	err = fuse_copy_one(&cs, &oh, sizeof(oh));
+	if (err)
+		goto err_finish;
+	err = -EINVAL;
+	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
+	    oh.len != nbytes)
+		goto err_finish;
+
+	spin_lock(&fuse_lock);
+	req = request_find(fc, oh.unique);
+	err = -EINVAL;
+	if (!req)
+		goto err_unlock;
+
+	list_del_init(&req->list);
+	if (req->interrupted) {
+		request_end(fc, req);
+		fuse_copy_finish(&cs);
+		return -ENOENT;
+	}
+	req->out.h = oh;
+	req->locked = 1;
+	cs.req = req;
+	spin_unlock(&fuse_lock);
+
+	err = copy_out_args(&cs, &req->out, nbytes);
+	fuse_copy_finish(&cs);
+
+	spin_lock(&fuse_lock);
+	req->locked = 0;
+	if (!err) {
+		if (req->interrupted)
+			err = -ENOENT;
+	} else if (!req->interrupted)
+		req->out.h.error = -EIO;
+	request_end(fc, req);
+
+	return err ? err : nbytes;
+
+ err_unlock:
+	spin_unlock(&fuse_lock);
+ err_finish:
+	fuse_copy_finish(&cs);
+	return err;
+}
+
+static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
+			      size_t nbytes, loff_t *off)
+{
+	struct iovec iov;
+	iov.iov_len = nbytes;
+	iov.iov_base = (char __user *) buf;
+	return fuse_dev_writev(file, &iov, 1, off);
+}
+
+static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
+{
+	struct fuse_conn *fc = fuse_get_conn(file);
+	unsigned mask = POLLOUT | POLLWRNORM;
+
+	if (!fc)
+		return -ENODEV;
+
+	poll_wait(file, &fc->waitq, wait);
+
+	spin_lock(&fuse_lock);
+	if (!list_empty(&fc->pending))
+                mask |= POLLIN | POLLRDNORM;
+	spin_unlock(&fuse_lock);
+
+	return mask;
+}
+
+/* Abort all requests on the given list (pending or processing) */
+static void end_requests(struct fuse_conn *fc, struct list_head *head)
+{
+	while (!list_empty(head)) {
+		struct fuse_req *req;
+		req = list_entry(head->next, struct fuse_req, list);
+		list_del_init(&req->list);
+		req->out.h.error = -ECONNABORTED;
+		request_end(fc, req);
+		spin_lock(&fuse_lock);
+	}
+}
+
+static int fuse_dev_release(struct inode *inode, struct file *file)
+{
+	struct fuse_conn *fc;
+
+	spin_lock(&fuse_lock);
+	fc = file->private_data;
+	if (fc) {
+		fc->file = NULL;
+		end_requests(fc, &fc->pending);
+		end_requests(fc, &fc->processing);
+		fuse_release_conn(fc);
+	}
+	spin_unlock(&fuse_lock);
+	return 0;
+}
+
+struct file_operations fuse_dev_operations = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.read		= fuse_dev_read,
+	.readv		= fuse_dev_readv,
+	.write		= fuse_dev_write,
+	.writev		= fuse_dev_writev,
+	.poll		= fuse_dev_poll,
+	.release	= fuse_dev_release,
+};
+
+static struct miscdevice fuse_miscdevice = {
+	.minor = FUSE_MINOR,
+	.name  = "fuse",
+	.fops = &fuse_dev_operations,
+};
+
+int __init fuse_dev_init(void)
+{
+	int err = -ENOMEM;
+	fuse_req_cachep = kmem_cache_create("fuse_request",
+					    sizeof(struct fuse_req),
+					    0, 0, NULL, NULL);
+	if (!fuse_req_cachep)
+		goto out;
+
+	err = misc_register(&fuse_miscdevice);
+	if (err)
+		goto out_cache_clean;
+
+	return 0;
+
+ out_cache_clean:
+	kmem_cache_destroy(fuse_req_cachep);
+ out:
+	return err;
+}
+
+void fuse_dev_cleanup(void)
+{
+	misc_deregister(&fuse_miscdevice);
+	kmem_cache_destroy(fuse_req_cachep);
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index eed6e89ce01..50ad6a0c39b 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -15,6 +15,12 @@
 #include <linux/backing-dev.h>
 #include <asm/semaphore.h>
 
+/** Max number of pages that can be used in a single read request */
+#define FUSE_MAX_PAGES_PER_REQ 32
+
+/** If more requests are outstanding, then the operation will block */
+#define FUSE_MAX_OUTSTANDING 10
+
 /** FUSE inode */
 struct fuse_inode {
 	/** Inode data */
@@ -28,6 +34,123 @@ struct fuse_inode {
 	unsigned long i_time;
 };
 
+/** One input argument of a request */
+struct fuse_in_arg {
+	unsigned size;
+	const void *value;
+};
+
+/** The request input */
+struct fuse_in {
+	/** The request header */
+	struct fuse_in_header h;
+
+	/** True if the data for the last argument is in req->pages */
+	unsigned argpages:1;
+
+	/** Number of arguments */
+	unsigned numargs;
+
+	/** Array of arguments */
+	struct fuse_in_arg args[3];
+};
+
+/** One output argument of a request */
+struct fuse_arg {
+	unsigned size;
+	void *value;
+};
+
+/** The request output */
+struct fuse_out {
+	/** Header returned from userspace */
+	struct fuse_out_header h;
+
+	/** Last argument is variable length (can be shorter than
+	    arg->size) */
+	unsigned argvar:1;
+
+	/** Last argument is a list of pages to copy data to */
+	unsigned argpages:1;
+
+	/** Zero partially or not copied pages */
+	unsigned page_zeroing:1;
+
+	/** Number or arguments */
+	unsigned numargs;
+
+	/** Array of arguments */
+	struct fuse_arg args[3];
+};
+
+struct fuse_req;
+struct fuse_conn;
+
+/**
+ * A request to the client
+ */
+struct fuse_req {
+	/** This can be on either unused_list, pending or processing
+	    lists in fuse_conn */
+	struct list_head list;
+
+	/** refcount */
+	atomic_t count;
+
+	/** True if the request has reply */
+	unsigned isreply:1;
+
+	/** The request is preallocated */
+	unsigned preallocated:1;
+
+	/** The request was interrupted */
+	unsigned interrupted:1;
+
+	/** Request is sent in the background */
+	unsigned background:1;
+
+	/** Data is being copied to/from the request */
+	unsigned locked:1;
+
+	/** Request has been sent to userspace */
+	unsigned sent:1;
+
+	/** The request is finished */
+	unsigned finished:1;
+
+	/** The request input */
+	struct fuse_in in;
+
+	/** The request output */
+	struct fuse_out out;
+
+	/** Used to wake up the task waiting for completion of request*/
+	wait_queue_head_t waitq;
+
+	/** Data for asynchronous requests */
+	union {
+		struct fuse_init_in_out init_in_out;
+	} misc;
+
+	/** page vector */
+	struct page *pages[FUSE_MAX_PAGES_PER_REQ];
+
+	/** number of pages in vector */
+	unsigned num_pages;
+
+	/** offset of data on first page */
+	unsigned page_offset;
+
+	/** Inode used in the request */
+	struct inode *inode;
+
+	/** Second inode used in the request (or NULL) */
+	struct inode *inode2;
+
+	/** File used in the request (or NULL) */
+	struct file *file;
+};
+
 /**
  * A Fuse connection.
  *
@@ -39,9 +162,37 @@ struct fuse_conn {
 	/** The superblock of the mounted filesystem */
 	struct super_block *sb;
 
+	/** The opened client device */
+	struct file *file;
+
 	/** The user id for this mount */
 	uid_t user_id;
 
+	/** Readers of the connection are waiting on this */
+	wait_queue_head_t waitq;
+
+	/** The list of pending requests */
+	struct list_head pending;
+
+	/** The list of requests being processed */
+	struct list_head processing;
+
+	/** Controls the maximum number of outstanding requests */
+	struct semaphore outstanding_sem;
+
+	/** This counts the number of outstanding requests if
+	    outstanding_sem would go negative */
+	unsigned outstanding_debt;
+
+	/** The list of unused requests */
+	struct list_head unused_list;
+
+	/** The next unique request id */
+	u64 reqctr;
+
+	/** Connection failed (version mismatch) */
+	unsigned conn_error : 1;
+
 	/** Backing dev info */
 	struct backing_dev_info bdi;
 };
@@ -71,13 +222,20 @@ static inline u64 get_node_id(struct inode *inode)
 	return get_fuse_inode(inode)->nodeid;
 }
 
+/** Device operations */
+extern struct file_operations fuse_dev_operations;
+
 /**
  * This is the single global spinlock which protects FUSE's structures
  *
  * The following data is protected by this lock:
  *
+ *  - the private_data field of the device file
  *  - the s_fs_info field of the super block
+ *  - unused_list, pending, processing lists in fuse_conn
+ *  - the unique request ID counter reqctr in fuse_conn
  *  - the sb (super_block) field in fuse_conn
+ *  - the file (device file) field in fuse_conn
  */
 extern spinlock_t fuse_lock;
 
@@ -87,3 +245,68 @@ extern spinlock_t fuse_lock;
  */
 void fuse_release_conn(struct fuse_conn *fc);
 
+/**
+ * Initialize the client device
+ */
+int fuse_dev_init(void);
+
+/**
+ * Cleanup the client device
+ */
+void fuse_dev_cleanup(void);
+
+/**
+ * Allocate a request
+ */
+struct fuse_req *fuse_request_alloc(void);
+
+/**
+ * Free a request
+ */
+void fuse_request_free(struct fuse_req *req);
+
+/**
+ * Reinitialize a request, the preallocated flag is left unmodified
+ */
+void fuse_reset_request(struct fuse_req *req);
+
+/**
+ * Reserve a preallocated request
+ */
+struct fuse_req *fuse_get_request(struct fuse_conn *fc);
+
+/**
+ * Reserve a preallocated request, only interruptible by SIGKILL
+ */
+struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc);
+
+/**
+ * Decrement reference count of a request.  If count goes to zero put
+ * on unused list (preallocated) or free reqest (not preallocated).
+ */
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request (synchronous, interruptible)
+ */
+void request_send(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request (synchronous, non-interruptible except by SIGKILL)
+ */
+void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request with no reply
+ */
+void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request in the background
+ */
+void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send the INIT message
+ */
+void fuse_send_init(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ea6339c2b6a..33fad334ba7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -151,6 +151,8 @@ static void fuse_put_super(struct super_block *sb)
 	mount_count --;
 	fc->sb = NULL;
 	fc->user_id = 0;
+	/* Flush all readers on this fs */
+	wake_up_all(&fc->waitq);
 	fuse_release_conn(fc);
 	*get_fuse_conn_super_p(sb) = NULL;
 	spin_unlock(&fuse_lock);
@@ -229,22 +231,51 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 	return 0;
 }
 
-void fuse_release_conn(struct fuse_conn *fc)
+static void free_conn(struct fuse_conn *fc)
 {
+	while (!list_empty(&fc->unused_list)) {
+		struct fuse_req *req;
+		req = list_entry(fc->unused_list.next, struct fuse_req, list);
+		list_del(&req->list);
+		fuse_request_free(req);
+	}
 	kfree(fc);
 }
 
+/* Must be called with the fuse lock held */
+void fuse_release_conn(struct fuse_conn *fc)
+{
+	if (!fc->sb && !fc->file)
+		free_conn(fc);
+}
+
 static struct fuse_conn *new_conn(void)
 {
 	struct fuse_conn *fc;
 
 	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
 	if (fc != NULL) {
+		int i;
 		memset(fc, 0, sizeof(*fc));
 		fc->sb = NULL;
+		fc->file = NULL;
 		fc->user_id = 0;
+		init_waitqueue_head(&fc->waitq);
+		INIT_LIST_HEAD(&fc->pending);
+		INIT_LIST_HEAD(&fc->processing);
+		INIT_LIST_HEAD(&fc->unused_list);
+		sema_init(&fc->outstanding_sem, 0);
+		for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
+			struct fuse_req *req = fuse_request_alloc();
+			if (!req) {
+				free_conn(fc);
+				return NULL;
+			}
+			list_add(&req->list, &fc->unused_list);
+		}
 		fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 		fc->bdi.unplug_io_fn = default_unplug_io_fn;
+		fc->reqctr = 0;
 	}
 	return fc;
 }
@@ -253,11 +284,20 @@ static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
 {
 	struct fuse_conn *fc;
 
+	if (file->f_op != &fuse_dev_operations)
+		return ERR_PTR(-EINVAL);
 	fc = new_conn();
 	if (fc == NULL)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	spin_lock(&fuse_lock);
-	fc->sb = sb;
+	if (file->private_data) {
+		free_conn(fc);
+		fc = ERR_PTR(-EINVAL);
+	} else {
+		file->private_data = fc;
+		fc->sb = sb;
+		fc->file = file;
+	}
 	spin_unlock(&fuse_lock);
 	return fc;
 }
@@ -315,8 +355,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
 	fc = get_conn(file, sb);
 	fput(file);
-	if (fc == NULL)
-		return -EINVAL;
+	if (IS_ERR(fc))
+		return PTR_ERR(fc);
 
 	fc->user_id = d.user_id;
 
@@ -336,6 +376,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 		iput(root);
 		goto err;
 	}
+	fuse_send_init(fc);
 	return 0;
 
  err:
@@ -411,8 +452,14 @@ static int __init fuse_init(void)
 	if (res)
 		goto err;
 
+	res = fuse_dev_init();
+	if (res)
+		goto err_fs_cleanup;
+
 	return 0;
 
+ err_fs_cleanup:
+	fuse_fs_cleanup();
  err:
 	return res;
 }
@@ -422,6 +469,7 @@ static void __exit fuse_exit(void)
 	printk(KERN_DEBUG "fuse exit\n");
 
 	fuse_fs_cleanup();
+	fuse_dev_cleanup();
 }
 
 module_init(fuse_init);
-- 
cgit v1.2.3-18-g5258


From e5e5558e923f35839108a12718494ecb73fb782f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:28 -0700
Subject: [PATCH] FUSE - read-only operations

This patch adds the read-only filesystem operations of FUSE.

This contains the following files:

 o dir.c
    - directory, symlink and file-inode operations

The following operations are added:

 o lookup
 o getattr
 o readlink
 o follow_link
 o directory open
 o readdir
 o directory release
 o permission
 o dentry revalidate
 o statfs

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/Makefile |   2 +-
 fs/fuse/dev.c    |   9 ++
 fs/fuse/dir.c    | 413 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h |  51 +++++++
 fs/fuse/inode.c  |  81 +++++++++++
 5 files changed, 555 insertions(+), 1 deletion(-)
 create mode 100644 fs/fuse/dir.c

(limited to 'fs')

diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 21021c35648..c34e268a0ed 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
 
-fuse-objs := dev.o inode.o
+fuse-objs := dev.o dir.o inode.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 9aaf10a6588..e8f3170946f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -691,6 +691,13 @@ static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
 	return NULL;
 }
 
+/* fget() needs to be done in this context */
+static void process_getdir(struct fuse_req *req)
+{
+	struct fuse_getdir_out_i *arg = req->out.args[0].value;
+	arg->file = fget(arg->fd);
+}
+
 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
 			 unsigned nbytes)
 {
@@ -770,6 +777,8 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
 	if (!err) {
 		if (req->interrupted)
 			err = -ENOENT;
+		else if (req->in.h.opcode == FUSE_GETDIR && !oh.error)
+			process_getdir(req);
 	} else if (!req->interrupted)
 		req->out.h.error = -EIO;
 	request_end(fc, req);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
new file mode 100644
index 00000000000..a89730e70c5
--- /dev/null
+++ b/fs/fuse/dir.c
@@ -0,0 +1,413 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+
+static inline unsigned long time_to_jiffies(unsigned long sec,
+					    unsigned long nsec)
+{
+	struct timespec ts = {sec, nsec};
+	return jiffies + timespec_to_jiffies(&ts);
+}
+
+static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
+			     struct dentry *entry,
+			     struct fuse_entry_out *outarg)
+{
+	req->in.h.opcode = FUSE_LOOKUP;
+	req->in.h.nodeid = get_node_id(dir);
+	req->inode = dir;
+	req->in.numargs = 1;
+	req->in.args[0].size = entry->d_name.len + 1;
+	req->in.args[0].value = entry->d_name.name;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(struct fuse_entry_out);
+	req->out.args[0].value = outarg;
+}
+
+static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+{
+	if (!entry->d_inode || is_bad_inode(entry->d_inode))
+		return 0;
+	else if (time_after(jiffies, entry->d_time)) {
+		int err;
+		int version;
+		struct fuse_entry_out outarg;
+		struct inode *inode = entry->d_inode;
+		struct fuse_inode *fi = get_fuse_inode(inode);
+		struct fuse_conn *fc = get_fuse_conn(inode);
+		struct fuse_req *req = fuse_get_request_nonint(fc);
+		if (!req)
+			return 0;
+
+		fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
+		request_send_nonint(fc, req);
+		version = req->out.h.unique;
+		err = req->out.h.error;
+		fuse_put_request(fc, req);
+		if (err || outarg.nodeid != get_node_id(inode) ||
+		    (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+			return 0;
+
+		fuse_change_attributes(inode, &outarg.attr);
+		inode->i_version = version;
+		entry->d_time = time_to_jiffies(outarg.entry_valid,
+						outarg.entry_valid_nsec);
+		fi->i_time = time_to_jiffies(outarg.attr_valid,
+					     outarg.attr_valid_nsec);
+	}
+	return 1;
+}
+
+static struct dentry_operations fuse_dentry_operations = {
+	.d_revalidate	= fuse_dentry_revalidate,
+};
+
+static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
+			    struct inode **inodep)
+{
+	int err;
+	int version;
+	struct fuse_entry_out outarg;
+	struct inode *inode = NULL;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req;
+
+	if (entry->d_name.len > FUSE_NAME_MAX)
+		return -ENAMETOOLONG;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	fuse_lookup_init(req, dir, entry, &outarg);
+	request_send(fc, req);
+	version = req->out.h.unique;
+	err = req->out.h.error;
+	if (!err) {
+		inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+				  &outarg.attr, version);
+		if (!inode) {
+			fuse_send_forget(fc, req, outarg.nodeid, version);
+			return -ENOMEM;
+		}
+	}
+	fuse_put_request(fc, req);
+	if (err && err != -ENOENT)
+		return err;
+
+	if (inode) {
+		struct fuse_inode *fi = get_fuse_inode(inode);
+		entry->d_time =	time_to_jiffies(outarg.entry_valid,
+						outarg.entry_valid_nsec);
+		fi->i_time = time_to_jiffies(outarg.attr_valid,
+					     outarg.attr_valid_nsec);
+	}
+
+	entry->d_op = &fuse_dentry_operations;
+	*inodep = inode;
+	return 0;
+}
+
+int fuse_do_getattr(struct inode *inode)
+{
+	int err;
+	struct fuse_attr_out arg;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	req->in.h.opcode = FUSE_GETATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(arg);
+	req->out.args[0].value = &arg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) {
+			make_bad_inode(inode);
+			err = -EIO;
+		} else {
+			struct fuse_inode *fi = get_fuse_inode(inode);
+			fuse_change_attributes(inode, &arg.attr);
+			fi->i_time = time_to_jiffies(arg.attr_valid,
+						     arg.attr_valid_nsec);
+		}
+	}
+	return err;
+}
+
+static int fuse_revalidate(struct dentry *entry)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (get_node_id(inode) == FUSE_ROOT_ID) {
+		if (current->fsuid != fc->user_id)
+			return -EACCES;
+	} else if (time_before_eq(jiffies, fi->i_time))
+		return 0;
+
+	return fuse_do_getattr(inode);
+}
+
+static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (current->fsuid != fc->user_id)
+		return -EACCES;
+	else {
+		int mode = inode->i_mode;
+		if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
+                    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+                        return -EROFS;
+		if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO))
+			return -EACCES;
+		return 0;
+	}
+}
+
+static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
+			 void *dstbuf, filldir_t filldir)
+{
+	while (nbytes >= FUSE_NAME_OFFSET) {
+		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+		size_t reclen = FUSE_DIRENT_SIZE(dirent);
+		int over;
+		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+			return -EIO;
+		if (reclen > nbytes)
+			break;
+
+		over = filldir(dstbuf, dirent->name, dirent->namelen,
+			       file->f_pos, dirent->ino, dirent->type);
+		if (over)
+			break;
+
+		buf += reclen;
+		nbytes -= reclen;
+		file->f_pos = dirent->off;
+	}
+
+	return 0;
+}
+
+static int fuse_checkdir(struct file *cfile, struct file *file)
+{
+	struct inode *inode;
+	if (!cfile)
+		return -EIO;
+	inode = cfile->f_dentry->d_inode;
+	if (!S_ISREG(inode->i_mode)) {
+		fput(cfile);
+		return -EIO;
+	}
+
+	file->private_data = cfile;
+	return 0;
+}
+
+static int fuse_getdir(struct file *file)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = fuse_get_request(fc);
+	struct fuse_getdir_out_i outarg;
+	int err;
+
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	req->in.h.opcode = FUSE_GETDIR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(struct fuse_getdir_out);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err)
+		err = fuse_checkdir(outarg.file, file);
+	return err;
+}
+
+static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+{
+	struct file *cfile = file->private_data;
+	char *buf;
+	int ret;
+
+	if (!cfile) {
+		ret = fuse_getdir(file);
+		if (ret)
+			return ret;
+
+		cfile = file->private_data;
+	}
+
+	buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = kernel_read(cfile, file->f_pos, buf, PAGE_SIZE);
+	if (ret > 0)
+		ret = parse_dirfile(buf, ret, file, dstbuf, filldir);
+
+	free_page((unsigned long) buf);
+	return ret;
+}
+
+static char *read_link(struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = fuse_get_request(fc);
+	char *link;
+
+	if (!req)
+		return ERR_PTR(-ERESTARTNOINTR);
+
+	link = (char *) __get_free_page(GFP_KERNEL);
+	if (!link) {
+		link = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	req->in.h.opcode = FUSE_READLINK;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->out.argvar = 1;
+	req->out.numargs = 1;
+	req->out.args[0].size = PAGE_SIZE - 1;
+	req->out.args[0].value = link;
+	request_send(fc, req);
+	if (req->out.h.error) {
+		free_page((unsigned long) link);
+		link = ERR_PTR(req->out.h.error);
+	} else
+		link[req->out.args[0].size] = '\0';
+ out:
+	fuse_put_request(fc, req);
+	return link;
+}
+
+static void free_link(char *link)
+{
+	if (!IS_ERR(link))
+		free_page((unsigned long) link);
+}
+
+static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	nd_set_link(nd, read_link(dentry));
+	return NULL;
+}
+
+static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
+{
+	free_link(nd_get_link(nd));
+}
+
+static int fuse_dir_open(struct inode *inode, struct file *file)
+{
+	file->private_data = NULL;
+	return 0;
+}
+
+static int fuse_dir_release(struct inode *inode, struct file *file)
+{
+	struct file *cfile = file->private_data;
+
+	if (cfile)
+		fput(cfile);
+
+	return 0;
+}
+
+static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
+			struct kstat *stat)
+{
+	struct inode *inode = entry->d_inode;
+	int err = fuse_revalidate(entry);
+	if (!err)
+		generic_fillattr(inode, stat);
+
+	return err;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+				  struct nameidata *nd)
+{
+	struct inode *inode;
+	int err = fuse_lookup_iget(dir, entry, &inode);
+	if (err)
+		return ERR_PTR(err);
+	if (inode && S_ISDIR(inode->i_mode)) {
+		/* Don't allow creating an alias to a directory  */
+		struct dentry *alias = d_find_alias(inode);
+		if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
+			dput(alias);
+			iput(inode);
+			return ERR_PTR(-EIO);
+		}
+	}
+	return d_splice_alias(inode, entry);
+}
+
+static struct inode_operations fuse_dir_inode_operations = {
+	.lookup		= fuse_lookup,
+	.permission	= fuse_permission,
+	.getattr	= fuse_getattr,
+};
+
+static struct file_operations fuse_dir_operations = {
+	.read		= generic_read_dir,
+	.readdir	= fuse_readdir,
+	.open		= fuse_dir_open,
+	.release	= fuse_dir_release,
+};
+
+static struct inode_operations fuse_common_inode_operations = {
+	.permission	= fuse_permission,
+	.getattr	= fuse_getattr,
+};
+
+static struct inode_operations fuse_symlink_inode_operations = {
+	.follow_link	= fuse_follow_link,
+	.put_link	= fuse_put_link,
+	.readlink	= generic_readlink,
+	.getattr	= fuse_getattr,
+};
+
+void fuse_init_common(struct inode *inode)
+{
+	inode->i_op = &fuse_common_inode_operations;
+}
+
+void fuse_init_dir(struct inode *inode)
+{
+	inode->i_op = &fuse_dir_inode_operations;
+	inode->i_fop = &fuse_dir_operations;
+}
+
+void fuse_init_symlink(struct inode *inode)
+{
+	inode->i_op = &fuse_symlink_inode_operations;
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 50ad6a0c39b..8d91e1492f9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -30,6 +30,9 @@ struct fuse_inode {
 	 * and kernel */
 	u64 nodeid;
 
+	/** The request used for sending the FORGET message */
+	struct fuse_req *forget_req;
+
 	/** Time in jiffies until the file attributes are valid */
 	unsigned long i_time;
 };
@@ -129,6 +132,7 @@ struct fuse_req {
 
 	/** Data for asynchronous requests */
 	union {
+		struct fuse_forget_in forget_in;
 		struct fuse_init_in_out init_in_out;
 	} misc;
 
@@ -197,6 +201,11 @@ struct fuse_conn {
 	struct backing_dev_info bdi;
 };
 
+struct fuse_getdir_out_i {
+	int fd;
+	void *file; /* Used by kernel only */
+};
+
 static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb)
 {
 	return (struct fuse_conn **) &sb->s_fs_info;
@@ -239,6 +248,38 @@ extern struct file_operations fuse_dev_operations;
  */
 extern spinlock_t fuse_lock;
 
+/**
+ * Get a filled in inode
+ */
+struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+			int generation, struct fuse_attr *attr, int version);
+
+/**
+ * Send FORGET command
+ */
+void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
+		      unsigned long nodeid, int version);
+
+/**
+ * Initialise inode operations on regular files and special files
+ */
+void fuse_init_common(struct inode *inode);
+
+/**
+ * Initialise inode and file operations on a directory
+ */
+void fuse_init_dir(struct inode *inode);
+
+/**
+ * Initialise inode operations on a symlink
+ */
+void fuse_init_symlink(struct inode *inode);
+
+/**
+ * Change attributes of an inode
+ */
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
+
 /**
  * Check if the connection can be released, and if yes, then free the
  * connection structure
@@ -306,6 +347,16 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
  */
 void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
 
+/**
+ * Get the attributes of a file
+ */
+int fuse_do_getattr(struct inode *inode);
+
+/**
+ * Invalidate inode attributes
+ */
+void fuse_invalidate_attr(struct inode *inode);
+
 /**
  * Send the INIT message
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 33fad334ba7..41498a1952a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -51,12 +51,20 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	fi = get_fuse_inode(inode);
 	fi->i_time = jiffies - 1;
 	fi->nodeid = 0;
+	fi->forget_req = fuse_request_alloc();
+	if (!fi->forget_req) {
+		kmem_cache_free(fuse_inode_cachep, inode);
+		return NULL;
+	}
 
 	return inode;
 }
 
 static void fuse_destroy_inode(struct inode *inode)
 {
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	if (fi->forget_req)
+		fuse_request_free(fi->forget_req);
 	kmem_cache_free(fuse_inode_cachep, inode);
 }
 
@@ -65,8 +73,27 @@ static void fuse_read_inode(struct inode *inode)
 	/* No op */
 }
 
+void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
+		      unsigned long nodeid, int version)
+{
+	struct fuse_forget_in *inarg = &req->misc.forget_in;
+	inarg->version = version;
+	req->in.h.opcode = FUSE_FORGET;
+	req->in.h.nodeid = nodeid;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(struct fuse_forget_in);
+	req->in.args[0].value = inarg;
+	request_send_noreply(fc, req);
+}
+
 static void fuse_clear_inode(struct inode *inode)
 {
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	if (fc) {
+		struct fuse_inode *fi = get_fuse_inode(inode);
+		fuse_send_forget(fc, fi->forget_req, fi->nodeid, inode->i_version);
+		fi->forget_req = NULL;
+	}
 }
 
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
@@ -94,6 +121,22 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 {
 	inode->i_mode = attr->mode & S_IFMT;
 	i_size_write(inode, attr->size);
+	if (S_ISREG(inode->i_mode)) {
+		fuse_init_common(inode);
+	} else if (S_ISDIR(inode->i_mode))
+		fuse_init_dir(inode);
+	else if (S_ISLNK(inode->i_mode))
+		fuse_init_symlink(inode);
+	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+		fuse_init_common(inode);
+		init_special_inode(inode, inode->i_mode,
+				   new_decode_dev(attr->rdev));
+	} else {
+		/* Don't let user create weird files */
+		inode->i_mode = S_IFREG;
+		fuse_init_common(inode);
+	}
 }
 
 static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
@@ -158,6 +201,43 @@ static void fuse_put_super(struct super_block *sb)
 	spin_unlock(&fuse_lock);
 }
 
+static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
+{
+	stbuf->f_type    = FUSE_SUPER_MAGIC;
+	stbuf->f_bsize   = attr->bsize;
+	stbuf->f_blocks  = attr->blocks;
+	stbuf->f_bfree   = attr->bfree;
+	stbuf->f_bavail  = attr->bavail;
+	stbuf->f_files   = attr->files;
+	stbuf->f_ffree   = attr->ffree;
+	stbuf->f_namelen = attr->namelen;
+	/* fsid is left zero */
+}
+
+static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+	struct fuse_req *req;
+	struct fuse_statfs_out outarg;
+	int err;
+
+        req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTSYS;
+
+	req->in.numargs = 0;
+	req->in.h.opcode = FUSE_STATFS;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	if (!err)
+		convert_fuse_statfs(buf, &outarg.st);
+	fuse_put_request(fc, req);
+	return err;
+}
+
 enum {
 	OPT_FD,
 	OPT_ROOTMODE,
@@ -318,6 +398,7 @@ static struct super_operations fuse_super_operations = {
 	.read_inode	= fuse_read_inode,
 	.clear_inode	= fuse_clear_inode,
 	.put_super	= fuse_put_super,
+	.statfs		= fuse_statfs,
 	.show_options	= fuse_show_options,
 };
 
-- 
cgit v1.2.3-18-g5258


From 9e6268db496a2592e89457537ea54a496feabb77 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:29 -0700
Subject: [PATCH] FUSE - read-write operations

This patch adds the write filesystem operations of FUSE.

The following operations are added:

 o setattr
 o symlink
 o mknod
 o mkdir
 o create
 o unlink
 o rmdir
 o rename
 o link

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c    | 373 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/fuse/fuse_i.h |   7 +-
 fs/fuse/inode.c  |  15 ++-
 3 files changed, 378 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index a89730e70c5..92c7188ccd1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -42,7 +42,6 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		return 0;
 	else if (time_after(jiffies, entry->d_time)) {
 		int err;
-		int version;
 		struct fuse_entry_out outarg;
 		struct inode *inode = entry->d_inode;
 		struct fuse_inode *fi = get_fuse_inode(inode);
@@ -53,15 +52,19 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 
 		fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
 		request_send_nonint(fc, req);
-		version = req->out.h.unique;
 		err = req->out.h.error;
+		if (!err) {
+			if (outarg.nodeid != get_node_id(inode)) {
+				fuse_send_forget(fc, req, outarg.nodeid, 1);
+				return 0;
+			}
+			fi->nlookup ++;
+		}
 		fuse_put_request(fc, req);
-		if (err || outarg.nodeid != get_node_id(inode) ||
-		    (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+		if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
 			return 0;
 
 		fuse_change_attributes(inode, &outarg.attr);
-		inode->i_version = version;
 		entry->d_time = time_to_jiffies(outarg.entry_valid,
 						outarg.entry_valid_nsec);
 		fi->i_time = time_to_jiffies(outarg.attr_valid,
@@ -78,7 +81,6 @@ static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
 			    struct inode **inodep)
 {
 	int err;
-	int version;
 	struct fuse_entry_out outarg;
 	struct inode *inode = NULL;
 	struct fuse_conn *fc = get_fuse_conn(dir);
@@ -93,13 +95,12 @@ static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
 
 	fuse_lookup_init(req, dir, entry, &outarg);
 	request_send(fc, req);
-	version = req->out.h.unique;
 	err = req->out.h.error;
 	if (!err) {
 		inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
-				  &outarg.attr, version);
+				  &outarg.attr);
 		if (!inode) {
-			fuse_send_forget(fc, req, outarg.nodeid, version);
+			fuse_send_forget(fc, req, outarg.nodeid, 1);
 			return -ENOMEM;
 		}
 	}
@@ -120,6 +121,264 @@ static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
 	return 0;
 }
 
+void fuse_invalidate_attr(struct inode *inode)
+{
+	get_fuse_inode(inode)->i_time = jiffies - 1;
+}
+
+static void fuse_invalidate_entry(struct dentry *entry)
+{
+	d_invalidate(entry);
+	entry->d_time = jiffies - 1;
+}
+
+static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+			    struct inode *dir, struct dentry *entry,
+			    int mode)
+{
+	struct fuse_entry_out outarg;
+	struct inode *inode;
+	struct fuse_inode *fi;
+	int err;
+
+	req->in.h.nodeid = get_node_id(dir);
+	req->inode = dir;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	if (err) {
+		fuse_put_request(fc, req);
+		return err;
+	}
+	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+			  &outarg.attr);
+	if (!inode) {
+		fuse_send_forget(fc, req, outarg.nodeid, 1);
+		return -ENOMEM;
+	}
+	fuse_put_request(fc, req);
+
+	/* Don't allow userspace to do really stupid things... */
+	if ((inode->i_mode ^ mode) & S_IFMT) {
+		iput(inode);
+		return -EIO;
+	}
+
+	entry->d_time = time_to_jiffies(outarg.entry_valid,
+					outarg.entry_valid_nsec);
+
+	fi = get_fuse_inode(inode);
+	fi->i_time = time_to_jiffies(outarg.attr_valid,
+				     outarg.attr_valid_nsec);
+
+	d_instantiate(entry, inode);
+	fuse_invalidate_attr(dir);
+	return 0;
+}
+
+static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
+		      dev_t rdev)
+{
+	struct fuse_mknod_in inarg;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.mode = mode;
+	inarg.rdev = new_encode_dev(rdev);
+	req->in.h.opcode = FUSE_MKNOD;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = entry->d_name.len + 1;
+	req->in.args[1].value = entry->d_name.name;
+	return create_new_entry(fc, req, dir, entry, mode);
+}
+
+static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
+		       struct nameidata *nd)
+{
+	return fuse_mknod(dir, entry, mode, 0);
+}
+
+static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
+{
+	struct fuse_mkdir_in inarg;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.mode = mode;
+	req->in.h.opcode = FUSE_MKDIR;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = entry->d_name.len + 1;
+	req->in.args[1].value = entry->d_name.name;
+	return create_new_entry(fc, req, dir, entry, S_IFDIR);
+}
+
+static int fuse_symlink(struct inode *dir, struct dentry *entry,
+			const char *link)
+{
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	unsigned len = strlen(link) + 1;
+	struct fuse_req *req;
+
+	if (len > FUSE_SYMLINK_MAX)
+		return -ENAMETOOLONG;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	req->in.h.opcode = FUSE_SYMLINK;
+	req->in.numargs = 2;
+	req->in.args[0].size = entry->d_name.len + 1;
+	req->in.args[0].value = entry->d_name.name;
+	req->in.args[1].size = len;
+	req->in.args[1].value = link;
+	return create_new_entry(fc, req, dir, entry, S_IFLNK);
+}
+
+static int fuse_unlink(struct inode *dir, struct dentry *entry)
+{
+	int err;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	req->in.h.opcode = FUSE_UNLINK;
+	req->in.h.nodeid = get_node_id(dir);
+	req->inode = dir;
+	req->in.numargs = 1;
+	req->in.args[0].size = entry->d_name.len + 1;
+	req->in.args[0].value = entry->d_name.name;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		struct inode *inode = entry->d_inode;
+
+		/* Set nlink to zero so the inode can be cleared, if
+                   the inode does have more links this will be
+                   discovered at the next lookup/getattr */
+		inode->i_nlink = 0;
+		fuse_invalidate_attr(inode);
+		fuse_invalidate_attr(dir);
+	} else if (err == -EINTR)
+		fuse_invalidate_entry(entry);
+	return err;
+}
+
+static int fuse_rmdir(struct inode *dir, struct dentry *entry)
+{
+	int err;
+	struct fuse_conn *fc = get_fuse_conn(dir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	req->in.h.opcode = FUSE_RMDIR;
+	req->in.h.nodeid = get_node_id(dir);
+	req->inode = dir;
+	req->in.numargs = 1;
+	req->in.args[0].size = entry->d_name.len + 1;
+	req->in.args[0].value = entry->d_name.name;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		entry->d_inode->i_nlink = 0;
+		fuse_invalidate_attr(dir);
+	} else if (err == -EINTR)
+		fuse_invalidate_entry(entry);
+	return err;
+}
+
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+		       struct inode *newdir, struct dentry *newent)
+{
+	int err;
+	struct fuse_rename_in inarg;
+	struct fuse_conn *fc = get_fuse_conn(olddir);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.newdir = get_node_id(newdir);
+	req->in.h.opcode = FUSE_RENAME;
+	req->in.h.nodeid = get_node_id(olddir);
+	req->inode = olddir;
+	req->inode2 = newdir;
+	req->in.numargs = 3;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = oldent->d_name.len + 1;
+	req->in.args[1].value = oldent->d_name.name;
+	req->in.args[2].size = newent->d_name.len + 1;
+	req->in.args[2].value = newent->d_name.name;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		fuse_invalidate_attr(olddir);
+		if (olddir != newdir)
+			fuse_invalidate_attr(newdir);
+	} else if (err == -EINTR) {
+		/* If request was interrupted, DEITY only knows if the
+		   rename actually took place.  If the invalidation
+		   fails (e.g. some process has CWD under the renamed
+		   directory), then there can be inconsistency between
+		   the dcache and the real filesystem.  Tough luck. */
+		fuse_invalidate_entry(oldent);
+		if (newent->d_inode)
+			fuse_invalidate_entry(newent);
+	}
+
+	return err;
+}
+
+static int fuse_link(struct dentry *entry, struct inode *newdir,
+		     struct dentry *newent)
+{
+	int err;
+	struct fuse_link_in inarg;
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.oldnodeid = get_node_id(inode);
+	req->in.h.opcode = FUSE_LINK;
+	req->inode2 = inode;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = newent->d_name.len + 1;
+	req->in.args[1].value = newent->d_name.name;
+	err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+	/* Contrary to "normal" filesystems it can happen that link
+	   makes two "logical" inodes point to the same "physical"
+	   inode.  We invalidate the attributes of the old one, so it
+	   will reflect changes in the backing inode (link count,
+	   etc.)
+	*/
+	if (!err || err == -EINTR)
+		fuse_invalidate_attr(inode);
+	return err;
+}
+
 int fuse_do_getattr(struct inode *inode)
 {
 	int err;
@@ -341,6 +600,91 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr)
+{
+	unsigned ivalid = iattr->ia_valid;
+	unsigned fvalid = 0;
+
+	memset(fattr, 0, sizeof(*fattr));
+
+	if (ivalid & ATTR_MODE)
+		fvalid |= FATTR_MODE,   fattr->mode = iattr->ia_mode;
+	if (ivalid & ATTR_UID)
+		fvalid |= FATTR_UID,    fattr->uid = iattr->ia_uid;
+	if (ivalid & ATTR_GID)
+		fvalid |= FATTR_GID,    fattr->gid = iattr->ia_gid;
+	if (ivalid & ATTR_SIZE)
+		fvalid |= FATTR_SIZE,   fattr->size = iattr->ia_size;
+	/* You can only _set_ these together (they may change by themselves) */
+	if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) {
+		fvalid |= FATTR_ATIME | FATTR_MTIME;
+		fattr->atime = iattr->ia_atime.tv_sec;
+		fattr->mtime = iattr->ia_mtime.tv_sec;
+	}
+
+	return fvalid;
+}
+
+static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_req *req;
+	struct fuse_setattr_in inarg;
+	struct fuse_attr_out outarg;
+	int err;
+	int is_truncate = 0;
+
+	if (attr->ia_valid & ATTR_SIZE) {
+		unsigned long limit;
+		is_truncate = 1;
+		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+		if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
+			send_sig(SIGXFSZ, current, 0);
+			return -EFBIG;
+		}
+	}
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.valid = iattr_to_fattr(attr, &inarg.attr);
+	req->in.h.opcode = FUSE_SETATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err) {
+		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+			make_bad_inode(inode);
+			err = -EIO;
+		} else {
+			if (is_truncate) {
+				loff_t origsize = i_size_read(inode);
+				i_size_write(inode, outarg.attr.size);
+				if (origsize > outarg.attr.size)
+					vmtruncate(inode, outarg.attr.size);
+			}
+			fuse_change_attributes(inode, &outarg.attr);
+			fi->i_time = time_to_jiffies(outarg.attr_valid,
+						     outarg.attr_valid_nsec);
+		}
+	} else if (err == -EINTR)
+		fuse_invalidate_attr(inode);
+
+	return err;
+}
+
 static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
 			struct kstat *stat)
 {
@@ -373,6 +717,15 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 
 static struct inode_operations fuse_dir_inode_operations = {
 	.lookup		= fuse_lookup,
+	.mkdir		= fuse_mkdir,
+	.symlink	= fuse_symlink,
+	.unlink		= fuse_unlink,
+	.rmdir		= fuse_rmdir,
+	.rename		= fuse_rename,
+	.link		= fuse_link,
+	.setattr	= fuse_setattr,
+	.create		= fuse_create,
+	.mknod		= fuse_mknod,
 	.permission	= fuse_permission,
 	.getattr	= fuse_getattr,
 };
@@ -385,11 +738,13 @@ static struct file_operations fuse_dir_operations = {
 };
 
 static struct inode_operations fuse_common_inode_operations = {
+	.setattr	= fuse_setattr,
 	.permission	= fuse_permission,
 	.getattr	= fuse_getattr,
 };
 
 static struct inode_operations fuse_symlink_inode_operations = {
+	.setattr	= fuse_setattr,
 	.follow_link	= fuse_follow_link,
 	.put_link	= fuse_put_link,
 	.readlink	= generic_readlink,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 8d91e1492f9..87d25b8f2dc 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -30,6 +30,9 @@ struct fuse_inode {
 	 * and kernel */
 	u64 nodeid;
 
+	/** Number of lookups on this inode */
+	u64 nlookup;
+
 	/** The request used for sending the FORGET message */
 	struct fuse_req *forget_req;
 
@@ -252,13 +255,13 @@ extern spinlock_t fuse_lock;
  * Get a filled in inode
  */
 struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
-			int generation, struct fuse_attr *attr, int version);
+			int generation, struct fuse_attr *attr);
 
 /**
  * Send FORGET command
  */
 void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
-		      unsigned long nodeid, int version);
+		      unsigned long nodeid, u64 nlookup);
 
 /**
  * Initialise inode operations on regular files and special files
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 41498a1952a..fa03f80806e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -51,6 +51,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	fi = get_fuse_inode(inode);
 	fi->i_time = jiffies - 1;
 	fi->nodeid = 0;
+	fi->nlookup = 0;
 	fi->forget_req = fuse_request_alloc();
 	if (!fi->forget_req) {
 		kmem_cache_free(fuse_inode_cachep, inode);
@@ -74,10 +75,10 @@ static void fuse_read_inode(struct inode *inode)
 }
 
 void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
-		      unsigned long nodeid, int version)
+		      unsigned long nodeid, u64 nlookup)
 {
 	struct fuse_forget_in *inarg = &req->misc.forget_in;
-	inarg->version = version;
+	inarg->nlookup = nlookup;
 	req->in.h.opcode = FUSE_FORGET;
 	req->in.h.nodeid = nodeid;
 	req->in.numargs = 1;
@@ -91,7 +92,7 @@ static void fuse_clear_inode(struct inode *inode)
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	if (fc) {
 		struct fuse_inode *fi = get_fuse_inode(inode);
-		fuse_send_forget(fc, fi->forget_req, fi->nodeid, inode->i_version);
+		fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
 		fi->forget_req = NULL;
 	}
 }
@@ -156,9 +157,10 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
 }
 
 struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
-			int generation, struct fuse_attr *attr, int version)
+			int generation, struct fuse_attr *attr)
 {
 	struct inode *inode;
+	struct fuse_inode *fi;
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 	int retried = 0;
 
@@ -181,8 +183,9 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
 		goto retry;
 	}
 
+	fi = get_fuse_inode(inode);
+	fi->nlookup ++;
 	fuse_change_attributes(inode, attr);
-	inode->i_version = version;
 	return inode;
 }
 
@@ -389,7 +392,7 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
 
 	attr.mode = mode;
 	attr.ino = FUSE_ROOT_ID;
-	return fuse_iget(sb, 1, 0, &attr, 0);
+	return fuse_iget(sb, 1, 0, &attr);
 }
 
 static struct super_operations fuse_super_operations = {
-- 
cgit v1.2.3-18-g5258


From b6aeadeda22a9aa322fdfcd3f4c69ccf0da5cbdd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:30 -0700
Subject: [PATCH] FUSE - file operations

This patch adds the file operations of FUSE.

The following operations are added:

 o open
 o flush
 o release
 o fsync
 o readpage
 o commit_write

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/Makefile |   2 +-
 fs/fuse/dir.c    |   1 +
 fs/fuse/file.c   | 341 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h |  21 ++++
 fs/fuse/inode.c  |   2 +
 5 files changed, 366 insertions(+), 1 deletion(-)
 create mode 100644 fs/fuse/file.c

(limited to 'fs')

diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index c34e268a0ed..c3e1f760cac 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
 
-fuse-objs := dev.o dir.o inode.o
+fuse-objs := dev.o dir.o file.o inode.o
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 92c7188ccd1..8adc1eed164 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -731,6 +731,7 @@ static struct inode_operations fuse_dir_inode_operations = {
 };
 
 static struct file_operations fuse_dir_operations = {
+	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= fuse_readdir,
 	.open		= fuse_dir_open,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
new file mode 100644
index 00000000000..de8c9c70246
--- /dev/null
+++ b/fs/fuse/file.c
@@ -0,0 +1,341 @@
+/*
+  FUSE: Filesystem in Userspace
+  Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
+
+  This program can be distributed under the terms of the GNU GPL.
+  See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+static int fuse_open(struct inode *inode, struct file *file)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_open_in inarg;
+	struct fuse_open_out outarg;
+	struct fuse_file *ff;
+	int err;
+	/* Restarting the syscall is not allowed if O_CREAT and O_EXCL
+	   are both set, because creation will fail on the restart */
+	int excl = (file->f_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL);
+
+	err = generic_file_open(inode, file);
+	if (err)
+		return err;
+
+	/* If opening the root node, no lookup has been performed on
+	   it, so the attributes must be refreshed */
+	if (get_node_id(inode) == FUSE_ROOT_ID) {
+		int err = fuse_do_getattr(inode);
+		if (err)
+		 	return err;
+	}
+
+	if (excl)
+		req = fuse_get_request_nonint(fc);
+	else
+		req = fuse_get_request(fc);
+	if (!req)
+		return excl ? -EINTR : -ERESTARTSYS;
+
+	err = -ENOMEM;
+	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
+	if (!ff)
+		goto out_put_request;
+
+	ff->release_req = fuse_request_alloc();
+	if (!ff->release_req) {
+		kfree(ff);
+		goto out_put_request;
+	}
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+	req->in.h.opcode = FUSE_OPEN;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	if (excl)
+		request_send_nonint(fc, req);
+	else
+		request_send(fc, req);
+	err = req->out.h.error;
+	if (!err)
+		invalidate_inode_pages(inode->i_mapping);
+	if (err) {
+		fuse_request_free(ff->release_req);
+		kfree(ff);
+	} else {
+		ff->fh = outarg.fh;
+		file->private_data = ff;
+	}
+
+ out_put_request:
+	fuse_put_request(fc, req);
+	return err;
+}
+
+static int fuse_release(struct inode *inode, struct file *file)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_req *req = ff->release_req;
+	struct fuse_release_in *inarg = &req->misc.release_in;
+
+	inarg->fh = ff->fh;
+	inarg->flags = file->f_flags & ~O_EXCL;
+	req->in.h.opcode = FUSE_RELEASE;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(struct fuse_release_in);
+	req->in.args[0].value = inarg;
+	request_send_background(fc, req);
+	kfree(ff);
+
+	/* Return value is ignored by VFS */
+	return 0;
+}
+
+static int fuse_flush(struct file *file)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_req *req;
+	struct fuse_flush_in inarg;
+	int err;
+
+	if (fc->no_flush)
+		return 0;
+
+	req = fuse_get_request_nonint(fc);
+	if (!req)
+		return -EINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.fh = ff->fh;
+	req->in.h.opcode = FUSE_FLUSH;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->file = file;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	request_send_nonint(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS) {
+		fc->no_flush = 1;
+		err = 0;
+	}
+	return err;
+}
+
+static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
+{
+	struct inode *inode = de->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_req *req;
+	struct fuse_fsync_in inarg;
+	int err;
+
+	if (fc->no_fsync)
+		return 0;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTSYS;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.fh = ff->fh;
+	inarg.fsync_flags = datasync ? 1 : 0;
+	req->in.h.opcode = FUSE_FSYNC;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->file = file;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS) {
+		fc->no_fsync = 1;
+		err = 0;
+	}
+	return err;
+}
+
+static ssize_t fuse_send_read(struct fuse_req *req, struct file *file,
+			      struct inode *inode, loff_t pos,  size_t count)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_read_in inarg;
+
+	memset(&inarg, 0, sizeof(struct fuse_read_in));
+	inarg.fh = ff->fh;
+	inarg.offset = pos;
+	inarg.size = count;
+	req->in.h.opcode = FUSE_READ;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->file = file;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(struct fuse_read_in);
+	req->in.args[0].value = &inarg;
+	req->out.argpages = 1;
+	req->out.argvar = 1;
+	req->out.numargs = 1;
+	req->out.args[0].size = count;
+	request_send_nonint(fc, req);
+	return req->out.args[0].size;
+}
+
+static int fuse_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT;
+	struct fuse_req *req = fuse_get_request_nonint(fc);
+	int err = -EINTR;
+	if (!req)
+		goto out;
+
+	req->out.page_zeroing = 1;
+	req->num_pages = 1;
+	req->pages[0] = page;
+	fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err)
+		SetPageUptodate(page);
+ out:
+	unlock_page(page);
+	return err;
+}
+
+static ssize_t fuse_send_write(struct fuse_req *req, struct file *file,
+			       struct inode *inode, loff_t pos, size_t count)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_write_in inarg;
+	struct fuse_write_out outarg;
+
+	memset(&inarg, 0, sizeof(struct fuse_write_in));
+	inarg.fh = ff->fh;
+	inarg.offset = pos;
+	inarg.size = count;
+	req->in.h.opcode = FUSE_WRITE;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->file = file;
+	req->in.argpages = 1;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(struct fuse_write_in);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = count;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(struct fuse_write_out);
+	req->out.args[0].value = &outarg;
+	request_send_nonint(fc, req);
+	return outarg.size;
+}
+
+static int fuse_prepare_write(struct file *file, struct page *page,
+			      unsigned offset, unsigned to)
+{
+	/* No op */
+	return 0;
+}
+
+static int fuse_commit_write(struct file *file, struct page *page,
+			     unsigned offset, unsigned to)
+{
+	int err;
+	ssize_t nres;
+	unsigned count = to - offset;
+	struct inode *inode = page->mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset;
+	struct fuse_req *req = fuse_get_request_nonint(fc);
+	if (!req)
+		return -EINTR;
+
+	req->num_pages = 1;
+	req->pages[0] = page;
+	req->page_offset = offset;
+	nres = fuse_send_write(req, file, inode, pos, count);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (!err && nres != count)
+		err = -EIO;
+	if (!err) {
+		pos += count;
+		if (pos > i_size_read(inode))
+			i_size_write(inode, pos);
+
+		if (offset == 0 && to == PAGE_CACHE_SIZE) {
+			clear_page_dirty(page);
+			SetPageUptodate(page);
+		}
+	} else if (err == -EINTR || err == -EIO)
+		fuse_invalidate_attr(inode);
+	return err;
+}
+
+static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if ((vma->vm_flags & VM_SHARED)) {
+		if ((vma->vm_flags & VM_WRITE))
+			return -ENODEV;
+		else
+			vma->vm_flags &= ~VM_MAYWRITE;
+	}
+	return generic_file_mmap(file, vma);
+}
+
+static int fuse_set_page_dirty(struct page *page)
+{
+	printk("fuse_set_page_dirty: should not happen\n");
+	dump_stack();
+	return 0;
+}
+
+static struct file_operations fuse_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_file_read,
+	.write		= generic_file_write,
+	.mmap		= fuse_file_mmap,
+	.open		= fuse_open,
+	.flush		= fuse_flush,
+	.release	= fuse_release,
+	.fsync		= fuse_fsync,
+	.sendfile	= generic_file_sendfile,
+};
+
+static struct address_space_operations fuse_file_aops  = {
+	.readpage	= fuse_readpage,
+	.prepare_write	= fuse_prepare_write,
+	.commit_write	= fuse_commit_write,
+	.set_page_dirty	= fuse_set_page_dirty,
+};
+
+void fuse_init_file_inode(struct inode *inode)
+{
+	inode->i_fop = &fuse_file_operations;
+	inode->i_data.a_ops = &fuse_file_aops;
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 87d25b8f2dc..b4aa8f7bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -40,6 +40,15 @@ struct fuse_inode {
 	unsigned long i_time;
 };
 
+/** FUSE specific file data */
+struct fuse_file {
+	/** Request reserved for flush and release */
+	struct fuse_req *release_req;
+
+	/** File handle used by userspace */
+	u64 fh;
+};
+
 /** One input argument of a request */
 struct fuse_in_arg {
 	unsigned size;
@@ -136,6 +145,7 @@ struct fuse_req {
 	/** Data for asynchronous requests */
 	union {
 		struct fuse_forget_in forget_in;
+		struct fuse_release_in release_in;
 		struct fuse_init_in_out init_in_out;
 	} misc;
 
@@ -200,6 +210,12 @@ struct fuse_conn {
 	/** Connection failed (version mismatch) */
 	unsigned conn_error : 1;
 
+	/** Is fsync not implemented by fs? */
+	unsigned no_fsync : 1;
+
+	/** Is flush not implemented by fs? */
+	unsigned no_flush : 1;
+
 	/** Backing dev info */
 	struct backing_dev_info bdi;
 };
@@ -263,6 +279,11 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
 void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
 		      unsigned long nodeid, u64 nlookup);
 
+/**
+ * Initialise file operations on a regular file
+ */
+void fuse_init_file_inode(struct inode *inode);
+
 /**
  * Initialise inode operations on regular files and special files
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fa03f80806e..f229d696264 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -124,6 +124,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 	i_size_write(inode, attr->size);
 	if (S_ISREG(inode->i_mode)) {
 		fuse_init_common(inode);
+		fuse_init_file_inode(inode);
 	} else if (S_ISDIR(inode->i_mode))
 		fuse_init_dir(inode);
 	else if (S_ISLNK(inode->i_mode))
@@ -137,6 +138,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 		/* Don't let user create weird files */
 		inode->i_mode = S_IFREG;
 		fuse_init_common(inode);
+		fuse_init_file_inode(inode);
 	}
 }
 
-- 
cgit v1.2.3-18-g5258


From 1e9a4ed9396e9c31139721b639550ffb1df17065 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:31 -0700
Subject: [PATCH] FUSE - mount options

This patch adds miscellaneous mount options to the FUSE filesystem.

The following mount options are added:

 o default_permissions:  check permissions with generic_permission()
 o allow_other:          allow other users to access files
 o allow_root:           allow root to access files
 o kernel_cache:         don't invalidate page cache on open

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dev.c    | 78 ++++++++++++++++++++++++++++++++++++++++++--------------
 fs/fuse/dir.c    | 35 ++++++++++++++++++++++---
 fs/fuse/file.c   |  2 +-
 fs/fuse/fuse_i.h | 45 ++++++++++++++++++++++++++++----
 fs/fuse/inode.c  | 77 +++++++++++++++++++++++++++----------------------------
 5 files changed, 170 insertions(+), 67 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e8f3170946f..ca6fc0e96d7 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -26,7 +26,7 @@ static inline struct fuse_conn *fuse_get_conn(struct file *file)
 	struct fuse_conn *fc;
 	spin_lock(&fuse_lock);
 	fc = file->private_data;
-	if (fc && !fc->sb)
+	if (fc && !fc->mounted)
 		fc = NULL;
 	spin_unlock(&fuse_lock);
 	return fc;
@@ -148,6 +148,17 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 		fuse_putback_request(fc, req);
 }
 
+void fuse_release_background(struct fuse_req *req)
+{
+	iput(req->inode);
+	iput(req->inode2);
+	if (req->file)
+		fput(req->file);
+	spin_lock(&fuse_lock);
+	list_del(&req->bg_entry);
+	spin_unlock(&fuse_lock);
+}
+
 /*
  * This function is called when a request is finished.  Either a reply
  * has arrived or it was interrupted (and not yet sent) or some error
@@ -166,12 +177,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 	putback = atomic_dec_and_test(&req->count);
 	spin_unlock(&fuse_lock);
 	if (req->background) {
-		if (req->inode)
-			iput(req->inode);
-		if (req->inode2)
-			iput(req->inode2);
-		if (req->file)
-			fput(req->file);
+		down_read(&fc->sbput_sem);
+		if (fc->mounted)
+			fuse_release_background(req);
+		up_read(&fc->sbput_sem);
 	}
 	wake_up(&req->waitq);
 	if (req->in.h.opcode == FUSE_INIT) {
@@ -191,11 +200,39 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 		fuse_putback_request(fc, req);
 }
 
-static void background_request(struct fuse_req *req)
+/*
+ * Unfortunately request interruption not just solves the deadlock
+ * problem, it causes problems too.  These stem from the fact, that an
+ * interrupted request is continued to be processed in userspace,
+ * while all the locks and object references (inode and file) held
+ * during the operation are released.
+ *
+ * To release the locks is exactly why there's a need to interrupt the
+ * request, so there's not a lot that can be done about this, except
+ * introduce additional locking in userspace.
+ *
+ * More important is to keep inode and file references until userspace
+ * has replied, otherwise FORGET and RELEASE could be sent while the
+ * inode/file is still used by the filesystem.
+ *
+ * For this reason the concept of "background" request is introduced.
+ * An interrupted request is backgrounded if it has been already sent
+ * to userspace.  Backgrounding involves getting an extra reference to
+ * inode(s) or file used in the request, and adding the request to
+ * fc->background list.  When a reply is received for a background
+ * request, the object references are released, and the request is
+ * removed from the list.  If the filesystem is unmounted while there
+ * are still background requests, the list is walked and references
+ * are released as if a reply was received.
+ *
+ * There's one more use for a background request.  The RELEASE message is
+ * always sent as background, since it doesn't return an error or
+ * data.
+ */
+static void background_request(struct fuse_conn *fc, struct fuse_req *req)
 {
-	/* Need to get hold of the inode(s) and/or file used in the
-	   request, so FORGET and RELEASE are not sent too early */
 	req->background = 1;
+	list_add(&req->bg_entry, &fc->background);
 	if (req->inode)
 		req->inode = igrab(req->inode);
 	if (req->inode2)
@@ -215,7 +252,8 @@ static int request_wait_answer_nonint(struct fuse_req *req)
 }
 
 /* Called with fuse_lock held.  Releases, and then reacquires it. */
-static void request_wait_answer(struct fuse_req *req, int interruptible)
+static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req,
+				int interruptible)
 {
 	int intr;
 
@@ -255,7 +293,7 @@ static void request_wait_answer(struct fuse_req *req, int interruptible)
 		list_del(&req->list);
 		__fuse_put_request(req);
 	} else if (!req->finished && req->sent)
-		background_request(req);
+		background_request(fc, req);
 }
 
 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
@@ -297,7 +335,7 @@ static void request_send_wait(struct fuse_conn *fc, struct fuse_req *req,
 {
 	req->isreply = 1;
 	spin_lock(&fuse_lock);
-	if (!fc->file)
+	if (!fc->connected)
 		req->out.h.error = -ENOTCONN;
 	else if (fc->conn_error)
 		req->out.h.error = -ECONNREFUSED;
@@ -307,7 +345,7 @@ static void request_send_wait(struct fuse_conn *fc, struct fuse_req *req,
 		   after request_end() */
 		__fuse_get_request(req);
 
-		request_wait_answer(req, interruptible);
+		request_wait_answer(fc, req, interruptible);
 	}
 	spin_unlock(&fuse_lock);
 }
@@ -330,7 +368,7 @@ void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req)
 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 {
 	spin_lock(&fuse_lock);
-	if (fc->file) {
+	if (fc->connected) {
 		queue_request(fc, req);
 		spin_unlock(&fuse_lock);
 	} else {
@@ -348,7 +386,9 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 {
 	req->isreply = 1;
-	background_request(req);
+	spin_lock(&fuse_lock);
+	background_request(fc, req);
+	spin_unlock(&fuse_lock);
 	request_send_nowait(fc, req);
 }
 
@@ -583,7 +623,7 @@ static void request_wait(struct fuse_conn *fc)
 	DECLARE_WAITQUEUE(wait, current);
 
 	add_wait_queue_exclusive(&fc->waitq, &wait);
-	while (fc->sb && list_empty(&fc->pending)) {
+	while (fc->mounted && list_empty(&fc->pending)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (signal_pending(current))
 			break;
@@ -622,7 +662,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
 		goto err_unlock;
 	request_wait(fc);
 	err = -ENODEV;
-	if (!fc->sb)
+	if (!fc->mounted)
 		goto err_unlock;
 	err = -ERESTARTSYS;
 	if (list_empty(&fc->pending))
@@ -839,7 +879,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
 	spin_lock(&fuse_lock);
 	fc = file->private_data;
 	if (fc) {
-		fc->file = NULL;
+		fc->connected = 0;
 		end_requests(fc, &fc->pending);
 		end_requests(fc, &fc->processing);
 		fuse_release_conn(fc);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8adc1eed164..0950455914d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -418,7 +418,8 @@ static int fuse_revalidate(struct dentry *entry)
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
 	if (get_node_id(inode) == FUSE_ROOT_ID) {
-		if (current->fsuid != fc->user_id)
+		if (!(fc->flags & FUSE_ALLOW_OTHER) &&
+		    current->fsuid != fc->user_id)
 			return -EACCES;
 	} else if (time_before_eq(jiffies, fi->i_time))
 		return 0;
@@ -430,9 +431,31 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
-	if (current->fsuid != fc->user_id)
+	if (!(fc->flags & FUSE_ALLOW_OTHER) && current->fsuid != fc->user_id)
 		return -EACCES;
-	else {
+	else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+		int err = generic_permission(inode, mask, NULL);
+
+		/* If permission is denied, try to refresh file
+		   attributes.  This is also needed, because the root
+		   node will at first have no permissions */
+		if (err == -EACCES) {
+		 	err = fuse_do_getattr(inode);
+			if (!err)
+				err = generic_permission(inode, mask, NULL);
+		}
+
+		/* FIXME: Need some mechanism to revoke permissions:
+		   currently if the filesystem suddenly changes the
+		   file mode, we will not be informed about it, and
+		   continue to allow access to the file/directory.
+
+		   This is actually not so grave, since the user can
+		   simply keep access to the file/directory anyway by
+		   keeping it open... */
+
+		return err;
+	} else {
 		int mode = inode->i_mode;
 		if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
                     (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
@@ -636,6 +659,12 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 	int err;
 	int is_truncate = 0;
 
+	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+		err = inode_change_ok(inode, attr);
+		if (err)
+			return err;
+	}
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		unsigned long limit;
 		is_truncate = 1;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index de8c9c70246..96ea302db18 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -70,7 +70,7 @@ static int fuse_open(struct inode *inode, struct file *file)
 	else
 		request_send(fc, req);
 	err = req->out.h.error;
-	if (!err)
+	if (!err && !(fc->flags & FUSE_KERNEL_CACHE))
 		invalidate_inode_pages(inode->i_mapping);
 	if (err) {
 		fuse_request_free(ff->release_req);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b4aa8f7bc2c..c8e6c87496e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,19 @@
 /** If more requests are outstanding, then the operation will block */
 #define FUSE_MAX_OUTSTANDING 10
 
+/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
+    module will check permissions based on the file mode.  Otherwise no
+    permission checking is done in the kernel */
+#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
+
+/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
+    doing the mount will be allowed to access the filesystem */
+#define FUSE_ALLOW_OTHER         (1 << 1)
+
+/** If the FUSE_KERNEL_CACHE flag is given, then cached data will not
+    be flushed on open */
+#define FUSE_KERNEL_CACHE        (1 << 2)
+
 /** FUSE inode */
 struct fuse_inode {
 	/** Inode data */
@@ -109,6 +122,9 @@ struct fuse_req {
 	    lists in fuse_conn */
 	struct list_head list;
 
+	/** Entry on the background list */
+	struct list_head bg_entry;
+
 	/** refcount */
 	atomic_t count;
 
@@ -176,15 +192,15 @@ struct fuse_req {
  * unmounted.
  */
 struct fuse_conn {
-	/** The superblock of the mounted filesystem */
-	struct super_block *sb;
-
-	/** The opened client device */
-	struct file *file;
+	/** Reference count */
+	int count;
 
 	/** The user id for this mount */
 	uid_t user_id;
 
+	/** The fuse mount flags for this mount */
+	unsigned flags;
+
 	/** Readers of the connection are waiting on this */
 	wait_queue_head_t waitq;
 
@@ -194,6 +210,10 @@ struct fuse_conn {
 	/** The list of requests being processed */
 	struct list_head processing;
 
+	/** Requests put in the background (RELEASE or any other
+	    interrupted request) */
+	struct list_head background;
+
 	/** Controls the maximum number of outstanding requests */
 	struct semaphore outstanding_sem;
 
@@ -201,12 +221,21 @@ struct fuse_conn {
 	    outstanding_sem would go negative */
 	unsigned outstanding_debt;
 
+	/** RW semaphore for exclusion with fuse_put_super() */
+	struct rw_semaphore sbput_sem;
+
 	/** The list of unused requests */
 	struct list_head unused_list;
 
 	/** The next unique request id */
 	u64 reqctr;
 
+	/** Mount is active */
+	unsigned mounted : 1;
+
+	/** Connection established */
+	unsigned connected : 1;
+
 	/** Connection failed (version mismatch) */
 	unsigned conn_error : 1;
 
@@ -261,6 +290,7 @@ extern struct file_operations fuse_dev_operations;
  *  - the private_data field of the device file
  *  - the s_fs_info field of the super block
  *  - unused_list, pending, processing lists in fuse_conn
+ *  - background list in fuse_conn
  *  - the unique request ID counter reqctr in fuse_conn
  *  - the sb (super_block) field in fuse_conn
  *  - the file (device file) field in fuse_conn
@@ -371,6 +401,11 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
  */
 void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
 
+/**
+ * Release inodes and file assiciated with background request
+ */
+void fuse_release_background(struct fuse_req *req);
+
 /**
  * Get the attributes of a file
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f229d696264..458c62ca0fe 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -15,7 +15,6 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/parser.h>
 #include <linux/statfs.h>
 
@@ -25,11 +24,6 @@ MODULE_LICENSE("GPL");
 
 spinlock_t fuse_lock;
 static kmem_cache_t *fuse_inode_cachep;
-static int mount_count;
-
-static int mount_max = 1000;
-module_param(mount_max, int, 0644);
-MODULE_PARM_DESC(mount_max, "Maximum number of FUSE mounts allowed, if -1 then unlimited (default: 1000)");
 
 #define FUSE_SUPER_MAGIC 0x65735546
 
@@ -37,6 +31,7 @@ struct fuse_mount_data {
 	int fd;
 	unsigned rootmode;
 	unsigned user_id;
+	unsigned flags;
 };
 
 static struct inode *fuse_alloc_inode(struct super_block *sb)
@@ -89,8 +84,8 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
 
 static void fuse_clear_inode(struct inode *inode)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
-	if (fc) {
+	if (inode->i_sb->s_flags & MS_ACTIVE) {
+		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_inode *fi = get_fuse_inode(inode);
 		fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
 		fi->forget_req = NULL;
@@ -195,14 +190,19 @@ static void fuse_put_super(struct super_block *sb)
 {
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
+	down_write(&fc->sbput_sem);
+	while (!list_empty(&fc->background))
+		fuse_release_background(list_entry(fc->background.next,
+						   struct fuse_req, bg_entry));
+
 	spin_lock(&fuse_lock);
-	mount_count --;
-	fc->sb = NULL;
+	fc->mounted = 0;
 	fc->user_id = 0;
+	fc->flags = 0;
 	/* Flush all readers on this fs */
 	wake_up_all(&fc->waitq);
+	up_write(&fc->sbput_sem);
 	fuse_release_conn(fc);
-	*get_fuse_conn_super_p(sb) = NULL;
 	spin_unlock(&fuse_lock);
 }
 
@@ -249,7 +249,6 @@ enum {
 	OPT_USER_ID,
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_ALLOW_OTHER,
-	OPT_ALLOW_ROOT,
 	OPT_KERNEL_CACHE,
 	OPT_ERR
 };
@@ -260,7 +259,6 @@ static match_table_t tokens = {
 	{OPT_USER_ID,			"user_id=%u"},
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_ALLOW_OTHER,		"allow_other"},
-	{OPT_ALLOW_ROOT,		"allow_root"},
 	{OPT_KERNEL_CACHE,		"kernel_cache"},
 	{OPT_ERR,			NULL}
 };
@@ -298,6 +296,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 			d->user_id = value;
 			break;
 
+		case OPT_DEFAULT_PERMISSIONS:
+			d->flags |= FUSE_DEFAULT_PERMISSIONS;
+			break;
+
+		case OPT_ALLOW_OTHER:
+			d->flags |= FUSE_ALLOW_OTHER;
+			break;
+
+		case OPT_KERNEL_CACHE:
+			d->flags |= FUSE_KERNEL_CACHE;
+			break;
+
 		default:
 			return 0;
 		}
@@ -313,6 +323,12 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 	struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
 
 	seq_printf(m, ",user_id=%u", fc->user_id);
+	if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
+		seq_puts(m, ",default_permissions");
+	if (fc->flags & FUSE_ALLOW_OTHER)
+		seq_puts(m, ",allow_other");
+	if (fc->flags & FUSE_KERNEL_CACHE)
+		seq_puts(m, ",kernel_cache");
 	return 0;
 }
 
@@ -330,7 +346,8 @@ static void free_conn(struct fuse_conn *fc)
 /* Must be called with the fuse lock held */
 void fuse_release_conn(struct fuse_conn *fc)
 {
-	if (!fc->sb && !fc->file)
+	fc->count--;
+	if (!fc->count)
 		free_conn(fc);
 }
 
@@ -342,14 +359,13 @@ static struct fuse_conn *new_conn(void)
 	if (fc != NULL) {
 		int i;
 		memset(fc, 0, sizeof(*fc));
-		fc->sb = NULL;
-		fc->file = NULL;
-		fc->user_id = 0;
 		init_waitqueue_head(&fc->waitq);
 		INIT_LIST_HEAD(&fc->pending);
 		INIT_LIST_HEAD(&fc->processing);
 		INIT_LIST_HEAD(&fc->unused_list);
+		INIT_LIST_HEAD(&fc->background);
 		sema_init(&fc->outstanding_sem, 0);
+		init_rwsem(&fc->sbput_sem);
 		for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
 			struct fuse_req *req = fuse_request_alloc();
 			if (!req) {
@@ -380,8 +396,10 @@ static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
 		fc = ERR_PTR(-EINVAL);
 	} else {
 		file->private_data = fc;
-		fc->sb = sb;
-		fc->file = file;
+		*get_fuse_conn_super_p(sb) = fc;
+		fc->mounted = 1;
+		fc->connected = 1;
+		fc->count = 2;
 	}
 	spin_unlock(&fuse_lock);
 	return fc;
@@ -407,17 +425,6 @@ static struct super_operations fuse_super_operations = {
 	.show_options	= fuse_show_options,
 };
 
-static int inc_mount_count(void)
-{
-	int success = 0;
-	spin_lock(&fuse_lock);
-	mount_count ++;
-	if (mount_max == -1 || mount_count <= mount_max)
-		success = 1;
-	spin_unlock(&fuse_lock);
-	return success;
-}
-
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct fuse_conn *fc;
@@ -444,14 +451,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (IS_ERR(fc))
 		return PTR_ERR(fc);
 
+	fc->flags = d.flags;
 	fc->user_id = d.user_id;
 
-	*get_fuse_conn_super_p(sb) = fc;
-
-	err = -ENFILE;
-	if (!inc_mount_count() && current->uid != 0)
-		goto err;
-
 	err = -ENOMEM;
 	root = get_root_inode(sb, d.rootmode);
 	if (root == NULL)
@@ -467,11 +469,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
  err:
 	spin_lock(&fuse_lock);
-	mount_count --;
-	fc->sb = NULL;
 	fuse_release_conn(fc);
 	spin_unlock(&fuse_lock);
-	*get_fuse_conn_super_p(sb) = NULL;
 	return err;
 }
 
-- 
cgit v1.2.3-18-g5258


From 92a8780e1136c5ca0c7ed940000d399943d1576e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:31 -0700
Subject: [PATCH] FUSE - extended attribute operations

This patch adds the extended attribute operations to FUSE.

The following operations are added:

 o getxattr
 o setxattr
 o listxattr
 o removexattr

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c    | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h |  12 ++++
 2 files changed, 195 insertions(+)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0950455914d..f127625543b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -744,6 +744,177 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	return d_splice_alias(inode, entry);
 }
 
+static int fuse_setxattr(struct dentry *entry, const char *name,
+			 const void *value, size_t size, int flags)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_setxattr_in inarg;
+	int err;
+
+	if (size > FUSE_XATTR_SIZE_MAX)
+		return -E2BIG;
+
+	if (fc->no_setxattr)
+		return -EOPNOTSUPP;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.size = size;
+	inarg.flags = flags;
+	req->in.h.opcode = FUSE_SETXATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 3;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = strlen(name) + 1;
+	req->in.args[1].value = name;
+	req->in.args[2].size = size;
+	req->in.args[2].value = value;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS) {
+		fc->no_setxattr = 1;
+		err = -EOPNOTSUPP;
+	}
+	return err;
+}
+
+static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
+			     void *value, size_t size)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_getxattr_in inarg;
+	struct fuse_getxattr_out outarg;
+	ssize_t ret;
+
+	if (fc->no_getxattr)
+		return -EOPNOTSUPP;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.size = size;
+	req->in.h.opcode = FUSE_GETXATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 2;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->in.args[1].size = strlen(name) + 1;
+	req->in.args[1].value = name;
+	/* This is really two different operations rolled into one */
+	req->out.numargs = 1;
+	if (size) {
+		req->out.argvar = 1;
+		req->out.args[0].size = size;
+		req->out.args[0].value = value;
+	} else {
+		req->out.args[0].size = sizeof(outarg);
+		req->out.args[0].value = &outarg;
+	}
+	request_send(fc, req);
+	ret = req->out.h.error;
+	if (!ret)
+		ret = size ? req->out.args[0].size : outarg.size;
+	else {
+		if (ret == -ENOSYS) {
+			fc->no_getxattr = 1;
+			ret = -EOPNOTSUPP;
+		}
+	}
+	fuse_put_request(fc, req);
+	return ret;
+}
+
+static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_getxattr_in inarg;
+	struct fuse_getxattr_out outarg;
+	ssize_t ret;
+
+	if (fc->no_listxattr)
+		return -EOPNOTSUPP;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.size = size;
+	req->in.h.opcode = FUSE_LISTXATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	/* This is really two different operations rolled into one */
+	req->out.numargs = 1;
+	if (size) {
+		req->out.argvar = 1;
+		req->out.args[0].size = size;
+		req->out.args[0].value = list;
+	} else {
+		req->out.args[0].size = sizeof(outarg);
+		req->out.args[0].value = &outarg;
+	}
+	request_send(fc, req);
+	ret = req->out.h.error;
+	if (!ret)
+		ret = size ? req->out.args[0].size : outarg.size;
+	else {
+		if (ret == -ENOSYS) {
+			fc->no_listxattr = 1;
+			ret = -EOPNOTSUPP;
+		}
+	}
+	fuse_put_request(fc, req);
+	return ret;
+}
+
+static int fuse_removexattr(struct dentry *entry, const char *name)
+{
+	struct inode *inode = entry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	int err;
+
+	if (fc->no_removexattr)
+		return -EOPNOTSUPP;
+
+	req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTNOINTR;
+
+	req->in.h.opcode = FUSE_REMOVEXATTR;
+	req->in.h.nodeid = get_node_id(inode);
+	req->inode = inode;
+	req->in.numargs = 1;
+	req->in.args[0].size = strlen(name) + 1;
+	req->in.args[0].value = name;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS) {
+		fc->no_removexattr = 1;
+		err = -EOPNOTSUPP;
+	}
+	return err;
+}
+
 static struct inode_operations fuse_dir_inode_operations = {
 	.lookup		= fuse_lookup,
 	.mkdir		= fuse_mkdir,
@@ -757,6 +928,10 @@ static struct inode_operations fuse_dir_inode_operations = {
 	.mknod		= fuse_mknod,
 	.permission	= fuse_permission,
 	.getattr	= fuse_getattr,
+	.setxattr	= fuse_setxattr,
+	.getxattr	= fuse_getxattr,
+	.listxattr	= fuse_listxattr,
+	.removexattr	= fuse_removexattr,
 };
 
 static struct file_operations fuse_dir_operations = {
@@ -771,6 +946,10 @@ static struct inode_operations fuse_common_inode_operations = {
 	.setattr	= fuse_setattr,
 	.permission	= fuse_permission,
 	.getattr	= fuse_getattr,
+	.setxattr	= fuse_setxattr,
+	.getxattr	= fuse_getxattr,
+	.listxattr	= fuse_listxattr,
+	.removexattr	= fuse_removexattr,
 };
 
 static struct inode_operations fuse_symlink_inode_operations = {
@@ -779,6 +958,10 @@ static struct inode_operations fuse_symlink_inode_operations = {
 	.put_link	= fuse_put_link,
 	.readlink	= generic_readlink,
 	.getattr	= fuse_getattr,
+	.setxattr	= fuse_setxattr,
+	.getxattr	= fuse_getxattr,
+	.listxattr	= fuse_listxattr,
+	.removexattr	= fuse_removexattr,
 };
 
 void fuse_init_common(struct inode *inode)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index c8e6c87496e..86183c56210 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -245,6 +245,18 @@ struct fuse_conn {
 	/** Is flush not implemented by fs? */
 	unsigned no_flush : 1;
 
+	/** Is setxattr not implemented by fs? */
+	unsigned no_setxattr : 1;
+
+	/** Is getxattr not implemented by fs? */
+	unsigned no_getxattr : 1;
+
+	/** Is listxattr not implemented by fs? */
+	unsigned no_listxattr : 1;
+
+	/** Is removexattr not implemented by fs? */
+	unsigned no_removexattr : 1;
+
 	/** Backing dev info */
 	struct backing_dev_info bdi;
 };
-- 
cgit v1.2.3-18-g5258


From db50b96c0f28a21c5a4a19ecaba12d0972aab06a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:33 -0700
Subject: [PATCH] FUSE - readpages operation

This patch adds readpages support to FUSE.

With the help of the readpages() operation multiple reads are bundled
together and sent as a single request to userspace.  This can improve
reading performace.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/file.c   | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h |  3 +++
 fs/fuse/inode.c  | 15 +++++++++++++
 3 files changed, 85 insertions(+)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 96ea302db18..86ffb6db5fe 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -227,6 +227,72 @@ static int fuse_readpage(struct file *file, struct page *page)
 	return err;
 }
 
+static int fuse_send_readpages(struct fuse_req *req, struct file *file,
+			       struct inode *inode)
+{
+	loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT;
+	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+	unsigned i;
+	req->out.page_zeroing = 1;
+	fuse_send_read(req, file, inode, pos, count);
+	for (i = 0; i < req->num_pages; i++) {
+		struct page *page = req->pages[i];
+		if (!req->out.h.error)
+			SetPageUptodate(page);
+		unlock_page(page);
+	}
+	return req->out.h.error;
+}
+
+struct fuse_readpages_data {
+	struct fuse_req *req;
+	struct file *file;
+	struct inode *inode;
+};
+
+static int fuse_readpages_fill(void *_data, struct page *page)
+{
+	struct fuse_readpages_data *data = _data;
+	struct fuse_req *req = data->req;
+	struct inode *inode = data->inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (req->num_pages &&
+	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
+	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
+		int err = fuse_send_readpages(req, data->file, inode);
+		if (err) {
+			unlock_page(page);
+			return err;
+		}
+		fuse_reset_request(req);
+	}
+	req->pages[req->num_pages] = page;
+	req->num_pages ++;
+	return 0;
+}
+
+static int fuse_readpages(struct file *file, struct address_space *mapping,
+			  struct list_head *pages, unsigned nr_pages)
+{
+	struct inode *inode = mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_readpages_data data;
+	int err;
+	data.file = file;
+	data.inode = inode;
+	data.req = fuse_get_request_nonint(fc);
+	if (!data.req)
+		return -EINTR;
+
+	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
+	if (!err && data.req->num_pages)
+		err = fuse_send_readpages(data.req, file, inode);
+	fuse_put_request(fc, data.req);
+	return err;
+}
+
 static ssize_t fuse_send_write(struct fuse_req *req, struct file *file,
 			       struct inode *inode, loff_t pos, size_t count)
 {
@@ -331,6 +397,7 @@ static struct address_space_operations fuse_file_aops  = {
 	.readpage	= fuse_readpage,
 	.prepare_write	= fuse_prepare_write,
 	.commit_write	= fuse_commit_write,
+	.readpages	= fuse_readpages,
 	.set_page_dirty	= fuse_set_page_dirty,
 };
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 86183c56210..aff3a01ea02 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -201,6 +201,9 @@ struct fuse_conn {
 	/** The fuse mount flags for this mount */
 	unsigned flags;
 
+	/** Maximum read size */
+	unsigned max_read;
+
 	/** Readers of the connection are waiting on this */
 	wait_queue_head_t waitq;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 458c62ca0fe..0b75c73386e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -32,6 +32,7 @@ struct fuse_mount_data {
 	unsigned rootmode;
 	unsigned user_id;
 	unsigned flags;
+	unsigned max_read;
 };
 
 static struct inode *fuse_alloc_inode(struct super_block *sb)
@@ -250,6 +251,7 @@ enum {
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_ALLOW_OTHER,
 	OPT_KERNEL_CACHE,
+	OPT_MAX_READ,
 	OPT_ERR
 };
 
@@ -260,6 +262,7 @@ static match_table_t tokens = {
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_ALLOW_OTHER,		"allow_other"},
 	{OPT_KERNEL_CACHE,		"kernel_cache"},
+	{OPT_MAX_READ,			"max_read=%u"},
 	{OPT_ERR,			NULL}
 };
 
@@ -268,6 +271,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 	char *p;
 	memset(d, 0, sizeof(struct fuse_mount_data));
 	d->fd = -1;
+	d->max_read = ~0;
 
 	while ((p = strsep(&opt, ",")) != NULL) {
 		int token;
@@ -308,6 +312,12 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 			d->flags |= FUSE_KERNEL_CACHE;
 			break;
 
+		case OPT_MAX_READ:
+			if (match_int(&args[0], &value))
+				return 0;
+			d->max_read = value;
+			break;
+
 		default:
 			return 0;
 		}
@@ -329,6 +339,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 		seq_puts(m, ",allow_other");
 	if (fc->flags & FUSE_KERNEL_CACHE)
 		seq_puts(m, ",kernel_cache");
+	if (fc->max_read != ~0)
+		seq_printf(m, ",max_read=%u", fc->max_read);
 	return 0;
 }
 
@@ -453,6 +465,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
 	fc->flags = d.flags;
 	fc->user_id = d.user_id;
+	fc->max_read = d.max_read;
+	if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
+		fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
 
 	err = -ENOMEM;
 	root = get_root_inode(sb, d.rootmode);
-- 
cgit v1.2.3-18-g5258


From 87729a5514e855ce2c71e3e33833a106b8caf2ae Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:34 -0700
Subject: [PATCH] FUSE: tighten check for processes allowed access

This patch tightens the check for allowing processes to access non-privileged
mounts.  The rational is that the filesystem implementation can control the
behavior or get otherwise unavailable information of the filesystem user.  If
the filesystem user process has the same uid, gid, and is not suid or sgid
application, then access is safe.  Otherwise access is not allowed unless the
"allow_other" mount option is given (for which policy is controlled by the
userspace mount utility).

Thanks to everyone linux-fsdevel, especially Martin Mares who helped uncover
problems with the previous approach.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c    | 40 ++++++++++++++++++++++++++++++++++------
 fs/fuse/fuse_i.h |  3 +++
 fs/fuse/inode.c  | 12 ++++++++++++
 3 files changed, 49 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index f127625543b..65da6e1b6de 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -411,17 +411,45 @@ int fuse_do_getattr(struct inode *inode)
 	return err;
 }
 
+/*
+ * Calling into a user-controlled filesystem gives the filesystem
+ * daemon ptrace-like capabilities over the requester process.  This
+ * means, that the filesystem daemon is able to record the exact
+ * filesystem operations performed, and can also control the behavior
+ * of the requester process in otherwise impossible ways.  For example
+ * it can delay the operation for arbitrary length of time allowing
+ * DoS against the requester.
+ *
+ * For this reason only those processes can call into the filesystem,
+ * for which the owner of the mount has ptrace privilege.  This
+ * excludes processes started by other users, suid or sgid processes.
+ */
+static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+{
+	if (fc->flags & FUSE_ALLOW_OTHER)
+		return 1;
+
+	if (task->euid == fc->user_id &&
+	    task->suid == fc->user_id &&
+	    task->uid == fc->user_id &&
+	    task->egid == fc->group_id &&
+	    task->sgid == fc->group_id &&
+	    task->gid == fc->group_id)
+		return 1;
+
+	return 0;
+}
+
 static int fuse_revalidate(struct dentry *entry)
 {
 	struct inode *inode = entry->d_inode;
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
-	if (get_node_id(inode) == FUSE_ROOT_ID) {
-		if (!(fc->flags & FUSE_ALLOW_OTHER) &&
-		    current->fsuid != fc->user_id)
-			return -EACCES;
-	} else if (time_before_eq(jiffies, fi->i_time))
+	if (!fuse_allow_task(fc, current))
+		return -EACCES;
+	if (get_node_id(inode) != FUSE_ROOT_ID &&
+	    time_before_eq(jiffies, fi->i_time))
 		return 0;
 
 	return fuse_do_getattr(inode);
@@ -431,7 +459,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
-	if (!(fc->flags & FUSE_ALLOW_OTHER) && current->fsuid != fc->user_id)
+	if (!fuse_allow_task(fc, current))
 		return -EACCES;
 	else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
 		int err = generic_permission(inode, mask, NULL);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aff3a01ea02..3ec2aff3fdb 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -198,6 +198,9 @@ struct fuse_conn {
 	/** The user id for this mount */
 	uid_t user_id;
 
+	/** The group id for this mount */
+	gid_t group_id;
+
 	/** The fuse mount flags for this mount */
 	unsigned flags;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 0b75c73386e..c8e54c0658f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -31,6 +31,7 @@ struct fuse_mount_data {
 	int fd;
 	unsigned rootmode;
 	unsigned user_id;
+	unsigned group_id;
 	unsigned flags;
 	unsigned max_read;
 };
@@ -199,6 +200,7 @@ static void fuse_put_super(struct super_block *sb)
 	spin_lock(&fuse_lock);
 	fc->mounted = 0;
 	fc->user_id = 0;
+	fc->group_id = 0;
 	fc->flags = 0;
 	/* Flush all readers on this fs */
 	wake_up_all(&fc->waitq);
@@ -248,6 +250,7 @@ enum {
 	OPT_FD,
 	OPT_ROOTMODE,
 	OPT_USER_ID,
+	OPT_GROUP_ID,
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_ALLOW_OTHER,
 	OPT_KERNEL_CACHE,
@@ -259,6 +262,7 @@ static match_table_t tokens = {
 	{OPT_FD,			"fd=%u"},
 	{OPT_ROOTMODE,			"rootmode=%o"},
 	{OPT_USER_ID,			"user_id=%u"},
+	{OPT_GROUP_ID,			"group_id=%u"},
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_ALLOW_OTHER,		"allow_other"},
 	{OPT_KERNEL_CACHE,		"kernel_cache"},
@@ -300,6 +304,12 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 			d->user_id = value;
 			break;
 
+		case OPT_GROUP_ID:
+			if (match_int(&args[0], &value))
+				return 0;
+			d->group_id = value;
+			break;
+
 		case OPT_DEFAULT_PERMISSIONS:
 			d->flags |= FUSE_DEFAULT_PERMISSIONS;
 			break;
@@ -333,6 +343,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 	struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
 
 	seq_printf(m, ",user_id=%u", fc->user_id);
+	seq_printf(m, ",group_id=%u", fc->group_id);
 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
 		seq_puts(m, ",default_permissions");
 	if (fc->flags & FUSE_ALLOW_OTHER)
@@ -465,6 +476,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
 	fc->flags = d.flags;
 	fc->user_id = d.user_id;
+	fc->group_id = d.group_id;
 	fc->max_read = d.max_read;
 	if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
 		fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
-- 
cgit v1.2.3-18-g5258


From 5a53368277efa2d80dd2206dddc1f4b19ef0c32a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:34 -0700
Subject: [PATCH] fuse: stricter mount option checking

Check for the presence of all mandatory mount options.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/inode.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index c8e54c0658f..298c1d4c153 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -32,6 +32,10 @@ struct fuse_mount_data {
 	unsigned rootmode;
 	unsigned user_id;
 	unsigned group_id;
+	unsigned fd_present : 1;
+	unsigned rootmode_present : 1;
+	unsigned user_id_present : 1;
+	unsigned group_id_present : 1;
 	unsigned flags;
 	unsigned max_read;
 };
@@ -274,7 +278,6 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 {
 	char *p;
 	memset(d, 0, sizeof(struct fuse_mount_data));
-	d->fd = -1;
 	d->max_read = ~0;
 
 	while ((p = strsep(&opt, ",")) != NULL) {
@@ -290,24 +293,28 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 			if (match_int(&args[0], &value))
 				return 0;
 			d->fd = value;
+			d->fd_present = 1;
 			break;
 
 		case OPT_ROOTMODE:
 			if (match_octal(&args[0], &value))
 				return 0;
 			d->rootmode = value;
+			d->rootmode_present = 1;
 			break;
 
 		case OPT_USER_ID:
 			if (match_int(&args[0], &value))
 				return 0;
 			d->user_id = value;
+			d->user_id_present = 1;
 			break;
 
 		case OPT_GROUP_ID:
 			if (match_int(&args[0], &value))
 				return 0;
 			d->group_id = value;
+			d->group_id_present = 1;
 			break;
 
 		case OPT_DEFAULT_PERMISSIONS:
@@ -332,7 +339,9 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 			return 0;
 		}
 	}
-	if (d->fd == -1)
+
+	if (!d->fd_present || !d->rootmode_present ||
+	    !d->user_id_present || !d->group_id_present)
 		return 0;
 
 	return 1;
-- 
cgit v1.2.3-18-g5258


From 413ef8cb302511d8e995e2b0e5517ee1a65b9c77 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:35 -0700
Subject: [PATCH] FUSE - direct I/O

This patch adds support for the "direct_io" mount option of FUSE.

When this mount option is specified, the page cache is bypassed for
read and write operations.  This is useful for example, if the
filesystem doesn't know the size of files before reading them, or when
any kind of caching is harmful.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/file.c   | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/fuse/fuse_i.h |   6 +++
 fs/fuse/inode.c  |   9 ++++
 3 files changed, 146 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 86ffb6db5fe..6bc3fb26de3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -363,6 +363,118 @@ static int fuse_commit_write(struct file *file, struct page *page,
 	return err;
 }
 
+static void fuse_release_user_pages(struct fuse_req *req, int write)
+{
+	unsigned i;
+
+	for (i = 0; i < req->num_pages; i++) {
+		struct page *page = req->pages[i];
+		if (write)
+			set_page_dirty_lock(page);
+		put_page(page);
+	}
+}
+
+static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
+			       unsigned nbytes, int write)
+{
+	unsigned long user_addr = (unsigned long) buf;
+	unsigned offset = user_addr & ~PAGE_MASK;
+	int npages;
+
+	/* This doesn't work with nfsd */
+	if (!current->mm)
+		return -EPERM;
+
+	nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+	npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	npages = min(npages, FUSE_MAX_PAGES_PER_REQ);
+	down_read(&current->mm->mmap_sem);
+	npages = get_user_pages(current, current->mm, user_addr, npages, write,
+				0, req->pages, NULL);
+	up_read(&current->mm->mmap_sem);
+	if (npages < 0)
+		return npages;
+
+	req->num_pages = npages;
+	req->page_offset = offset;
+	return 0;
+}
+
+static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+			      size_t count, loff_t *ppos, int write)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	size_t nmax = write ? fc->max_write : fc->max_read;
+	loff_t pos = *ppos;
+	ssize_t res = 0;
+	struct fuse_req *req = fuse_get_request(fc);
+	if (!req)
+		return -ERESTARTSYS;
+
+	while (count) {
+		size_t tmp;
+		size_t nres;
+		size_t nbytes = min(count, nmax);
+		int err = fuse_get_user_pages(req, buf, nbytes, !write);
+		if (err) {
+			res = err;
+			break;
+		}
+		tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+		nbytes = min(nbytes, tmp);
+		if (write)
+			nres = fuse_send_write(req, file, inode, pos, nbytes);
+		else
+			nres = fuse_send_read(req, file, inode, pos, nbytes);
+		fuse_release_user_pages(req, !write);
+		if (req->out.h.error) {
+			if (!res)
+				res = req->out.h.error;
+			break;
+		} else if (nres > nbytes) {
+			res = -EIO;
+			break;
+		}
+		count -= nres;
+		res += nres;
+		pos += nres;
+		buf += nres;
+		if (nres != nbytes)
+			break;
+		if (count)
+			fuse_reset_request(req);
+	}
+	fuse_put_request(fc, req);
+	if (res > 0) {
+		if (write && pos > i_size_read(inode))
+			i_size_write(inode, pos);
+		*ppos = pos;
+	} else if (write && (res == -EINTR || res == -EIO))
+		fuse_invalidate_attr(inode);
+
+	return res;
+}
+
+static ssize_t fuse_direct_read(struct file *file, char __user *buf,
+				     size_t count, loff_t *ppos)
+{
+	return fuse_direct_io(file, buf, count, ppos, 0);
+}
+
+static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	ssize_t res;
+	/* Don't allow parallel writes to the same file */
+	down(&inode->i_sem);
+	res = fuse_direct_io(file, buf, count, ppos, 1);
+	up(&inode->i_sem);
+	return res;
+}
+
 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	if ((vma->vm_flags & VM_SHARED)) {
@@ -393,6 +505,17 @@ static struct file_operations fuse_file_operations = {
 	.sendfile	= generic_file_sendfile,
 };
 
+static struct file_operations fuse_direct_io_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= fuse_direct_read,
+	.write		= fuse_direct_write,
+	.open		= fuse_open,
+	.flush		= fuse_flush,
+	.release	= fuse_release,
+	.fsync		= fuse_fsync,
+	/* no mmap and sendfile */
+};
+
 static struct address_space_operations fuse_file_aops  = {
 	.readpage	= fuse_readpage,
 	.prepare_write	= fuse_prepare_write,
@@ -403,6 +526,12 @@ static struct address_space_operations fuse_file_aops  = {
 
 void fuse_init_file_inode(struct inode *inode)
 {
-	inode->i_fop = &fuse_file_operations;
-	inode->i_data.a_ops = &fuse_file_aops;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (fc->flags & FUSE_DIRECT_IO)
+		inode->i_fop = &fuse_direct_io_file_operations;
+	else {
+		inode->i_fop = &fuse_file_operations;
+		inode->i_data.a_ops = &fuse_file_aops;
+	}
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 3ec2aff3fdb..0af1ac64692 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -34,6 +34,9 @@
     be flushed on open */
 #define FUSE_KERNEL_CACHE        (1 << 2)
 
+/** Bypass the page cache for read and write operations  */
+#define FUSE_DIRECT_IO           (1 << 3)
+
 /** FUSE inode */
 struct fuse_inode {
 	/** Inode data */
@@ -207,6 +210,9 @@ struct fuse_conn {
 	/** Maximum read size */
 	unsigned max_read;
 
+	/** Maximum write size */
+	unsigned max_write;
+
 	/** Readers of the connection are waiting on this */
 	wait_queue_head_t waitq;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 298c1d4c153..652c9d5df97 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -258,6 +258,7 @@ enum {
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_ALLOW_OTHER,
 	OPT_KERNEL_CACHE,
+	OPT_DIRECT_IO,
 	OPT_MAX_READ,
 	OPT_ERR
 };
@@ -270,6 +271,7 @@ static match_table_t tokens = {
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_ALLOW_OTHER,		"allow_other"},
 	{OPT_KERNEL_CACHE,		"kernel_cache"},
+	{OPT_DIRECT_IO,			"direct_io"},
 	{OPT_MAX_READ,			"max_read=%u"},
 	{OPT_ERR,			NULL}
 };
@@ -329,6 +331,10 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 			d->flags |= FUSE_KERNEL_CACHE;
 			break;
 
+		case OPT_DIRECT_IO:
+			d->flags |= FUSE_DIRECT_IO;
+			break;
+
 		case OPT_MAX_READ:
 			if (match_int(&args[0], &value))
 				return 0;
@@ -359,6 +365,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 		seq_puts(m, ",allow_other");
 	if (fc->flags & FUSE_KERNEL_CACHE)
 		seq_puts(m, ",kernel_cache");
+	if (fc->flags & FUSE_DIRECT_IO)
+		seq_puts(m, ",direct_io");
 	if (fc->max_read != ~0)
 		seq_printf(m, ",max_read=%u", fc->max_read);
 	return 0;
@@ -489,6 +497,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	fc->max_read = d.max_read;
 	if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
 		fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+	fc->max_write = FUSE_MAX_IN / 2;
 
 	err = -ENOMEM;
 	root = get_root_inode(sb, d.rootmode);
-- 
cgit v1.2.3-18-g5258


From 04730fef1f9c7277e5c730b193e681ac095b0507 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:36 -0700
Subject: [PATCH] fuse: transfer readdir data through device

This patch removes a long lasting "hack" in FUSE, which used a separate
channel (a file descriptor refering to a disk-file) to transfer directory
contents from userspace to the kernel.

The patch adds three new operations (OPENDIR, READDIR, RELEASEDIR), which
have semantics and implementation exactly maching the respective file
operations (OPEN, READ, RELEASE).

This simplifies the directory reading code.  Also disk space is not
necessary, which can be important in embedded systems.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dev.c    |  9 ------
 fs/fuse/dir.c    | 84 ++++++++++++++++----------------------------------------
 fs/fuse/file.c   | 38 ++++++++++++++++++-------
 fs/fuse/fuse_i.h | 22 +++++++++++----
 4 files changed, 69 insertions(+), 84 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ca6fc0e96d7..e4ada021d08 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -731,13 +731,6 @@ static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
 	return NULL;
 }
 
-/* fget() needs to be done in this context */
-static void process_getdir(struct fuse_req *req)
-{
-	struct fuse_getdir_out_i *arg = req->out.args[0].value;
-	arg->file = fget(arg->fd);
-}
-
 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
 			 unsigned nbytes)
 {
@@ -817,8 +810,6 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
 	if (!err) {
 		if (req->interrupted)
 			err = -ENOENT;
-		else if (req->in.h.opcode == FUSE_GETDIR && !oh.error)
-			process_getdir(req);
 	} else if (!req->interrupted)
 		req->out.h.error = -EIO;
 	request_end(fc, req);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 65da6e1b6de..cf5d1faed7a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -519,70 +519,40 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
 	return 0;
 }
 
-static int fuse_checkdir(struct file *cfile, struct file *file)
+static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file,
+				       struct inode *inode, loff_t pos,
+				       size_t count)
 {
-	struct inode *inode;
-	if (!cfile)
-		return -EIO;
-	inode = cfile->f_dentry->d_inode;
-	if (!S_ISREG(inode->i_mode)) {
-		fput(cfile);
-		return -EIO;
-	}
-
-	file->private_data = cfile;
-	return 0;
+	return fuse_send_read_common(req, file, inode, pos, count, 1);
 }
 
-static int fuse_getdir(struct file *file)
+static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
 {
+	int err;
+	size_t nbytes;
+	struct page *page;
 	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req = fuse_get_request(fc);
-	struct fuse_getdir_out_i outarg;
-	int err;
-
+	struct fuse_req *req = fuse_get_request_nonint(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
-	req->in.h.opcode = FUSE_GETDIR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->inode = inode;
-	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(struct fuse_getdir_out);
-	req->out.args[0].value = &outarg;
-	request_send(fc, req);
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		fuse_put_request(fc, req);
+		return -ENOMEM;
+	}
+	req->num_pages = 1;
+	req->pages[0] = page;
+	nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err)
-		err = fuse_checkdir(outarg.file, file);
-	return err;
-}
-
-static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
-{
-	struct file *cfile = file->private_data;
-	char *buf;
-	int ret;
-
-	if (!cfile) {
-		ret = fuse_getdir(file);
-		if (ret)
-			return ret;
-
-		cfile = file->private_data;
-	}
+		err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
+				    filldir);
 
-	buf = (char *) __get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	ret = kernel_read(cfile, file->f_pos, buf, PAGE_SIZE);
-	if (ret > 0)
-		ret = parse_dirfile(buf, ret, file, dstbuf, filldir);
-
-	free_page((unsigned long) buf);
-	return ret;
+	__free_page(page);
+	return err;
 }
 
 static char *read_link(struct dentry *dentry)
@@ -637,18 +607,12 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
 
 static int fuse_dir_open(struct inode *inode, struct file *file)
 {
-	file->private_data = NULL;
-	return 0;
+	return fuse_open_common(inode, file, 1);
 }
 
 static int fuse_dir_release(struct inode *inode, struct file *file)
 {
-	struct file *cfile = file->private_data;
-
-	if (cfile)
-		fput(cfile);
-
-	return 0;
+	return fuse_release_common(inode, file, 1);
 }
 
 static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6bc3fb26de3..224453557cf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,7 +12,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 
-static int fuse_open(struct inode *inode, struct file *file)
+int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_req *req;
@@ -56,7 +56,7 @@ static int fuse_open(struct inode *inode, struct file *file)
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
-	req->in.h.opcode = FUSE_OPEN;
+	req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
 	req->in.h.nodeid = get_node_id(inode);
 	req->inode = inode;
 	req->in.numargs = 1;
@@ -85,7 +85,7 @@ static int fuse_open(struct inode *inode, struct file *file)
 	return err;
 }
 
-static int fuse_release(struct inode *inode, struct file *file)
+int fuse_release_common(struct inode *inode, struct file *file, int isdir)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
@@ -94,7 +94,7 @@ static int fuse_release(struct inode *inode, struct file *file)
 
 	inarg->fh = ff->fh;
 	inarg->flags = file->f_flags & ~O_EXCL;
-	req->in.h.opcode = FUSE_RELEASE;
+	req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
 	req->in.h.nodeid = get_node_id(inode);
 	req->inode = inode;
 	req->in.numargs = 1;
@@ -107,6 +107,16 @@ static int fuse_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int fuse_open(struct inode *inode, struct file *file)
+{
+	return fuse_open_common(inode, file, 0);
+}
+
+static int fuse_release(struct inode *inode, struct file *file)
+{
+	return fuse_release_common(inode, file, 0);
+}
+
 static int fuse_flush(struct file *file)
 {
 	struct inode *inode = file->f_dentry->d_inode;
@@ -178,8 +188,9 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
 	return err;
 }
 
-static ssize_t fuse_send_read(struct fuse_req *req, struct file *file,
-			      struct inode *inode, loff_t pos,  size_t count)
+size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+			     struct inode *inode, loff_t pos, size_t count,
+			     int isdir)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
@@ -189,7 +200,7 @@ static ssize_t fuse_send_read(struct fuse_req *req, struct file *file,
 	inarg.fh = ff->fh;
 	inarg.offset = pos;
 	inarg.size = count;
-	req->in.h.opcode = FUSE_READ;
+	req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ;
 	req->in.h.nodeid = get_node_id(inode);
 	req->inode = inode;
 	req->file = file;
@@ -204,6 +215,13 @@ static ssize_t fuse_send_read(struct fuse_req *req, struct file *file,
 	return req->out.args[0].size;
 }
 
+static inline size_t fuse_send_read(struct fuse_req *req, struct file *file,
+				    struct inode *inode, loff_t pos,
+				    size_t count)
+{
+	return fuse_send_read_common(req, file, inode, pos, count, 0);
+}
+
 static int fuse_readpage(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
@@ -293,8 +311,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 	return err;
 }
 
-static ssize_t fuse_send_write(struct fuse_req *req, struct file *file,
-			       struct inode *inode, loff_t pos, size_t count)
+static size_t fuse_send_write(struct fuse_req *req, struct file *file,
+			      struct inode *inode, loff_t pos, size_t count)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
@@ -332,7 +350,7 @@ static int fuse_commit_write(struct file *file, struct page *page,
 			     unsigned offset, unsigned to)
 {
 	int err;
-	ssize_t nres;
+	size_t nres;
 	unsigned count = to - offset;
 	struct inode *inode = page->mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 0af1ac64692..8593d5bae7a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -273,11 +273,6 @@ struct fuse_conn {
 	struct backing_dev_info bdi;
 };
 
-struct fuse_getdir_out_i {
-	int fd;
-	void *file; /* Used by kernel only */
-};
-
 static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb)
 {
 	return (struct fuse_conn **) &sb->s_fs_info;
@@ -333,6 +328,23 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
 void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
 		      unsigned long nodeid, u64 nlookup);
 
+/**
+ * Send READ or READDIR request
+ */
+size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+			     struct inode *inode, loff_t pos, size_t count,
+			     int isdir);
+
+/**
+ * Send OPEN or OPENDIR request
+ */
+int fuse_open_common(struct inode *inode, struct file *file, int isdir);
+
+/**
+ * Send RELEASE or RELEASEDIR request
+ */
+int fuse_release_common(struct inode *inode, struct file *file, int isdir);
+
 /**
  * Initialise file operations on a regular file
  */
-- 
cgit v1.2.3-18-g5258


From 45323fb76465a9576220c7427dbac7b1e7ad3caf Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:37 -0700
Subject: [PATCH] fuse: more flexible caching

Make data caching behavior selectable on a per-open basis instead of
per-mount.  Compatibility for the old mount options 'kernel_cache' and
'direct_io' is retained in the userspace library (version 2.4.0-pre1 or
later).

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/file.c   | 18 ++++++++----------
 fs/fuse/fuse_i.h |  6 ------
 fs/fuse/inode.c  | 16 ----------------
 3 files changed, 8 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 224453557cf..a8dc88527fb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,6 +12,8 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 
+static struct file_operations fuse_direct_io_file_operations;
+
 int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
@@ -70,12 +72,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 	else
 		request_send(fc, req);
 	err = req->out.h.error;
-	if (!err && !(fc->flags & FUSE_KERNEL_CACHE))
-		invalidate_inode_pages(inode->i_mapping);
 	if (err) {
 		fuse_request_free(ff->release_req);
 		kfree(ff);
 	} else {
+		if (!isdir && (outarg.open_flags & FOPEN_DIRECT_IO))
+			file->f_op = &fuse_direct_io_file_operations;
+		if (!(outarg.open_flags & FOPEN_KEEP_CACHE))
+			invalidate_inode_pages(inode->i_mapping);
 		ff->fh = outarg.fh;
 		file->private_data = ff;
 	}
@@ -544,12 +548,6 @@ static struct address_space_operations fuse_file_aops  = {
 
 void fuse_init_file_inode(struct inode *inode)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
-
-	if (fc->flags & FUSE_DIRECT_IO)
-		inode->i_fop = &fuse_direct_io_file_operations;
-	else {
-		inode->i_fop = &fuse_file_operations;
-		inode->i_data.a_ops = &fuse_file_aops;
-	}
+	inode->i_fop = &fuse_file_operations;
+	inode->i_data.a_ops = &fuse_file_aops;
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 8593d5bae7a..84849601363 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -30,12 +30,6 @@
     doing the mount will be allowed to access the filesystem */
 #define FUSE_ALLOW_OTHER         (1 << 1)
 
-/** If the FUSE_KERNEL_CACHE flag is given, then cached data will not
-    be flushed on open */
-#define FUSE_KERNEL_CACHE        (1 << 2)
-
-/** Bypass the page cache for read and write operations  */
-#define FUSE_DIRECT_IO           (1 << 3)
 
 /** FUSE inode */
 struct fuse_inode {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 652c9d5df97..8dc66760b41 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -257,8 +257,6 @@ enum {
 	OPT_GROUP_ID,
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_ALLOW_OTHER,
-	OPT_KERNEL_CACHE,
-	OPT_DIRECT_IO,
 	OPT_MAX_READ,
 	OPT_ERR
 };
@@ -270,8 +268,6 @@ static match_table_t tokens = {
 	{OPT_GROUP_ID,			"group_id=%u"},
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_ALLOW_OTHER,		"allow_other"},
-	{OPT_KERNEL_CACHE,		"kernel_cache"},
-	{OPT_DIRECT_IO,			"direct_io"},
 	{OPT_MAX_READ,			"max_read=%u"},
 	{OPT_ERR,			NULL}
 };
@@ -327,14 +323,6 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 			d->flags |= FUSE_ALLOW_OTHER;
 			break;
 
-		case OPT_KERNEL_CACHE:
-			d->flags |= FUSE_KERNEL_CACHE;
-			break;
-
-		case OPT_DIRECT_IO:
-			d->flags |= FUSE_DIRECT_IO;
-			break;
-
 		case OPT_MAX_READ:
 			if (match_int(&args[0], &value))
 				return 0;
@@ -363,10 +351,6 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 		seq_puts(m, ",default_permissions");
 	if (fc->flags & FUSE_ALLOW_OTHER)
 		seq_puts(m, ",allow_other");
-	if (fc->flags & FUSE_KERNEL_CACHE)
-		seq_puts(m, ",kernel_cache");
-	if (fc->flags & FUSE_DIRECT_IO)
-		seq_puts(m, ",direct_io");
 	if (fc->max_read != ~0)
 		seq_printf(m, ",max_read=%u", fc->max_read);
 	return 0;
-- 
cgit v1.2.3-18-g5258


From b36c31ba95f0fe0a03c727300d9c4c54438a5636 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:38 -0700
Subject: [PATCH] fuse: don't update file times

Don't change mtime/ctime/atime to local time on read/write.  Rather invalidate
file attributes, so next stat() will force a GETATTR call.  Bug reported by
Ben Grimm.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c   |  2 ++
 fs/fuse/file.c  | 10 ++++++----
 fs/fuse/inode.c |  1 +
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index cf5d1faed7a..9b43fd46aaa 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -552,6 +552,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
 				    filldir);
 
 	__free_page(page);
+	fuse_invalidate_attr(inode); /* atime changed */
 	return err;
 }
 
@@ -585,6 +586,7 @@ static char *read_link(struct dentry *dentry)
 		link[req->out.args[0].size] = '\0';
  out:
 	fuse_put_request(fc, req);
+	fuse_invalidate_attr(inode); /* atime changed */
 	return link;
 }
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a8dc88527fb..6dcae74ce7f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -244,6 +244,7 @@ static int fuse_readpage(struct file *file, struct page *page)
 	fuse_put_request(fc, req);
 	if (!err)
 		SetPageUptodate(page);
+	fuse_invalidate_attr(inode); /* atime changed */
  out:
 	unlock_page(page);
 	return err;
@@ -312,6 +313,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 	if (!err && data.req->num_pages)
 		err = fuse_send_readpages(data.req, file, inode);
 	fuse_put_request(fc, data.req);
+	fuse_invalidate_attr(inode); /* atime changed */
 	return err;
 }
 
@@ -380,8 +382,8 @@ static int fuse_commit_write(struct file *file, struct page *page,
 			clear_page_dirty(page);
 			SetPageUptodate(page);
 		}
-	} else if (err == -EINTR || err == -EIO)
-		fuse_invalidate_attr(inode);
+	}
+	fuse_invalidate_attr(inode);
 	return err;
 }
 
@@ -473,8 +475,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 		if (write && pos > i_size_read(inode))
 			i_size_write(inode, pos);
 		*ppos = pos;
-	} else if (write && (res == -EINTR || res == -EIO))
-		fuse_invalidate_attr(inode);
+	}
+	fuse_invalidate_attr(inode);
 
 	return res;
 }
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 8dc66760b41..52e954f4bb9 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -173,6 +173,7 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
 		return NULL;
 
 	if ((inode->i_state & I_NEW)) {
+		inode->i_flags |= S_NOATIME|S_NOCMTIME;
 		inode->i_generation = generation;
 		inode->i_data.backing_dev_info = &fc->bdi;
 		fuse_init_inode(inode, attr);
-- 
cgit v1.2.3-18-g5258


From 8254798199332966e2ab647380c990193af7e854 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:38 -0700
Subject: [PATCH] FUSE: add fsync operation for directories

This patch adds a new FSYNCDIR request, which is sent when fsync is called
on directories.  This operation is available in libfuse 2.3-pre1 or
greater.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c    |  7 +++++++
 fs/fuse/file.c   | 17 +++++++++++++----
 fs/fuse/fuse_i.h |  9 +++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 9b43fd46aaa..73792d65b6c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -617,6 +617,12 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
 	return fuse_release_common(inode, file, 1);
 }
 
+static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
+{
+	/* nfsd can call this with no file */
+	return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
+}
+
 static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr)
 {
 	unsigned ivalid = iattr->ia_valid;
@@ -934,6 +940,7 @@ static struct file_operations fuse_dir_operations = {
 	.readdir	= fuse_readdir,
 	.open		= fuse_dir_open,
 	.release	= fuse_dir_release,
+	.fsync		= fuse_dir_fsync,
 };
 
 static struct inode_operations fuse_common_inode_operations = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6dcae74ce7f..e225f8c0b26 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -156,7 +156,8 @@ static int fuse_flush(struct file *file)
 	return err;
 }
 
-static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
+int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
+		      int isdir)
 {
 	struct inode *inode = de->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
@@ -165,7 +166,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
 	struct fuse_fsync_in inarg;
 	int err;
 
-	if (fc->no_fsync)
+	if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
 		return 0;
 
 	req = fuse_get_request(fc);
@@ -175,7 +176,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.fh = ff->fh;
 	inarg.fsync_flags = datasync ? 1 : 0;
-	req->in.h.opcode = FUSE_FSYNC;
+	req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
 	req->in.h.nodeid = get_node_id(inode);
 	req->inode = inode;
 	req->file = file;
@@ -186,12 +187,20 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err == -ENOSYS) {
-		fc->no_fsync = 1;
+		if (isdir)
+			fc->no_fsyncdir = 1;
+		else
+			fc->no_fsync = 1;
 		err = 0;
 	}
 	return err;
 }
 
+static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
+{
+	return fuse_fsync_common(file, de, datasync, 0);
+}
+
 size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
 			     struct inode *inode, loff_t pos, size_t count,
 			     int isdir)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 84849601363..d7647289d8a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -248,6 +248,9 @@ struct fuse_conn {
 	/** Is fsync not implemented by fs? */
 	unsigned no_fsync : 1;
 
+	/** Is fsyncdir not implemented by fs? */
+	unsigned no_fsyncdir : 1;
+
 	/** Is flush not implemented by fs? */
 	unsigned no_flush : 1;
 
@@ -339,6 +342,12 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir);
  */
 int fuse_release_common(struct inode *inode, struct file *file, int isdir);
 
+/**
+ * Send FSYNC or FSYNCDIR request
+ */
+int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
+		      int isdir);
+
 /**
  * Initialise file operations on a regular file
  */
-- 
cgit v1.2.3-18-g5258


From 7c352bdf048811b8128019ffc1e886161e09c11c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 9 Sep 2005 13:10:39 -0700
Subject: [PATCH] FUSE: don't allow restarting of system calls

This patch removes ability to interrupt and restart operations while there
hasn't been any side-effect.

The reason: applications.  There are some apps it seems that generate
signals at a fast rate.  This means, that if the operation cannot make
enough progress between two signals, it will be restarted for ever.  This
bug actually manifested itself with 'krusader' trying to open a file for
writing under sshfs.  Thanks to Eduard Czimbalmos for the report.

The problem can be solved just by making open() uninterruptible, because in
this case it was the truncate operation that slowed down the progress.  But
it's better to solve this by simply not allowing interrupts at all (except
SIGKILL), because applications don't expect file operations to be
interruptible anyway.  As an added bonus the code is simplified somewhat.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dev.c    | 73 ++++++++++----------------------------------------------
 fs/fuse/dir.c    | 36 ++++++++++++++--------------
 fs/fuse/file.c   | 33 ++++++++++---------------
 fs/fuse/fuse_i.h | 12 +---------
 fs/fuse/inode.c  |  2 +-
 5 files changed, 45 insertions(+), 111 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e4ada021d08..d4c869c6d01 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -103,19 +103,8 @@ static struct fuse_req *do_get_request(struct fuse_conn *fc)
 	return req;
 }
 
+/* This can return NULL, but only in case it's interrupted by a SIGKILL */
 struct fuse_req *fuse_get_request(struct fuse_conn *fc)
-{
-	if (down_interruptible(&fc->outstanding_sem))
-		return NULL;
-	return do_get_request(fc);
-}
-
-/*
- * Non-interruptible version of the above function is for operations
- * which can't legally return -ERESTART{SYS,NOINTR}.  This can still
- * return NULL, but only in case the signal is SIGKILL.
- */
-struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc)
 {
 	int intr;
 	sigset_t oldset;
@@ -241,43 +230,20 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
 		get_file(req->file);
 }
 
-static int request_wait_answer_nonint(struct fuse_req *req)
-{
-	int err;
-	sigset_t oldset;
-	block_sigs(&oldset);
-	err = wait_event_interruptible(req->waitq, req->finished);
-	restore_sigs(&oldset);
-	return err;
-}
-
 /* Called with fuse_lock held.  Releases, and then reacquires it. */
-static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req,
-				int interruptible)
+static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
 {
-	int intr;
+	sigset_t oldset;
 
 	spin_unlock(&fuse_lock);
-	if (interruptible)
-		intr = wait_event_interruptible(req->waitq, req->finished);
-	else
-		intr = request_wait_answer_nonint(req);
+	block_sigs(&oldset);
+	wait_event_interruptible(req->waitq, req->finished);
+	restore_sigs(&oldset);
 	spin_lock(&fuse_lock);
-	if (intr && interruptible && req->sent) {
-		/* If request is already in userspace, only allow KILL
-		   signal to interrupt */
-		spin_unlock(&fuse_lock);
-		intr = request_wait_answer_nonint(req);
-		spin_lock(&fuse_lock);
-	}
-	if (!intr)
+	if (req->finished)
 		return;
 
-	if (!interruptible || req->sent)
-		req->out.h.error = -EINTR;
-	else
-		req->out.h.error = -ERESTARTNOINTR;
-
+	req->out.h.error = -EINTR;
 	req->interrupted = 1;
 	if (req->locked) {
 		/* This is uninterruptible sleep, because data is
@@ -330,8 +296,10 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 	wake_up(&fc->waitq);
 }
 
-static void request_send_wait(struct fuse_conn *fc, struct fuse_req *req,
-			      int interruptible)
+/*
+ * This can only be interrupted by a SIGKILL
+ */
+void request_send(struct fuse_conn *fc, struct fuse_req *req)
 {
 	req->isreply = 1;
 	spin_lock(&fuse_lock);
@@ -345,26 +313,11 @@ static void request_send_wait(struct fuse_conn *fc, struct fuse_req *req,
 		   after request_end() */
 		__fuse_get_request(req);
 
-		request_wait_answer(fc, req, interruptible);
+		request_wait_answer(fc, req);
 	}
 	spin_unlock(&fuse_lock);
 }
 
-void request_send(struct fuse_conn *fc, struct fuse_req *req)
-{
-	request_send_wait(fc, req, 1);
-}
-
-/*
- * Non-interruptible version of the above function is for operations
- * which can't legally return -ERESTART{SYS,NOINTR}.  This can still
- * be interrupted but only with SIGKILL.
- */
-void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req)
-{
-	request_send_wait(fc, req, 0);
-}
-
 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 {
 	spin_lock(&fuse_lock);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 73792d65b6c..e79e49b3eec 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -46,12 +46,12 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		struct inode *inode = entry->d_inode;
 		struct fuse_inode *fi = get_fuse_inode(inode);
 		struct fuse_conn *fc = get_fuse_conn(inode);
-		struct fuse_req *req = fuse_get_request_nonint(fc);
+		struct fuse_req *req = fuse_get_request(fc);
 		if (!req)
 			return 0;
 
 		fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
-		request_send_nonint(fc, req);
+		request_send(fc, req);
 		err = req->out.h.error;
 		if (!err) {
 			if (outarg.nodeid != get_node_id(inode)) {
@@ -91,7 +91,7 @@ static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
 
 	req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	fuse_lookup_init(req, dir, entry, &outarg);
 	request_send(fc, req);
@@ -185,7 +185,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
@@ -211,7 +211,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
@@ -236,7 +236,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
 
 	req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	req->in.h.opcode = FUSE_SYMLINK;
 	req->in.numargs = 2;
@@ -253,7 +253,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	req->in.h.opcode = FUSE_UNLINK;
 	req->in.h.nodeid = get_node_id(dir);
@@ -284,7 +284,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	req->in.h.opcode = FUSE_RMDIR;
 	req->in.h.nodeid = get_node_id(dir);
@@ -311,7 +311,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
 	struct fuse_conn *fc = get_fuse_conn(olddir);
 	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.newdir = get_node_id(newdir);
@@ -356,7 +356,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.oldnodeid = get_node_id(inode);
@@ -386,7 +386,7 @@ int fuse_do_getattr(struct inode *inode)
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	req->in.h.opcode = FUSE_GETATTR;
 	req->in.h.nodeid = get_node_id(inode);
@@ -533,7 +533,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
 	struct page *page;
 	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req = fuse_get_request_nonint(fc);
+	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
 		return -EINTR;
 
@@ -564,7 +564,7 @@ static char *read_link(struct dentry *dentry)
 	char *link;
 
 	if (!req)
-		return ERR_PTR(-ERESTARTNOINTR);
+		return ERR_PTR(-EINTR);
 
 	link = (char *) __get_free_page(GFP_KERNEL);
 	if (!link) {
@@ -677,7 +677,7 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 
 	req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.valid = iattr_to_fattr(attr, &inarg.attr);
@@ -761,7 +761,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
 
 	req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.size = size;
@@ -801,7 +801,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
 
 	req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.size = size;
@@ -851,7 +851,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 
 	req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.size = size;
@@ -897,7 +897,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
 
 	req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTNOINTR;
+		return -EINTR;
 
 	req->in.h.opcode = FUSE_REMOVEXATTR;
 	req->in.h.nodeid = get_node_id(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e225f8c0b26..6454022b053 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -22,9 +22,6 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 	struct fuse_open_out outarg;
 	struct fuse_file *ff;
 	int err;
-	/* Restarting the syscall is not allowed if O_CREAT and O_EXCL
-	   are both set, because creation will fail on the restart */
-	int excl = (file->f_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL);
 
 	err = generic_file_open(inode, file);
 	if (err)
@@ -38,12 +35,9 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 		 	return err;
 	}
 
-	if (excl)
-		req = fuse_get_request_nonint(fc);
-	else
-		req = fuse_get_request(fc);
+	req = fuse_get_request(fc);
 	if (!req)
-		return excl ? -EINTR : -ERESTARTSYS;
+		return -EINTR;
 
 	err = -ENOMEM;
 	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
@@ -67,10 +61,7 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 	req->out.numargs = 1;
 	req->out.args[0].size = sizeof(outarg);
 	req->out.args[0].value = &outarg;
-	if (excl)
-		request_send_nonint(fc, req);
-	else
-		request_send(fc, req);
+	request_send(fc, req);
 	err = req->out.h.error;
 	if (err) {
 		fuse_request_free(ff->release_req);
@@ -133,7 +124,7 @@ static int fuse_flush(struct file *file)
 	if (fc->no_flush)
 		return 0;
 
-	req = fuse_get_request_nonint(fc);
+	req = fuse_get_request(fc);
 	if (!req)
 		return -EINTR;
 
@@ -146,7 +137,7 @@ static int fuse_flush(struct file *file)
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
-	request_send_nonint(fc, req);
+	request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err == -ENOSYS) {
@@ -171,7 +162,7 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
 
 	req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTSYS;
+		return -EINTR;
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.fh = ff->fh;
@@ -224,7 +215,7 @@ size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
 	req->out.argvar = 1;
 	req->out.numargs = 1;
 	req->out.args[0].size = count;
-	request_send_nonint(fc, req);
+	request_send(fc, req);
 	return req->out.args[0].size;
 }
 
@@ -240,7 +231,7 @@ static int fuse_readpage(struct file *file, struct page *page)
 	struct inode *inode = page->mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT;
-	struct fuse_req *req = fuse_get_request_nonint(fc);
+	struct fuse_req *req = fuse_get_request(fc);
 	int err = -EINTR;
 	if (!req)
 		goto out;
@@ -314,7 +305,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 	int err;
 	data.file = file;
 	data.inode = inode;
-	data.req = fuse_get_request_nonint(fc);
+	data.req = fuse_get_request(fc);
 	if (!data.req)
 		return -EINTR;
 
@@ -350,7 +341,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
 	req->out.numargs = 1;
 	req->out.args[0].size = sizeof(struct fuse_write_out);
 	req->out.args[0].value = &outarg;
-	request_send_nonint(fc, req);
+	request_send(fc, req);
 	return outarg.size;
 }
 
@@ -370,7 +361,7 @@ static int fuse_commit_write(struct file *file, struct page *page,
 	struct inode *inode = page->mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset;
-	struct fuse_req *req = fuse_get_request_nonint(fc);
+	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
 		return -EINTR;
 
@@ -444,7 +435,7 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 	ssize_t res = 0;
 	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTSYS;
+		return -EINTR;
 
 	while (count) {
 		size_t tmp;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d7647289d8a..24d761518d8 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -409,11 +409,6 @@ void fuse_reset_request(struct fuse_req *req);
  */
 struct fuse_req *fuse_get_request(struct fuse_conn *fc);
 
-/**
- * Reserve a preallocated request, only interruptible by SIGKILL
- */
-struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc);
-
 /**
  * Decrement reference count of a request.  If count goes to zero put
  * on unused list (preallocated) or free reqest (not preallocated).
@@ -421,15 +416,10 @@ struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc);
 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
 
 /**
- * Send a request (synchronous, interruptible)
+ * Send a request (synchronous)
  */
 void request_send(struct fuse_conn *fc, struct fuse_req *req);
 
-/**
- * Send a request (synchronous, non-interruptible except by SIGKILL)
- */
-void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req);
-
 /**
  * Send a request with no reply
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 52e954f4bb9..e69a546844d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -236,7 +236,7 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
 
         req = fuse_get_request(fc);
 	if (!req)
-		return -ERESTARTSYS;
+		return -EINTR;
 
 	req->in.numargs = 0;
 	req->in.h.opcode = FUSE_STATFS;
-- 
cgit v1.2.3-18-g5258


From a9f6a0dd54efea2a5d57a27e6c232f9197c25154 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 9 Sep 2005 13:10:41 -0700
Subject: [PATCH] more SPIN_LOCK_UNLOCKED -> DEFINE_SPINLOCK conversions

This converts the final 20 DEFINE_SPINLOCK holdouts.  (another 580 places
are already using DEFINE_SPINLOCK).  Build tested on x86.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/xfs/support/ktrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 3dae14c8c55..fa8394f9437 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -170,7 +170,7 @@ ktrace_enter(
 	void            *val14,
 	void            *val15)
 {
-	static lock_t   wrap_lock = SPIN_LOCK_UNLOCKED;
+	static DEFINE_SPINLOCK(wrap_lock);
 	unsigned long	flags;
 	int             index;
 	ktrace_entry_t  *ktep;
-- 
cgit v1.2.3-18-g5258


From d99901d6fdfb4098b9996de89ffbbae890e08288 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Fri, 9 Sep 2005 13:59:48 +0400
Subject: [PATCH] Lost sockfd_put() in routing_ioctl()

This patch adds lost sockfd_put() in 32bit compat rounting_ioctl() on
64bit platforms

Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-Off-By: Maxim Giryaev <gem@sw.ru>
Signed-off-By: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat_ioctl.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 155e612635f..e28a74203f3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -798,13 +798,16 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 		r = (void *) &r4;
 	}
 
-	if (ret)
-		return -EFAULT;
+	if (ret) {
+		ret = -EFAULT;
+		goto out;
+	}
 
 	set_fs (KERNEL_DS);
 	ret = sys_ioctl (fd, cmd, (unsigned long) r);
 	set_fs (old_fs);
 
+out:
 	if (mysock)
 		sockfd_put(mysock);
 
-- 
cgit v1.2.3-18-g5258


From a4531edd75522804dd2b268d8ccc5eaa70748011 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Fri, 9 Sep 2005 15:10:52 -0700
Subject: Fix up lost patch in compat_sys_select() for new RCU files world
 order

Andrew lost this in patch reject resolution, and never noticed, since
the compat code isn't in use on x86.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 8c665705c6a..c2e0813164b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1619,6 +1619,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
 	char *bits;
 	long timeout;
 	int size, max_fdset, ret = -EINVAL;
+	struct fdtable *fdt;
 
 	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (tvp) {
@@ -1644,7 +1645,8 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
 		goto out_nofds;
 
 	/* max_fdset can increase, so grab it once to avoid race */
-	max_fdset = current->files->max_fdset;
+	fdt = files_fdtable(current->files);
+	max_fdset = fdt->max_fdset;
 	if (n > max_fdset)
 		n = max_fdset;
 
-- 
cgit v1.2.3-18-g5258


From ac5b8b6f22118620cd1133d9943b1f31dc40a913 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Fri, 9 Sep 2005 15:42:34 -0700
Subject: Preempt-safe RCU file usage

Fix up fs/compat.c fixes.
---
 fs/compat.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index c2e0813164b..ac3fb9ed8ee 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1645,8 +1645,10 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
 		goto out_nofds;
 
 	/* max_fdset can increase, so grab it once to avoid race */
+	rcu_read_lock();
 	fdt = files_fdtable(current->files);
 	max_fdset = fdt->max_fdset;
+	rcu_read_unlock();
 	if (n > max_fdset)
 		n = max_fdset;
 
-- 
cgit v1.2.3-18-g5258


From b4012a9895b3e28e3bff3aa534d58c7827af6d4f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 10 Sep 2005 00:25:47 -0700
Subject: [PATCH] ntfs build fix

*** Warning: "bit_spin_lock" [fs/ntfs/ntfs.ko] undefined!
*** Warning: "bit_spin_unlock" [fs/ntfs/ntfs.ko] undefined!

Cc: Anton Altaparmakov <aia21@cantab.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ntfs/aops.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 545236414d5..b6cc8cf2462 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -27,6 +27,7 @@
 #include <linux/swap.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/bit_spinlock.h>
 
 #include "aops.h"
 #include "attrib.h"
-- 
cgit v1.2.3-18-g5258


From fb1c8f93d869b34cacb8b8932e2b83d96a19d720 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 10 Sep 2005 00:25:56 -0700
Subject: [PATCH] spinlock consolidation

This patch (written by me and also containing many suggestions of Arjan van
de Ven) does a major cleanup of the spinlock code.  It does the following
things:

 - consolidates and enhances the spinlock/rwlock debugging code

 - simplifies the asm/spinlock.h files

 - encapsulates the raw spinlock type and moves generic spinlock
   features (such as ->break_lock) into the generic code.

 - cleans up the spinlock code hierarchy to get rid of the spaghetti.

Most notably there's now only a single variant of the debugging code,
located in lib/spinlock_debug.c.  (previously we had one SMP debugging
variant per architecture, plus a separate generic one for UP builds)

Also, i've enhanced the rwlock debugging facility, it will now track
write-owners.  There is new spinlock-owner/CPU-tracking on SMP builds too.
All locks have lockup detection now, which will work for both soft and hard
spin/rwlock lockups.

The arch-level include files now only contain the minimally necessary
subset of the spinlock code - all the rest that can be generalized now
lives in the generic headers:

 include/asm-i386/spinlock_types.h       |   16
 include/asm-x86_64/spinlock_types.h     |   16

I have also split up the various spinlock variants into separate files,
making it easier to see which does what. The new layout is:

   SMP                         |  UP
   ----------------------------|-----------------------------------
   asm/spinlock_types_smp.h    |  linux/spinlock_types_up.h
   linux/spinlock_types.h      |  linux/spinlock_types.h
   asm/spinlock_smp.h          |  linux/spinlock_up.h
   linux/spinlock_api_smp.h    |  linux/spinlock_api_up.h
   linux/spinlock.h            |  linux/spinlock.h

/*
 * here's the role of the various spinlock/rwlock related include files:
 *
 * on SMP builds:
 *
 *  asm/spinlock_types.h: contains the raw_spinlock_t/raw_rwlock_t and the
 *                        initializers
 *
 *  linux/spinlock_types.h:
 *                        defines the generic type and initializers
 *
 *  asm/spinlock.h:       contains the __raw_spin_*()/etc. lowlevel
 *                        implementations, mostly inline assembly code
 *
 *   (also included on UP-debug builds:)
 *
 *  linux/spinlock_api_smp.h:
 *                        contains the prototypes for the _spin_*() APIs.
 *
 *  linux/spinlock.h:     builds the final spin_*() APIs.
 *
 * on UP builds:
 *
 *  linux/spinlock_type_up.h:
 *                        contains the generic, simplified UP spinlock type.
 *                        (which is an empty structure on non-debug builds)
 *
 *  linux/spinlock_types.h:
 *                        defines the generic type and initializers
 *
 *  linux/spinlock_up.h:
 *                        contains the __raw_spin_*()/etc. version of UP
 *                        builds. (which are NOPs on non-debug, non-preempt
 *                        builds)
 *
 *   (included on UP-non-debug builds:)
 *
 *  linux/spinlock_api_up.h:
 *                        builds the _spin_*() APIs.
 *
 *  linux/spinlock.h:     builds the final spin_*() APIs.
 */

All SMP and UP architectures are converted by this patch.

arm, i386, ia64, ppc, ppc64, s390/s390x, x64 was build-tested via
crosscompilers.  m32r, mips, sh, sparc, have not been tested yet, but should
be mostly fine.

From: Grant Grundler <grundler@parisc-linux.org>

  Booted and lightly tested on a500-44 (64-bit, SMP kernel, dual CPU).
  Builds 32-bit SMP kernel (not booted or tested).  I did not try to build
  non-SMP kernels.  That should be trivial to fix up later if necessary.

  I converted bit ops atomic_hash lock to raw_spinlock_t.  Doing so avoids
  some ugly nesting of linux/*.h and asm/*.h files.  Those particular locks
  are well tested and contained entirely inside arch specific code.  I do NOT
  expect any new issues to arise with them.

 If someone does ever need to use debug/metrics with them, then they will
  need to unravel this hairball between spinlocks, atomic ops, and bit ops
  that exist only because parisc has exactly one atomic instruction: LDCW
  (load and clear word).

From: "Luck, Tony" <tony.luck@intel.com>

   ia64 fix

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjanv@infradead.org>
Signed-off-by: Grant Grundler <grundler@parisc-linux.org>
Cc: Matthew Wilcox <willy@debian.org>
Signed-off-by: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Mikael Pettersson <mikpe@csd.uu.se>
Signed-off-by: Benoit Boissinot <benoit.boissinot@ens-lyon.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 1c62203a490..6cbfceabd95 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -40,6 +40,7 @@
 #include <linux/cpu.h>
 #include <linux/bitops.h>
 #include <linux/mpage.h>
+#include <linux/bit_spinlock.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static void invalidate_bh_lrus(void);
-- 
cgit v1.2.3-18-g5258


From d79fc0fc6645b0cf5cd980da76942ca6d6300fa4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 10 Sep 2005 00:26:12 -0700
Subject: [PATCH] sched: TASK_NONINTERACTIVE

This patch implements a task state bit (TASK_NONINTERACTIVE), which can be
used by blocking points to mark the task's wait as "non-interactive".  This
does not mean the task will be considered a CPU-hog - the wait will simply
not have an effect on the waiting task's priority - positive or negative
alike.  Right now only pipe_wait() will make use of it, because it's a
common source of not-so-interactive waits (kernel compilation jobs, etc.).

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/pipe.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 2c7a23dde2d..66aa0b938d6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -39,7 +39,11 @@ void pipe_wait(struct inode * inode)
 {
 	DEFINE_WAIT(wait);
 
-	prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE);
+	/*
+	 * Pipes are system-local resources, so sleeping on them
+	 * is considered a noninteractive wait:
+	 */
+	prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
 	up(PIPE_SEM(*inode));
 	schedule();
 	finish_wait(PIPE_WAIT(*inode), &wait);
-- 
cgit v1.2.3-18-g5258


From 216d81bb35fc50923993462cc4fbc7029f9be1a9 Mon Sep 17 00:00:00 2001
From: Domen Puncer <domen@coderock.org>
Date: Sat, 10 Sep 2005 00:27:05 -0700
Subject: [PATCH] janitor: jffs/intrep: list_for_each_entry

Use list_for_each_entry to make code more readable.

Signed-off-by: Maximilian Attems <janitor@sternwelten.at>
Signed-off-by: Domen Puncer <domen@coderock.org>
Cc: <jffs-dev@axis.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs/intrep.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 456d7e6e29c..27f199e94cf 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -1701,12 +1701,10 @@ jffs_find_file(struct jffs_control *c, __u32 ino)
 {
 	struct jffs_file *f;
 	int i = ino % c->hash_len;
-	struct list_head *tmp;
 
 	D3(printk("jffs_find_file(): ino: %u\n", ino));
 
-	for (tmp = c->hash[i].next; tmp != &c->hash[i]; tmp = tmp->next) {
-		f = list_entry(tmp, struct jffs_file, hash);
+	list_for_each_entry(f, &c->hash[i], hash) {
 		if (ino != f->ino)
 			continue;
 		D3(printk("jffs_find_file(): Found file with ino "
@@ -2102,13 +2100,12 @@ jffs_foreach_file(struct jffs_control *c, int (*func)(struct jffs_file *))
 	int result = 0;
 
 	for (pos = 0; pos < c->hash_len; pos++) {
-		struct list_head *p, *next;
-		for (p = c->hash[pos].next; p != &c->hash[pos]; p = next) {
-			/* We need a reference to the next file in the
-			   list because `func' might remove the current
-			   file `f'.  */
-			next = p->next;
-			r = func(list_entry(p, struct jffs_file, hash));
+		struct jffs_file *f, *next;
+
+		/* We must do _safe, because 'func' might remove the
+		   current file 'f' from the list.  */
+		list_for_each_entry_safe(f, next, &c->hash[pos], hash) {
+			r = func(f);
 			if (r < 0)
 				return r;
 			result += r;
@@ -2613,9 +2610,8 @@ jffs_print_hash_table(struct jffs_control *c)
 
 	printk("JFFS: Dumping the file system's hash table...\n");
 	for (i = 0; i < c->hash_len; i++) {
-		struct list_head *p;
-		for (p = c->hash[i].next; p != &c->hash[i]; p = p->next) {
-			struct jffs_file *f=list_entry(p,struct jffs_file,hash);
+		struct jffs_file *f;
+		list_for_each_entry(f, &c->hash[i], hash) {
 			printk("*** c->hash[%u]: \"%s\" "
 			       "(ino: %u, pino: %u)\n",
 			       i, (f->name ? f->name : ""),
-- 
cgit v1.2.3-18-g5258


From fdadd65fbce0ff966cb8e74247d9636f52a7fc7a Mon Sep 17 00:00:00 2001
From: Domen Puncer <domen@coderock.org>
Date: Sat, 10 Sep 2005 00:27:07 -0700
Subject: [PATCH] janitor: fs/namespace.c: list_for_each_entry

Make code more readable with list_for_each_entry.

Signed-off-by: Maximilian Attems <janitor@sternwelten.at>
Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 34156260c9b..2fa9fdf7d6f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -537,7 +537,6 @@ lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
 static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct vfsmount *res, *p, *q, *r, *s;
-	struct list_head *h;
 	struct nameidata nd;
 
 	res = q = clone_mnt(mnt, dentry);
@@ -546,8 +545,7 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
 	q->mnt_mountpoint = mnt->mnt_mountpoint;
 
 	p = mnt;
-	for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) {
-		r = list_entry(h, struct vfsmount, mnt_child);
+	list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
 		if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry))
 			continue;
 
-- 
cgit v1.2.3-18-g5258


From 0cdca3f9806a3dbaa07b5e8175000cd513ba92d4 Mon Sep 17 00:00:00 2001
From: Domen Puncer <domen@coderock.org>
Date: Sat, 10 Sep 2005 00:27:07 -0700
Subject: [PATCH] janitor: fs/dcache.c: list_for_each*

First one is list_for_each_entry (thanks maks), second 2 list_for_each_safe.

Signed-off-by: Maximilian Attems <janitor@sternwelten.at>
Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index a15a2e1f552..7376b61269f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -337,12 +337,10 @@ struct dentry * d_find_alias(struct inode *inode)
  */
 void d_prune_aliases(struct inode *inode)
 {
-	struct list_head *tmp, *head = &inode->i_dentry;
+	struct dentry *dentry;
 restart:
 	spin_lock(&dcache_lock);
-	tmp = head;
-	while ((tmp = tmp->next) != head) {
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!atomic_read(&dentry->d_count)) {
 			__dget_locked(dentry);
@@ -463,10 +461,7 @@ void shrink_dcache_sb(struct super_block * sb)
 	 * superblock to the most recent end of the unused list.
 	 */
 	spin_lock(&dcache_lock);
-	next = dentry_unused.next;
-	while (next != &dentry_unused) {
-		tmp = next;
-		next = tmp->next;
+	list_for_each_safe(tmp, next, &dentry_unused) {
 		dentry = list_entry(tmp, struct dentry, d_lru);
 		if (dentry->d_sb != sb)
 			continue;
@@ -478,10 +473,7 @@ void shrink_dcache_sb(struct super_block * sb)
 	 * Pass two ... free the dentries for this superblock.
 	 */
 repeat:
-	next = dentry_unused.next;
-	while (next != &dentry_unused) {
-		tmp = next;
-		next = tmp->next;
+	list_for_each_safe(tmp, next, &dentry_unused) {
 		dentry = list_entry(tmp, struct dentry, d_lru);
 		if (dentry->d_sb != sb)
 			continue;
-- 
cgit v1.2.3-18-g5258


From ea0e0a4f53a75ed9d0812352c0410f6fc2a0b62a Mon Sep 17 00:00:00 2001
From: James Lamanna <jlamanna@gmail.com>
Date: Sat, 10 Sep 2005 00:27:16 -0700
Subject: [PATCH] janitor: reiserfs: super.c - vfree() checking cleanups

super.c vfree() checking cleanups.

Signed-off by: James Lamanna <jlamanna@gmail.com>
Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/super.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6951c35755b..44b02fc02eb 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1934,8 +1934,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 			if (SB_AP_BITMAP(s))
 				brelse(SB_AP_BITMAP(s)[j].bh);
 		}
-		if (SB_AP_BITMAP(s))
-			vfree(SB_AP_BITMAP(s));
+		vfree(SB_AP_BITMAP(s));
 	}
 	if (SB_BUFFER_WITH_SB(s))
 		brelse(SB_BUFFER_WITH_SB(s));
-- 
cgit v1.2.3-18-g5258


From e711700a0e6a6824fcfd5519d6b6982850a648ee Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 10 Sep 2005 00:27:20 -0700
Subject: [PATCH] fs/cramfs/uncompress.c should #include <linux/cramfs_fs.h>

Every file should #include the header with the prototypes of the global
functions it is offering.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cramfs/uncompress.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 5034365b06a..8def89f2c43 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/vmalloc.h>
 #include <linux/zlib.h>
+#include <linux/cramfs_fs.h>
 
 static z_stream stream;
 static int initialized;
-- 
cgit v1.2.3-18-g5258


From 041e0e3b1970c508dc9a95b7dd9dc86271a7d7ac Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Sat, 10 Sep 2005 00:27:23 -0700
Subject: [PATCH] fs: fix-up schedule_timeout() usage

Use schedule_timeout_{,un}interruptible() instead of
set_current_state()/schedule_timeout() to reduce kernel size.  Also use helper
functions to convert between human time units and jiffies rather than constant
HZ division to avoid rounding errors.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/connect.c            |  6 ++----
 fs/jbd/transaction.c         |  3 +--
 fs/lockd/clntproc.c          |  3 +--
 fs/nfs/nfs3proc.c            |  3 +--
 fs/nfs/nfs4proc.c            | 12 ++++--------
 fs/reiserfs/journal.c        |  3 +--
 fs/smbfs/proc.c              |  3 +--
 fs/xfs/linux-2.6/time.h      |  3 +--
 fs/xfs/linux-2.6/xfs_buf.c   |  6 +++---
 fs/xfs/linux-2.6/xfs_super.c | 12 ++++++------
 10 files changed, 21 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3217ac5f6bd..2335f14a158 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3215,10 +3215,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 	}
 	
 	cifs_sb->tcon = NULL;
-	if (ses) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ / 2);
-	}
+	if (ses)
+		schedule_timeout_interruptible(msecs_to_jiffies(500));
 	if (ses)
 		sesInfoFree(ses);
 
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index c6ec66fd876..49bbc2be3d7 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1340,8 +1340,7 @@ int journal_stop(handle_t *handle)
 	if (handle->h_sync) {
 		do {
 			old_handle_count = transaction->t_handle_count;
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(1);
+			schedule_timeout_uninterruptible(1);
 		} while (old_handle_count != transaction->t_handle_count);
 	}
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 14b3ce87fa2..87332f30141 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -299,8 +299,7 @@ nlmclnt_alloc_call(void)
 			return call;
 		}
 		printk("nlmclnt_alloc_call: failed, waiting for memory\n");
-		current->state = TASK_INTERRUPTIBLE;
-		schedule_timeout(5*HZ);
+		schedule_timeout_interruptible(5*HZ);
 	}
 	return NULL;
 }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2681485cf2d..edc95514046 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -34,8 +34,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 		res = rpc_call_sync(clnt, msg, flags);
 		if (res != -EJUKEBOX)
 			break;
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
+		schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
 	} while (!signalled());
 	rpc_clnt_sigunmask(clnt, &oldset);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0c5a308e496..9701ca8c942 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2418,14 +2418,11 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 		*timeout = NFS4_POLL_RETRY_MAX;
 	rpc_clnt_sigmask(clnt, &oldset);
 	if (clnt->cl_intr) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(*timeout);
+		schedule_timeout_interruptible(*timeout);
 		if (signalled())
 			res = -ERESTARTSYS;
-	} else {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(*timeout);
-	}
+	} else
+		schedule_timeout_uninterruptible(*timeout);
 	rpc_clnt_sigunmask(clnt, &oldset);
 	*timeout <<= 1;
 	return res;
@@ -2578,8 +2575,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
 static unsigned long
 nfs4_set_lock_task_retry(unsigned long timeout)
 {
-	current->state = TASK_INTERRUPTIBLE;
-	schedule_timeout(timeout);
+	schedule_timeout_interruptible(timeout);
 	timeout <<= 1;
 	if (timeout > NFS4_LOCK_MAXTIMEOUT)
 		return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index a8e29e9bbbd..4b15761434b 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2868,8 +2868,7 @@ static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 	unsigned long bcount = journal->j_bcount;
 	while (1) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(1);
+		schedule_timeout_uninterruptible(1);
 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
 		while ((atomic_read(&journal->j_wcount) > 0 ||
 			atomic_read(&journal->j_jlock)) &&
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 220babe91ef..38ab558835c 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -2397,8 +2397,7 @@ smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
 		if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
 			/* a damn Win95 bug - sometimes it clags if you 
 			   ask it too fast */
-			current->state = TASK_INTERRUPTIBLE;
-			schedule_timeout(HZ/5);
+			schedule_timeout_interruptible(msecs_to_jiffies(200));
 			continue;
                 }
 
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
index 6c6fd0faa8e..b0d2873ab27 100644
--- a/fs/xfs/linux-2.6/time.h
+++ b/fs/xfs/linux-2.6/time.h
@@ -39,8 +39,7 @@ typedef struct timespec timespec_t;
 
 static inline void delay(long ticks)
 {
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	schedule_timeout(ticks);
+	schedule_timeout_uninterruptible(ticks);
 }
 
 static inline void nanotime(struct timespec *tvp)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 655bf4a78af..e82cf72ac59 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1780,10 +1780,10 @@ xfsbufd(
 			xfsbufd_force_sleep = 0;
 		}
 
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
+		schedule_timeout_interruptible
+			(xfs_buf_timer_centisecs * msecs_to_jiffies(10));
 
-		age = (xfs_buf_age_centisecs * HZ) / 100;
+		age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
 		spin_lock(&pbd_delwrite_lock);
 		list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
 			PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 0da87bfc999..2302454d8d4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -467,7 +467,7 @@ xfs_flush_inode(
 
 	igrab(inode);
 	xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
-	delay(HZ/2);
+	delay(msecs_to_jiffies(500));
 }
 
 /*
@@ -492,7 +492,7 @@ xfs_flush_device(
 
 	igrab(inode);
 	xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
-	delay(HZ/2);
+	delay(msecs_to_jiffies(500));
 	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
 }
 
@@ -520,10 +520,9 @@ xfssyncd(
 	struct vfs_sync_work	*work, *n;
 	LIST_HEAD		(tmp);
 
-	timeleft = (xfs_syncd_centisecs * HZ) / 100;
+	timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
 	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		timeleft = schedule_timeout(timeleft);
+		timeleft = schedule_timeout_interruptible(timeleft);
 		/* swsusp */
 		try_to_freeze();
 		if (kthread_should_stop())
@@ -537,7 +536,8 @@ xfssyncd(
 		 */
 		if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
 			if (!timeleft)
-				timeleft = (xfs_syncd_centisecs * HZ) / 100;
+				timeleft = xfs_syncd_centisecs *
+							msecs_to_jiffies(10);
 			INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
 			list_add_tail(&vfsp->vfs_sync_work.w_list,
 					&vfsp->vfs_sync_list);
-- 
cgit v1.2.3-18-g5258