aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorGreg KH <gregkh@suse.de>2005-09-12 12:45:04 -0700
committerGreg Kroah-Hartman <gregkh@suse.de>2005-09-12 12:45:04 -0700
commitd58dde0f552a5c5c4485b962d8b6e9dd54fefb30 (patch)
treed9a7e35eb88fea6265d5aadcc3d4ed39122b052a /fs
parent877599fdef5ea4a7dd1956e22fa9d6923add97f8 (diff)
parent2ade81473636b33aaac64495f89a7dc572c529f0 (diff)
Merge ../torvalds-2.6/
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/9p.c359
-rw-r--r--fs/9p/9p.h341
-rw-r--r--fs/9p/Makefile17
-rw-r--r--fs/9p/conv.c693
-rw-r--r--fs/9p/conv.h36
-rw-r--r--fs/9p/debug.h70
-rw-r--r--fs/9p/error.c93
-rw-r--r--fs/9p/error.h178
-rw-r--r--fs/9p/fid.c241
-rw-r--r--fs/9p/fid.h57
-rw-r--r--fs/9p/mux.c475
-rw-r--r--fs/9p/mux.h41
-rw-r--r--fs/9p/trans_fd.c172
-rw-r--r--fs/9p/trans_sock.c290
-rw-r--r--fs/9p/transport.h46
-rw-r--r--fs/9p/v9fs.c452
-rw-r--r--fs/9p/v9fs.h103
-rw-r--r--fs/9p/v9fs_vfs.h53
-rw-r--r--fs/9p/vfs_dentry.c126
-rw-r--r--fs/9p/vfs_dir.c226
-rw-r--r--fs/9p/vfs_file.c401
-rw-r--r--fs/9p/vfs_inode.c1338
-rw-r--r--fs/9p/vfs_super.c280
-rw-r--r--fs/Kconfig24
-rw-r--r--fs/Makefile2
-rw-r--r--fs/affs/inode.c1
-rw-r--r--fs/aio.c34
-rw-r--r--fs/autofs/autofs_i.h3
-rw-r--r--fs/autofs/dirhash.c5
-rw-r--r--fs/autofs/inode.c3
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/dir.c25
-rw-r--r--fs/bfs/file.c23
-rw-r--r--fs/bfs/inode.c104
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/compat.c6
-rw-r--r--fs/compat_ioctl.c7
-rw-r--r--fs/cramfs/uncompress.c1
-rw-r--r--fs/dcache.c16
-rw-r--r--fs/exec.c8
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/xattr.h8
-rw-r--r--fs/ext2/xattr_security.c22
-rw-r--r--fs/ext3/ialloc.c5
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/xattr.h11
-rw-r--r--fs/ext3/xattr_security.c22
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/fcntl.c60
-rw-r--r--fs/file.c387
-rw-r--r--fs/file_table.c40
-rw-r--r--fs/fuse/Makefile7
-rw-r--r--fs/fuse/dev.c877
-rw-r--r--fs/fuse/dir.c982
-rw-r--r--fs/fuse/file.c555
-rw-r--r--fs/fuse/fuse_i.h451
-rw-r--r--fs/fuse/inode.c591
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/inode.c12
-rw-r--r--fs/jbd/transaction.c3
-rw-r--r--fs/jffs/inode-v23.c1
-rw-r--r--fs/jffs/intrep.c22
-rw-r--r--fs/jfs/acl.c24
-rw-r--r--fs/jfs/inode.c26
-rw-r--r--fs/jfs/jfs_acl.h12
-rw-r--r--fs/jfs/jfs_xattr.h14
-rw-r--r--fs/jfs/namei.c85
-rw-r--r--fs/jfs/xattr.c94
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/locks.c8
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/namei.c26
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs3proc.c3
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/ntfs/aops.c1
-rw-r--r--fs/open.c43
-rw-r--r--fs/pipe.c6
-rw-r--r--fs/proc/array.c5
-rw-r--r--fs/proc/base.c33
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c3
-rw-r--r--fs/reiserfs/super.c3
-rw-r--r--fs/select.c23
-rw-r--r--fs/smbfs/inode.c1
-rw-r--r--fs/smbfs/proc.c3
-rw-r--r--fs/sysv/inode.c1
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/xfs/linux-2.6/time.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c12
-rw-r--r--fs/xfs/support/ktrace.c2
100 files changed, 10459 insertions, 437 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
new file mode 100644
index 00000000000..e847f504a47
--- /dev/null
+++ b/fs/9p/9p.c
@@ -0,0 +1,359 @@
+/*
+ * linux/fs/9p/9p.c
+ *
+ * This file contains functions 9P2000 functions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "mux.h"
+
+/**
+ * v9fs_t_version - negotiate protocol parameters with sever
+ * @v9ses: 9P2000 session information
+ * @msize: requested max size packet
+ * @version: requested version.extension string
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+ char *version, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
+ msg.id = TVERSION;
+ msg.params.tversion.msize = msize;
+ msg.params.tversion.version = version;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_attach - mount the server
+ * @v9ses: 9P2000 session information
+ * @uname: user name doing the attach
+ * @aname: remote name being attached to
+ * @fid: mount fid to attatch to root node
+ * @afid: authentication fid (in this case result key)
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+ u32 fid, u32 afid, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
+ aname, fid, afid);
+ msg.id = TATTACH;
+ msg.params.tattach.fid = fid;
+ msg.params.tattach.afid = afid;
+ msg.params.tattach.uname = uname;
+ msg.params.tattach.aname = aname;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_clunk - release a fid (finish a transaction)
+ * @v9ses: 9P2000 session information
+ * @fid: fid to release
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d\n", fid);
+ msg.id = TCLUNK;
+ msg.params.tclunk.fid = fid;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_v9fs_t_flush - flush a pending transaction
+ * @v9ses: 9P2000 session information
+ * @tag: tid to release
+ *
+ */
+
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "oldtag %d\n", tag);
+ msg.id = TFLUSH;
+ msg.params.tflush.oldtag = tag;
+ return v9fs_mux_rpc(v9ses, &msg, NULL);
+}
+
+/**
+ * v9fs_t_stat - read a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to get info about
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d\n", fid);
+ if (fcall)
+ *fcall = NULL;
+
+ msg.id = TSTAT;
+ msg.params.tstat.fid = fid;
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_wstat - write a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to write info about
+ * @stat: metadata
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_stat *stat, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length);
+ msg.id = TWSTAT;
+ msg.params.twstat.fid = fid;
+ msg.params.twstat.stat = stat;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_walk - walk a fid to a new file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to walk
+ * @newfid: new fid (for clone operations)
+ * @name: path to walk fid to
+ * @fcall: pointer to response fcall
+ *
+ */
+
+/* TODO: support multiple walk */
+
+int
+v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+ char *name, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
+ msg.id = TWALK;
+ msg.params.twalk.fid = fid;
+ msg.params.twalk.newfid = newfid;
+
+ if (name) {
+ msg.params.twalk.nwname = 1;
+ msg.params.twalk.wnames = &name;
+ } else {
+ msg.params.twalk.nwname = 0;
+ }
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_open - open a file
+ *
+ * @v9ses - 9P2000 session information
+ * @fid - fid to open
+ * @mode - mode to open file (R, RW, etc)
+ * @fcall - pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+ struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+ long errorno = -1;
+
+ dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
+ msg.id = TOPEN;
+ msg.params.topen.fid = fid;
+ msg.params.topen.mode = mode;
+
+ errorno = v9fs_mux_rpc(v9ses, &msg, fcall);
+
+ return errorno;
+}
+
+/**
+ * v9fs_t_remove - remove a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to remove
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d\n", fid);
+ msg.id = TREMOVE;
+ msg.params.tremove.fid = fid;
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_create - create a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to create
+ * @name: name of the file or directory to create
+ * @perm: permissions to create with
+ * @mode: mode to open file (R, RW, etc)
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
+ u32 perm, u8 mode, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
+ fid, name, perm, mode);
+
+ msg.id = TCREATE;
+ msg.params.tcreate.fid = fid;
+ msg.params.tcreate.name = name;
+ msg.params.tcreate.perm = perm;
+ msg.params.tcreate.mode = mode;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_read - read data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to read from
+ * @offset: offset to start read at
+ * @count: how many bytes to read
+ * @fcall: pointer to response fcall (with data)
+ *
+ */
+
+int
+v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+ u32 count, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+ struct v9fs_fcall *rc = NULL;
+ long errorno = -1;
+
+ dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid,
+ (long unsigned int)offset, count);
+ msg.id = TREAD;
+ msg.params.tread.fid = fid;
+ msg.params.tread.offset = offset;
+ msg.params.tread.count = count;
+ errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+
+ if (!errorno) {
+ errorno = rc->params.rread.count;
+ dump_data(rc->params.rread.data, rc->params.rread.count);
+ }
+
+ if (fcall)
+ *fcall = rc;
+ else
+ kfree(rc);
+
+ return errorno;
+}
+
+/**
+ * v9fs_t_write - write data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to write to
+ * @offset: offset to start write at
+ * @count: how many bytes to write
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid,
+ u64 offset, u32 count, void *data, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+ struct v9fs_fcall *rc = NULL;
+ long errorno = -1;
+
+ dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid,
+ (unsigned long long)offset, count);
+ dump_data(data, count);
+
+ msg.id = TWRITE;
+ msg.params.twrite.fid = fid;
+ msg.params.twrite.offset = offset;
+ msg.params.twrite.count = count;
+ msg.params.twrite.data = data;
+
+ errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+
+ if (!errorno)
+ errorno = rc->params.rwrite.count;
+
+ if (fcall)
+ *fcall = rc;
+ else
+ kfree(rc);
+
+ return errorno;
+}
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
new file mode 100644
index 00000000000..f55424216be
--- /dev/null
+++ b/fs/9p/9p.h
@@ -0,0 +1,341 @@
+/*
+ * linux/fs/9p/9p.h
+ *
+ * 9P protocol definitions.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+/* Message Types */
+enum {
+ TVERSION = 100,
+ RVERSION,
+ TAUTH = 102,
+ RAUTH,
+ TATTACH = 104,
+ RATTACH,
+ TERROR = 106,
+ RERROR,
+ TFLUSH = 108,
+ RFLUSH,
+ TWALK = 110,
+ RWALK,
+ TOPEN = 112,
+ ROPEN,
+ TCREATE = 114,
+ RCREATE,
+ TREAD = 116,
+ RREAD,
+ TWRITE = 118,
+ RWRITE,
+ TCLUNK = 120,
+ RCLUNK,
+ TREMOVE = 122,
+ RREMOVE,
+ TSTAT = 124,
+ RSTAT,
+ TWSTAT = 126,
+ RWSTAT,
+};
+
+/* modes */
+enum {
+ V9FS_OREAD = 0x00,
+ V9FS_OWRITE = 0x01,
+ V9FS_ORDWR = 0x02,
+ V9FS_OEXEC = 0x03,
+ V9FS_OEXCL = 0x04,
+ V9FS_OTRUNC = 0x10,
+ V9FS_OREXEC = 0x20,
+ V9FS_ORCLOSE = 0x40,
+ V9FS_OAPPEND = 0x80,
+};
+
+/* permissions */
+enum {
+ V9FS_DMDIR = 0x80000000,
+ V9FS_DMAPPEND = 0x40000000,
+ V9FS_DMEXCL = 0x20000000,
+ V9FS_DMMOUNT = 0x10000000,
+ V9FS_DMAUTH = 0x08000000,
+ V9FS_DMTMP = 0x04000000,
+ V9FS_DMSYMLINK = 0x02000000,
+ V9FS_DMLINK = 0x01000000,
+ /* 9P2000.u extensions */
+ V9FS_DMDEVICE = 0x00800000,
+ V9FS_DMNAMEDPIPE = 0x00200000,
+ V9FS_DMSOCKET = 0x00100000,
+ V9FS_DMSETUID = 0x00080000,
+ V9FS_DMSETGID = 0x00040000,
+};
+
+/* qid.types */
+enum {
+ V9FS_QTDIR = 0x80,
+ V9FS_QTAPPEND = 0x40,
+ V9FS_QTEXCL = 0x20,
+ V9FS_QTMOUNT = 0x10,
+ V9FS_QTAUTH = 0x08,
+ V9FS_QTTMP = 0x04,
+ V9FS_QTSYMLINK = 0x02,
+ V9FS_QTLINK = 0x01,
+ V9FS_QTFILE = 0x00,
+};
+
+/* ample room for Twrite/Rread header (iounit) */
+#define V9FS_IOHDRSZ 24
+
+/* qids are the unique ID for a file (like an inode */
+struct v9fs_qid {
+ u8 type;
+ u32 version;
+ u64 path;
+};
+
+/* Plan 9 file metadata (stat) structure */
+struct v9fs_stat {
+ u16 size;
+ u16 type;
+ u32 dev;
+ struct v9fs_qid qid;
+ u32 mode;
+ u32 atime;
+ u32 mtime;
+ u64 length;
+ char *name;
+ char *uid;
+ char *gid;
+ char *muid;
+ char *extension; /* 9p2000.u extensions */
+ u32 n_uid; /* 9p2000.u extensions */
+ u32 n_gid; /* 9p2000.u extensions */
+ u32 n_muid; /* 9p2000.u extensions */
+ char data[0];
+};
+
+/* Structures for Protocol Operations */
+
+struct Tversion {
+ u32 msize;
+ char *version;
+};
+
+struct Rversion {
+ u32 msize;
+ char *version;
+};
+
+struct Tauth {
+ u32 afid;
+ char *uname;
+ char *aname;
+};
+
+struct Rauth {
+ struct v9fs_qid qid;
+};
+
+struct Rerror {
+ char *error;
+ u32 errno; /* 9p2000.u extension */
+};
+
+struct Tflush {
+ u32 oldtag;
+};
+
+struct Rflush {
+};
+
+struct Tattach {
+ u32 fid;
+ u32 afid;
+ char *uname;
+ char *aname;
+};
+
+struct Rattach {
+ struct v9fs_qid qid;
+};
+
+struct Twalk {
+ u32 fid;
+ u32 newfid;
+ u32 nwname;
+ char **wnames;
+};
+
+struct Rwalk {
+ u32 nwqid;
+ struct v9fs_qid *wqids;
+};
+
+struct Topen {
+ u32 fid;
+ u8 mode;
+};
+
+struct Ropen {
+ struct v9fs_qid qid;
+ u32 iounit;
+};
+
+struct Tcreate {
+ u32 fid;
+ char *name;
+ u32 perm;
+ u8 mode;
+};
+
+struct Rcreate {
+ struct v9fs_qid qid;
+ u32 iounit;
+};
+
+struct Tread {
+ u32 fid;
+ u64 offset;
+ u32 count;
+};
+
+struct Rread {
+ u32 count;
+ u8 *data;
+};
+
+struct Twrite {
+ u32 fid;
+ u64 offset;
+ u32 count;
+ u8 *data;
+};
+
+struct Rwrite {
+ u32 count;
+};
+
+struct Tclunk {
+ u32 fid;
+};
+
+struct Rclunk {
+};
+
+struct Tremove {
+ u32 fid;
+};
+
+struct Rremove {
+};
+
+struct Tstat {
+ u32 fid;
+};
+
+struct Rstat {
+ struct v9fs_stat *stat;
+};
+
+struct Twstat {
+ u32 fid;
+ struct v9fs_stat *stat;
+};
+
+struct Rwstat {
+};
+
+/*
+ * fcall is the primary packet structure
+ *
+ */
+
+struct v9fs_fcall {
+ u32 size;
+ u8 id;
+ u16 tag;
+
+ union {
+ struct Tversion tversion;
+ struct Rversion rversion;
+ struct Tauth tauth;
+ struct Rauth rauth;
+ struct Rerror rerror;
+ struct Tflush tflush;
+ struct Rflush rflush;
+ struct Tattach tattach;
+ struct Rattach rattach;
+ struct Twalk twalk;
+ struct Rwalk rwalk;
+ struct Topen topen;
+ struct Ropen ropen;
+ struct Tcreate tcreate;
+ struct Rcreate rcreate;
+ struct Tread tread;
+ struct Rread rread;
+ struct Twrite twrite;
+ struct Rwrite rwrite;
+ struct Tclunk tclunk;
+ struct Rclunk rclunk;
+ struct Tremove tremove;
+ struct Rremove rremove;
+ struct Tstat tstat;
+ struct Rstat rstat;
+ struct Twstat twstat;
+ struct Rwstat rwstat;
+ } params;
+};
+
+#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "")
+
+int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+ char *version, struct v9fs_fcall **rcall);
+
+int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+ u32 fid, u32 afid, struct v9fs_fcall **rcall);
+
+int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **rcall);
+
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
+
+int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **rcall);
+
+int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_stat *stat, struct v9fs_fcall **rcall);
+
+int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+ char *name, struct v9fs_fcall **rcall);
+
+int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+ struct v9fs_fcall **rcall);
+
+int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **rcall);
+
+int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
+ u32 perm, u8 mode, struct v9fs_fcall **rcall);
+
+int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
+ u64 offset, u32 count, struct v9fs_fcall **rcall);
+
+int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+ u32 count, void *data, struct v9fs_fcall **rcall);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
new file mode 100644
index 00000000000..e4e4ffe5a7d
--- /dev/null
+++ b/fs/9p/Makefile
@@ -0,0 +1,17 @@
+obj-$(CONFIG_9P_FS) := 9p2000.o
+
+9p2000-objs := \
+ vfs_super.o \
+ vfs_inode.o \
+ vfs_file.o \
+ vfs_dir.o \
+ vfs_dentry.o \
+ error.o \
+ mux.o \
+ trans_fd.o \
+ trans_sock.o \
+ 9p.o \
+ conv.o \
+ v9fs.o \
+ fid.o
+
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
new file mode 100644
index 00000000000..1554731bd65
--- /dev/null
+++ b/fs/9p/conv.c
@@ -0,0 +1,693 @@
+/*
+ * linux/fs/9p/conv.c
+ *
+ * 9P protocol conversion functions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "conv.h"
+
+/*
+ * Buffer to help with string parsing
+ */
+struct cbuf {
+ unsigned char *sp;
+ unsigned char *p;
+ unsigned char *ep;
+};
+
+static inline void buf_init(struct cbuf *buf, void *data, int datalen)
+{
+ buf->sp = buf->p = data;
+ buf->ep = data + datalen;
+}
+
+static inline int buf_check_overflow(struct cbuf *buf)
+{
+ return buf->p > buf->ep;
+}
+
+static inline void buf_check_size(struct cbuf *buf, int len)
+{
+ if (buf->p+len > buf->ep) {
+ if (buf->p < buf->ep) {
+ eprintk(KERN_ERR, "buffer overflow\n");
+ buf->p = buf->ep + 1;
+ }
+ }
+}
+
+static inline void *buf_alloc(struct cbuf *buf, int len)
+{
+ void *ret = NULL;
+
+ buf_check_size(buf, len);
+ ret = buf->p;
+ buf->p += len;
+
+ return ret;
+}
+
+static inline void buf_put_int8(struct cbuf *buf, u8 val)
+{
+ buf_check_size(buf, 1);
+
+ buf->p[0] = val;
+ buf->p++;
+}
+
+static inline void buf_put_int16(struct cbuf *buf, u16 val)
+{
+ buf_check_size(buf, 2);
+
+ *(__le16 *) buf->p = cpu_to_le16(val);
+ buf->p += 2;
+}
+
+static inline void buf_put_int32(struct cbuf *buf, u32 val)
+{
+ buf_check_size(buf, 4);
+
+ *(__le32 *)buf->p = cpu_to_le32(val);
+ buf->p += 4;
+}
+
+static inline void buf_put_int64(struct cbuf *buf, u64 val)
+{
+ buf_check_size(buf, 8);
+
+ *(__le64 *)buf->p = cpu_to_le64(val);
+ buf->p += 8;
+}
+
+static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
+{
+ buf_check_size(buf, slen + 2);
+
+ buf_put_int16(buf, slen);
+ memcpy(buf->p, s, slen);
+ buf->p += slen;
+}
+
+static inline void buf_put_string(struct cbuf *buf, const char *s)
+{
+ buf_put_stringn(buf, s, strlen(s));
+}
+
+static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
+{
+ buf_check_size(buf, datalen);
+
+ memcpy(buf->p, data, datalen);
+ buf->p += datalen;
+}
+
+static inline u8 buf_get_int8(struct cbuf *buf)
+{
+ u8 ret = 0;
+
+ buf_check_size(buf, 1);
+ ret = buf->p[0];
+
+ buf->p++;
+
+ return ret;
+}
+
+static inline u16 buf_get_int16(struct cbuf *buf)
+{
+ u16 ret = 0;
+
+ buf_check_size(buf, 2);
+ ret = le16_to_cpu(*(__le16 *)buf->p);
+
+ buf->p += 2;
+
+ return ret;
+}
+
+static inline u32 buf_get_int32(struct cbuf *buf)
+{
+ u32 ret = 0;
+
+ buf_check_size(buf, 4);
+ ret = le32_to_cpu(*(__le32 *)buf->p);
+
+ buf->p += 4;
+
+ return ret;
+}
+
+static inline u64 buf_get_int64(struct cbuf *buf)
+{
+ u64 ret = 0;
+
+ buf_check_size(buf, 8);
+ ret = le64_to_cpu(*(__le64 *)buf->p);
+
+ buf->p += 8;
+
+ return ret;
+}
+
+static inline int
+buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
+{
+
+ u16 len = buf_get_int16(buf);
+ buf_check_size(buf, len);
+ if (len + 1 > datalen)
+ return 0;
+
+ memcpy(data, buf->p, len);
+ data[len] = 0;
+ buf->p += len;
+
+ return len + 1;
+}
+
+static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
+{
+ char *ret = NULL;
+ int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p);
+
+ if (n > 0) {
+ ret = sbuf->p;
+ sbuf->p += n;
+ }
+
+ return ret;
+}
+
+static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
+{
+ buf_check_size(buf, datalen);
+
+ memcpy(data, buf->p, datalen);
+ buf->p += datalen;
+
+ return datalen;
+}
+
+static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
+ int datalen)
+{
+ char *ret = NULL;
+ int n = 0;
+
+ buf_check_size(dbuf, datalen);
+
+ n = buf_get_data(buf, dbuf->p, datalen);
+
+ if (n > 0) {
+ ret = dbuf->p;
+ dbuf->p += n;
+ }
+
+ return ret;
+}
+
+/**
+ * v9fs_size_stat - calculate the size of a variable length stat struct
+ * @v9ses: session information
+ * @stat: metadata (stat) structure
+ *
+ */
+
+static int v9fs_size_stat(struct v9fs_session_info *v9ses,
+ struct v9fs_stat *stat)
+{
+ int size = 0;
+
+ if (stat == NULL) {
+ eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
+ return 0;
+ }
+
+ size = /* 2 + *//* size[2] */
+ 2 + /* type[2] */
+ 4 + /* dev[4] */
+ 1 + /* qid.type[1] */
+ 4 + /* qid.vers[4] */
+ 8 + /* qid.path[8] */
+ 4 + /* mode[4] */
+ 4 + /* atime[4] */
+ 4 + /* mtime[4] */
+ 8 + /* length[8] */
+ 8; /* minimum sum of string lengths */
+
+ if (stat->name)
+ size += strlen(stat->name);
+ if (stat->uid)
+ size += strlen(stat->uid);
+ if (stat->gid)
+ size += strlen(stat->gid);
+ if (stat->muid)
+ size += strlen(stat->muid);
+
+ if (v9ses->extended) {
+ size += 4 + /* n_uid[4] */
+ 4 + /* n_gid[4] */
+ 4 + /* n_muid[4] */
+ 2; /* string length of extension[4] */
+ if (stat->extension)
+ size += strlen(stat->extension);
+ }
+
+ return size;
+}
+
+/**
+ * serialize_stat - safely format a stat structure for transmission
+ * @v9ses: session info
+ * @stat: metadata (stat) structure
+ * @bufp: buffer to serialize structure into
+ *
+ */
+
+static int
+serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
+ struct cbuf *bufp)
+{
+ buf_put_int16(bufp, stat->size);
+ buf_put_int16(bufp, stat->type);
+ buf_put_int32(bufp, stat->dev);
+ buf_put_int8(bufp, stat->qid.type);
+ buf_put_int32(bufp, stat->qid.version);
+ buf_put_int64(bufp, stat->qid.path);
+ buf_put_int32(bufp, stat->mode);
+ buf_put_int32(bufp, stat->atime);
+ buf_put_int32(bufp, stat->mtime);
+ buf_put_int64(bufp, stat->length);
+
+ buf_put_string(bufp, stat->name);
+ buf_put_string(bufp, stat->uid);
+ buf_put_string(bufp, stat->gid);
+ buf_put_string(bufp, stat->muid);
+
+ if (v9ses->extended) {
+ buf_put_string(bufp, stat->extension);
+ buf_put_int32(bufp, stat->n_uid);
+ buf_put_int32(bufp, stat->n_gid);
+ buf_put_int32(bufp, stat->n_muid);
+ }
+
+ if (buf_check_overflow(bufp))
+ return 0;
+
+ return stat->size;
+}
+
+/**
+ * deserialize_stat - safely decode a recieved metadata (stat) structure
+ * @v9ses: session info
+ * @bufp: buffer to deserialize
+ * @stat: metadata (stat) structure
+ * @dbufp: buffer to deserialize variable strings into
+ *
+ */
+
+static inline int
+deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
+ struct v9fs_stat *stat, struct cbuf *dbufp)
+{
+
+ stat->size = buf_get_int16(bufp);
+ stat->type = buf_get_int16(bufp);
+ stat->dev = buf_get_int32(bufp);
+ stat->qid.type = buf_get_int8(bufp);
+ stat->qid.version = buf_get_int32(bufp);
+ stat->qid.path = buf_get_int64(bufp);
+ stat->mode = buf_get_int32(bufp);
+ stat->atime = buf_get_int32(bufp);
+ stat->mtime = buf_get_int32(bufp);
+ stat->length = buf_get_int64(bufp);
+ stat->name = buf_get_stringb(bufp, dbufp);
+ stat->uid = buf_get_stringb(bufp, dbufp);
+ stat->gid = buf_get_stringb(bufp, dbufp);
+ stat->muid = buf_get_stringb(bufp, dbufp);
+
+ if (v9ses->extended) {
+ stat->extension = buf_get_stringb(bufp, dbufp);
+ stat->n_uid = buf_get_int32(bufp);
+ stat->n_gid = buf_get_int32(bufp);
+ stat->n_muid = buf_get_int32(bufp);
+ }
+
+ if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+ return 0;
+
+ return stat->size + 2;
+}
+
+/**
+ * deserialize_statb - wrapper for decoding a received metadata structure
+ * @v9ses: session info
+ * @bufp: buffer to deserialize
+ * @dbufp: buffer to deserialize variable strings into
+ *
+ */
+
+static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
+ *v9ses, struct cbuf *bufp,
+ struct cbuf *dbufp)
+{
+ struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
+
+ if (ret) {
+ int n = deserialize_stat(v9ses, bufp, ret, dbufp);
+ if (n <= 0)
+ return NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * v9fs_deserialize_stat - decode a received metadata structure
+ * @v9ses: session info
+ * @buf: buffer to deserialize
+ * @buflen: length of received buffer
+ * @stat: metadata structure to decode into
+ * @statlen: length of destination metadata structure
+ *
+ */
+
+int
+v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf,
+ u32 buflen, struct v9fs_stat *stat, u32 statlen)
+{
+ struct cbuf buffer;
+ struct cbuf *bufp = &buffer;
+ struct cbuf dbuffer;
+ struct cbuf *dbufp = &dbuffer;
+
+ buf_init(bufp, buf, buflen);
+ buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat),
+ statlen - sizeof(struct v9fs_stat));
+
+ return deserialize_stat(v9ses, bufp, stat, dbufp);
+}
+
+static inline int
+v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
+{
+ int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */
+ int i = 0;
+
+ switch (fcall->id) {
+ default:
+ eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
+ return 0;
+ case TVERSION: /* msize[4] version[s] */
+ size += 4 + 2 + strlen(fcall->params.tversion.version);
+ break;
+ case TAUTH: /* afid[4] uname[s] aname[s] */
+ size += 4 + 2 + strlen(fcall->params.tauth.uname) +
+ 2 + strlen(fcall->params.tauth.aname);
+ break;
+ case TFLUSH: /* oldtag[2] */
+ size += 2;
+ break;
+ case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */
+ size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
+ 2 + strlen(fcall->params.tattach.aname);
+ break;
+ case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
+ size += 4 + 4 + 2;
+ /* now compute total for the array of names */
+ for (i = 0; i < fcall->params.twalk.nwname; i++)
+ size += 2 + strlen(fcall->params.twalk.wnames[i]);
+ break;
+ case TOPEN: /* fid[4] mode[1] */
+ size += 4 + 1;
+ break;
+ case TCREATE: /* fid[4] name[s] perm[4] mode[1] */
+ size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
+ break;
+ case TREAD: /* fid[4] offset[8] count[4] */
+ size += 4 + 8 + 4;
+ break;
+ case TWRITE: /* fid[4] offset[8] count[4] data[count] */
+ size += 4 + 8 + 4 + fcall->params.twrite.count;
+ break;
+ case TCLUNK: /* fid[4] */
+ size += 4;
+ break;
+ case TREMOVE: /* fid[4] */
+ size += 4;
+ break;
+ case TSTAT: /* fid[4] */
+ size += 4;
+ break;
+ case TWSTAT: /* fid[4] stat[n] */
+ fcall->params.twstat.stat->size =
+ v9fs_size_stat(v9ses, fcall->params.twstat.stat);
+ size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
+ }
+ return size;
+}
+
+/*
+ * v9fs_serialize_fcall - marshall fcall struct into a packet
+ * @v9ses: session information
+ * @fcall: structure to convert
+ * @data: buffer to serialize fcall into
+ * @datalen: length of buffer to serialize fcall into
+ *
+ */
+
+int
+v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
+ void *data, u32 datalen)
+{
+ int i = 0;
+ struct v9fs_stat *stat = NULL;
+ struct cbuf buffer;
+ struct cbuf *bufp = &buffer;
+
+ buf_init(bufp, data, datalen);
+
+ if (!fcall) {
+ eprintk(KERN_ERR, "no fcall\n");
+ return -EINVAL;
+ }
+
+ fcall->size = v9fs_size_fcall(v9ses, fcall);
+
+ buf_put_int32(bufp, fcall->size);
+ buf_put_int8(bufp, fcall->id);
+ buf_put_int16(bufp, fcall->tag);
+
+ dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
+ fcall->tag);
+
+ /* now encode it */
+ switch (fcall->id) {
+ default:
+ eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
+ return -EPROTO;
+ case TVERSION:
+ buf_put_int32(bufp, fcall->params.tversion.msize);
+ buf_put_string(bufp, fcall->params.tversion.version);
+ break;
+ case TAUTH:
+ buf_put_int32(bufp, fcall->params.tauth.afid);
+ buf_put_string(bufp, fcall->params.tauth.uname);
+ buf_put_string(bufp, fcall->params.tauth.aname);
+ break;
+ case TFLUSH:
+ buf_put_int16(bufp, fcall->params.tflush.oldtag);
+ break;
+ case TATTACH:
+ buf_put_int32(bufp, fcall->params.tattach.fid);
+ buf_put_int32(bufp, fcall->params.tattach.afid);
+ buf_put_string(bufp, fcall->params.tattach.uname);
+ buf_put_string(bufp, fcall->params.tattach.aname);
+ break;
+ case TWALK:
+ buf_put_int32(bufp, fcall->params.twalk.fid);
+ buf_put_int32(bufp, fcall->params.twalk.newfid);
+ buf_put_int16(bufp, fcall->params.twalk.nwname);
+ for (i = 0; i < fcall->params.twalk.nwname; i++)
+ buf_put_string(bufp, fcall->params.twalk.wnames[i]);
+ break;
+ case TOPEN:
+ buf_put_int32(bufp, fcall->params.topen.fid);
+ buf_put_int8(bufp, fcall->params.topen.mode);
+ break;
+ case TCREATE:
+ buf_put_int32(bufp, fcall->params.tcreate.fid);
+ buf_put_string(bufp, fcall->params.tcreate.name);
+ buf_put_int32(bufp, fcall->params.tcreate.perm);
+ buf_put_int8(bufp, fcall->params.tcreate.mode);
+ break;
+ case TREAD:
+ buf_put_int32(bufp, fcall->params.tread.fid);
+ buf_put_int64(bufp, fcall->params.tread.offset);
+ buf_put_int32(bufp, fcall->params.tread.count);
+ break;
+ case TWRITE:
+ buf_put_int32(bufp, fcall->params.twrite.fid);
+ buf_put_int64(bufp, fcall->params.twrite.offset);
+ buf_put_int32(bufp, fcall->params.twrite.count);
+ buf_put_data(bufp, fcall->params.twrite.data,
+ fcall->params.twrite.count);
+ break;
+ case TCLUNK:
+ buf_put_int32(bufp, fcall->params.tclunk.fid);
+ break;
+ case TREMOVE:
+ buf_put_int32(bufp, fcall->params.tremove.fid);
+ break;
+ case TSTAT:
+ buf_put_int32(bufp, fcall->params.tstat.fid);
+ break;
+ case TWSTAT:
+ buf_put_int32(bufp, fcall->params.twstat.fid);
+ stat = fcall->params.twstat.stat;
+
+ buf_put_int16(bufp, stat->size + 2);
+ serialize_stat(v9ses, stat, bufp);
+ break;
+ }
+
+ if (buf_check_overflow(bufp))
+ return -EIO;
+
+ return fcall->size;
+}
+
+/**
+ * deserialize_fcall - unmarshal a response
+ * @v9ses: session information
+ * @msgsize: size of rcall message
+ * @buf: recieved buffer
+ * @buflen: length of received buffer
+ * @rcall: fcall structure to populate
+ * @rcalllen: length of fcall structure to populate
+ *
+ */
+
+int
+v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
+ void *buf, u32 buflen, struct v9fs_fcall *rcall,
+ int rcalllen)
+{
+
+ struct cbuf buffer;
+ struct cbuf *bufp = &buffer;
+ struct cbuf dbuffer;
+ struct cbuf *dbufp = &dbuffer;
+ int i = 0;
+
+ buf_init(bufp, buf, buflen);
+ buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
+ rcalllen - sizeof(struct v9fs_fcall));
+
+ rcall->size = msgsize;
+ rcall->id = buf_get_int8(bufp);
+ rcall->tag = buf_get_int16(bufp);
+
+ dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
+ rcall->tag);
+ switch (rcall->id) {
+ default:
+ eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
+ return -EPROTO;
+ case RVERSION:
+ rcall->params.rversion.msize = buf_get_int32(bufp);
+ rcall->params.rversion.version = buf_get_stringb(bufp, dbufp);
+ break;
+ case RFLUSH:
+ break;
+ case RATTACH:
+ rcall->params.rattach.qid.type = buf_get_int8(bufp);
+ rcall->params.rattach.qid.version = buf_get_int32(bufp);
+ rcall->params.rattach.qid.path = buf_get_int64(bufp);
+ break;
+ case RWALK:
+ rcall->params.rwalk.nwqid = buf_get_int16(bufp);
+ rcall->params.rwalk.wqids = buf_alloc(bufp,
+ rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
+ if (rcall->params.rwalk.wqids)
+ for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
+ rcall->params.rwalk.wqids[i].type =
+ buf_get_int8(bufp);
+ rcall->params.rwalk.wqids[i].version =
+ buf_get_int16(bufp);
+ rcall->params.rwalk.wqids[i].path =
+ buf_get_int64(bufp);
+ }
+ break;
+ case ROPEN:
+ rcall->params.ropen.qid.type = buf_get_int8(bufp);
+ rcall->params.ropen.qid.version = buf_get_int32(bufp);
+ rcall->params.ropen.qid.path = buf_get_int64(bufp);
+ rcall->params.ropen.iounit = buf_get_int32(bufp);
+ break;
+ case RCREATE:
+ rcall->params.rcreate.qid.type = buf_get_int8(bufp);
+ rcall->params.rcreate.qid.version = buf_get_int32(bufp);
+ rcall->params.rcreate.qid.path = buf_get_int64(bufp);
+ rcall->params.rcreate.iounit = buf_get_int32(bufp);
+ break;
+ case RREAD:
+ rcall->params.rread.count = buf_get_int32(bufp);
+ rcall->params.rread.data = buf_get_datab(bufp, dbufp,
+ rcall->params.rread.count);
+ break;
+ case RWRITE:
+ rcall->params.rwrite.count = buf_get_int32(bufp);
+ break;
+ case RCLUNK:
+ break;
+ case RREMOVE:
+ break;
+ case RSTAT:
+ buf_get_int16(bufp);
+ rcall->params.rstat.stat =
+ deserialize_statb(v9ses, bufp, dbufp);
+ break;
+ case RWSTAT:
+ break;
+ case RERROR:
+ rcall->params.rerror.error = buf_get_stringb(bufp, dbufp);
+ if (v9ses->extended)
+ rcall->params.rerror.errno = buf_get_int16(bufp);
+ break;
+ }
+
+ if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+ return -EIO;
+
+ return rcall->size;
+}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
new file mode 100644
index 00000000000..ee849613c61
--- /dev/null
+++ b/fs/9p/conv.h
@@ -0,0 +1,36 @@
+/*
+ * linux/fs/9p/conv.h
+ *
+ * 9P protocol conversion definitions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf,
+ u32 buflen, struct v9fs_stat *stat, u32 statlen);
+int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall,
+ void *buf, u32 buflen);
+int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
+ void *buf, u32 buflen, struct v9fs_fcall *rcall,
+ int rcalllen);
+
+/* this one is actually in error.c right now */
+int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
new file mode 100644
index 00000000000..4445f06919d
--- /dev/null
+++ b/fs/9p/debug.h
@@ -0,0 +1,70 @@
+/*
+ * linux/fs/9p/debug.h - V9FS Debug Definitions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#define DEBUG_ERROR (1<<0)
+#define DEBUG_CURRENT (1<<1)
+#define DEBUG_9P (1<<2)
+#define DEBUG_VFS (1<<3)
+#define DEBUG_CONV (1<<4)
+#define DEBUG_MUX (1<<5)
+#define DEBUG_TRANS (1<<6)
+#define DEBUG_SLABS (1<<7)
+
+#define DEBUG_DUMP_PKT 0
+
+extern int v9fs_debug_level;
+
+#define dprintk(level, format, arg...) \
+do { \
+ if((v9fs_debug_level & level)==level) \
+ printk(KERN_NOTICE "-- %s (%d): " \
+ format , __FUNCTION__, current->pid , ## arg); \
+} while(0)
+
+#define eprintk(level, format, arg...) \
+do { \
+ printk(level "v9fs: %s (%d): " \
+ format , __FUNCTION__, current->pid , ## arg); \
+} while(0)
+
+#if DEBUG_DUMP_PKT
+static inline void dump_data(const unsigned char *data, unsigned int datalen)
+{
+ int i, j;
+ int len = datalen;
+
+ printk(KERN_DEBUG "data ");
+ for (i = 0; i < len; i += 4) {
+ for (j = 0; (j < 4) && (i + j < len); j++)
+ printk(KERN_DEBUG "%02x", data[i + j]);
+ printk(KERN_DEBUG " ");
+ }
+ printk(KERN_DEBUG "\n");
+}
+#else /* DEBUG_DUMP_PKT */
+static inline void dump_data(const unsigned char *data, unsigned int datalen)
+{
+
+}
+#endif /* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
new file mode 100644
index 00000000000..fee5d19179c
--- /dev/null
+++ b/fs/9p/error.c
@@ -0,0 +1,93 @@
+/*
+ * linux/fs/9p/error.c
+ *
+ * Error string handling
+ *
+ * Plan 9 uses error strings, Unix uses error numbers. These functions
+ * try to help manage that and provide for dynamically adding error
+ * mappings.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/list.h>
+#include <linux/jhash.h>
+
+#include "debug.h"
+#include "error.h"
+
+/**
+ * v9fs_error_init - preload
+ * @errstr: error string
+ *
+ */
+
+int v9fs_error_init(void)
+{
+ struct errormap *c;
+ int bucket;
+
+ /* initialize hash table */
+ for (bucket = 0; bucket < ERRHASHSZ; bucket++)
+ INIT_HLIST_HEAD(&hash_errmap[bucket]);
+
+ /* load initial error map into hash table */
+ for (c = errmap; c->name != NULL; c++) {
+ bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ;
+ INIT_HLIST_NODE(&c->list);
+ hlist_add_head(&c->list, &hash_errmap[bucket]);
+ }
+
+ return 1;
+}
+
+/**
+ * errstr2errno - convert error string to error number
+ * @errstr: error string
+ *
+ */
+
+int v9fs_errstr2errno(char *errstr)
+{
+ int errno = 0;
+ struct hlist_node *p = NULL;
+ struct errormap *c = NULL;
+ int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ;
+
+ hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
+ if (!strcmp(c->name, errstr)) {
+ errno = c->val;
+ break;
+ }
+ }
+
+ if (errno == 0) {
+ /* TODO: if error isn't found, add it dynamically */
+ printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
+ errstr);
+ errno = 1;
+ }
+
+ return -errno;
+}
diff --git a/fs/9p/error.h b/fs/9p/error.h
new file mode 100644
index 00000000000..78f89acf7c9
--- /dev/null
+++ b/fs/9p/error.h
@@ -0,0 +1,178 @@
+/*
+ * linux/fs/9p/error.h
+ *
+ * Huge Nasty Error Table
+ *
+ * Plan 9 uses error strings, Unix uses error numbers. This table tries to
+ * match UNIX strings and Plan 9 strings to unix error numbers. It is used
+ * to preload the dynamic error table which can also track user-specific error
+ * strings.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/errno.h>
+#include <asm/errno.h>
+
+struct errormap {
+ char *name;
+ int val;
+
+ struct hlist_node list;
+};
+
+#define ERRHASHSZ 32
+static struct hlist_head hash_errmap[ERRHASHSZ];
+
+/* FixMe - reduce to a reasonable size */
+static struct errormap errmap[] = {
+ {"Operation not permitted", EPERM},
+ {"wstat prohibited", EPERM},
+ {"No such file or directory", ENOENT},
+ {"directory entry not found", ENOENT},
+ {"file not found", ENOENT},
+ {"Interrupted system call", EINTR},
+ {"Input/output error", EIO},
+ {"No such device or address", ENXIO},
+ {"Argument list too long", E2BIG},
+ {"Bad file descriptor", EBADF},
+ {"Resource temporarily unavailable", EAGAIN},
+ {"Cannot allocate memory", ENOMEM},
+ {"Permission denied", EACCES},
+ {"Bad address", EFAULT},
+ {"Block device required", ENOTBLK},
+ {"Device or resource busy", EBUSY},
+ {"File exists", EEXIST},
+ {"Invalid cross-device link", EXDEV},
+ {"No such device", ENODEV},
+ {"Not a directory", ENOTDIR},
+ {"Is a directory", EISDIR},
+ {"Invalid argument", EINVAL},
+ {"Too many open files in system", ENFILE},
+ {"Too many open files", EMFILE},
+ {"Text file busy", ETXTBSY},
+ {"File too large", EFBIG},
+ {"No space left on device", ENOSPC},
+ {"Illegal seek", ESPIPE},
+ {"Read-only file system", EROFS},
+ {"Too many links", EMLINK},
+ {"Broken pipe", EPIPE},
+ {"Numerical argument out of domain", EDOM},
+ {"Numerical result out of range", ERANGE},
+ {"Resource deadlock avoided", EDEADLK},
+ {"File name too long", ENAMETOOLONG},
+ {"No locks available", ENOLCK},
+ {"Function not implemented", ENOSYS},
+ {"Directory not empty", ENOTEMPTY},
+ {"Too many levels of symbolic links", ELOOP},
+ {"No message of desired type", ENOMSG},
+ {"Identifier removed", EIDRM},
+ {"No data available", ENODATA},
+ {"Machine is not on the network", ENONET},
+ {"Package not installed", ENOPKG},
+ {"Object is remote", EREMOTE},
+ {"Link has been severed", ENOLINK},
+ {"Communication error on send", ECOMM},
+ {"Protocol error", EPROTO},
+ {"Bad message", EBADMSG},
+ {"File descriptor in bad state", EBADFD},
+ {"Streams pipe error", ESTRPIPE},
+ {"Too many users", EUSERS},
+ {"Socket operation on non-socket", ENOTSOCK},
+ {"Message too long", EMSGSIZE},
+ {"Protocol not available", ENOPROTOOPT},
+ {"Protocol not supported", EPROTONOSUPPORT},
+ {"Socket type not supported", ESOCKTNOSUPPORT},
+ {"Operation not supported", EOPNOTSUPP},
+ {"Protocol family not supported", EPFNOSUPPORT},
+ {"Network is down", ENETDOWN},
+ {"Network is unreachable", ENETUNREACH},
+ {"Network dropped connection on reset", ENETRESET},
+ {"Software caused connection abort", ECONNABORTED},
+ {"Connection reset by peer", ECONNRESET},
+ {"No buffer space available", ENOBUFS},
+ {"Transport endpoint is already connected", EISCONN},
+ {"Transport endpoint is not connected", ENOTCONN},
+ {"Cannot send after transport endpoint shutdown", ESHUTDOWN},
+ {"Connection timed out", ETIMEDOUT},
+ {"Connection refused", ECONNREFUSED},
+ {"Host is down", EHOSTDOWN},
+ {"No route to host", EHOSTUNREACH},
+ {"Operation already in progress", EALREADY},
+ {"Operation now in progress", EINPROGRESS},
+ {"Is a named type file", EISNAM},
+ {"Remote I/O error", EREMOTEIO},
+ {"Disk quota exceeded", EDQUOT},
+/* errors from fossil, vacfs, and u9fs */
+ {"fid unknown or out of range", EBADF},
+ {"permission denied", EACCES},
+ {"file does not exist", ENOENT},
+ {"authentication failed", ECONNREFUSED},
+ {"bad offset in directory read", ESPIPE},
+ {"bad use of fid", EBADF},
+ {"wstat can't convert between files and directories", EPERM},
+ {"directory is not empty", ENOTEMPTY},
+ {"file exists", EEXIST},
+ {"file already exists", EEXIST},
+ {"file or directory already exists", EEXIST},
+ {"fid already in use", EBADF},
+ {"file in use", ETXTBSY},
+ {"i/o error", EIO},
+ {"file already open for I/O", ETXTBSY},
+ {"illegal mode", EINVAL},
+ {"illegal name", ENAMETOOLONG},
+ {"not a directory", ENOTDIR},
+ {"not a member of proposed group", EPERM},
+ {"not owner", EACCES},
+ {"only owner can change group in wstat", EACCES},
+ {"read only file system", EROFS},
+ {"no access to special file", EPERM},
+ {"i/o count too large", EIO},
+ {"unknown group", EINVAL},
+ {"unknown user", EINVAL},
+ {"bogus wstat buffer", EPROTO},
+ {"exclusive use file already open", EAGAIN},
+ {"corrupted directory entry", EIO},
+ {"corrupted file entry", EIO},
+ {"corrupted block label", EIO},
+ {"corrupted meta data", EIO},
+ {"illegal offset", EINVAL},
+ {"illegal path element", ENOENT},
+ {"root of file system is corrupted", EIO},
+ {"corrupted super block", EIO},
+ {"protocol botch", EPROTO},
+ {"file system is full", ENOSPC},
+ {"file is in use", EAGAIN},
+ {"directory entry is not allocated", ENOENT},
+ {"file is read only", EROFS},
+ {"file has been removed", EIDRM},
+ {"only support truncation to zero length", EPERM},
+ {"cannot remove root", EPERM},
+ {"file too big", EFBIG},
+ {"venti i/o error", EIO},
+ /* these are not errors */
+ {"u9fs rhostsauth: no authentication required", 0},
+ {"u9fs authnone: no authentication required", 0},
+ {NULL, -1}
+};
+
+extern int v9fs_error_init(void);
+extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
new file mode 100644
index 00000000000..821c9c4d76a
--- /dev/null
+++ b/fs/9p/fid.c
@@ -0,0 +1,241 @@
+/*
+ * V9FS FID Management
+ *
+ * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "transport.h"
+#include "mux.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * v9fs_fid_insert - add a fid to a dentry
+ * @fid: fid to add
+ * @dentry: dentry that it is being added to
+ *
+ */
+
+static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry)
+{
+ struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+ dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
+ dentry->d_iname, dentry);
+ if (dentry->d_fsdata == NULL) {
+ dentry->d_fsdata =
+ kmalloc(sizeof(struct list_head), GFP_KERNEL);
+ if (dentry->d_fsdata == NULL) {
+ dprintk(DEBUG_ERROR, "Out of memory\n");
+ return -ENOMEM;
+ }
+ fid_list = (struct list_head *)dentry->d_fsdata;
+ INIT_LIST_HEAD(fid_list); /* Initialize list head */
+ }
+
+ fid->uid = current->uid;
+ fid->pid = current->pid;
+ list_add(&fid->list, fid_list);
+ return 0;
+}
+
+/**
+ * v9fs_fid_create - allocate a FID structure
+ * @dentry - dentry to link newly created fid to
+ *
+ */
+
+struct v9fs_fid *v9fs_fid_create(struct dentry *dentry)
+{
+ struct v9fs_fid *new;
+
+ new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+ if (new == NULL) {
+ dprintk(DEBUG_ERROR, "Out of Memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ new->fid = -1;
+ new->fidopen = 0;
+ new->fidcreate = 0;
+ new->fidclunked = 0;
+ new->iounit = 0;
+
+ if (v9fs_fid_insert(new, dentry) == 0)
+ return new;
+ else {
+ dprintk(DEBUG_ERROR, "Problems inserting to dentry\n");
+ kfree(new);
+ return NULL;
+ }
+}
+
+/**
+ * v9fs_fid_destroy - deallocate a FID structure
+ * @fid: fid to destroy
+ *
+ */
+
+void v9fs_fid_destroy(struct v9fs_fid *fid)
+{
+ list_del(&fid->list);
+ kfree(fid);
+}
+
+/**
+ * v9fs_fid_lookup - retrieve the right fid from a particular dentry
+ * @dentry: dentry to look for fid in
+ * @type: intent of lookup (operation or traversal)
+ *
+ * search list of fids associated with a dentry for a fid with a matching
+ * thread id or uid. If that fails, look up the dentry's parents to see if you
+ * can find a matching fid.
+ *
+ */
+
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type)
+{
+ struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+ struct v9fs_fid *current_fid = NULL;
+ struct v9fs_fid *temp = NULL;
+ struct v9fs_fid *return_fid = NULL;
+ int found_parent = 0;
+ int found_user = 0;
+
+ dprintk(DEBUG_9P, " dentry: %s (%p) type %d\n", dentry->d_iname, dentry,
+ type);
+
+ if (fid_list && !list_empty(fid_list)) {
+ list_for_each_entry_safe(current_fid, temp, fid_list, list) {
+ if (current_fid->uid == current->uid) {
+ if (return_fid == NULL) {
+ if ((type == FID_OP)
+ || (!current_fid->fidopen)) {
+ return_fid = current_fid;
+ found_user = 1;
+ }
+ }
+ }
+ if (current_fid->pid == current->real_parent->pid) {
+ if ((return_fid == NULL) || (found_parent)
+ || (found_user)) {
+ if ((type == FID_OP)
+ || (!current_fid->fidopen)) {
+ return_fid = current_fid;
+ found_parent = 1;
+ found_user = 0;
+ }
+ }
+ }
+ if (current_fid->pid == current->pid) {
+ if ((type == FID_OP) ||
+ (!current_fid->fidopen)) {
+ return_fid = current_fid;
+ found_parent = 0;
+ found_user = 0;
+ }
+ }
+ }
+ }
+
+ /* we are at the root but didn't match */
+ if ((!return_fid) && (dentry->d_parent == dentry)) {
+ /* TODO: clone attach with new uid */
+ return_fid = current_fid;
+ }
+
+ if (!return_fid) {
+ struct dentry *par = current->fs->pwd->d_parent;
+ int count = 1;
+ while (par != NULL) {
+ if (par == dentry)
+ break;
+ count++;
+ if (par == par->d_parent) {
+ dprintk(DEBUG_ERROR,
+ "got to root without finding dentry\n");
+ break;
+ }
+ par = par->d_parent;
+ }
+
+/* XXX - there may be some duplication we can get rid of */
+ if (par == dentry) {
+ /* we need to fid_lookup the starting point */
+ int fidnum = -1;
+ int oldfid = -1;
+ int result = -1;
+ struct v9fs_session_info *v9ses =
+ v9fs_inode2v9ses(current->fs->pwd->d_inode);
+
+ current_fid =
+ v9fs_fid_lookup(current->fs->pwd, FID_WALK);
+ if (current_fid == NULL) {
+ dprintk(DEBUG_ERROR,
+ "process cwd doesn't have a fid\n");
+ return return_fid;
+ }
+ oldfid = current_fid->fid;
+ par = current->fs->pwd;
+ /* TODO: take advantage of multiwalk */
+
+ fidnum = v9fs_get_idpool(&v9ses->fidpool);
+ if (fidnum < 0) {
+ dprintk(DEBUG_ERROR,
+ "could not get a new fid num\n");
+ return return_fid;
+ }
+
+ while (par != dentry) {
+ result =
+ v9fs_t_walk(v9ses, oldfid, fidnum, "..",
+ NULL);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR,
+ "problem walking to parent\n");
+
+ break;
+ }
+ oldfid = fidnum;
+ if (par == par->d_parent) {
+ dprintk(DEBUG_ERROR,
+ "can't find dentry\n");
+ break;
+ }
+ par = par->d_parent;
+ }
+ if (par == dentry) {
+ return_fid = v9fs_fid_create(dentry);
+ return_fid->fid = fidnum;
+ }
+ }
+ }
+
+ return return_fid;
+}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
new file mode 100644
index 00000000000..7db478ccca3
--- /dev/null
+++ b/fs/9p/fid.h
@@ -0,0 +1,57 @@
+/*
+ * V9FS FID Management
+ *
+ * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/list.h>
+
+#define FID_OP 0
+#define FID_WALK 1
+
+struct v9fs_fid {
+ struct list_head list; /* list of fids associated with a dentry */
+ struct list_head active; /* XXX - debug */
+
+ u32 fid;
+ unsigned char fidopen; /* set when fid is opened */
+ unsigned char fidcreate; /* set when fid was just created */
+ unsigned char fidclunked; /* set when fid has already been clunked */
+
+ struct v9fs_qid qid;
+ u32 iounit;
+
+ /* readdir stuff */
+ int rdir_fpos;
+ loff_t rdir_pos;
+ struct v9fs_fcall *rdir_fcall;
+
+ /* management stuff */
+ pid_t pid; /* thread associated with this fid */
+ uid_t uid; /* user associated with this fid */
+
+ /* private data */
+ struct file *filp; /* backpointer to File struct for open files */
+ struct v9fs_session_info *v9ses; /* session info for this FID */
+};
+
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type);
+void v9fs_fid_destroy(struct v9fs_fid *fid);
+struct v9fs_fid *v9fs_fid_create(struct dentry *);
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
new file mode 100644
index 00000000000..8835b576f74
--- /dev/null
+++ b/fs/9p/mux.c
@@ -0,0 +1,475 @@
+/*
+ * linux/fs/9p/mux.c
+ *
+ * Protocol Multiplexer
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kthread.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "transport.h"
+#include "conv.h"
+#include "mux.h"
+
+/**
+ * dprintcond - print condition of session info
+ * @v9ses: session info structure
+ * @req: RPC request structure
+ *
+ */
+
+static inline int
+dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+ dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status,
+ req->rcall);
+ return 0;
+}
+
+/**
+ * xread - force read of a certain number of bytes
+ * @v9ses: session info structure
+ * @ptr: pointer to buffer
+ * @sz: number of bytes to read
+ *
+ * Chuck Cranor CS-533 project1
+ */
+
+static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz)
+{
+ int rd = 0;
+ int ret = 0;
+ while (rd < sz) {
+ ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd);
+ if (ret <= 0) {
+ dprintk(DEBUG_ERROR, "xread errno %d\n", ret);
+ return ret;
+ }
+ rd += ret;
+ ptr += ret;
+ }
+ return (rd);
+}
+
+/**
+ * read_message - read a full 9P2000 fcall packet
+ * @v9ses: session info structure
+ * @rcall: fcall structure to read into
+ * @rcalllen: size of fcall buffer
+ *
+ */
+
+static int
+read_message(struct v9fs_session_info *v9ses,
+ struct v9fs_fcall *rcall, int rcalllen)
+{
+ unsigned char buf[4];
+ void *data;
+ int size = 0;
+ int res = 0;
+
+ res = xread(v9ses, buf, sizeof(buf));
+ if (res < 0) {
+ dprintk(DEBUG_ERROR,
+ "Reading of count field failed returned: %d\n", res);
+ return res;
+ }
+
+ if (res < 4) {
+ dprintk(DEBUG_ERROR,
+ "Reading of count field failed returned: %d\n", res);
+ return -EIO;
+ }
+
+ size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
+ dprintk(DEBUG_MUX, "got a packet count: %d\n", size);
+
+ /* adjust for the four bytes of size */
+ size -= 4;
+
+ if (size > v9ses->maxdata) {
+ dprintk(DEBUG_ERROR, "packet too big: %d\n", size);
+ return -E2BIG;
+ }
+
+ data = kmalloc(size, GFP_KERNEL);
+ if (!data) {
+ eprintk(KERN_WARNING, "out of memory\n");
+ return -ENOMEM;
+ }
+
+ res = xread(v9ses, data, size);
+ if (res < size) {
+ dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n",
+ res);
+ kfree(data);
+ return res;
+ }
+
+ /* we now have an in-memory string that is the reply.
+ * deserialize it. There is very little to go wrong at this point
+ * save for v9fs_alloc errors.
+ */
+ res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata,
+ rcall, rcalllen);
+
+ kfree(data);
+
+ if (res < 0)
+ return res;
+
+ return 0;
+}
+
+/**
+ * v9fs_recv - receive an RPC response for a particular tag
+ * @v9ses: session info structure
+ * @req: RPC request structure
+ *
+ */
+
+static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+ int ret = 0;
+
+ dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
+ ret = wait_event_interruptible(v9ses->read_wait,
+ ((v9ses->transport->status != Connected) ||
+ (req->rcall != 0) || (req->err < 0) ||
+ dprintcond(v9ses, req)));
+
+ dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
+
+ spin_lock(&v9ses->muxlock);
+ list_del(&req->next);
+ spin_unlock(&v9ses->muxlock);
+
+ if (req->err < 0)
+ return req->err;
+
+ if (v9ses->transport->status == Disconnected)
+ return -ECONNRESET;
+
+ return ret;
+}
+
+/**
+ * v9fs_send - send a 9P request
+ * @v9ses: session info structure
+ * @req: RPC request to send
+ *
+ */
+
+static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+ int ret = -1;
+ void *data = NULL;
+ struct v9fs_fcall *tcall = req->tcall;
+
+ data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ tcall->size = 0; /* enforce size recalculation */
+ ret =
+ v9fs_serialize_fcall(v9ses, tcall, data,
+ v9ses->maxdata + V9FS_IOHDRSZ);
+ if (ret < 0)
+ goto free_data;
+
+ spin_lock(&v9ses->muxlock);
+ list_add(&req->next, &v9ses->mux_fcalls);
+ spin_unlock(&v9ses->muxlock);
+
+ dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag,
+ tcall->size);
+ ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
+
+ if (ret != tcall->size) {
+ spin_lock(&v9ses->muxlock);
+ list_del(&req->next);
+ kfree(req->rcall);
+
+ spin_unlock(&v9ses->muxlock);
+ if (ret >= 0)
+ ret = -EREMOTEIO;
+ } else
+ ret = 0;
+
+ free_data:
+ kfree(data);
+ return ret;
+}
+
+/**
+ * v9fs_mux_rpc - send a request, receive a response
+ * @v9ses: session info structure
+ * @tcall: fcall to send
+ * @rcall: buffer to place response into
+ *
+ */
+
+long
+v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
+ struct v9fs_fcall **rcall)
+{
+ int tid = -1;
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_rpcreq req;
+ int ret = -1;
+
+ if (!v9ses)
+ return -EINVAL;
+
+ if (!v9ses->transport || v9ses->transport->status != Connected)
+ return -EIO;
+
+ if (rcall)
+ *rcall = NULL;
+
+ if (tcall->id != TVERSION) {
+ tid = v9fs_get_idpool(&v9ses->tidpool);
+ if (tid < 0)
+ return -ENOMEM;
+ }
+
+ tcall->tag = tid;
+
+ req.tcall = tcall;
+ req.err = 0;
+ req.rcall = NULL;
+
+ ret = v9fs_send(v9ses, &req);
+
+ if (ret < 0) {
+ if (tcall->id != TVERSION)
+ v9fs_put_idpool(tid, &v9ses->tidpool);
+ dprintk(DEBUG_MUX, "error %d\n", ret);
+ return ret;
+ }
+
+ ret = v9fs_recv(v9ses, &req);
+
+ fcall = req.rcall;
+
+ dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret);
+ if (ret == -ERESTARTSYS) {
+ if (v9ses->transport->status != Disconnected
+ && tcall->id != TFLUSH) {
+ unsigned long flags;
+
+ dprintk(DEBUG_MUX, "flushing the tag: %d\n",
+ tcall->tag);
+ clear_thread_flag(TIF_SIGPENDING);
+ v9fs_t_flush(v9ses, tcall->tag);
+ spin_lock_irqsave(&current->sighand->siglock, flags);
+ recalc_sigpending();
+ spin_unlock_irqrestore(&current->sighand->siglock,
+ flags);
+ dprintk(DEBUG_MUX, "flushing done\n");
+ }
+
+ goto release_req;
+ } else if (ret < 0)
+ goto release_req;
+
+ if (!fcall)
+ ret = -EIO;
+ else {
+ if (fcall->id == RERROR) {
+ ret = v9fs_errstr2errno(fcall->params.rerror.error);
+ if (ret == 0) { /* string match failed */
+ if (fcall->params.rerror.errno)
+ ret = -(fcall->params.rerror.errno);
+ else
+ ret = -ESERVERFAULT;
+ }
+ } else if (fcall->id != tcall->id + 1) {
+ dprintk(DEBUG_ERROR,
+ "fcall mismatch: expected %d, got %d\n",
+ tcall->id + 1, fcall->id);
+ ret = -EIO;
+ }
+ }
+
+ release_req:
+ if (tcall->id != TVERSION)
+ v9fs_put_idpool(tid, &v9ses->tidpool);
+ if (rcall)
+ *rcall = fcall;
+ else
+ kfree(fcall);
+
+ return ret;
+}
+
+/**
+ * v9fs_mux_cancel_requests - cancels all pending requests
+ *
+ * @v9ses: session info structure
+ * @err: error code to return to the requests
+ */
+void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err)
+{
+ struct v9fs_rpcreq *rptr;
+ struct v9fs_rpcreq *rreq;
+
+ dprintk(DEBUG_MUX, " %d\n", err);
+ spin_lock(&v9ses->muxlock);
+ list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+ rreq->err = err;
+ }
+ spin_unlock(&v9ses->muxlock);
+ wake_up_all(&v9ses->read_wait);
+}
+
+/**
+ * v9fs_recvproc - kproc to handle demultiplexing responses
+ * @data: session info structure
+ *
+ */
+
+static int v9fs_recvproc(void *data)
+{
+ struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data;
+ struct v9fs_fcall *rcall = NULL;
+ struct v9fs_rpcreq *rptr;
+ struct v9fs_rpcreq *req;
+ struct v9fs_rpcreq *rreq;
+ int err = 0;
+
+ allow_signal(SIGKILL);
+ set_current_state(TASK_INTERRUPTIBLE);
+ complete(&v9ses->proccmpl);
+ while (!kthread_should_stop() && err >= 0) {
+ req = rptr = rreq = NULL;
+
+ rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
+ if (!rcall) {
+ eprintk(KERN_ERR, "no memory for buffers\n");
+ break;
+ }
+
+ err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
+ spin_lock(&v9ses->muxlock);
+ if (err < 0) {
+ list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+ rreq->err = err;
+ }
+ if(err != -ERESTARTSYS)
+ eprintk(KERN_ERR,
+ "Transport error while reading message %d\n", err);
+ } else {
+ list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+ if (rreq->tcall->tag == rcall->tag) {
+ req = rreq;
+ req->rcall = rcall;
+ break;
+ }
+ }
+ }
+
+ if (req && (req->tcall->id == TFLUSH)) {
+ struct v9fs_rpcreq *treq = NULL;
+ list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) {
+ if (treq->tcall->tag ==
+ req->tcall->params.tflush.oldtag) {
+ list_del(&rptr->next);
+ kfree(treq->rcall);
+ break;
+ }
+ }
+ }
+
+ spin_unlock(&v9ses->muxlock);
+
+ if (!req) {
+ if (err >= 0)
+ dprintk(DEBUG_ERROR,
+ "unexpected response: id %d tag %d\n",
+ rcall->id, rcall->tag);
+
+ kfree(rcall);
+ }
+
+ wake_up_all(&v9ses->read_wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+
+ v9ses->transport->close(v9ses->transport);
+
+ /* Inform all pending processes about the failure */
+ wake_up_all(&v9ses->read_wait);
+
+ if (signal_pending(current))
+ complete(&v9ses->proccmpl);
+
+ dprintk(DEBUG_MUX, "recvproc: end\n");
+ v9ses->recvproc = NULL;
+
+ return err >= 0;
+}
+
+/**
+ * v9fs_mux_init - initialize multiplexer (spawn kproc)
+ * @v9ses: session info structure
+ * @dev_name: mount device information (to create unique kproc)
+ *
+ */
+
+int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name)
+{
+ char procname[60];
+
+ strncpy(procname, dev_name, sizeof(procname));
+ procname[sizeof(procname) - 1] = 0;
+
+ init_waitqueue_head(&v9ses->read_wait);
+ init_completion(&v9ses->fcread);
+ init_completion(&v9ses->proccmpl);
+ spin_lock_init(&v9ses->muxlock);
+ INIT_LIST_HEAD(&v9ses->mux_fcalls);
+ v9ses->recvproc = NULL;
+ v9ses->curfcall = NULL;
+
+ v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
+ "v9fs_recvproc %s", procname);
+
+ if (IS_ERR(v9ses->recvproc)) {
+ eprintk(KERN_ERR, "cannot create receiving thread\n");
+ v9fs_session_close(v9ses);
+ return -ECONNABORTED;
+ }
+
+ wake_up_process(v9ses->recvproc);
+ wait_for_completion(&v9ses->proccmpl);
+
+ return 0;
+}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
new file mode 100644
index 00000000000..4994cb10bad
--- /dev/null
+++ b/fs/9p/mux.h
@@ -0,0 +1,41 @@
+/*
+ * linux/fs/9p/mux.h
+ *
+ * Multiplexer Definitions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+/* structure to manage each RPC transaction */
+
+struct v9fs_rpcreq {
+ struct v9fs_fcall *tcall;
+ struct v9fs_fcall *rcall;
+ int err; /* error code if response failed */
+
+ /* XXX - could we put scatter/gather buffers here? */
+
+ struct list_head next;
+};
+
+int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
+long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
+ struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
+void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
new file mode 100644
index 00000000000..63b58ce98ff
--- /dev/null
+++ b/fs/9p/trans_fd.c
@@ -0,0 +1,172 @@
+/*
+ * linux/fs/9p/trans_fd.c
+ *
+ * File Descriptor Transport Layer
+ *
+ * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/un.h>
+#include <asm/uaccess.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+#include <linux/file.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "transport.h"
+
+struct v9fs_trans_fd {
+ struct file *in_file;
+ struct file *out_file;
+};
+
+/**
+ * v9fs_fd_recv - receive from a socket
+ * @v9ses: session information
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
+ *
+ */
+
+static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len)
+{
+ struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+
+ if (!trans || trans->status != Connected || !ts)
+ return -EIO;
+
+ return kernel_read(ts->in_file, ts->in_file->f_pos, v, len);
+}
+
+/**
+ * v9fs_fd_send - send to a socket
+ * @v9ses: session information
+ * @v: buffer to send data from
+ * @len: size of send buffer
+ *
+ */
+
+static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len)
+{
+ struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+ mm_segment_t oldfs = get_fs();
+ int ret = 0;
+
+ if (!trans || trans->status != Connected || !ts)
+ return -EIO;
+
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos);
+ set_fs(oldfs);
+
+ return ret;
+}
+
+/**
+ * v9fs_fd_init - initialize file descriptor transport
+ * @v9ses: session information
+ * @addr: address of server to mount
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+ struct v9fs_trans_fd *ts = NULL;
+ struct v9fs_transport *trans = v9ses->transport;
+
+ if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
+ printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
+ return -ENOPROTOOPT;
+ }
+
+ sema_init(&trans->writelock, 1);
+ sema_init(&trans->readlock, 1);
+
+ ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
+
+ if (!ts)
+ return -ENOMEM;
+
+ ts->in_file = fget( v9ses->rfdno );
+ ts->out_file = fget( v9ses->wfdno );
+
+ if (!ts->in_file || !ts->out_file) {
+ if (ts->in_file)
+ fput(ts->in_file);
+
+ if (ts->out_file)
+ fput(ts->out_file);
+
+ kfree(ts);
+ return -EIO;
+ }
+
+ trans->priv = ts;
+ trans->status = Connected;
+
+ return 0;
+}
+
+
+/**
+ * v9fs_fd_close - shutdown file descriptor
+ * @trans: private socket structure
+ *
+ */
+
+static void v9fs_fd_close(struct v9fs_transport *trans)
+{
+ struct v9fs_trans_fd *ts;
+
+ if (!trans)
+ return;
+
+ trans->status = Disconnected;
+ ts = trans->priv;
+
+ if (!ts)
+ return;
+
+ if (ts->in_file)
+ fput(ts->in_file);
+
+ if (ts->out_file)
+ fput(ts->out_file);
+
+ kfree(ts);
+}
+
+struct v9fs_transport v9fs_trans_fd = {
+ .init = v9fs_fd_init,
+ .write = v9fs_fd_send,
+ .read = v9fs_fd_recv,
+ .close = v9fs_fd_close,
+};
+
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
new file mode 100644
index 00000000000..01e26f0013a
--- /dev/null
+++ b/fs/9p/trans_sock.c
@@ -0,0 +1,290 @@
+/*
+ * linux/fs/9p/trans_socket.c
+ *
+ * Socket Transport Layer
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
+ * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/un.h>
+#include <asm/uaccess.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "transport.h"
+
+#define V9FS_PORT 564
+
+struct v9fs_trans_sock {
+ struct socket *s;
+};
+
+/**
+ * v9fs_sock_recv - receive from a socket
+ * @v9ses: session information
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
+ *
+ */
+
+static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
+{
+ struct msghdr msg;
+ struct kvec iov;
+ int result;
+ mm_segment_t oldfs;
+ struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+
+ if (trans->status == Disconnected)
+ return -EREMOTEIO;
+
+ result = -EINVAL;
+
+ oldfs = get_fs();
+ set_fs(get_ds());
+
+ iov.iov_base = v;
+ iov.iov_len = len;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iovlen = 1;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_namelen = 0;
+ msg.msg_flags = MSG_NOSIGNAL;
+
+ result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0);
+
+ dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state);
+ set_fs(oldfs);
+
+ if (result <= 0) {
+ if (result != -ERESTARTSYS)
+ trans->status = Disconnected;
+ }
+
+ return result;
+}
+
+/**
+ * v9fs_sock_send - send to a socket
+ * @v9ses: session information
+ * @v: buffer to send data from
+ * @len: size of send buffer
+ *
+ */
+
+static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
+{
+ struct kvec iov;
+ struct msghdr msg;
+ int result = -1;
+ mm_segment_t oldfs;
+ struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+
+ dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len);
+ dump_data(v, len);
+
+ down(&trans->writelock);
+
+ oldfs = get_fs();
+ set_fs(get_ds());
+ iov.iov_base = v;
+ iov.iov_len = len;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iovlen = 1;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_namelen = 0;
+ msg.msg_flags = MSG_NOSIGNAL;
+ result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
+ set_fs(oldfs);
+
+ if (result < 0) {
+ if (result != -ERESTARTSYS)
+ trans->status = Disconnected;
+ }
+
+ up(&trans->writelock);
+ return result;
+}
+
+/**
+ * v9fs_tcp_init - initialize TCP socket
+ * @v9ses: session information
+ * @addr: address of server to mount
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+ struct socket *csocket = NULL;
+ struct sockaddr_in sin_server;
+ int rc = 0;
+ struct v9fs_trans_sock *ts = NULL;
+ struct v9fs_transport *trans = v9ses->transport;
+
+ sema_init(&trans->writelock, 1);
+ sema_init(&trans->readlock, 1);
+
+ ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
+
+ if (!ts)
+ return -ENOMEM;
+
+ trans->priv = ts;
+ ts->s = NULL;
+
+ if (!addr)
+ return -EINVAL;
+
+ dprintk(DEBUG_TRANS, "Connecting to %s\n", addr);
+
+ sin_server.sin_family = AF_INET;
+ sin_server.sin_addr.s_addr = in_aton(addr);
+ sin_server.sin_port = htons(v9ses->port);
+ sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
+ rc = csocket->ops->connect(csocket,
+ (struct sockaddr *)&sin_server,
+ sizeof(struct sockaddr_in), 0);
+ if (rc < 0) {
+ eprintk(KERN_ERR,
+ "v9fs_trans_tcp: problem connecting socket to %s\n",
+ addr);
+ return rc;
+ }
+ csocket->sk->sk_allocation = GFP_NOIO;
+ ts->s = csocket;
+ trans->status = Connected;
+
+ return 0;
+}
+
+/**
+ * v9fs_unix_init - initialize UNIX domain socket
+ * @v9ses: session information
+ * @dev_name: path to named pipe
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
+ char *data)
+{
+ int rc;
+ struct socket *csocket;
+ struct sockaddr_un sun_server;
+ struct v9fs_transport *trans;
+ struct v9fs_trans_sock *ts;
+
+ rc = 0;
+ csocket = NULL;
+ trans = v9ses->transport;
+
+ if (strlen(dev_name) > UNIX_PATH_MAX) {
+ eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
+ dev_name);
+ return -ENOMEM;
+ }
+
+ ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
+ if (!ts)
+ return -ENOMEM;
+
+ trans->priv = ts;
+ ts->s = NULL;
+
+ sema_init(&trans->writelock, 1);
+ sema_init(&trans->readlock, 1);
+
+ sun_server.sun_family = PF_UNIX;
+ strcpy(sun_server.sun_path, dev_name);
+ sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+ rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+ sizeof(struct sockaddr_un) - 1, 0); /* -1 *is* important */
+ if (rc < 0) {
+ eprintk(KERN_ERR,
+ "v9fs_trans_unix: problem connecting socket: %s: %d\n",
+ dev_name, rc);
+ return rc;
+ }
+ csocket->sk->sk_allocation = GFP_NOIO;
+ ts->s = csocket;
+ trans->status = Connected;
+
+ return 0;
+}
+
+/**
+ * v9fs_sock_close - shutdown socket
+ * @trans: private socket structure
+ *
+ */
+
+static void v9fs_sock_close(struct v9fs_transport *trans)
+{
+ struct v9fs_trans_sock *ts;
+
+ if (!trans)
+ return;
+
+ ts = trans->priv;
+
+ if ((ts) && (ts->s)) {
+ dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
+ sock_release(ts->s);
+ ts->s = NULL;
+ trans->status = Disconnected;
+ dprintk(DEBUG_TRANS, "socket closed\n");
+ }
+
+ if (ts)
+ kfree(ts);
+
+ trans->priv = NULL;
+}
+
+struct v9fs_transport v9fs_trans_tcp = {
+ .init = v9fs_tcp_init,
+ .write = v9fs_sock_send,
+ .read = v9fs_sock_recv,
+ .close = v9fs_sock_close,
+};
+
+struct v9fs_transport v9fs_trans_unix = {
+ .init = v9fs_unix_init,
+ .write = v9fs_sock_send,
+ .read = v9fs_sock_recv,
+ .close = v9fs_sock_close,
+};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
new file mode 100644
index 00000000000..9e9cd418efd
--- /dev/null
+++ b/fs/9p/transport.h
@@ -0,0 +1,46 @@
+/*
+ * linux/fs/9p/transport.h
+ *
+ * Transport Definition
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+enum v9fs_transport_status {
+ Connected,
+ Disconnected,
+ Hung,
+};
+
+struct v9fs_transport {
+ enum v9fs_transport_status status;
+ struct semaphore writelock;
+ struct semaphore readlock;
+ void *priv;
+
+ int (*init) (struct v9fs_session_info *, const char *, char *);
+ int (*write) (struct v9fs_transport *, void *, int);
+ int (*read) (struct v9fs_transport *, void *, int);
+ void (*close) (struct v9fs_transport *);
+};
+
+extern struct v9fs_transport v9fs_trans_tcp;
+extern struct v9fs_transport v9fs_trans_unix;
+extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
new file mode 100644
index 00000000000..13bdbbab438
--- /dev/null
+++ b/fs/9p/v9fs.c
@@ -0,0 +1,452 @@
+/*
+ * linux/fs/9p/v9fs.c
+ *
+ * This file contains functions assisting in mapping VFS to 9P2000
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/parser.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "transport.h"
+#include "mux.h"
+#include "conv.h"
+
+/* TODO: sysfs or debugfs interface */
+int v9fs_debug_level = 0; /* feature-rific global debug level */
+
+/*
+ * Option Parsing (code inspired by NFS code)
+ *
+ */
+
+enum {
+ /* Options that take integer arguments */
+ Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
+ Opt_rfdno, Opt_wfdno,
+ /* String options */
+ Opt_name, Opt_remotename,
+ /* Options that take no arguments */
+ Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
+ /* Error token */
+ Opt_err
+};
+
+static match_table_t tokens = {
+ {Opt_port, "port=%u"},
+ {Opt_msize, "msize=%u"},
+ {Opt_uid, "uid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_afid, "afid=%u"},
+ {Opt_rfdno, "rfdno=%u"},
+ {Opt_wfdno, "wfdno=%u"},
+ {Opt_debug, "debug=%u"},
+ {Opt_name, "name=%s"},
+ {Opt_remotename, "aname=%s"},
+ {Opt_unix, "proto=unix"},
+ {Opt_tcp, "proto=tcp"},
+ {Opt_fd, "proto=fd"},
+ {Opt_tcp, "tcp"},
+ {Opt_unix, "unix"},
+ {Opt_fd, "fd"},
+ {Opt_legacy, "noextend"},
+ {Opt_nodevmap, "nodevmap"},
+ {Opt_err, NULL}
+};
+
+/*
+ * Parse option string.
+ */
+
+/**
+ * v9fs_parse_options - parse mount options into session structure
+ * @options: options string passed from mount
+ * @v9ses: existing v9fs session information
+ *
+ */
+
+static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ int ret;
+
+ /* setup defaults */
+ v9ses->port = V9FS_PORT;
+ v9ses->maxdata = 9000;
+ v9ses->proto = PROTO_TCP;
+ v9ses->extended = 1;
+ v9ses->afid = ~0;
+ v9ses->debug = 0;
+ v9ses->rfdno = ~0;
+ v9ses->wfdno = ~0;
+
+ if (!options)
+ return;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+ if (!*p)
+ continue;
+ token = match_token(p, tokens, args);
+ if (token < Opt_name) {
+ if ((ret = match_int(&args[0], &option)) < 0) {
+ dprintk(DEBUG_ERROR,
+ "integer field, but no integer?\n");
+ continue;
+ }
+
+ }
+ switch (token) {
+ case Opt_port:
+ v9ses->port = option;
+ break;
+ case Opt_msize:
+ v9ses->maxdata = option;
+ break;
+ case Opt_uid:
+ v9ses->uid = option;
+ break;
+ case Opt_gid:
+ v9ses->gid = option;
+ break;
+ case Opt_afid:
+ v9ses->afid = option;
+ break;
+ case Opt_rfdno:
+ v9ses->rfdno = option;
+ break;
+ case Opt_wfdno:
+ v9ses->wfdno = option;
+ break;
+ case Opt_debug:
+ v9ses->debug = option;
+ break;
+ case Opt_tcp:
+ v9ses->proto = PROTO_TCP;
+ break;
+ case Opt_unix:
+ v9ses->proto = PROTO_UNIX;
+ break;
+ case Opt_fd:
+ v9ses->proto = PROTO_FD;
+ break;
+ case Opt_name:
+ match_strcpy(v9ses->name, &args[0]);
+ break;
+ case Opt_remotename:
+ match_strcpy(v9ses->remotename, &args[0]);
+ break;
+ case Opt_legacy:
+ v9ses->extended = 0;
+ break;
+ case Opt_nodevmap:
+ v9ses->nodev = 1;
+ break;
+ default:
+ continue;
+ }
+ }
+}
+
+/**
+ * v9fs_inode2v9ses - safely extract v9fs session info from super block
+ * @inode: inode to extract information from
+ *
+ * Paranoid function to extract v9ses information from superblock,
+ * if anything is missing it will report an error.
+ *
+ */
+
+struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
+{
+ return (inode->i_sb->s_fs_info);
+}
+
+/**
+ * v9fs_get_idpool - allocate numeric id from pool
+ * @p - pool to allocate from
+ *
+ * XXX - This seems to be an awful generic function, should it be in idr.c with
+ * the lock included in struct idr?
+ */
+
+int v9fs_get_idpool(struct v9fs_idpool *p)
+{
+ int i = 0;
+ int error;
+
+retry:
+ if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
+ return 0;
+
+ if (down_interruptible(&p->lock) == -EINTR) {
+ eprintk(KERN_WARNING, "Interrupted while locking\n");
+ return -1;
+ }
+
+ error = idr_get_new(&p->pool, NULL, &i);
+ up(&p->lock);
+
+ if (error == -EAGAIN)
+ goto retry;
+ else if (error)
+ return -1;
+
+ return i;
+}
+
+/**
+ * v9fs_put_idpool - release numeric id from pool
+ * @p - pool to allocate from
+ *
+ * XXX - This seems to be an awful generic function, should it be in idr.c with
+ * the lock included in struct idr?
+ */
+
+void v9fs_put_idpool(int id, struct v9fs_idpool *p)
+{
+ if (down_interruptible(&p->lock) == -EINTR) {
+ eprintk(KERN_WARNING, "Interrupted while locking\n");
+ return;
+ }
+ idr_remove(&p->pool, id);
+ up(&p->lock);
+}
+
+/**
+ * v9fs_session_init - initialize session
+ * @v9ses: session information structure
+ * @dev_name: device being mounted
+ * @data: options
+ *
+ */
+
+int
+v9fs_session_init(struct v9fs_session_info *v9ses,
+ const char *dev_name, char *data)
+{
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_transport *trans_proto;
+ int n = 0;
+ int newfid = -1;
+ int retval = -EINVAL;
+
+ v9ses->name = __getname();
+ if (!v9ses->name)
+ return -ENOMEM;
+
+ v9ses->remotename = __getname();
+ if (!v9ses->remotename) {
+ putname(v9ses->name);
+ return -ENOMEM;
+ }
+
+ strcpy(v9ses->name, V9FS_DEFUSER);
+ strcpy(v9ses->remotename, V9FS_DEFANAME);
+
+ v9fs_parse_options(data, v9ses);
+
+ /* set global debug level */
+ v9fs_debug_level = v9ses->debug;
+
+ /* id pools that are session-dependent: FIDs and TIDs */
+ idr_init(&v9ses->fidpool.pool);
+ init_MUTEX(&v9ses->fidpool.lock);
+ idr_init(&v9ses->tidpool.pool);
+ init_MUTEX(&v9ses->tidpool.lock);
+
+
+ switch (v9ses->proto) {
+ case PROTO_TCP:
+ trans_proto = &v9fs_trans_tcp;
+ break;
+ case PROTO_UNIX:
+ trans_proto = &v9fs_trans_unix;
+ *v9ses->remotename = 0;
+ break;
+ case PROTO_FD:
+ trans_proto = &v9fs_trans_fd;
+ *v9ses->remotename = 0;
+ break;
+ default:
+ printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
+ retval = -ENOPROTOOPT;
+ goto SessCleanUp;
+ };
+
+ v9ses->transport = trans_proto;
+
+ if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
+ eprintk(KERN_ERR, "problem initializing transport\n");
+ goto SessCleanUp;
+ }
+
+ v9ses->inprogress = 0;
+ v9ses->shutdown = 0;
+ v9ses->session_hung = 0;
+
+ if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) {
+ dprintk(DEBUG_ERROR, "problem initializing mux\n");
+ goto SessCleanUp;
+ }
+
+ if (v9ses->afid == ~0) {
+ if (v9ses->extended)
+ retval =
+ v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
+ &fcall);
+ else
+ retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
+ &fcall);
+
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
+ goto FreeFcall;
+ }
+
+ /* Really should check for 9P1 and report error */
+ if (!strcmp(fcall->params.rversion.version, "9P2000.u")) {
+ dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
+ v9ses->extended = 1;
+ } else {
+ dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
+ v9ses->extended = 0;
+ }
+
+ n = fcall->params.rversion.msize;
+ kfree(fcall);
+
+ if (n < v9ses->maxdata)
+ v9ses->maxdata = n;
+ }
+
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "couldn't allocate FID\n");
+ retval = -ENOMEM;
+ goto SessCleanUp;
+ }
+ /* it is a little bit ugly, but we have to prevent newfid */
+ /* being the same as afid, so if it is, get a new fid */
+ if (v9ses->afid != ~0 && newfid == v9ses->afid) {
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "couldn't allocate FID\n");
+ retval = -ENOMEM;
+ goto SessCleanUp;
+ }
+ }
+
+ if ((retval =
+ v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
+ v9ses->afid, NULL))
+ < 0) {
+ dprintk(DEBUG_ERROR, "cannot attach\n");
+ goto SessCleanUp;
+ }
+
+ if (v9ses->afid != ~0) {
+ if (v9fs_t_clunk(v9ses, v9ses->afid, NULL))
+ dprintk(DEBUG_ERROR, "clunk failed\n");
+ }
+
+ return newfid;
+
+ FreeFcall:
+ kfree(fcall);
+
+ SessCleanUp:
+ v9fs_session_close(v9ses);
+ return retval;
+}
+
+/**
+ * v9fs_session_close - shutdown a session
+ * @v9ses: session information structure
+ *
+ */
+
+void v9fs_session_close(struct v9fs_session_info *v9ses)
+{
+ if (v9ses->recvproc) {
+ send_sig(SIGKILL, v9ses->recvproc, 1);
+ wait_for_completion(&v9ses->proccmpl);
+ }
+
+ if (v9ses->transport)
+ v9ses->transport->close(v9ses->transport);
+
+ putname(v9ses->name);
+ putname(v9ses->remotename);
+}
+
+/**
+ * v9fs_session_cancel - mark transport as disconnected
+ * and cancel all pending requests.
+ */
+void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
+ v9ses->transport->status = Disconnected;
+ v9fs_mux_cancel_requests(v9ses, -EIO);
+}
+
+extern int v9fs_error_init(void);
+
+/**
+ * v9fs_init - Initialize module
+ *
+ */
+
+static int __init init_v9fs(void)
+{
+ v9fs_error_init();
+
+ printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
+
+ return register_filesystem(&v9fs_fs_type);
+}
+
+/**
+ * v9fs_init - shutdown module
+ *
+ */
+
+static void __exit exit_v9fs(void)
+{
+ unregister_filesystem(&v9fs_fs_type);
+}
+
+module_init(init_v9fs)
+module_exit(exit_v9fs)
+
+MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
+MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
+MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
new file mode 100644
index 00000000000..45dcef42bdd
--- /dev/null
+++ b/fs/9p/v9fs.h
@@ -0,0 +1,103 @@
+/*
+ * V9FS definitions.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+/*
+ * Idpool structure provides lock and id management
+ *
+ */
+
+struct v9fs_idpool {
+ struct semaphore lock;
+ struct idr pool;
+};
+
+/*
+ * Session structure provides information for an opened session
+ *
+ */
+
+struct v9fs_session_info {
+ /* options */
+ unsigned int maxdata;
+ unsigned char extended; /* set to 1 if we are using UNIX extensions */
+ unsigned char nodev; /* set to 1 if no disable device mapping */
+ unsigned short port; /* port to connect to */
+ unsigned short debug; /* debug level */
+ unsigned short proto; /* protocol to use */
+ unsigned int afid; /* authentication fid */
+ unsigned int rfdno; /* read file descriptor number */
+ unsigned int wfdno; /* write file descriptor number */
+
+
+ char *name; /* user name to mount as */
+ char *remotename; /* name of remote hierarchy being mounted */
+ unsigned int uid; /* default uid/muid for legacy support */
+ unsigned int gid; /* default gid for legacy support */
+
+ /* book keeping */
+ struct v9fs_idpool fidpool; /* The FID pool for file descriptors */
+ struct v9fs_idpool tidpool; /* The TID pool for transactions ids */
+
+ /* transport information */
+ struct v9fs_transport *transport;
+
+ int inprogress; /* session in progress => true */
+ int shutdown; /* session shutting down. no more attaches. */
+ unsigned char session_hung;
+
+ /* mux private data */
+ struct v9fs_fcall *curfcall;
+ wait_queue_head_t read_wait;
+ struct completion fcread;
+ struct completion proccmpl;
+ struct task_struct *recvproc;
+
+ spinlock_t muxlock;
+ struct list_head mux_fcalls;
+};
+
+/* possible values of ->proto */
+enum {
+ PROTO_TCP,
+ PROTO_UNIX,
+ PROTO_FD,
+};
+
+int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
+struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
+void v9fs_session_close(struct v9fs_session_info *v9ses);
+int v9fs_get_idpool(struct v9fs_idpool *p);
+void v9fs_put_idpool(int id, struct v9fs_idpool *p);
+void v9fs_session_cancel(struct v9fs_session_info *v9ses);
+
+#define V9FS_MAGIC 0x01021997
+
+/* other default globals */
+#define V9FS_PORT 564
+#define V9FS_DEFUSER "nobody"
+#define V9FS_DEFANAME ""
+
+/* inital pool sizes for fids and tags */
+#define V9FS_START_FIDS 8192
+#define V9FS_START_TIDS 256
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
new file mode 100644
index 00000000000..2f2cea7ee3e
--- /dev/null
+++ b/fs/9p/v9fs_vfs.h
@@ -0,0 +1,53 @@
+/*
+ * V9FS VFS extensions.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+/* plan9 semantics are that created files are implicitly opened.
+ * But linux semantics are that you call create, then open.
+ * the plan9 approach is superior as it provides an atomic
+ * open.
+ * we track the create fid here. When the file is opened, if fidopen is
+ * non-zero, we use the fid and can skip some steps.
+ * there may be a better way to do this, but I don't know it.
+ * one BAD way is to clunk the fid on create, then open it again:
+ * you lose the atomicity of file open
+ */
+
+/* special case:
+ * unlink calls remove, which is an implicit clunk. So we have to track
+ * that kind of thing so that we don't try to clunk a dead fid.
+ */
+
+extern struct file_system_type v9fs_fs_type;
+extern struct file_operations v9fs_file_operations;
+extern struct file_operations v9fs_dir_operations;
+extern struct dentry_operations v9fs_dentry_operations;
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+ino_t v9fs_qid2ino(struct v9fs_qid *qid);
+void v9fs_mistat2inode(struct v9fs_stat *, struct inode *,
+ struct super_block *);
+int v9fs_dir_release(struct inode *inode, struct file *filp);
+int v9fs_file_open(struct inode *inode, struct file *file);
+void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat);
+void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
new file mode 100644
index 00000000000..306c96741f8
--- /dev/null
+++ b/fs/9p/vfs_dentry.c
@@ -0,0 +1,126 @@
+/*
+ * linux/fs/9p/vfs_dentry.c
+ *
+ * This file contians vfs dentry ops for the 9P2000 protocol.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * v9fs_dentry_validate - VFS dcache hook to validate cache
+ * @dentry: dentry that is being validated
+ * @nd: path data
+ *
+ * dcache really shouldn't be used for 9P2000 as at all due to
+ * potential attached semantics to directory traversal (walk).
+ *
+ * FUTURE: look into how to use dcache to allow multi-stage
+ * walks in Plan 9 & potential for better dcache operation which
+ * would remain valid for Plan 9 semantics. Older versions
+ * had validation via stat for those interested. However, since
+ * stat has the same approximate overhead as walk there really
+ * is no difference. The only improvement would be from a
+ * time-decay cache like NFS has and that undermines the
+ * synchronous nature of 9P2000.
+ *
+ */
+
+static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct dentry *dc = current->fs->pwd;
+
+ dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry);
+ if (v9fs_fid_lookup(dentry, FID_OP)) {
+ dprintk(DEBUG_VFS, "VALID\n");
+ return 1;
+ }
+
+ while (dc != NULL) {
+ if (dc == dentry) {
+ dprintk(DEBUG_VFS, "VALID\n");
+ return 1;
+ }
+ if (dc == dc->d_parent)
+ break;
+
+ dc = dc->d_parent;
+ }
+
+ dprintk(DEBUG_VFS, "INVALID\n");
+ return 0;
+}
+
+/**
+ * v9fs_dentry_release - called when dentry is going to be freed
+ * @dentry: dentry that is being release
+ *
+ */
+
+void v9fs_dentry_release(struct dentry *dentry)
+{
+ dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+ if (dentry->d_fsdata != NULL) {
+ struct list_head *fid_list = dentry->d_fsdata;
+ struct v9fs_fid *temp = NULL;
+ struct v9fs_fid *current_fid = NULL;
+ struct v9fs_fcall *fcall = NULL;
+
+ list_for_each_entry_safe(current_fid, temp, fid_list, list) {
+ if (v9fs_t_clunk
+ (current_fid->v9ses, current_fid->fid, &fcall))
+ dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+ FCALL_ERROR(fcall));
+
+ v9fs_put_idpool(current_fid->fid,
+ &current_fid->v9ses->fidpool);
+
+ kfree(fcall);
+ v9fs_fid_destroy(current_fid);
+ }
+
+ kfree(dentry->d_fsdata); /* free the list_head */
+ }
+}
+
+struct dentry_operations v9fs_dentry_operations = {
+ .d_revalidate = v9fs_dentry_validate,
+ .d_release = v9fs_dentry_release,
+};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
new file mode 100644
index 00000000000..c478a738418
--- /dev/null
+++ b/fs/9p/vfs_dir.c
@@ -0,0 +1,226 @@
+/*
+ * linux/fs/9p/vfs_dir.c
+ *
+ * This file contains vfs directory ops for the 9P2000 protocol.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * dt_type - return file type
+ * @mistat: mistat structure
+ *
+ */
+
+static inline int dt_type(struct v9fs_stat *mistat)
+{
+ unsigned long perm = mistat->mode;
+ int rettype = DT_REG;
+
+ if (perm & V9FS_DMDIR)
+ rettype = DT_DIR;
+ if (perm & V9FS_DMSYMLINK)
+ rettype = DT_LNK;
+
+ return rettype;
+}
+
+/**
+ * v9fs_dir_readdir - read a directory
+ * @filep: opened file structure
+ * @dirent: directory structure ???
+ * @filldir: function to populate directory structure ???
+ *
+ */
+
+static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct v9fs_fcall *fcall = NULL;
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *file = filp->private_data;
+ unsigned int i, n;
+ int fid = -1;
+ int ret = 0;
+ struct v9fs_stat *mi = NULL;
+ int over = 0;
+
+ dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
+
+ fid = file->fid;
+
+ mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ if (!mi)
+ return -ENOMEM;
+
+ if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
+ kfree(file->rdir_fcall);
+ file->rdir_fcall = NULL;
+ }
+
+ if (file->rdir_fcall) {
+ n = file->rdir_fcall->params.rread.count;
+ i = file->rdir_fpos;
+ while (i < n) {
+ int s = v9fs_deserialize_stat(v9ses,
+ file->rdir_fcall->params.rread.data + i,
+ n - i, mi, v9ses->maxdata);
+
+ if (s == 0) {
+ dprintk(DEBUG_ERROR,
+ "error while deserializing mistat\n");
+ ret = -EIO;
+ goto FreeStructs;
+ }
+
+ over = filldir(dirent, mi->name, strlen(mi->name),
+ filp->f_pos, v9fs_qid2ino(&mi->qid),
+ dt_type(mi));
+
+ if (over) {
+ file->rdir_fpos = i;
+ file->rdir_pos = filp->f_pos;
+ break;
+ }
+
+ i += s;
+ filp->f_pos += s;
+ }
+
+ if (!over) {
+ kfree(file->rdir_fcall);
+ file->rdir_fcall = NULL;
+ }
+ }
+
+ while (!over) {
+ ret = v9fs_t_read(v9ses, fid, filp->f_pos,
+ v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
+ if (ret < 0) {
+ dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
+ ret, fcall);
+ goto FreeStructs;
+ } else if (ret == 0)
+ break;
+
+ n = ret;
+ i = 0;
+ while (i < n) {
+ int s = v9fs_deserialize_stat(v9ses,
+ fcall->params.rread.data + i, n - i, mi,
+ v9ses->maxdata);
+
+ if (s == 0) {
+ dprintk(DEBUG_ERROR,
+ "error while deserializing mistat\n");
+ return -EIO;
+ }
+
+ over = filldir(dirent, mi->name, strlen(mi->name),
+ filp->f_pos, v9fs_qid2ino(&mi->qid),
+ dt_type(mi));
+
+ if (over) {
+ file->rdir_fcall = fcall;
+ file->rdir_fpos = i;
+ file->rdir_pos = filp->f_pos;
+ fcall = NULL;
+ break;
+ }
+
+ i += s;
+ filp->f_pos += s;
+ }
+
+ kfree(fcall);
+ }
+
+ FreeStructs:
+ kfree(fcall);
+ kfree(mi);
+ return ret;
+}
+
+/**
+ * v9fs_dir_release - close a directory
+ * @inode: inode of the directory
+ * @filp: file pointer to a directory
+ *
+ */
+
+int v9fs_dir_release(struct inode *inode, struct file *filp)
+{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *fid = filp->private_data;
+ int fidnum = -1;
+
+ dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
+ fid->fid);
+ fidnum = fid->fid;
+
+ filemap_fdatawrite(inode->i_mapping);
+ filemap_fdatawait(inode->i_mapping);
+
+ if (fidnum >= 0) {
+ fid->fidopen--;
+ dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
+ fid->fid);
+
+ if (fid->fidopen == 0) {
+ if (v9fs_t_clunk(v9ses, fidnum, NULL))
+ dprintk(DEBUG_ERROR, "clunk failed\n");
+
+ v9fs_put_idpool(fid->fid, &v9ses->fidpool);
+ }
+
+ kfree(fid->rdir_fcall);
+
+ filp->private_data = NULL;
+ v9fs_fid_destroy(fid);
+ }
+
+ d_drop(filp->f_dentry);
+ return 0;
+}
+
+struct file_operations v9fs_dir_operations = {
+ .read = generic_read_dir,
+ .readdir = v9fs_dir_readdir,
+ .open = v9fs_file_open,
+ .release = v9fs_dir_release,
+};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
new file mode 100644
index 00000000000..1f8ae7d580a
--- /dev/null
+++ b/fs/9p/vfs_file.c
@@ -0,0 +1,401 @@
+/*
+ * linux/fs/9p/vfs_file.c
+ *
+ * This file contians vfs file ops for 9P2000.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/version.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "fid.h"
+
+/**
+ * v9fs_file_open - open a file (or directory)
+ * @inode: inode to be opened
+ * @file: file being opened
+ *
+ */
+
+int v9fs_file_open(struct inode *inode, struct file *file)
+{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *v9fid = v9fs_fid_lookup(file->f_dentry, FID_WALK);
+ struct v9fs_fid *v9newfid = NULL;
+ struct v9fs_fcall *fcall = NULL;
+ int open_mode = 0;
+ unsigned int iounit = 0;
+ int newfid = -1;
+ long result = -1;
+
+ dprintk(DEBUG_VFS, "inode: %p file: %p v9fid= %p\n", inode, file,
+ v9fid);
+
+ if (!v9fid) {
+ struct dentry *dentry = file->f_dentry;
+ dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n");
+
+ /* XXX - some duplication from lookup, generalize later */
+ /* basically vfs_lookup is too heavy weight */
+ v9fid = v9fs_fid_lookup(file->f_dentry, FID_OP);
+ if (!v9fid)
+ return -EBADF;
+
+ v9fid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
+ if (!v9fid)
+ return -EBADF;
+
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "newfid fails!\n");
+ return -ENOSPC;
+ }
+
+ result =
+ v9fs_t_walk(v9ses, v9fid->fid, newfid,
+ (char *)file->f_dentry->d_name.name, NULL);
+ if (result < 0) {
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ dprintk(DEBUG_ERROR, "rewalk didn't work\n");
+ return -EBADF;
+ }
+
+ v9fid = v9fs_fid_create(dentry);
+ if (v9fid == NULL) {
+ dprintk(DEBUG_ERROR, "couldn't insert\n");
+ return -ENOMEM;
+ }
+ v9fid->fid = newfid;
+ }
+
+ if (v9fid->fidcreate) {
+ /* create case */
+ newfid = v9fid->fid;
+ iounit = v9fid->iounit;
+ v9fid->fidcreate = 0;
+ } else {
+ if (!S_ISDIR(inode->i_mode))
+ newfid = v9fid->fid;
+ else {
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "allocation failed\n");
+ return -ENOSPC;
+ }
+ /* This would be a somewhat critical clone */
+ result =
+ v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL,
+ &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "clone error: %s\n",
+ FCALL_ERROR(fcall));
+ kfree(fcall);
+ return result;
+ }
+
+ v9newfid = v9fs_fid_create(file->f_dentry);
+ v9newfid->fid = newfid;
+ v9newfid->qid = v9fid->qid;
+ v9newfid->iounit = v9fid->iounit;
+ v9newfid->fidopen = 0;
+ v9newfid->fidclunked = 0;
+ v9newfid->v9ses = v9ses;
+ v9fid = v9newfid;
+ kfree(fcall);
+ }
+
+ /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
+ /* translate open mode appropriately */
+ open_mode = file->f_flags & 0x3;
+
+ if (file->f_flags & O_EXCL)
+ open_mode |= V9FS_OEXCL;
+
+ if (v9ses->extended) {
+ if (file->f_flags & O_TRUNC)
+ open_mode |= V9FS_OTRUNC;
+
+ if (file->f_flags & O_APPEND)
+ open_mode |= V9FS_OAPPEND;
+ }
+
+ result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR,
+ "open failed, open_mode 0x%x: %s\n", open_mode,
+ FCALL_ERROR(fcall));
+ kfree(fcall);
+ return result;
+ }
+
+ iounit = fcall->params.ropen.iounit;
+ kfree(fcall);
+ }
+
+
+ file->private_data = v9fid;
+
+ v9fid->rdir_pos = 0;
+ v9fid->rdir_fcall = NULL;
+ v9fid->fidopen = 1;
+ v9fid->filp = file;
+ v9fid->iounit = iounit;
+
+ return 0;
+}
+
+/**
+ * v9fs_file_lock - lock a file (or directory)
+ * @inode: inode to be opened
+ * @file: file being opened
+ *
+ * XXX - this looks like a local only lock, we should extend into 9P
+ * by using open exclusive
+ */
+
+static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ int res = 0;
+ struct inode *inode = filp->f_dentry->d_inode;
+
+ dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
+
+ /* No mandatory locks */
+ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+ return -ENOLCK;
+
+ if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+ filemap_fdatawrite(inode->i_mapping);
+ filemap_fdatawait(inode->i_mapping);
+ invalidate_inode_pages(&inode->i_data);
+ }
+
+ return res;
+}
+
+/**
+ * v9fs_read - read from a file (internal)
+ * @filep: file pointer to read
+ * @data: data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+
+static ssize_t
+v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *v9f = filp->private_data;
+ struct v9fs_fcall *fcall = NULL;
+ int fid = v9f->fid;
+ int rsize = 0;
+ int result = 0;
+ int total = 0;
+
+ dprintk(DEBUG_VFS, "\n");
+
+ rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+ if (v9f->iounit != 0 && rsize > v9f->iounit)
+ rsize = v9f->iounit;
+
+ do {
+ if (count < rsize)
+ rsize = count;
+
+ result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall);
+
+ if (result < 0) {
+ printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
+ result);
+
+ kfree(fcall);
+ return total;
+ } else
+ *offset += result;
+
+ /* XXX - extra copy */
+ memcpy(buffer, fcall->params.rread.data, result);
+ count -= result;
+ buffer += result;
+ total += result;
+
+ kfree(fcall);
+
+ if (result < rsize)
+ break;
+ } while (count);
+
+ return total;
+}
+
+/**
+ * v9fs_file_read - read from a file
+ * @filep: file pointer to read
+ * @data: data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+
+static ssize_t
+v9fs_file_read(struct file *filp, char __user * data, size_t count,
+ loff_t * offset)
+{
+ int retval = -1;
+ int ret = 0;
+ char *buffer;
+
+ buffer = kmalloc(count, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ retval = v9fs_read(filp, buffer, count, offset);
+ if (retval > 0) {
+ if ((ret = copy_to_user(data, buffer, retval)) != 0) {
+ dprintk(DEBUG_ERROR, "Problem copying to user %d\n",
+ ret);
+ retval = ret;
+ }
+ }
+
+ kfree(buffer);
+
+ return retval;
+}
+
+/**
+ * v9fs_write - write to a file
+ * @filep: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+
+static ssize_t
+v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *v9fid = filp->private_data;
+ struct v9fs_fcall *fcall;
+ int fid = v9fid->fid;
+ int result = -EIO;
+ int rsize = 0;
+ int total = 0;
+
+ dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count,
+ (int)*offset);
+ rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+ if (v9fid->iounit != 0 && rsize > v9fid->iounit)
+ rsize = v9fid->iounit;
+
+ dump_data(buffer, count);
+
+ do {
+ if (count < rsize)
+ rsize = count;
+
+ result =
+ v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall);
+ if (result < 0) {
+ eprintk(KERN_ERR, "error while writing: %s(%d)\n",
+ FCALL_ERROR(fcall), result);
+ kfree(fcall);
+ return result;
+ } else
+ *offset += result;
+
+ kfree(fcall);
+
+ if (result != rsize) {
+ eprintk(KERN_ERR,
+ "short write: v9fs_t_write returned %d\n",
+ result);
+ break;
+ }
+
+ count -= result;
+ buffer += result;
+ total += result;
+ } while (count);
+
+ return total;
+}
+
+/**
+ * v9fs_file_write - write to a file
+ * @filep: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+
+static ssize_t
+v9fs_file_write(struct file *filp, const char __user * data,
+ size_t count, loff_t * offset)
+{
+ int ret = -1;
+ char *buffer;
+
+ buffer = kmalloc(count, GFP_KERNEL);
+ if (buffer == NULL)
+ return -ENOMEM;
+
+ ret = copy_from_user(buffer, data, count);
+ if (ret) {
+ dprintk(DEBUG_ERROR, "Problem copying from user\n");
+ ret = -EFAULT;
+ } else {
+ ret = v9fs_write(filp, buffer, count, offset);
+ }
+
+ kfree(buffer);
+
+ return ret;
+}
+
+struct file_operations v9fs_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = v9fs_file_read,
+ .write = v9fs_file_write,
+ .open = v9fs_file_open,
+ .release = v9fs_dir_release,
+ .lock = v9fs_file_lock,
+};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
new file mode 100644
index 00000000000..0c13fc60004
--- /dev/null
+++ b/fs/9p/vfs_inode.c
@@ -0,0 +1,1338 @@
+/*
+ * linux/fs/9p/vfs_inode.c
+ *
+ * This file contains vfs inode ops for the 9P2000 protocol.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+static struct inode_operations v9fs_dir_inode_operations;
+static struct inode_operations v9fs_dir_inode_operations_ext;
+static struct inode_operations v9fs_file_inode_operations;
+static struct inode_operations v9fs_symlink_inode_operations;
+
+/**
+ * unixmode2p9mode - convert unix mode bits to plan 9
+ * @v9ses: v9fs session information
+ * @mode: mode to convert
+ *
+ */
+
+static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
+{
+ int res;
+ res = mode & 0777;
+ if (S_ISDIR(mode))
+ res |= V9FS_DMDIR;
+ if (v9ses->extended) {
+ if (S_ISLNK(mode))
+ res |= V9FS_DMSYMLINK;
+ if (v9ses->nodev == 0) {
+ if (S_ISSOCK(mode))
+ res |= V9FS_DMSOCKET;
+ if (S_ISFIFO(mode))
+ res |= V9FS_DMNAMEDPIPE;
+ if (S_ISBLK(mode))
+ res |= V9FS_DMDEVICE;
+ if (S_ISCHR(mode))
+ res |= V9FS_DMDEVICE;
+ }
+
+ if ((mode & S_ISUID) == S_ISUID)
+ res |= V9FS_DMSETUID;
+ if ((mode & S_ISGID) == S_ISGID)
+ res |= V9FS_DMSETGID;
+ if ((mode & V9FS_DMLINK))
+ res |= V9FS_DMLINK;
+ }
+
+ return res;
+}
+
+/**
+ * p9mode2unixmode- convert plan9 mode bits to unix mode bits
+ * @v9ses: v9fs session information
+ * @mode: mode to convert
+ *
+ */
+
+static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
+{
+ int res;
+
+ res = mode & 0777;
+
+ if ((mode & V9FS_DMDIR) == V9FS_DMDIR)
+ res |= S_IFDIR;
+ else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended))
+ res |= S_IFLNK;
+ else if ((mode & V9FS_DMSOCKET) && (v9ses->extended)
+ && (v9ses->nodev == 0))
+ res |= S_IFSOCK;
+ else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended)
+ && (v9ses->nodev == 0))
+ res |= S_IFIFO;
+ else if ((mode & V9FS_DMDEVICE) && (v9ses->extended)
+ && (v9ses->nodev == 0))
+ res |= S_IFBLK;
+ else
+ res |= S_IFREG;
+
+ if (v9ses->extended) {
+ if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID)
+ res |= S_ISUID;
+
+ if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID)
+ res |= S_ISGID;
+ }
+
+ return res;
+}
+
+/**
+ * v9fs_blank_mistat - helper function to setup a 9P stat structure
+ * @v9ses: 9P session info (for determining extended mode)
+ * @mistat: structure to initialize
+ *
+ */
+
+static void
+v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
+{
+ mistat->type = ~0;
+ mistat->dev = ~0;
+ mistat->qid.type = ~0;
+ mistat->qid.version = ~0;
+ *((long long *)&mistat->qid.path) = ~0;
+ mistat->mode = ~0;
+ mistat->atime = ~0;
+ mistat->mtime = ~0;
+ mistat->length = ~0;
+ mistat->name = mistat->data;
+ mistat->uid = mistat->data;
+ mistat->gid = mistat->data;
+ mistat->muid = mistat->data;
+ if (v9ses->extended) {
+ mistat->n_uid = ~0;
+ mistat->n_gid = ~0;
+ mistat->n_muid = ~0;
+ mistat->extension = mistat->data;
+ }
+ *mistat->data = 0;
+}
+
+/**
+ * v9fs_mistat2unix - convert mistat to unix stat
+ * @mistat: Plan 9 metadata (mistat) structure
+ * @buf: unix metadata (stat) structure to populate
+ * @sb: superblock
+ *
+ */
+
+static void
+v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
+ struct super_block *sb)
+{
+ struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
+
+ buf->st_nlink = 1;
+
+ buf->st_atime = mistat->atime;
+ buf->st_mtime = mistat->mtime;
+ buf->st_ctime = mistat->mtime;
+
+ buf->st_uid = (unsigned short)-1;
+ buf->st_gid = (unsigned short)-1;
+
+ if (v9ses && v9ses->extended) {
+ /* TODO: string to uid mapping via user-space daemon */
+ if (mistat->n_uid != -1)
+ sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
+
+ if (mistat->n_gid != -1)
+ sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
+ }
+
+ if (buf->st_uid == (unsigned short)-1)
+ buf->st_uid = v9ses->uid;
+ if (buf->st_gid == (unsigned short)-1)
+ buf->st_gid = v9ses->gid;
+
+ buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
+ if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
+ char type = 0;
+ int major = -1;
+ int minor = -1;
+ sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+ switch (type) {
+ case 'c':
+ buf->st_mode &= ~S_IFBLK;
+ buf->st_mode |= S_IFCHR;
+ break;
+ case 'b':
+ break;
+ default:
+ dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
+ type, mistat->extension);
+ };
+ buf->st_rdev = MKDEV(major, minor);
+ } else
+ buf->st_rdev = 0;
+
+ buf->st_size = mistat->length;
+
+ buf->st_blksize = sb->s_blocksize;
+ buf->st_blocks =
+ (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
+}
+
+/**
+ * v9fs_get_inode - helper function to setup an inode
+ * @sb: superblock
+ * @mode: mode to setup inode with
+ *
+ */
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+{
+ struct inode *inode = NULL;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+
+ inode = new_inode(sb);
+ if (inode) {
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_blksize = sb->s_blocksize;
+ inode->i_blocks = 0;
+ inode->i_rdev = 0;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+
+ switch (mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFBLK:
+ case S_IFCHR:
+ case S_IFSOCK:
+ if(!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "special files without extended mode\n");
+ return ERR_PTR(-EINVAL);
+ }
+ init_special_inode(inode, inode->i_mode,
+ inode->i_rdev);
+ break;
+ case S_IFREG:
+ inode->i_op = &v9fs_file_inode_operations;
+ inode->i_fop = &v9fs_file_operations;
+ break;
+ case S_IFLNK:
+ if(!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n");
+ return ERR_PTR(-EINVAL);
+ }
+ inode->i_op = &v9fs_symlink_inode_operations;
+ break;
+ case S_IFDIR:
+ inode->i_nlink++;
+ if(v9ses->extended)
+ inode->i_op = &v9fs_dir_inode_operations_ext;
+ else
+ inode->i_op = &v9fs_dir_inode_operations;
+ inode->i_fop = &v9fs_dir_operations;
+ break;
+ default:
+ dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
+ mode, mode & S_IFMT);
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ eprintk(KERN_WARNING, "Problem allocating inode\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ return inode;
+}
+
+/**
+ * v9fs_create - helper function to create files and directories
+ * @dir: directory inode file is being created in
+ * @file_dentry: dentry file is being created in
+ * @perm: permissions file is being created with
+ * @open_mode: resulting open mode for file
+ *
+ */
+
+static int
+v9fs_create(struct inode *dir,
+ struct dentry *file_dentry,
+ unsigned int perm, unsigned int open_mode)
+{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+ struct super_block *sb = dir->i_sb;
+ struct v9fs_fid *dirfid =
+ v9fs_fid_lookup(file_dentry->d_parent, FID_WALK);
+ struct v9fs_fid *fid = NULL;
+ struct inode *file_inode = NULL;
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_qid qid;
+ struct stat newstat;
+ int dirfidnum = -1;
+ long newfid = -1;
+ int result = 0;
+ unsigned int iounit = 0;
+
+ perm = unixmode2p9mode(v9ses, perm);
+
+ dprintk(DEBUG_VFS, "dir: %p dentry: %p perm: %o mode: %o\n", dir,
+ file_dentry, perm, open_mode);
+
+ if (!dirfid)
+ return -EBADF;
+
+ dirfidnum = dirfid->fid;
+ if (dirfidnum < 0) {
+ dprintk(DEBUG_ERROR, "No fid for the directory #%lu\n",
+ dir->i_ino);
+ return -EBADF;
+ }
+
+ if (file_dentry->d_inode) {
+ dprintk(DEBUG_ERROR,
+ "Odd. There is an inode for dir %lu, name :%s:\n",
+ dir->i_ino, file_dentry->d_name.name);
+ return -EEXIST;
+ }
+
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "no free fids available\n");
+ return -ENOSPC;
+ }
+
+ result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ newfid = 0;
+ goto CleanUpFid;
+ }
+
+ kfree(fcall);
+
+ result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
+ perm, open_mode, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "create fails: %s(%d)\n",
+ FCALL_ERROR(fcall), result);
+
+ goto CleanUpFid;
+ }
+
+ iounit = fcall->params.rcreate.iounit;
+ qid = fcall->params.rcreate.qid;
+ kfree(fcall);
+
+ fid = v9fs_fid_create(file_dentry);
+ if (!fid) {
+ result = -ENOMEM;
+ goto CleanUpFid;
+ }
+
+ fid->fid = newfid;
+ fid->fidopen = 0;
+ fid->fidcreate = 1;
+ fid->qid = qid;
+ fid->iounit = iounit;
+ fid->rdir_pos = 0;
+ fid->rdir_fcall = NULL;
+ fid->v9ses = v9ses;
+
+ if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) ||
+ (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) ||
+ (perm & V9FS_DMDEVICE))
+ return 0;
+
+ result = v9fs_t_stat(v9ses, newfid, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall),
+ result);
+ goto CleanUpFid;
+ }
+
+ v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
+
+ file_inode = v9fs_get_inode(sb, newstat.st_mode);
+ if ((!file_inode) || IS_ERR(file_inode)) {
+ dprintk(DEBUG_ERROR, "create inode failed\n");
+ result = -EBADF;
+ goto CleanUpFid;
+ }
+
+ v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb);
+ kfree(fcall);
+ d_instantiate(file_dentry, file_inode);
+
+ if (perm & V9FS_DMDIR) {
+ if (v9fs_t_clunk(v9ses, newfid, &fcall))
+ dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
+ FCALL_ERROR(fcall));
+
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ kfree(fcall);
+ fid->fidopen = 0;
+ fid->fidcreate = 0;
+ d_drop(file_dentry);
+ }
+
+ return 0;
+
+ CleanUpFid:
+ kfree(fcall);
+
+ if (newfid) {
+ if (v9fs_t_clunk(v9ses, newfid, &fcall))
+ dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+ FCALL_ERROR(fcall));
+
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ kfree(fcall);
+ }
+ return result;
+}
+
+/**
+ * v9fs_remove - helper function to remove files and directories
+ * @dir: directory inode that is being deleted
+ * @file: dentry that is being deleted
+ * @rmdir: removing a directory
+ *
+ */
+
+static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
+{
+ struct v9fs_fcall *fcall = NULL;
+ struct super_block *sb = NULL;
+ struct v9fs_session_info *v9ses = NULL;
+ struct v9fs_fid *v9fid = NULL;
+ struct inode *file_inode = NULL;
+ int fid = -1;
+ int result = 0;
+
+ dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
+ rmdir);
+
+ file_inode = file->d_inode;
+ sb = file_inode->i_sb;
+ v9ses = v9fs_inode2v9ses(file_inode);
+ v9fid = v9fs_fid_lookup(file, FID_OP);
+
+ if (!v9fid) {
+ dprintk(DEBUG_ERROR,
+ "no v9fs_fid\n");
+ return -EBADF;
+ }
+
+ fid = v9fid->fid;
+ if (fid < 0) {
+ dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
+ file_inode->i_ino);
+ return -EBADF;
+ }
+
+ result = v9fs_t_remove(v9ses, fid, &fcall);
+ if (result < 0)
+ dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n",
+ FCALL_ERROR(fcall), result);
+ else {
+ v9fs_put_idpool(fid, &v9ses->fidpool);
+ v9fs_fid_destroy(v9fid);
+ }
+
+ kfree(fcall);
+ return result;
+}
+
+/**
+ * v9fs_vfs_create - VFS hook to create files
+ * @inode: directory inode that is being deleted
+ * @dentry: dentry that is being deleted
+ * @perm: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create(struct inode *inode, struct dentry *dentry, int perm,
+ struct nameidata *nd)
+{
+ return v9fs_create(inode, dentry, perm, O_RDWR);
+}
+
+/**
+ * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
+ * @inode: inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir(struct inode *inode, struct dentry *dentry, int mode)
+{
+ return v9fs_create(inode, dentry, mode | S_IFDIR, O_RDONLY);
+}
+
+/**
+ * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
+ * @dir: inode that is being walked from
+ * @dentry: dentry that is being walked to?
+ * @nameidata: path data
+ *
+ */
+
+static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nameidata)
+{
+ struct super_block *sb;
+ struct v9fs_session_info *v9ses;
+ struct v9fs_fid *dirfid;
+ struct v9fs_fid *fid;
+ struct inode *inode;
+ struct v9fs_fcall *fcall = NULL;
+ struct stat newstat;
+ int dirfidnum = -1;
+ int newfid = -1;
+ int result = 0;
+
+ dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
+ dir, dentry->d_iname, dentry, nameidata);
+
+ sb = dir->i_sb;
+ v9ses = v9fs_inode2v9ses(dir);
+ dirfid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
+
+ if (!dirfid) {
+ dprintk(DEBUG_ERROR, "no dirfid\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ dirfidnum = dirfid->fid;
+
+ if (dirfidnum < 0) {
+ dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n",
+ dir, dir->i_ino);
+ return ERR_PTR(-EBADF);
+ }
+
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "newfid fails!\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ result =
+ v9fs_t_walk(v9ses, dirfidnum, newfid, (char *)dentry->d_name.name,
+ NULL);
+ if (result < 0) {
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ if (result == -ENOENT) {
+ d_add(dentry, NULL);
+ dprintk(DEBUG_ERROR,
+ "Return negative dentry %p count %d\n",
+ dentry, atomic_read(&dentry->d_count));
+ return NULL;
+ }
+ dprintk(DEBUG_ERROR, "walk error:%d\n", result);
+ goto FreeFcall;
+ }
+
+ result = v9fs_t_stat(v9ses, newfid, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "stat error\n");
+ goto FreeFcall;
+ }
+
+ v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
+ inode = v9fs_get_inode(sb, newstat.st_mode);
+
+ if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
+ eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
+ PTR_ERR(inode));
+
+ result = -ENOSPC;
+ goto FreeFcall;
+ }
+
+ inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+
+ fid = v9fs_fid_create(dentry);
+ if (fid == NULL) {
+ dprintk(DEBUG_ERROR, "couldn't insert\n");
+ result = -ENOMEM;
+ goto FreeFcall;
+ }
+
+ fid->fid = newfid;
+ fid->fidopen = 0;
+ fid->v9ses = v9ses;
+ fid->qid = fcall->params.rstat.stat->qid;
+
+ dentry->d_op = &v9fs_dentry_operations;
+ v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb);
+
+ d_add(dentry, inode);
+ kfree(fcall);
+
+ return NULL;
+
+ FreeFcall:
+ kfree(fcall);
+ return ERR_PTR(result);
+}
+
+/**
+ * v9fs_vfs_unlink - VFS unlink hook to delete an inode
+ * @i: inode that is being unlinked
+ * @d: dentry that is being unlinked
+ *
+ */
+
+static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
+{
+ return v9fs_remove(i, d, 0);
+}
+
+/**
+ * v9fs_vfs_rmdir - VFS unlink hook to delete a directory
+ * @i: inode that is being unlinked
+ * @d: dentry that is being unlinked
+ *
+ */
+
+static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
+{
+ return v9fs_remove(i, d, 1);
+}
+
+/**
+ * v9fs_vfs_rename - VFS hook to rename an inode
+ * @old_dir: old dir inode
+ * @old_dentry: old dentry
+ * @new_dir: new dir inode
+ * @new_dentry: new dentry
+ *
+ */
+
+static int
+v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct inode *old_inode = old_dentry->d_inode;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
+ struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_WALK);
+ struct v9fs_fid *olddirfid =
+ v9fs_fid_lookup(old_dentry->d_parent, FID_WALK);
+ struct v9fs_fid *newdirfid =
+ v9fs_fid_lookup(new_dentry->d_parent, FID_WALK);
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ struct v9fs_fcall *fcall = NULL;
+ int fid = -1;
+ int olddirfidnum = -1;
+ int newdirfidnum = -1;
+ int retval = 0;
+
+ dprintk(DEBUG_VFS, "\n");
+
+ if (!mistat)
+ return -ENOMEM;
+
+ if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
+ dprintk(DEBUG_ERROR, "problem with arguments\n");
+ return -EBADF;
+ }
+
+ /* 9P can only handle file rename in the same directory */
+ if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
+ dprintk(DEBUG_ERROR, "old dir and new dir are different\n");
+ retval = -EPERM;
+ goto FreeFcallnBail;
+ }
+
+ fid = oldfid->fid;
+ olddirfidnum = olddirfid->fid;
+ newdirfidnum = newdirfid->fid;
+
+ if (fid < 0) {
+ dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
+ old_inode->i_ino);
+ retval = -EBADF;
+ goto FreeFcallnBail;
+ }
+
+ v9fs_blank_mistat(v9ses, mistat);
+
+ strcpy(mistat->data + 1, v9ses->name);
+ mistat->name = mistat->data + 1 + strlen(v9ses->name);
+
+ if (new_dentry->d_name.len >
+ (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
+ dprintk(DEBUG_ERROR, "new name too long\n");
+ goto FreeFcallnBail;
+ }
+
+ strcpy(mistat->name, new_dentry->d_name.name);
+ retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
+
+ FreeFcallnBail:
+ kfree(mistat);
+
+ if (retval < 0)
+ dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+ FCALL_ERROR(fcall));
+
+ kfree(fcall);
+ return retval;
+}
+
+/**
+ * v9fs_vfs_getattr - retreive file metadata
+ * @mnt - mount information
+ * @dentry - file to get attributes on
+ * @stat - metadata structure to populate
+ *
+ */
+
+static int
+v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+ int err = -EPERM;
+
+ dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
+ if (!fid) {
+ dprintk(DEBUG_ERROR,
+ "couldn't find fid associated with dentry\n");
+ return -EBADF;
+ }
+
+ err = v9fs_t_stat(v9ses, fid->fid, &fcall);
+
+ if (err < 0)
+ dprintk(DEBUG_ERROR, "stat error\n");
+ else {
+ v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode,
+ dentry->d_inode->i_sb);
+ generic_fillattr(dentry->d_inode, stat);
+ }
+
+ kfree(fcall);
+ return err;
+}
+
+/**
+ * v9fs_vfs_setattr - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ int res = -EPERM;
+
+ dprintk(DEBUG_VFS, "\n");
+
+ if (!mistat)
+ return -ENOMEM;
+
+ if (!fid) {
+ dprintk(DEBUG_ERROR,
+ "Couldn't find fid associated with dentry\n");
+ return -EBADF;
+ }
+
+ v9fs_blank_mistat(v9ses, mistat);
+ if (iattr->ia_valid & ATTR_MODE)
+ mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
+
+ if (iattr->ia_valid & ATTR_MTIME)
+ mistat->mtime = iattr->ia_mtime.tv_sec;
+
+ if (iattr->ia_valid & ATTR_ATIME)
+ mistat->atime = iattr->ia_atime.tv_sec;
+
+ if (iattr->ia_valid & ATTR_SIZE)
+ mistat->length = iattr->ia_size;
+
+ if (v9ses->extended) {
+ char *ptr = mistat->data+1;
+
+ if (iattr->ia_valid & ATTR_UID) {
+ mistat->uid = ptr;
+ ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
+ mistat->n_uid = iattr->ia_uid;
+ }
+
+ if (iattr->ia_valid & ATTR_GID) {
+ mistat->gid = ptr;
+ ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
+ mistat->n_gid = iattr->ia_gid;
+ }
+ }
+
+ res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
+
+ if (res < 0)
+ dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall));
+
+ kfree(mistat);
+ kfree(fcall);
+
+ if (res >= 0)
+ res = inode_setattr(dentry->d_inode, iattr);
+
+ return res;
+}
+
+/**
+ * v9fs_mistat2inode - populate an inode structure with mistat info
+ * @mistat: Plan 9 metadata (mistat) structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
+ struct super_block *sb)
+{
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ inode->i_nlink = 1;
+
+ inode->i_atime.tv_sec = mistat->atime;
+ inode->i_mtime.tv_sec = mistat->mtime;
+ inode->i_ctime.tv_sec = mistat->mtime;
+
+ inode->i_uid = -1;
+ inode->i_gid = -1;
+
+ if (v9ses->extended) {
+ /* TODO: string to uid mapping via user-space daemon */
+ inode->i_uid = mistat->n_uid;
+ inode->i_gid = mistat->n_gid;
+
+ if (mistat->n_uid == -1)
+ sscanf(mistat->uid, "%x", &inode->i_uid);
+
+ if (mistat->n_gid == -1)
+ sscanf(mistat->gid, "%x", &inode->i_gid);
+ }
+
+ if (inode->i_uid == -1)
+ inode->i_uid = v9ses->uid;
+ if (inode->i_gid == -1)
+ inode->i_gid = v9ses->gid;
+
+ inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
+ if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
+ char type = 0;
+ int major = -1;
+ int minor = -1;
+ sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+ switch (type) {
+ case 'c':
+ inode->i_mode &= ~S_IFBLK;
+ inode->i_mode |= S_IFCHR;
+ break;
+ case 'b':
+ break;
+ default:
+ dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
+ type, mistat->extension);
+ };
+ inode->i_rdev = MKDEV(major, minor);
+ } else
+ inode->i_rdev = 0;
+
+ inode->i_size = mistat->length;
+
+ inode->i_blksize = sb->s_blocksize;
+ inode->i_blocks =
+ (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits;
+}
+
+/**
+ * v9fs_qid2ino - convert qid into inode number
+ * @qid: qid to hash
+ *
+ * BUG: potential for inode number collisions?
+ */
+
+ino_t v9fs_qid2ino(struct v9fs_qid *qid)
+{
+ u64 path = qid->path + 2;
+ ino_t i = 0;
+
+ if (sizeof(ino_t) == sizeof(path))
+ memcpy(&i, &path, sizeof(ino_t));
+ else
+ i = (ino_t) (path ^ (path >> 32));
+
+ return i;
+}
+
+/**
+ * v9fs_vfs_symlink - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See 9P2000.u RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+ int retval = -EPERM;
+ struct v9fs_fid *newfid;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+
+ dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+ symname);
+
+ if (!mistat)
+ return -ENOMEM;
+
+ if (!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "not extended\n");
+ goto FreeFcall;
+ }
+
+ /* issue a create */
+ retval = v9fs_create(dir, dentry, S_IFLNK, 0);
+ if (retval != 0)
+ goto FreeFcall;
+
+ newfid = v9fs_fid_lookup(dentry, FID_OP);
+
+ /* issue a twstat */
+ v9fs_blank_mistat(v9ses, mistat);
+ strcpy(mistat->data + 1, symname);
+ mistat->extension = mistat->data + 1;
+ retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeFcall;
+ }
+
+ kfree(fcall);
+
+ if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+ dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeFcall;
+ }
+
+ d_drop(dentry); /* FID - will this also clunk? */
+
+ FreeFcall:
+ kfree(mistat);
+ kfree(fcall);
+
+ return retval;
+}
+
+/**
+ * v9fs_readlink - read a symlink's location (internal version)
+ * @dentry: dentry for symlink
+ * @buffer: buffer to load symlink location into
+ * @buflen: length of buffer
+ *
+ */
+
+static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+ int retval = -EPERM;
+
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+
+ if (!fid) {
+ dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n");
+ retval = -EBADF;
+ goto FreeFcall;
+ }
+
+ if (!v9ses->extended) {
+ retval = -EBADF;
+ dprintk(DEBUG_ERROR, "not extended\n");
+ goto FreeFcall;
+ }
+
+ dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
+ retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
+
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "stat error\n");
+ goto FreeFcall;
+ }
+
+ if (!fcall)
+ return -EIO;
+
+ if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) {
+ retval = -EINVAL;
+ goto FreeFcall;
+ }
+
+ /* copy extension buffer into buffer */
+ if (strlen(fcall->params.rstat.stat->extension) < buflen)
+ buflen = strlen(fcall->params.rstat.stat->extension);
+
+ memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1);
+
+ retval = buflen;
+
+ FreeFcall:
+ kfree(fcall);
+
+ return retval;
+}
+
+/**
+ * v9fs_vfs_readlink - read a symlink's location
+ * @dentry: dentry for symlink
+ * @buf: buffer to load symlink location into
+ * @buflen: length of buffer
+ *
+ */
+
+static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
+ int buflen)
+{
+ int retval;
+ int ret;
+ char *link = __getname();
+
+ if (strlen(link) < buflen)
+ buflen = strlen(link);
+
+ dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+ retval = v9fs_readlink(dentry, link, buflen);
+
+ if (retval > 0) {
+ if ((ret = copy_to_user(buffer, link, retval)) != 0) {
+ dprintk(DEBUG_ERROR, "problem copying to user: %d\n",
+ ret);
+ retval = ret;
+ }
+ }
+
+ putname(link);
+ return retval;
+}
+
+/**
+ * v9fs_vfs_follow_link - follow a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ int len = 0;
+ char *link = __getname();
+
+ dprintk(DEBUG_VFS, "%s n", dentry->d_name.name);
+
+ if (!link)
+ link = ERR_PTR(-ENOMEM);
+ else {
+ len = v9fs_readlink(dentry, link, strlen(link));
+
+ if (len < 0) {
+ putname(link);
+ link = ERR_PTR(len);
+ } else
+ link[len] = 0;
+ }
+ nd_set_link(nd, link);
+
+ return NULL;
+}
+
+/**
+ * v9fs_vfs_put_link - release a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+ char *s = nd_get_link(nd);
+
+ dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
+ if (!IS_ERR(s))
+ putname(s);
+}
+
+/**
+ * v9fs_vfs_link - create a hardlink
+ * @old_dentry: dentry for file to link to
+ * @dir: inode destination for new link
+ * @dentry: dentry for link
+ *
+ */
+
+/* XXX - lots of code dup'd from symlink and creates,
+ * figure out a better reuse strategy
+ */
+
+static int
+v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ int retval = -EPERM;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_OP);
+ struct v9fs_fid *newfid = NULL;
+ char *symname = __getname();
+
+ dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+ old_dentry->d_name.name);
+
+ if (!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "not extended\n");
+ goto FreeMem;
+ }
+
+ /* get fid of old_dentry */
+ sprintf(symname, "hardlink(%d)\n", oldfid->fid);
+
+ /* issue a create */
+ retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
+ if (retval != 0)
+ goto FreeMem;
+
+ newfid = v9fs_fid_lookup(dentry, FID_OP);
+ if (!newfid) {
+ dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
+ goto FreeMem;
+ }
+
+ /* issue a twstat */
+ v9fs_blank_mistat(v9ses, mistat);
+ strcpy(mistat->data + 1, symname);
+ mistat->extension = mistat->data + 1;
+ retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeMem;
+ }
+
+ kfree(fcall);
+
+ if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+ dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeMem;
+ }
+
+ d_drop(dentry); /* FID - will this also clunk? */
+
+ kfree(fcall);
+ fcall = NULL;
+
+ FreeMem:
+ kfree(mistat);
+ kfree(fcall);
+ putname(symname);
+ return retval;
+}
+
+/**
+ * v9fs_vfs_mknod - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @dev_t: device associated with special file
+ *
+ */
+
+static int
+v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+{
+ int retval = -EPERM;
+ struct v9fs_fid *newfid;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ char *symname = __getname();
+
+ dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+ dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+
+ if (!mistat)
+ return -ENOMEM;
+
+ if (!new_valid_dev(rdev)) {
+ retval = -EINVAL;
+ goto FreeMem;
+ }
+
+ if (!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "not extended\n");
+ goto FreeMem;
+ }
+
+ /* issue a create */
+ retval = v9fs_create(dir, dentry, mode, 0);
+
+ if (retval != 0)
+ goto FreeMem;
+
+ newfid = v9fs_fid_lookup(dentry, FID_OP);
+ if (!newfid) {
+ dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
+ retval = -EINVAL;
+ goto FreeMem;
+ }
+
+ /* build extension */
+ if (S_ISBLK(mode))
+ sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
+ else if (S_ISCHR(mode))
+ sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
+ else if (S_ISFIFO(mode))
+ ; /* DO NOTHING */
+ else {
+ retval = -EINVAL;
+ goto FreeMem;
+ }
+
+ if (!S_ISFIFO(mode)) {
+ /* issue a twstat */
+ v9fs_blank_mistat(v9ses, mistat);
+ strcpy(mistat->data + 1, symname);
+ mistat->extension = mistat->data + 1;
+ retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeMem;
+ }
+ }
+
+ /* need to update dcache so we show up */
+ kfree(fcall);
+
+ if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+ dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeMem;
+ }
+
+ d_drop(dentry); /* FID - will this also clunk? */
+
+ FreeMem:
+ kfree(mistat);
+ kfree(fcall);
+ putname(symname);
+
+ return retval;
+}
+
+static struct inode_operations v9fs_dir_inode_operations_ext = {
+ .create = v9fs_vfs_create,
+ .lookup = v9fs_vfs_lookup,
+ .symlink = v9fs_vfs_symlink,
+ .link = v9fs_vfs_link,
+ .unlink = v9fs_vfs_unlink,
+ .mkdir = v9fs_vfs_mkdir,
+ .rmdir = v9fs_vfs_rmdir,
+ .mknod = v9fs_vfs_mknod,
+ .rename = v9fs_vfs_rename,
+ .readlink = v9fs_vfs_readlink,
+ .getattr = v9fs_vfs_getattr,
+ .setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_dir_inode_operations = {
+ .create = v9fs_vfs_create,
+ .lookup = v9fs_vfs_lookup,
+ .unlink = v9fs_vfs_unlink,
+ .mkdir = v9fs_vfs_mkdir,
+ .rmdir = v9fs_vfs_rmdir,
+ .mknod = v9fs_vfs_mknod,
+ .rename = v9fs_vfs_rename,
+ .getattr = v9fs_vfs_getattr,
+ .setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_file_inode_operations = {
+ .getattr = v9fs_vfs_getattr,
+ .setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_symlink_inode_operations = {
+ .readlink = v9fs_vfs_readlink,
+ .follow_link = v9fs_vfs_follow_link,
+ .put_link = v9fs_vfs_put_link,
+ .getattr = v9fs_vfs_getattr,
+ .setattr = v9fs_vfs_setattr,
+};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
new file mode 100644
index 00000000000..868f350b2c5
--- /dev/null
+++ b/fs/9p/vfs_super.c
@@ -0,0 +1,280 @@
+/*
+ * linux/fs/9p/vfs_super.c
+ *
+ * This file contians superblock ops for 9P2000. It is intended that
+ * you mount this file system on directories.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+static void v9fs_clear_inode(struct inode *);
+static struct super_operations v9fs_super_ops;
+
+/**
+ * v9fs_clear_inode - release an inode
+ * @inode: inode to release
+ *
+ */
+
+static void v9fs_clear_inode(struct inode *inode)
+{
+ filemap_fdatawrite(inode->i_mapping);
+}
+
+/**
+ * v9fs_set_super - set the superblock
+ * @s: super block
+ * @data: file system specific data
+ *
+ */
+
+static int v9fs_set_super(struct super_block *s, void *data)
+{
+ s->s_fs_info = data;
+ return set_anon_super(s, data);
+}
+
+/**
+ * v9fs_fill_super - populate superblock with info
+ * @sb: superblock
+ * @v9ses: session information
+ *
+ */
+
+static void
+v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
+ int flags)
+{
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
+ sb->s_blocksize = 1 << sb->s_blocksize_bits;
+ sb->s_magic = V9FS_MAGIC;
+ sb->s_op = &v9fs_super_ops;
+
+ sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
+ MS_NODIRATIME | MS_NOATIME;
+}
+
+/**
+ * v9fs_get_sb - mount a superblock
+ * @fs_type: file system type
+ * @flags: mount flags
+ * @dev_name: device name that was mounted
+ * @data: mount options
+ *
+ */
+
+static struct super_block *v9fs_get_sb(struct file_system_type
+ *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ struct super_block *sb = NULL;
+ struct v9fs_fcall *fcall = NULL;
+ struct inode *inode = NULL;
+ struct dentry *root = NULL;
+ struct v9fs_session_info *v9ses = NULL;
+ struct v9fs_fid *root_fid = NULL;
+ int mode = S_IRWXUGO | S_ISVTX;
+ uid_t uid = current->fsuid;
+ gid_t gid = current->fsgid;
+ int stat_result = 0;
+ int newfid = 0;
+ int retval = 0;
+
+ dprintk(DEBUG_VFS, " \n");
+
+ v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL);
+ if (!v9ses)
+ return ERR_PTR(-ENOMEM);
+
+ if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
+ dprintk(DEBUG_ERROR, "problem initiating session\n");
+ retval = newfid;
+ goto free_session;
+ }
+
+ sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
+
+ v9fs_fill_super(sb, v9ses, flags);
+
+ inode = v9fs_get_inode(sb, S_IFDIR | mode);
+ if (IS_ERR(inode)) {
+ retval = PTR_ERR(inode);
+ goto put_back_sb;
+ }
+
+ inode->i_uid = uid;
+ inode->i_gid = gid;
+
+ root = d_alloc_root(inode);
+
+ if (!root) {
+ retval = -ENOMEM;
+ goto release_inode;
+ }
+
+ sb->s_root = root;
+
+ /* Setup the Root Inode */
+ root_fid = v9fs_fid_create(root);
+ if (root_fid == NULL) {
+ retval = -ENOMEM;
+ goto release_dentry;
+ }
+
+ root_fid->fidopen = 0;
+ root_fid->v9ses = v9ses;
+
+ stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
+ if (stat_result < 0) {
+ dprintk(DEBUG_ERROR, "stat error\n");
+ v9fs_t_clunk(v9ses, newfid, NULL);
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ } else {
+ root_fid->fid = newfid;
+ root_fid->qid = fcall->params.rstat.stat->qid;
+ root->d_inode->i_ino =
+ v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+ v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb);
+ }
+
+ kfree(fcall);
+
+ if (stat_result < 0) {
+ retval = stat_result;
+ goto release_dentry;
+ }
+
+ return sb;
+
+ release_dentry:
+ dput(sb->s_root);
+
+ release_inode:
+ iput(inode);
+
+ put_back_sb:
+ up_write(&sb->s_umount);
+ deactivate_super(sb);
+ v9fs_session_close(v9ses);
+
+ free_session:
+ kfree(v9ses);
+
+ return ERR_PTR(retval);
+}
+
+/**
+ * v9fs_kill_super - Kill Superblock
+ * @s: superblock
+ *
+ */
+
+static void v9fs_kill_super(struct super_block *s)
+{
+ struct v9fs_session_info *v9ses = s->s_fs_info;
+
+ dprintk(DEBUG_VFS, " %p\n", s);
+
+ v9fs_dentry_release(s->s_root); /* clunk root */
+
+ kill_anon_super(s);
+
+ v9fs_session_close(v9ses);
+ kfree(v9ses);
+ dprintk(DEBUG_VFS, "exiting kill_super\n");
+}
+
+/**
+ * v9fs_show_options - Show mount options in /proc/mounts
+ * @m: seq_file to write to
+ * @mnt: mount descriptor
+ *
+ */
+
+static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
+
+ if (v9ses->debug != 0)
+ seq_printf(m, ",debug=%u", v9ses->debug);
+ if (v9ses->port != V9FS_PORT)
+ seq_printf(m, ",port=%u", v9ses->port);
+ if (v9ses->maxdata != 9000)
+ seq_printf(m, ",msize=%u", v9ses->maxdata);
+ if (v9ses->afid != ~0)
+ seq_printf(m, ",afid=%u", v9ses->afid);
+ if (v9ses->proto == PROTO_UNIX)
+ seq_puts(m, ",proto=unix");
+ if (v9ses->extended == 0)
+ seq_puts(m, ",noextend");
+ if (v9ses->nodev == 1)
+ seq_puts(m, ",nodevmap");
+ seq_printf(m, ",name=%s", v9ses->name);
+ seq_printf(m, ",aname=%s", v9ses->remotename);
+ seq_printf(m, ",uid=%u", v9ses->uid);
+ seq_printf(m, ",gid=%u", v9ses->gid);
+ return 0;
+}
+
+static void
+v9fs_umount_begin(struct super_block *sb)
+{
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ v9fs_session_cancel(v9ses);
+}
+
+static struct super_operations v9fs_super_ops = {
+ .statfs = simple_statfs,
+ .clear_inode = v9fs_clear_inode,
+ .show_options = v9fs_show_options,
+ .umount_begin = v9fs_umount_begin,
+};
+
+struct file_system_type v9fs_fs_type = {
+ .name = "9P",
+ .get_sb = v9fs_get_sb,
+ .kill_sb = v9fs_kill_super,
+ .owner = THIS_MODULE,
+};
diff --git a/fs/Kconfig b/fs/Kconfig
index 5e817902cb3..068ccea2f18 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -462,6 +462,19 @@ config AUTOFS4_FS
local network, you probably do not need an automounter, and can say
N here.
+config FUSE_FS
+ tristate "Filesystem in Userspace support"
+ help
+ With FUSE it is possible to implement a fully functional filesystem
+ in a userspace program.
+
+ There's also companion library: libfuse. This library along with
+ utilities is available from the FUSE homepage:
+ <http://fuse.sourceforge.net/>
+
+ If you want to develop a userspace FS, or if you want to use
+ a filesystem based on FUSE, answer Y or M.
+
menu "CD-ROM/DVD Filesystems"
config ISO9660_FS
@@ -1703,6 +1716,17 @@ config AFS_FS
config RXRPC
tristate
+config 9P_FS
+ tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
+ depends on INET && EXPERIMENTAL
+ help
+ If you say Y here, you will get experimental support for
+ Plan 9 resource sharing via the 9P2000 protocol.
+
+ See <http://v9fs.sf.net> for more information.
+
+ If unsure, say N.
+
endmenu
menu "Partition Types"
diff --git a/fs/Makefile b/fs/Makefile
index 15158309dee..1972da18627 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -89,11 +89,13 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4/
obj-$(CONFIG_AUTOFS_FS) += autofs/
obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
+obj-$(CONFIG_FUSE_FS) += fuse/
obj-$(CONFIG_UDF_FS) += udf/
obj-$(CONFIG_RELAYFS_FS) += relayfs/
obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
obj-$(CONFIG_JFS_FS) += jfs/
obj-$(CONFIG_XFS_FS) += xfs/
+obj-$(CONFIG_9P_FS) += 9p/
obj-$(CONFIG_AFS_FS) += afs/
obj-$(CONFIG_BEFS_FS) += befs/
obj-$(CONFIG_HOSTFS) += hostfs/
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 7aa6f200453..9ebe881c678 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -255,6 +255,7 @@ void
affs_delete_inode(struct inode *inode)
{
pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+ truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
if (S_ISREG(inode->i_mode))
affs_truncate(inode);
diff --git a/fs/aio.c b/fs/aio.c
index 4f641abac3c..38f62680fd6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
#include <linux/highmem.h>
#include <linux/workqueue.h>
#include <linux/security.h>
+#include <linux/rcuref.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
/* Must be done under the lock to serialise against cancellation.
* Call this aio_fput as it duplicates fput via the fput_work.
*/
- if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
+ if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
@@ -546,6 +547,24 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ioctx;
}
+static int lock_kiocb_action(void *param)
+{
+ schedule();
+ return 0;
+}
+
+static inline void lock_kiocb(struct kiocb *iocb)
+{
+ wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action,
+ TASK_UNINTERRUPTIBLE);
+}
+
+static inline void unlock_kiocb(struct kiocb *iocb)
+{
+ kiocbClearLocked(iocb);
+ wake_up_bit(&iocb->ki_flags, KIF_LOCKED);
+}
+
/*
* use_mm
* Makes the calling kernel thread take on the specified
@@ -786,7 +805,9 @@ static int __aio_run_iocbs(struct kioctx *ctx)
* Hold an extra reference while retrying i/o.
*/
iocb->ki_users++; /* grab extra reference */
+ lock_kiocb(iocb);
aio_run_iocb(iocb);
+ unlock_kiocb(iocb);
if (__aio_put_req(ctx, iocb)) /* drop extra ref */
put_ioctx(ctx);
}
@@ -1527,10 +1548,9 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
spin_lock_irq(&ctx->ctx_lock);
- if (likely(list_empty(&ctx->run_list))) {
- aio_run_iocb(req);
- } else {
- list_add_tail(&req->ki_run_list, &ctx->run_list);
+ aio_run_iocb(req);
+ unlock_kiocb(req);
+ if (!list_empty(&ctx->run_list)) {
/* drain the run list */
while (__aio_run_iocbs(ctx))
;
@@ -1661,6 +1681,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
if (NULL != cancel) {
struct io_event tmp;
pr_debug("calling cancel\n");
+ lock_kiocb(kiocb);
memset(&tmp, 0, sizeof(tmp));
tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
tmp.data = kiocb->ki_user_data;
@@ -1672,8 +1693,9 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
if (copy_to_user(result, &tmp, sizeof(tmp)))
ret = -EFAULT;
}
+ unlock_kiocb(kiocb);
} else
- printk(KERN_DEBUG "iocb has no cancel operation\n");
+ ret = -EINVAL;
put_ioctx(ctx);
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 6171431272d..990c28da5ae 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -105,6 +105,7 @@ struct autofs_sb_info {
struct file *pipe;
pid_t oz_pgrp;
int catatonic;
+ struct super_block *sb;
unsigned long exp_timeout;
ino_t next_dir_ino;
struct autofs_wait_queue *queues; /* Wait queue pointer */
@@ -134,7 +135,7 @@ void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
void autofs_hash_delete(struct autofs_dir_ent *);
struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
void autofs_hash_dputall(struct autofs_dirhash *);
-void autofs_hash_nuke(struct autofs_dirhash *);
+void autofs_hash_nuke(struct autofs_sb_info *);
/* Expiration-handling functions */
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 448143fd079..5ccfcf26310 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -232,13 +232,13 @@ void autofs_hash_dputall(struct autofs_dirhash *dh)
/* Delete everything. This is used on filesystem destruction, so we
make no attempt to keep the pointers valid */
-void autofs_hash_nuke(struct autofs_dirhash *dh)
+void autofs_hash_nuke(struct autofs_sb_info *sbi)
{
int i;
struct autofs_dir_ent *ent, *nent;
for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
- for ( ent = dh->h[i] ; ent ; ent = nent ) {
+ for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
nent = ent->next;
if ( ent->dentry )
dput(ent->dentry);
@@ -246,4 +246,5 @@ void autofs_hash_nuke(struct autofs_dirhash *dh)
kfree(ent);
}
}
+ shrink_dcache_sb(sbi->sb);
}
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 4888c1fabbf..65e5ed42190 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -27,7 +27,7 @@ static void autofs_put_super(struct super_block *sb)
if ( !sbi->catatonic )
autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
- autofs_hash_nuke(&sbi->dirhash);
+ autofs_hash_nuke(sbi);
for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) {
if ( test_bit(n, sbi->symlink_bitmap) )
kfree(sbi->symlink[n].data);
@@ -148,6 +148,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
s->s_magic = AUTOFS_SUPER_MAGIC;
s->s_op = &autofs_sops;
s->s_time_gran = 1;
+ sbi->sb = s;
root_inode = iget(s, AUTOFS_ROOT_INO);
root = d_alloc_root(root_inode);
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 1020dbc88be..1fbc53f14ab 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -20,7 +20,6 @@ struct bfs_sb_info {
unsigned long si_lasti;
unsigned long * si_imap;
struct buffer_head * si_sbh; /* buffer header w/superblock */
- struct bfs_super_block * si_bfs_sb; /* superblock in si_sbh->b_data */
};
/*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 5a1e5ce057f..e240c335eb2 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -2,6 +2,7 @@
* fs/bfs/dir.c
* BFS directory operations.
* Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
*/
#include <linux/time.h>
@@ -20,9 +21,9 @@
#define dprintf(x...)
#endif
-static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino);
+static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino);
static struct buffer_head * bfs_find_entry(struct inode * dir,
- const char * name, int namelen, struct bfs_dirent ** res_dir);
+ const unsigned char * name, int namelen, struct bfs_dirent ** res_dir);
static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
{
@@ -53,7 +54,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
de = (struct bfs_dirent *)(bh->b_data + offset);
if (de->ino) {
int size = strnlen(de->name, BFS_NAMELEN);
- if (filldir(dirent, de->name, size, f->f_pos, de->ino, DT_UNKNOWN) < 0) {
+ if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) {
brelse(bh);
unlock_kernel();
return 0;
@@ -107,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode,
inode->i_mapping->a_ops = &bfs_aops;
inode->i_mode = mode;
inode->i_ino = ino;
- BFS_I(inode)->i_dsk_ino = ino;
+ BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino);
BFS_I(inode)->i_sblock = 0;
BFS_I(inode)->i_eblock = 0;
insert_inode_hash(inode);
@@ -139,7 +140,7 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st
lock_kernel();
bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
if (bh) {
- unsigned long ino = le32_to_cpu(de->ino);
+ unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
brelse(bh);
inode = iget(dir->i_sb, ino);
if (!inode) {
@@ -183,7 +184,7 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry)
inode = dentry->d_inode;
lock_kernel();
bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
- if (!bh || de->ino != inode->i_ino)
+ if (!bh || le16_to_cpu(de->ino) != inode->i_ino)
goto out_brelse;
if (!inode->i_nlink) {
@@ -224,7 +225,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry,
old_dentry->d_name.name,
old_dentry->d_name.len, &old_de);
- if (!old_bh || old_de->ino != old_inode->i_ino)
+ if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino)
goto end_rename;
error = -EPERM;
@@ -270,7 +271,7 @@ struct inode_operations bfs_dir_inops = {
.rename = bfs_rename,
};
-static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino)
+static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino)
{
struct buffer_head * bh;
struct bfs_dirent * de;
@@ -304,7 +305,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
}
dir->i_mtime = CURRENT_TIME_SEC;
mark_inode_dirty(dir);
- de->ino = ino;
+ de->ino = cpu_to_le16((u16)ino);
for (i=0; i<BFS_NAMELEN; i++)
de->name[i] = (i < namelen) ? name[i] : 0;
mark_buffer_dirty(bh);
@@ -317,7 +318,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
return -ENOSPC;
}
-static inline int bfs_namecmp(int len, const char * name, const char * buffer)
+static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer)
{
if (len < BFS_NAMELEN && buffer[len])
return 0;
@@ -325,7 +326,7 @@ static inline int bfs_namecmp(int len, const char * name, const char * buffer)
}
static struct buffer_head * bfs_find_entry(struct inode * dir,
- const char * name, int namelen, struct bfs_dirent ** res_dir)
+ const unsigned char * name, int namelen, struct bfs_dirent ** res_dir)
{
unsigned long block, offset;
struct buffer_head * bh;
@@ -346,7 +347,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir,
}
de = (struct bfs_dirent *)(bh->b_data + offset);
offset += BFS_DIRENT_SIZE;
- if (de->ino && bfs_namecmp(namelen, name, de->name)) {
+ if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) {
*res_dir = de;
return bh;
}
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 747fd1ea55e..807723b65da 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -40,8 +40,8 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo
return 0;
}
-static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned long end,
- unsigned long where)
+static int bfs_move_blocks(struct super_block *sb, unsigned long start,
+ unsigned long end, unsigned long where)
{
unsigned long i;
@@ -57,20 +57,21 @@ static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned
static int bfs_get_block(struct inode * inode, sector_t block,
struct buffer_head * bh_result, int create)
{
- long phys;
+ unsigned long phys;
int err;
struct super_block *sb = inode->i_sb;
struct bfs_sb_info *info = BFS_SB(sb);
struct bfs_inode_info *bi = BFS_I(inode);
struct buffer_head *sbh = info->si_sbh;
- if (block < 0 || block > info->si_blocks)
+ if (block > info->si_blocks)
return -EIO;
phys = bi->i_sblock + block;
if (!create) {
if (phys <= bi->i_eblock) {
- dprintf("c=%d, b=%08lx, phys=%08lx (granted)\n", create, block, phys);
+ dprintf("c=%d, b=%08lx, phys=%09lx (granted)\n",
+ create, (unsigned long)block, phys);
map_bh(bh_result, sb, phys);
}
return 0;
@@ -80,7 +81,7 @@ static int bfs_get_block(struct inode * inode, sector_t block,
of blocks allocated for this file, we can grant it */
if (inode->i_size && phys <= bi->i_eblock) {
dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n",
- create, block, phys);
+ create, (unsigned long)block, phys);
map_bh(bh_result, sb, phys);
return 0;
}
@@ -88,11 +89,12 @@ static int bfs_get_block(struct inode * inode, sector_t block,
/* the rest has to be protected against itself */
lock_kernel();
- /* if the last data block for this file is the last allocated block, we can
- extend the file trivially, without moving it anywhere */
+ /* if the last data block for this file is the last allocated
+ block, we can extend the file trivially, without moving it
+ anywhere */
if (bi->i_eblock == info->si_lf_eblk) {
dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n",
- create, block, phys);
+ create, (unsigned long)block, phys);
map_bh(bh_result, sb, phys);
info->si_freeb -= phys - bi->i_eblock;
info->si_lf_eblk = bi->i_eblock = phys;
@@ -114,7 +116,8 @@ static int bfs_get_block(struct inode * inode, sector_t block,
} else
err = 0;
- dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", create, block, phys);
+ dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n",
+ create, (unsigned long)block, phys);
bi->i_sblock = phys;
phys += block;
info->si_lf_eblk = bi->i_eblock = phys;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 64e0fb33fc0..c7b39aa279d 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -3,6 +3,8 @@
* BFS superblock and inode operations.
* Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
* From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
+ *
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
*/
#include <linux/module.h>
@@ -54,46 +56,50 @@ static void bfs_read_inode(struct inode * inode)
off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
di = (struct bfs_inode *)bh->b_data + off;
- inode->i_mode = 0x0000FFFF & di->i_mode;
- if (di->i_vtype == BFS_VDIR) {
+ inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode);
+ if (le32_to_cpu(di->i_vtype) == BFS_VDIR) {
inode->i_mode |= S_IFDIR;
inode->i_op = &bfs_dir_inops;
inode->i_fop = &bfs_dir_operations;
- } else if (di->i_vtype == BFS_VREG) {
+ } else if (le32_to_cpu(di->i_vtype) == BFS_VREG) {
inode->i_mode |= S_IFREG;
inode->i_op = &bfs_file_inops;
inode->i_fop = &bfs_file_operations;
inode->i_mapping->a_ops = &bfs_aops;
}
- inode->i_uid = di->i_uid;
- inode->i_gid = di->i_gid;
- inode->i_nlink = di->i_nlink;
+ BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock);
+ BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock);
+ inode->i_uid = le32_to_cpu(di->i_uid);
+ inode->i_gid = le32_to_cpu(di->i_gid);
+ inode->i_nlink = le32_to_cpu(di->i_nlink);
inode->i_size = BFS_FILESIZE(di);
inode->i_blocks = BFS_FILEBLOCKS(di);
+ if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks);
inode->i_blksize = PAGE_SIZE;
- inode->i_atime.tv_sec = di->i_atime;
- inode->i_mtime.tv_sec = di->i_mtime;
- inode->i_ctime.tv_sec = di->i_ctime;
+ inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
+ inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime);
+ inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime);
inode->i_atime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
inode->i_ctime.tv_nsec = 0;
- BFS_I(inode)->i_dsk_ino = di->i_ino; /* can be 0 so we store a copy */
- BFS_I(inode)->i_sblock = di->i_sblock;
- BFS_I(inode)->i_eblock = di->i_eblock;
+ BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */
brelse(bh);
}
static int bfs_write_inode(struct inode * inode, int unused)
{
- unsigned long ino = inode->i_ino;
+ unsigned int ino = (u16)inode->i_ino;
+ unsigned long i_sblock;
struct bfs_inode * di;
struct buffer_head * bh;
int block, off;
+ dprintf("ino=%08x\n", ino);
+
if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) {
- printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino);
+ printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino);
return -EIO;
}
@@ -101,7 +107,7 @@ static int bfs_write_inode(struct inode * inode, int unused)
block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1;
bh = sb_bread(inode->i_sb, block);
if (!bh) {
- printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino);
+ printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino);
unlock_kernel();
return -EIO;
}
@@ -109,24 +115,26 @@ static int bfs_write_inode(struct inode * inode, int unused)
off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
di = (struct bfs_inode *)bh->b_data + off;
- if (inode->i_ino == BFS_ROOT_INO)
- di->i_vtype = BFS_VDIR;
+ if (ino == BFS_ROOT_INO)
+ di->i_vtype = cpu_to_le32(BFS_VDIR);
else
- di->i_vtype = BFS_VREG;
-
- di->i_ino = inode->i_ino;
- di->i_mode = inode->i_mode;
- di->i_uid = inode->i_uid;
- di->i_gid = inode->i_gid;
- di->i_nlink = inode->i_nlink;
- di->i_atime = inode->i_atime.tv_sec;
- di->i_mtime = inode->i_mtime.tv_sec;
- di->i_ctime = inode->i_ctime.tv_sec;
- di->i_sblock = BFS_I(inode)->i_sblock;
- di->i_eblock = BFS_I(inode)->i_eblock;
- di->i_eoffset = di->i_sblock * BFS_BSIZE + inode->i_size - 1;
+ di->i_vtype = cpu_to_le32(BFS_VREG);
+
+ di->i_ino = cpu_to_le16(ino);
+ di->i_mode = cpu_to_le32(inode->i_mode);
+ di->i_uid = cpu_to_le32(inode->i_uid);
+ di->i_gid = cpu_to_le32(inode->i_gid);
+ di->i_nlink = cpu_to_le32(inode->i_nlink);
+ di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+ di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+ di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+ i_sblock = BFS_I(inode)->i_sblock;
+ di->i_sblock = cpu_to_le32(i_sblock);
+ di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
+ di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
mark_buffer_dirty(bh);
+ dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off);
brelse(bh);
unlock_kernel();
return 0;
@@ -140,11 +148,14 @@ static void bfs_delete_inode(struct inode * inode)
int block, off;
struct super_block * s = inode->i_sb;
struct bfs_sb_info * info = BFS_SB(s);
+ struct bfs_inode_info * bi = BFS_I(inode);
- dprintf("ino=%08lx\n", inode->i_ino);
+ dprintf("ino=%08lx\n", ino);
- if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) {
- printf("invalid ino=%08lx\n", inode->i_ino);
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (ino < BFS_ROOT_INO || ino > info->si_lasti) {
+ printf("invalid ino=%08lx\n", ino);
return;
}
@@ -160,13 +171,13 @@ static void bfs_delete_inode(struct inode * inode)
return;
}
off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
- di = (struct bfs_inode *)bh->b_data + off;
- if (di->i_ino) {
- info->si_freeb += BFS_FILEBLOCKS(di);
+ di = (struct bfs_inode *) bh->b_data + off;
+ if (bi->i_dsk_ino) {
+ info->si_freeb += 1 + bi->i_eblock - bi->i_sblock;
info->si_freei++;
- clear_bit(di->i_ino, info->si_imap);
+ clear_bit(ino, info->si_imap);
dump_imap("delete_inode", s);
- }
+ }
di->i_ino = 0;
di->i_sblock = 0;
mark_buffer_dirty(bh);
@@ -272,14 +283,14 @@ static struct super_operations bfs_sops = {
void dump_imap(const char *prefix, struct super_block * s)
{
-#if 0
+#ifdef DEBUG
int i;
char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL);
if (!tmpbuf)
return;
for (i=BFS_SB(s)->si_lasti; i>=0; i--) {
- if (i>PAGE_SIZE-100) break;
+ if (i > PAGE_SIZE-100) break;
if (test_bit(i, BFS_SB(s)->si_imap))
strcat(tmpbuf, "1");
else
@@ -295,7 +306,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
struct buffer_head * bh;
struct bfs_super_block * bfs_sb;
struct inode * inode;
- int i, imap_len;
+ unsigned i, imap_len;
struct bfs_sb_info * info;
info = kmalloc(sizeof(*info), GFP_KERNEL);
@@ -310,19 +321,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
if(!bh)
goto out;
bfs_sb = (struct bfs_super_block *)bh->b_data;
- if (bfs_sb->s_magic != BFS_MAGIC) {
+ if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
if (!silent)
printf("No BFS filesystem on %s (magic=%08x)\n",
- s->s_id, bfs_sb->s_magic);
+ s->s_id, le32_to_cpu(bfs_sb->s_magic));
goto out;
}
if (BFS_UNCLEAN(bfs_sb, s) && !silent)
printf("%s is unclean, continuing\n", s->s_id);
s->s_magic = BFS_MAGIC;
- info->si_bfs_sb = bfs_sb;
info->si_sbh = bh;
- info->si_lasti = (bfs_sb->s_start - BFS_BSIZE)/sizeof(struct bfs_inode)
+ info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode)
+ BFS_ROOT_INO - 1;
imap_len = info->si_lasti/8 + 1;
@@ -346,8 +356,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
goto out;
}
- info->si_blocks = (bfs_sb->s_end + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
- info->si_freeb = (bfs_sb->s_end + 1 - bfs_sb->s_start)>>BFS_BSIZE_BITS;
+ info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
+ info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS;
info->si_freei = 0;
info->si_lf_eblk = 0;
info->si_lf_sblk = 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index 1c62203a490..6cbfceabd95 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -40,6 +40,7 @@
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/mpage.h>
+#include <linux/bit_spinlock.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static void invalidate_bh_lrus(void);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3217ac5f6bd..2335f14a158 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3215,10 +3215,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
}
cifs_sb->tcon = NULL;
- if (ses) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ / 2);
- }
+ if (ses)
+ schedule_timeout_interruptible(msecs_to_jiffies(500));
if (ses)
sesInfoFree(ses);
diff --git a/fs/compat.c b/fs/compat.c
index 8c665705c6a..ac3fb9ed8ee 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1619,6 +1619,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
char *bits;
long timeout;
int size, max_fdset, ret = -EINVAL;
+ struct fdtable *fdt;
timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) {
@@ -1644,7 +1645,10 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
goto out_nofds;
/* max_fdset can increase, so grab it once to avoid race */
- max_fdset = current->files->max_fdset;
+ rcu_read_lock();
+ fdt = files_fdtable(current->files);
+ max_fdset = fdt->max_fdset;
+ rcu_read_unlock();
if (n > max_fdset)
n = max_fdset;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 155e612635f..e28a74203f3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -798,13 +798,16 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
r = (void *) &r4;
}
- if (ret)
- return -EFAULT;
+ if (ret) {
+ ret = -EFAULT;
+ goto out;
+ }
set_fs (KERNEL_DS);
ret = sys_ioctl (fd, cmd, (unsigned long) r);
set_fs (old_fs);
+out:
if (mysock)
sockfd_put(mysock);
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 5034365b06a..8def89f2c43 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -19,6 +19,7 @@
#include <linux/errno.h>
#include <linux/vmalloc.h>
#include <linux/zlib.h>
+#include <linux/cramfs_fs.h>
static z_stream stream;
static int initialized;
diff --git a/fs/dcache.c b/fs/dcache.c
index a15a2e1f552..7376b61269f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -337,12 +337,10 @@ struct dentry * d_find_alias(struct inode *inode)
*/
void d_prune_aliases(struct inode *inode)
{
- struct list_head *tmp, *head = &inode->i_dentry;
+ struct dentry *dentry;
restart:
spin_lock(&dcache_lock);
- tmp = head;
- while ((tmp = tmp->next) != head) {
- struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+ list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
__dget_locked(dentry);
@@ -463,10 +461,7 @@ void shrink_dcache_sb(struct super_block * sb)
* superblock to the most recent end of the unused list.
*/
spin_lock(&dcache_lock);
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
@@ -478,10 +473,7 @@ void shrink_dcache_sb(struct super_block * sb)
* Pass two ... free the dentries for this superblock.
*/
repeat:
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
diff --git a/fs/exec.c b/fs/exec.c
index 222ab1c572d..14dd03907cc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -798,6 +798,7 @@ no_thread_group:
static inline void flush_old_files(struct files_struct * files)
{
long j = -1;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
for (;;) {
@@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files)
j++;
i = j * __NFDBITS;
- if (i >= files->max_fds || i >= files->max_fdset)
+ fdt = files_fdtable(files);
+ if (i >= fdt->max_fds || i >= fdt->max_fdset)
break;
- set = files->close_on_exec->fds_bits[j];
+ set = fdt->close_on_exec->fds_bits[j];
if (!set)
continue;
- files->close_on_exec->fds_bits[j] = 0;
+ fdt->close_on_exec->fds_bits[j] = 0;
spin_unlock(&files->file_lock);
for ( ; set ; i++,set >>= 1) {
if (set & 1) {
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 161f156d98c..c8d07030c89 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -615,6 +615,11 @@ got:
DQUOT_DROP(inode);
goto fail2;
}
+ err = ext2_init_security(inode,dir);
+ if (err) {
+ DQUOT_FREE_INODE(inode);
+ goto fail2;
+ }
mark_inode_dirty(inode);
ext2_debug("allocating inode %lu\n", inode->i_ino);
ext2_preread_inode(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 53dceb0c659..fdba4d1d3c6 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -71,6 +71,8 @@ void ext2_put_inode(struct inode *inode)
*/
void ext2_delete_inode (struct inode * inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+
if (is_bad_inode(inode))
goto no_delete;
EXT2_I(inode)->i_dtime = get_seconds();
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 5f3bfde3b81..67cfeb66e89 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,3 +116,11 @@ exit_ext2_xattr(void)
# endif /* CONFIG_EXT2_FS_XATTR */
+#ifdef CONFIG_EXT2_FS_SECURITY
+extern int ext2_init_security(struct inode *inode, struct inode *dir);
+#else
+static inline int ext2_init_security(struct inode *inode, struct inode *dir)
+{
+ return 0;
+}
+#endif
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 6a6c59fbe59..a2661279847 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/smp_lock.h>
#include <linux/ext2_fs.h>
+#include <linux/security.h>
#include "xattr.h"
static size_t
@@ -45,6 +46,27 @@ ext2_xattr_security_set(struct inode *inode, const char *name,
value, size, flags);
}
+int
+ext2_init_security(struct inode *inode, struct inode *dir)
+{
+ int err;
+ size_t len;
+ void *value;
+ char *name;
+
+ err = security_inode_init_security(inode, dir, &name, &value, &len);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY,
+ name, value, len, 0);
+ kfree(name);
+ kfree(value);
+ return err;
+}
+
struct xattr_handler ext2_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.list = ext2_xattr_security_list,
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 6981bd014ed..96552769d03 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -607,6 +607,11 @@ got:
DQUOT_DROP(inode);
goto fail2;
}
+ err = ext3_init_security(handle,inode, dir);
+ if (err) {
+ DQUOT_FREE_INODE(inode);
+ goto fail2;
+ }
err = ext3_mark_inode_dirty(handle, inode);
if (err) {
ext3_std_error(sb, err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9989fdcf4d5..b5177c90d6f 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -187,6 +187,8 @@ void ext3_delete_inode (struct inode * inode)
{
handle_t *handle;
+ truncate_inode_pages(&inode->i_data, 0);
+
if (is_bad_inode(inode))
goto no_delete;
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index eb31a69e82d..2ceae38f3d4 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -133,3 +133,14 @@ exit_ext3_xattr(void)
#define ext3_xattr_handlers NULL
# endif /* CONFIG_EXT3_FS_XATTR */
+
+#ifdef CONFIG_EXT3_FS_SECURITY
+extern int ext3_init_security(handle_t *handle, struct inode *inode,
+ struct inode *dir);
+#else
+static inline int ext3_init_security(handle_t *handle, struct inode *inode,
+ struct inode *dir)
+{
+ return 0;
+}
+#endif
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index ddc1c41750e..b9c40c15647 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -9,6 +9,7 @@
#include <linux/smp_lock.h>
#include <linux/ext3_jbd.h>
#include <linux/ext3_fs.h>
+#include <linux/security.h>
#include "xattr.h"
static size_t
@@ -47,6 +48,27 @@ ext3_xattr_security_set(struct inode *inode, const char *name,
value, size, flags);
}
+int
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+ int err;
+ size_t len;
+ void *value;
+ char *name;
+
+ err = security_inode_init_security(inode, dir, &name, &value, &len);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
+ name, value, len, 0);
+ kfree(name);
+ kfree(value);
+ return err;
+}
+
struct xattr_handler ext3_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.list = ext3_xattr_security_list,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 96ae85b67eb..a7cbe68e225 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -335,6 +335,8 @@ EXPORT_SYMBOL(fat_build_inode);
static void fat_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+
if (!is_bad_inode(inode)) {
inode->i_size = 0;
fat_truncate(inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6fbc9d8fcc3..863b46e0d78 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -16,6 +16,7 @@
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/signal.h>
+#include <linux/rcupdate.h>
#include <asm/poll.h>
#include <asm/siginfo.h>
@@ -24,21 +25,25 @@
void fastcall set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
if (flag)
- FD_SET(fd, files->close_on_exec);
+ FD_SET(fd, fdt->close_on_exec);
else
- FD_CLR(fd, files->close_on_exec);
+ FD_CLR(fd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
}
static inline int get_close_on_exec(unsigned int fd)
{
struct files_struct *files = current->files;
+ struct fdtable *fdt;
int res;
- spin_lock(&files->file_lock);
- res = FD_ISSET(fd, files->close_on_exec);
- spin_unlock(&files->file_lock);
+ rcu_read_lock();
+ fdt = files_fdtable(files);
+ res = FD_ISSET(fd, fdt->close_on_exec);
+ rcu_read_unlock();
return res;
}
@@ -54,24 +59,26 @@ static int locate_fd(struct files_struct *files,
unsigned int newfd;
unsigned int start;
int error;
+ struct fdtable *fdt;
error = -EINVAL;
if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
repeat:
+ fdt = files_fdtable(files);
/*
* Someone might have closed fd's in the range
- * orig_start..files->next_fd
+ * orig_start..fdt->next_fd
*/
start = orig_start;
- if (start < files->next_fd)
- start = files->next_fd;
+ if (start < fdt->next_fd)
+ start = fdt->next_fd;
newfd = start;
- if (start < files->max_fdset) {
- newfd = find_next_zero_bit(files->open_fds->fds_bits,
- files->max_fdset, start);
+ if (start < fdt->max_fdset) {
+ newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
+ fdt->max_fdset, start);
}
error = -EMFILE;
@@ -89,9 +96,15 @@ repeat:
if (error)
goto repeat;
- if (start <= files->next_fd)
- files->next_fd = newfd + 1;
-
+ /*
+ * We reacquired files_lock, so we are safe as long as
+ * we reacquire the fdtable pointer and use it while holding
+ * the lock, no one can free it during that time.
+ */
+ fdt = files_fdtable(files);
+ if (start <= fdt->next_fd)
+ fdt->next_fd = newfd + 1;
+
error = newfd;
out:
@@ -101,13 +114,16 @@ out:
static int dupfd(struct file *file, unsigned int start)
{
struct files_struct * files = current->files;
+ struct fdtable *fdt;
int fd;
spin_lock(&files->file_lock);
fd = locate_fd(files, file, start);
if (fd >= 0) {
- FD_SET(fd, files->open_fds);
- FD_CLR(fd, files->close_on_exec);
+ /* locate_fd() may have expanded fdtable, load the ptr */
+ fdt = files_fdtable(files);
+ FD_SET(fd, fdt->open_fds);
+ FD_CLR(fd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
fd_install(fd, file);
} else {
@@ -123,6 +139,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
int err = -EBADF;
struct file * file, *tofree;
struct files_struct * files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
if (!(file = fcheck(oldfd)))
@@ -148,13 +165,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
/* Yes. It's a race. In user space. Nothing sane to do */
err = -EBUSY;
- tofree = files->fd[newfd];
- if (!tofree && FD_ISSET(newfd, files->open_fds))
+ fdt = files_fdtable(files);
+ tofree = fdt->fd[newfd];
+ if (!tofree && FD_ISSET(newfd, fdt->open_fds))
goto out_fput;
- files->fd[newfd] = file;
- FD_SET(newfd, files->open_fds);
- FD_CLR(newfd, files->close_on_exec);
+ rcu_assign_pointer(fdt->fd[newfd], file);
+ FD_SET(newfd, fdt->open_fds);
+ FD_CLR(newfd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 92b5f25985d..2127a7b9dc3 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -13,6 +13,25 @@
#include <linux/vmalloc.h>
#include <linux/file.h>
#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+
+struct fdtable_defer {
+ spinlock_t lock;
+ struct work_struct wq;
+ struct timer_list timer;
+ struct fdtable *next;
+};
+
+/*
+ * We use this list to defer free fdtables that have vmalloced
+ * sets/arrays. By keeping a per-cpu list, we avoid having to embed
+ * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
+ * this per-task structure.
+ */
+static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
/*
@@ -48,82 +67,143 @@ void free_fd_array(struct file **array, int num)
vfree(array);
}
-/*
- * Expand the fd array in the files_struct. Called with the files
- * spinlock held for write.
- */
+static void __free_fdtable(struct fdtable *fdt)
+{
+ int fdset_size, fdarray_size;
-static int expand_fd_array(struct files_struct *files, int nr)
- __releases(files->file_lock)
- __acquires(files->file_lock)
+ fdset_size = fdt->max_fdset / 8;
+ fdarray_size = fdt->max_fds * sizeof(struct file *);
+ free_fdset(fdt->open_fds, fdset_size);
+ free_fdset(fdt->close_on_exec, fdset_size);
+ free_fd_array(fdt->fd, fdarray_size);
+ kfree(fdt);
+}
+
+static void fdtable_timer(unsigned long data)
{
- struct file **new_fds;
- int error, nfds;
+ struct fdtable_defer *fddef = (struct fdtable_defer *)data;
-
- error = -EMFILE;
- if (files->max_fds >= NR_OPEN || nr >= NR_OPEN)
+ spin_lock(&fddef->lock);
+ /*
+ * If someone already emptied the queue return.
+ */
+ if (!fddef->next)
goto out;
+ if (!schedule_work(&fddef->wq))
+ mod_timer(&fddef->timer, 5);
+out:
+ spin_unlock(&fddef->lock);
+}
- nfds = files->max_fds;
- spin_unlock(&files->file_lock);
+static void free_fdtable_work(struct fdtable_defer *f)
+{
+ struct fdtable *fdt;
- /*
- * Expand to the max in easy steps, and keep expanding it until
- * we have enough for the requested fd array size.
- */
+ spin_lock_bh(&f->lock);
+ fdt = f->next;
+ f->next = NULL;
+ spin_unlock_bh(&f->lock);
+ while(fdt) {
+ struct fdtable *next = fdt->next;
+ __free_fdtable(fdt);
+ fdt = next;
+ }
+}
- do {
-#if NR_OPEN_DEFAULT < 256
- if (nfds < 256)
- nfds = 256;
- else
-#endif
- if (nfds < (PAGE_SIZE / sizeof(struct file *)))
- nfds = PAGE_SIZE / sizeof(struct file *);
- else {
- nfds = nfds * 2;
- if (nfds > NR_OPEN)
- nfds = NR_OPEN;
- }
- } while (nfds <= nr);
+static void free_fdtable_rcu(struct rcu_head *rcu)
+{
+ struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
+ int fdset_size, fdarray_size;
+ struct fdtable_defer *fddef;
- error = -ENOMEM;
- new_fds = alloc_fd_array(nfds);
- spin_lock(&files->file_lock);
- if (!new_fds)
- goto out;
+ BUG_ON(!fdt);
+ fdset_size = fdt->max_fdset / 8;
+ fdarray_size = fdt->max_fds * sizeof(struct file *);
- /* Copy the existing array and install the new pointer */
-
- if (nfds > files->max_fds) {
- struct file **old_fds;
- int i;
-
- old_fds = xchg(&files->fd, new_fds);
- i = xchg(&files->max_fds, nfds);
-
- /* Don't copy/clear the array if we are creating a new
- fd array for fork() */
- if (i) {
- memcpy(new_fds, old_fds, i * sizeof(struct file *));
- /* clear the remainder of the array */
- memset(&new_fds[i], 0,
- (nfds-i) * sizeof(struct file *));
-
- spin_unlock(&files->file_lock);
- free_fd_array(old_fds, i);
- spin_lock(&files->file_lock);
- }
+ if (fdt->free_files) {
+ /*
+ * The this fdtable was embedded in the files structure
+ * and the files structure itself was getting destroyed.
+ * It is now safe to free the files structure.
+ */
+ kmem_cache_free(files_cachep, fdt->free_files);
+ return;
+ }
+ if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
+ /*
+ * The fdtable was embedded
+ */
+ return;
+ }
+ if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
+ kfree(fdt->open_fds);
+ kfree(fdt->close_on_exec);
+ kfree(fdt->fd);
+ kfree(fdt);
} else {
- /* Somebody expanded the array while we slept ... */
- spin_unlock(&files->file_lock);
- free_fd_array(new_fds, nfds);
- spin_lock(&files->file_lock);
+ fddef = &get_cpu_var(fdtable_defer_list);
+ spin_lock(&fddef->lock);
+ fdt->next = fddef->next;
+ fddef->next = fdt;
+ /*
+ * vmallocs are handled from the workqueue context.
+ * If the per-cpu workqueue is running, then we
+ * defer work scheduling through a timer.
+ */
+ if (!schedule_work(&fddef->wq))
+ mod_timer(&fddef->timer, 5);
+ spin_unlock(&fddef->lock);
+ put_cpu_var(fdtable_defer_list);
}
- error = 0;
-out:
- return error;
+}
+
+void free_fdtable(struct fdtable *fdt)
+{
+ if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
+ fdt->max_fds > NR_OPEN_DEFAULT)
+ call_rcu(&fdt->rcu, free_fdtable_rcu);
+}
+
+/*
+ * Expand the fdset in the files_struct. Called with the files spinlock
+ * held for write.
+ */
+static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
+{
+ int i;
+ int count;
+
+ BUG_ON(nfdt->max_fdset < fdt->max_fdset);
+ BUG_ON(nfdt->max_fds < fdt->max_fds);
+ /* Copy the existing tables and install the new pointers */
+
+ i = fdt->max_fdset / (sizeof(unsigned long) * 8);
+ count = (nfdt->max_fdset - fdt->max_fdset) / 8;
+
+ /*
+ * Don't copy the entire array if the current fdset is
+ * not yet initialised.
+ */
+ if (i) {
+ memcpy (nfdt->open_fds, fdt->open_fds,
+ fdt->max_fdset/8);
+ memcpy (nfdt->close_on_exec, fdt->close_on_exec,
+ fdt->max_fdset/8);
+ memset (&nfdt->open_fds->fds_bits[i], 0, count);
+ memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
+ }
+
+ /* Don't copy/clear the array if we are creating a new
+ fd array for fork() */
+ if (fdt->max_fds) {
+ memcpy(nfdt->fd, fdt->fd,
+ fdt->max_fds * sizeof(struct file *));
+ /* clear the remainder of the array */
+ memset(&nfdt->fd[fdt->max_fds], 0,
+ (nfdt->max_fds - fdt->max_fds) *
+ sizeof(struct file *));
+ }
+ nfdt->next_fd = fdt->next_fd;
}
/*
@@ -154,26 +234,21 @@ void free_fdset(fd_set *array, int num)
vfree(array);
}
-/*
- * Expand the fdset in the files_struct. Called with the files spinlock
- * held for write.
- */
-static int expand_fdset(struct files_struct *files, int nr)
- __releases(file->file_lock)
- __acquires(file->file_lock)
+static struct fdtable *alloc_fdtable(int nr)
{
- fd_set *new_openset = NULL, *new_execset = NULL;
- int error, nfds = 0;
-
- error = -EMFILE;
- if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)
- goto out;
+ struct fdtable *fdt = NULL;
+ int nfds = 0;
+ fd_set *new_openset = NULL, *new_execset = NULL;
+ struct file **new_fds;
- nfds = files->max_fdset;
- spin_unlock(&files->file_lock);
+ fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
+ if (!fdt)
+ goto out;
+ memset(fdt, 0, sizeof(*fdt));
- /* Expand to the max in easy steps */
- do {
+ nfds = __FD_SETSIZE;
+ /* Expand to the max in easy steps */
+ do {
if (nfds < (PAGE_SIZE * 8))
nfds = PAGE_SIZE * 8;
else {
@@ -183,49 +258,88 @@ static int expand_fdset(struct files_struct *files, int nr)
}
} while (nfds <= nr);
- error = -ENOMEM;
- new_openset = alloc_fdset(nfds);
- new_execset = alloc_fdset(nfds);
- spin_lock(&files->file_lock);
- if (!new_openset || !new_execset)
+ new_openset = alloc_fdset(nfds);
+ new_execset = alloc_fdset(nfds);
+ if (!new_openset || !new_execset)
+ goto out;
+ fdt->open_fds = new_openset;
+ fdt->close_on_exec = new_execset;
+ fdt->max_fdset = nfds;
+
+ nfds = NR_OPEN_DEFAULT;
+ /*
+ * Expand to the max in easy steps, and keep expanding it until
+ * we have enough for the requested fd array size.
+ */
+ do {
+#if NR_OPEN_DEFAULT < 256
+ if (nfds < 256)
+ nfds = 256;
+ else
+#endif
+ if (nfds < (PAGE_SIZE / sizeof(struct file *)))
+ nfds = PAGE_SIZE / sizeof(struct file *);
+ else {
+ nfds = nfds * 2;
+ if (nfds > NR_OPEN)
+ nfds = NR_OPEN;
+ }
+ } while (nfds <= nr);
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out;
+ fdt->fd = new_fds;
+ fdt->max_fds = nfds;
+ fdt->free_files = NULL;
+ return fdt;
+out:
+ if (new_openset)
+ free_fdset(new_openset, nfds);
+ if (new_execset)
+ free_fdset(new_execset, nfds);
+ kfree(fdt);
+ return NULL;
+}
+
+/*
+ * Expands the file descriptor table - it will allocate a new fdtable and
+ * both fd array and fdset. It is expected to be called with the
+ * files_lock held.
+ */
+static int expand_fdtable(struct files_struct *files, int nr)
+ __releases(files->file_lock)
+ __acquires(files->file_lock)
+{
+ int error = 0;
+ struct fdtable *fdt;
+ struct fdtable *nfdt = NULL;
+
+ spin_unlock(&files->file_lock);
+ nfdt = alloc_fdtable(nr);
+ if (!nfdt) {
+ error = -ENOMEM;
+ spin_lock(&files->file_lock);
goto out;
+ }
- error = 0;
-
- /* Copy the existing tables and install the new pointers */
- if (nfds > files->max_fdset) {
- int i = files->max_fdset / (sizeof(unsigned long) * 8);
- int count = (nfds - files->max_fdset) / 8;
-
- /*
- * Don't copy the entire array if the current fdset is
- * not yet initialised.
- */
- if (i) {
- memcpy (new_openset, files->open_fds, files->max_fdset/8);
- memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
- memset (&new_openset->fds_bits[i], 0, count);
- memset (&new_execset->fds_bits[i], 0, count);
- }
-
- nfds = xchg(&files->max_fdset, nfds);
- new_openset = xchg(&files->open_fds, new_openset);
- new_execset = xchg(&files->close_on_exec, new_execset);
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ /*
+ * Check again since another task may have expanded the
+ * fd table while we dropped the lock
+ */
+ if (nr >= fdt->max_fds || nr >= fdt->max_fdset) {
+ copy_fdtable(nfdt, fdt);
+ } else {
+ /* Somebody expanded while we dropped file_lock */
spin_unlock(&files->file_lock);
- free_fdset (new_openset, nfds);
- free_fdset (new_execset, nfds);
+ __free_fdtable(nfdt);
spin_lock(&files->file_lock);
- return 0;
- }
- /* Somebody expanded the array while we slept ... */
-
+ goto out;
+ }
+ rcu_assign_pointer(files->fdt, nfdt);
+ free_fdtable(fdt);
out:
- spin_unlock(&files->file_lock);
- if (new_openset)
- free_fdset(new_openset, nfds);
- if (new_execset)
- free_fdset(new_execset, nfds);
- spin_lock(&files->file_lock);
return error;
}
@@ -237,18 +351,39 @@ out:
int expand_files(struct files_struct *files, int nr)
{
int err, expand = 0;
+ struct fdtable *fdt;
- if (nr >= files->max_fdset) {
- expand = 1;
- if ((err = expand_fdset(files, nr)))
+ fdt = files_fdtable(files);
+ if (nr >= fdt->max_fdset || nr >= fdt->max_fds) {
+ if (fdt->max_fdset >= NR_OPEN ||
+ fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) {
+ err = -EMFILE;
goto out;
- }
- if (nr >= files->max_fds) {
+ }
expand = 1;
- if ((err = expand_fd_array(files, nr)))
+ if ((err = expand_fdtable(files, nr)))
goto out;
}
err = expand;
out:
return err;
}
+
+static void __devinit fdtable_defer_list_init(int cpu)
+{
+ struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
+ spin_lock_init(&fddef->lock);
+ INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
+ init_timer(&fddef->timer);
+ fddef->timer.data = (unsigned long)fddef;
+ fddef->timer.function = fdtable_timer;
+ fddef->next = NULL;
+}
+
+void __init files_defer_init(void)
+{
+ int i;
+ /* Really early - can't use for_each_cpu */
+ for (i = 0; i < NR_CPUS; i++)
+ fdtable_defer_list_init(i);
+}
diff --git a/fs/file_table.c b/fs/file_table.c
index 43e9e1737de..86ec8ae985b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/eventpoll.h>
+#include <linux/rcupdate.h>
#include <linux/mount.h>
#include <linux/cdev.h>
#include <linux/fsnotify.h>
@@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)
spin_unlock_irqrestore(&filp_count_lock, flags);
}
-static inline void file_free(struct file *f)
+static inline void file_free_rcu(struct rcu_head *head)
{
+ struct file *f = container_of(head, struct file, f_rcuhead);
kmem_cache_free(filp_cachep, f);
}
+static inline void file_free(struct file *f)
+{
+ call_rcu(&f->f_rcuhead, file_free_rcu);
+}
+
/* Find an unused file structure and return a pointer to it.
* Returns NULL, if there are no more free file structures or
* we run out of memory.
@@ -110,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
void fastcall fput(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count))
+ if (rcuref_dec_and_test(&file->f_count))
__fput(file);
}
@@ -156,11 +163,17 @@ struct file fastcall *fget(unsigned int fd)
struct file *file;
struct files_struct *files = current->files;
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
- if (file)
- get_file(file);
- spin_unlock(&files->file_lock);
+ if (file) {
+ if (!rcuref_inc_lf(&file->f_count)) {
+ /* File object ref couldn't be taken */
+ rcu_read_unlock();
+ return NULL;
+ }
+ }
+ rcu_read_unlock();
+
return file;
}
@@ -182,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
if (likely((atomic_read(&files->count) == 1))) {
file = fcheck_files(files, fd);
} else {
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- get_file(file);
- *fput_needed = 1;
+ if (rcuref_inc_lf(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
}
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
}
+
return file;
}
void put_filp(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count)) {
+ if (rcuref_dec_and_test(&file->f_count)) {
security_file_free(file);
file_kill(file);
file_free(file);
@@ -257,4 +274,5 @@ void __init files_init(unsigned long mempages)
files_stat.max_files = n;
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
+ files_defer_init();
}
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
new file mode 100644
index 00000000000..c3e1f760cac
--- /dev/null
+++ b/fs/fuse/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the FUSE filesystem.
+#
+
+obj-$(CONFIG_FUSE_FS) += fuse.o
+
+fuse-objs := dev.o dir.o file.o inode.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
new file mode 100644
index 00000000000..d4c869c6d01
--- /dev/null
+++ b/fs/fuse/dev.c
@@ -0,0 +1,877 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/uio.h>
+#include <linux/miscdevice.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+
+MODULE_ALIAS_MISCDEV(FUSE_MINOR);
+
+static kmem_cache_t *fuse_req_cachep;
+
+static inline struct fuse_conn *fuse_get_conn(struct file *file)
+{
+ struct fuse_conn *fc;
+ spin_lock(&fuse_lock);
+ fc = file->private_data;
+ if (fc && !fc->mounted)
+ fc = NULL;
+ spin_unlock(&fuse_lock);
+ return fc;
+}
+
+static inline void fuse_request_init(struct fuse_req *req)
+{
+ memset(req, 0, sizeof(*req));
+ INIT_LIST_HEAD(&req->list);
+ init_waitqueue_head(&req->waitq);
+ atomic_set(&req->count, 1);
+}
+
+struct fuse_req *fuse_request_alloc(void)
+{
+ struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
+ if (req)
+ fuse_request_init(req);
+ return req;
+}
+
+void fuse_request_free(struct fuse_req *req)
+{
+ kmem_cache_free(fuse_req_cachep, req);
+}
+
+static inline void block_sigs(sigset_t *oldset)
+{
+ sigset_t mask;
+
+ siginitsetinv(&mask, sigmask(SIGKILL));
+ sigprocmask(SIG_BLOCK, &mask, oldset);
+}
+
+static inline void restore_sigs(sigset_t *oldset)
+{
+ sigprocmask(SIG_SETMASK, oldset, NULL);
+}
+
+void fuse_reset_request(struct fuse_req *req)
+{
+ int preallocated = req->preallocated;
+ BUG_ON(atomic_read(&req->count) != 1);
+ fuse_request_init(req);
+ req->preallocated = preallocated;
+}
+
+static void __fuse_get_request(struct fuse_req *req)
+{
+ atomic_inc(&req->count);
+}
+
+/* Must be called with > 1 refcount */
+static void __fuse_put_request(struct fuse_req *req)
+{
+ BUG_ON(atomic_read(&req->count) < 2);
+ atomic_dec(&req->count);
+}
+
+static struct fuse_req *do_get_request(struct fuse_conn *fc)
+{
+ struct fuse_req *req;
+
+ spin_lock(&fuse_lock);
+ BUG_ON(list_empty(&fc->unused_list));
+ req = list_entry(fc->unused_list.next, struct fuse_req, list);
+ list_del_init(&req->list);
+ spin_unlock(&fuse_lock);
+ fuse_request_init(req);
+ req->preallocated = 1;
+ req->in.h.uid = current->fsuid;
+ req->in.h.gid = current->fsgid;
+ req->in.h.pid = current->pid;
+ return req;
+}
+
+/* This can return NULL, but only in case it's interrupted by a SIGKILL */
+struct fuse_req *fuse_get_request(struct fuse_conn *fc)
+{
+ int intr;
+ sigset_t oldset;
+
+ block_sigs(&oldset);
+ intr = down_interruptible(&fc->outstanding_sem);
+ restore_sigs(&oldset);
+ return intr ? NULL : do_get_request(fc);
+}
+
+static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ spin_lock(&fuse_lock);
+ if (req->preallocated)
+ list_add(&req->list, &fc->unused_list);
+ else
+ fuse_request_free(req);
+
+ /* If we are in debt decrease that first */
+ if (fc->outstanding_debt)
+ fc->outstanding_debt--;
+ else
+ up(&fc->outstanding_sem);
+ spin_unlock(&fuse_lock);
+}
+
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ if (atomic_dec_and_test(&req->count))
+ fuse_putback_request(fc, req);
+}
+
+void fuse_release_background(struct fuse_req *req)
+{
+ iput(req->inode);
+ iput(req->inode2);
+ if (req->file)
+ fput(req->file);
+ spin_lock(&fuse_lock);
+ list_del(&req->bg_entry);
+ spin_unlock(&fuse_lock);
+}
+
+/*
+ * This function is called when a request is finished. Either a reply
+ * has arrived or it was interrupted (and not yet sent) or some error
+ * occured during communication with userspace, or the device file was
+ * closed. It decreases the referece count for the request. In case
+ * of a background request the referece to the stored objects are
+ * released. The requester thread is woken up (if still waiting), and
+ * finally the request is either freed or put on the unused_list
+ *
+ * Called with fuse_lock, unlocks it
+ */
+static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ int putback;
+ req->finished = 1;
+ putback = atomic_dec_and_test(&req->count);
+ spin_unlock(&fuse_lock);
+ if (req->background) {
+ down_read(&fc->sbput_sem);
+ if (fc->mounted)
+ fuse_release_background(req);
+ up_read(&fc->sbput_sem);
+ }
+ wake_up(&req->waitq);
+ if (req->in.h.opcode == FUSE_INIT) {
+ int i;
+
+ if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
+ fc->conn_error = 1;
+
+ /* After INIT reply is received other requests can go
+ out. So do (FUSE_MAX_OUTSTANDING - 1) number of
+ up()s on outstanding_sem. The last up() is done in
+ fuse_putback_request() */
+ for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
+ up(&fc->outstanding_sem);
+ }
+ if (putback)
+ fuse_putback_request(fc, req);
+}
+
+/*
+ * Unfortunately request interruption not just solves the deadlock
+ * problem, it causes problems too. These stem from the fact, that an
+ * interrupted request is continued to be processed in userspace,
+ * while all the locks and object references (inode and file) held
+ * during the operation are released.
+ *
+ * To release the locks is exactly why there's a need to interrupt the
+ * request, so there's not a lot that can be done about this, except
+ * introduce additional locking in userspace.
+ *
+ * More important is to keep inode and file references until userspace
+ * has replied, otherwise FORGET and RELEASE could be sent while the
+ * inode/file is still used by the filesystem.
+ *
+ * For this reason the concept of "background" request is introduced.
+ * An interrupted request is backgrounded if it has been already sent
+ * to userspace. Backgrounding involves getting an extra reference to
+ * inode(s) or file used in the request, and adding the request to
+ * fc->background list. When a reply is received for a background
+ * request, the object references are released, and the request is
+ * removed from the list. If the filesystem is unmounted while there
+ * are still background requests, the list is walked and references
+ * are released as if a reply was received.
+ *
+ * There's one more use for a background request. The RELEASE message is
+ * always sent as background, since it doesn't return an error or
+ * data.
+ */
+static void background_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->background = 1;
+ list_add(&req->bg_entry, &fc->background);
+ if (req->inode)
+ req->inode = igrab(req->inode);
+ if (req->inode2)
+ req->inode2 = igrab(req->inode2);
+ if (req->file)
+ get_file(req->file);
+}
+
+/* Called with fuse_lock held. Releases, and then reacquires it. */
+static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+{
+ sigset_t oldset;
+
+ spin_unlock(&fuse_lock);
+ block_sigs(&oldset);
+ wait_event_interruptible(req->waitq, req->finished);
+ restore_sigs(&oldset);
+ spin_lock(&fuse_lock);
+ if (req->finished)
+ return;
+
+ req->out.h.error = -EINTR;
+ req->interrupted = 1;
+ if (req->locked) {
+ /* This is uninterruptible sleep, because data is
+ being copied to/from the buffers of req. During
+ locked state, there mustn't be any filesystem
+ operation (e.g. page fault), since that could lead
+ to deadlock */
+ spin_unlock(&fuse_lock);
+ wait_event(req->waitq, !req->locked);
+ spin_lock(&fuse_lock);
+ }
+ if (!req->sent && !list_empty(&req->list)) {
+ list_del(&req->list);
+ __fuse_put_request(req);
+ } else if (!req->finished && req->sent)
+ background_request(fc, req);
+}
+
+static unsigned len_args(unsigned numargs, struct fuse_arg *args)
+{
+ unsigned nbytes = 0;
+ unsigned i;
+
+ for (i = 0; i < numargs; i++)
+ nbytes += args[i].size;
+
+ return nbytes;
+}
+
+static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ fc->reqctr++;
+ /* zero is special */
+ if (fc->reqctr == 0)
+ fc->reqctr = 1;
+ req->in.h.unique = fc->reqctr;
+ req->in.h.len = sizeof(struct fuse_in_header) +
+ len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
+ if (!req->preallocated) {
+ /* If request is not preallocated (either FORGET or
+ RELEASE), then still decrease outstanding_sem, so
+ user can't open infinite number of files while not
+ processing the RELEASE requests. However for
+ efficiency do it without blocking, so if down()
+ would block, just increase the debt instead */
+ if (down_trylock(&fc->outstanding_sem))
+ fc->outstanding_debt++;
+ }
+ list_add_tail(&req->list, &fc->pending);
+ wake_up(&fc->waitq);
+}
+
+/*
+ * This can only be interrupted by a SIGKILL
+ */
+void request_send(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 1;
+ spin_lock(&fuse_lock);
+ if (!fc->connected)
+ req->out.h.error = -ENOTCONN;
+ else if (fc->conn_error)
+ req->out.h.error = -ECONNREFUSED;
+ else {
+ queue_request(fc, req);
+ /* acquire extra reference, since request is still needed
+ after request_end() */
+ __fuse_get_request(req);
+
+ request_wait_answer(fc, req);
+ }
+ spin_unlock(&fuse_lock);
+}
+
+static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
+{
+ spin_lock(&fuse_lock);
+ if (fc->connected) {
+ queue_request(fc, req);
+ spin_unlock(&fuse_lock);
+ } else {
+ req->out.h.error = -ENOTCONN;
+ request_end(fc, req);
+ }
+}
+
+void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 0;
+ request_send_nowait(fc, req);
+}
+
+void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 1;
+ spin_lock(&fuse_lock);
+ background_request(fc, req);
+ spin_unlock(&fuse_lock);
+ request_send_nowait(fc, req);
+}
+
+void fuse_send_init(struct fuse_conn *fc)
+{
+ /* This is called from fuse_read_super() so there's guaranteed
+ to be a request available */
+ struct fuse_req *req = do_get_request(fc);
+ struct fuse_init_in_out *arg = &req->misc.init_in_out;
+ arg->major = FUSE_KERNEL_VERSION;
+ arg->minor = FUSE_KERNEL_MINOR_VERSION;
+ req->in.h.opcode = FUSE_INIT;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*arg);
+ req->in.args[0].value = arg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(*arg);
+ req->out.args[0].value = arg;
+ request_send_background(fc, req);
+}
+
+/*
+ * Lock the request. Up to the next unlock_request() there mustn't be
+ * anything that could cause a page-fault. If the request was already
+ * interrupted bail out.
+ */
+static inline int lock_request(struct fuse_req *req)
+{
+ int err = 0;
+ if (req) {
+ spin_lock(&fuse_lock);
+ if (req->interrupted)
+ err = -ENOENT;
+ else
+ req->locked = 1;
+ spin_unlock(&fuse_lock);
+ }
+ return err;
+}
+
+/*
+ * Unlock request. If it was interrupted during being locked, the
+ * requester thread is currently waiting for it to be unlocked, so
+ * wake it up.
+ */
+static inline void unlock_request(struct fuse_req *req)
+{
+ if (req) {
+ spin_lock(&fuse_lock);
+ req->locked = 0;
+ if (req->interrupted)
+ wake_up(&req->waitq);
+ spin_unlock(&fuse_lock);
+ }
+}
+
+struct fuse_copy_state {
+ int write;
+ struct fuse_req *req;
+ const struct iovec *iov;
+ unsigned long nr_segs;
+ unsigned long seglen;
+ unsigned long addr;
+ struct page *pg;
+ void *mapaddr;
+ void *buf;
+ unsigned len;
+};
+
+static void fuse_copy_init(struct fuse_copy_state *cs, int write,
+ struct fuse_req *req, const struct iovec *iov,
+ unsigned long nr_segs)
+{
+ memset(cs, 0, sizeof(*cs));
+ cs->write = write;
+ cs->req = req;
+ cs->iov = iov;
+ cs->nr_segs = nr_segs;
+}
+
+/* Unmap and put previous page of userspace buffer */
+static inline void fuse_copy_finish(struct fuse_copy_state *cs)
+{
+ if (cs->mapaddr) {
+ kunmap_atomic(cs->mapaddr, KM_USER0);
+ if (cs->write) {
+ flush_dcache_page(cs->pg);
+ set_page_dirty_lock(cs->pg);
+ }
+ put_page(cs->pg);
+ cs->mapaddr = NULL;
+ }
+}
+
+/*
+ * Get another pagefull of userspace buffer, and map it to kernel
+ * address space, and lock request
+ */
+static int fuse_copy_fill(struct fuse_copy_state *cs)
+{
+ unsigned long offset;
+ int err;
+
+ unlock_request(cs->req);
+ fuse_copy_finish(cs);
+ if (!cs->seglen) {
+ BUG_ON(!cs->nr_segs);
+ cs->seglen = cs->iov[0].iov_len;
+ cs->addr = (unsigned long) cs->iov[0].iov_base;
+ cs->iov ++;
+ cs->nr_segs --;
+ }
+ down_read(&current->mm->mmap_sem);
+ err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
+ &cs->pg, NULL);
+ up_read(&current->mm->mmap_sem);
+ if (err < 0)
+ return err;
+ BUG_ON(err != 1);
+ offset = cs->addr % PAGE_SIZE;
+ cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
+ cs->buf = cs->mapaddr + offset;
+ cs->len = min(PAGE_SIZE - offset, cs->seglen);
+ cs->seglen -= cs->len;
+ cs->addr += cs->len;
+
+ return lock_request(cs->req);
+}
+
+/* Do as much copy to/from userspace buffer as we can */
+static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
+ unsigned *size)
+{
+ unsigned ncpy = min(*size, cs->len);
+ if (val) {
+ if (cs->write)
+ memcpy(cs->buf, *val, ncpy);
+ else
+ memcpy(*val, cs->buf, ncpy);
+ *val += ncpy;
+ }
+ *size -= ncpy;
+ cs->len -= ncpy;
+ cs->buf += ncpy;
+ return ncpy;
+}
+
+/*
+ * Copy a page in the request to/from the userspace buffer. Must be
+ * done atomically
+ */
+static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
+ unsigned offset, unsigned count, int zeroing)
+{
+ if (page && zeroing && count < PAGE_SIZE) {
+ void *mapaddr = kmap_atomic(page, KM_USER1);
+ memset(mapaddr, 0, PAGE_SIZE);
+ kunmap_atomic(mapaddr, KM_USER1);
+ }
+ while (count) {
+ int err;
+ if (!cs->len && (err = fuse_copy_fill(cs)))
+ return err;
+ if (page) {
+ void *mapaddr = kmap_atomic(page, KM_USER1);
+ void *buf = mapaddr + offset;
+ offset += fuse_copy_do(cs, &buf, &count);
+ kunmap_atomic(mapaddr, KM_USER1);
+ } else
+ offset += fuse_copy_do(cs, NULL, &count);
+ }
+ if (page && !cs->write)
+ flush_dcache_page(page);
+ return 0;
+}
+
+/* Copy pages in the request to/from userspace buffer */
+static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
+ int zeroing)
+{
+ unsigned i;
+ struct fuse_req *req = cs->req;
+ unsigned offset = req->page_offset;
+ unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
+
+ for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
+ struct page *page = req->pages[i];
+ int err = fuse_copy_page(cs, page, offset, count, zeroing);
+ if (err)
+ return err;
+
+ nbytes -= count;
+ count = min(nbytes, (unsigned) PAGE_SIZE);
+ offset = 0;
+ }
+ return 0;
+}
+
+/* Copy a single argument in the request to/from userspace buffer */
+static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
+{
+ while (size) {
+ int err;
+ if (!cs->len && (err = fuse_copy_fill(cs)))
+ return err;
+ fuse_copy_do(cs, &val, &size);
+ }
+ return 0;
+}
+
+/* Copy request arguments to/from userspace buffer */
+static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
+ unsigned argpages, struct fuse_arg *args,
+ int zeroing)
+{
+ int err = 0;
+ unsigned i;
+
+ for (i = 0; !err && i < numargs; i++) {
+ struct fuse_arg *arg = &args[i];
+ if (i == numargs - 1 && argpages)
+ err = fuse_copy_pages(cs, arg->size, zeroing);
+ else
+ err = fuse_copy_one(cs, arg->value, arg->size);
+ }
+ return err;
+}
+
+/* Wait until a request is available on the pending list */
+static void request_wait(struct fuse_conn *fc)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&fc->waitq, &wait);
+ while (fc->mounted && list_empty(&fc->pending)) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (signal_pending(current))
+ break;
+
+ spin_unlock(&fuse_lock);
+ schedule();
+ spin_lock(&fuse_lock);
+ }
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&fc->waitq, &wait);
+}
+
+/*
+ * Read a single request into the userspace filesystem's buffer. This
+ * function waits until a request is available, then removes it from
+ * the pending list and copies request data to userspace buffer. If
+ * no reply is needed (FORGET) or request has been interrupted or
+ * there was an error during the copying then it's finished by calling
+ * request_end(). Otherwise add it to the processing list, and set
+ * the 'sent' flag.
+ */
+static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *off)
+{
+ int err;
+ struct fuse_conn *fc;
+ struct fuse_req *req;
+ struct fuse_in *in;
+ struct fuse_copy_state cs;
+ unsigned reqsize;
+
+ spin_lock(&fuse_lock);
+ fc = file->private_data;
+ err = -EPERM;
+ if (!fc)
+ goto err_unlock;
+ request_wait(fc);
+ err = -ENODEV;
+ if (!fc->mounted)
+ goto err_unlock;
+ err = -ERESTARTSYS;
+ if (list_empty(&fc->pending))
+ goto err_unlock;
+
+ req = list_entry(fc->pending.next, struct fuse_req, list);
+ list_del_init(&req->list);
+ spin_unlock(&fuse_lock);
+
+ in = &req->in;
+ reqsize = req->in.h.len;
+ fuse_copy_init(&cs, 1, req, iov, nr_segs);
+ err = -EINVAL;
+ if (iov_length(iov, nr_segs) >= reqsize) {
+ err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
+ if (!err)
+ err = fuse_copy_args(&cs, in->numargs, in->argpages,
+ (struct fuse_arg *) in->args, 0);
+ }
+ fuse_copy_finish(&cs);
+
+ spin_lock(&fuse_lock);
+ req->locked = 0;
+ if (!err && req->interrupted)
+ err = -ENOENT;
+ if (err) {
+ if (!req->interrupted)
+ req->out.h.error = -EIO;
+ request_end(fc, req);
+ return err;
+ }
+ if (!req->isreply)
+ request_end(fc, req);
+ else {
+ req->sent = 1;
+ list_add_tail(&req->list, &fc->processing);
+ spin_unlock(&fuse_lock);
+ }
+ return reqsize;
+
+ err_unlock:
+ spin_unlock(&fuse_lock);
+ return err;
+}
+
+static ssize_t fuse_dev_read(struct file *file, char __user *buf,
+ size_t nbytes, loff_t *off)
+{
+ struct iovec iov;
+ iov.iov_len = nbytes;
+ iov.iov_base = buf;
+ return fuse_dev_readv(file, &iov, 1, off);
+}
+
+/* Look up request on processing list by unique ID */
+static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
+{
+ struct list_head *entry;
+
+ list_for_each(entry, &fc->processing) {
+ struct fuse_req *req;
+ req = list_entry(entry, struct fuse_req, list);
+ if (req->in.h.unique == unique)
+ return req;
+ }
+ return NULL;
+}
+
+static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
+ unsigned nbytes)
+{
+ unsigned reqsize = sizeof(struct fuse_out_header);
+
+ if (out->h.error)
+ return nbytes != reqsize ? -EINVAL : 0;
+
+ reqsize += len_args(out->numargs, out->args);
+
+ if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
+ return -EINVAL;
+ else if (reqsize > nbytes) {
+ struct fuse_arg *lastarg = &out->args[out->numargs-1];
+ unsigned diffsize = reqsize - nbytes;
+ if (diffsize > lastarg->size)
+ return -EINVAL;
+ lastarg->size -= diffsize;
+ }
+ return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
+ out->page_zeroing);
+}
+
+/*
+ * Write a single reply to a request. First the header is copied from
+ * the write buffer. The request is then searched on the processing
+ * list by the unique ID found in the header. If found, then remove
+ * it from the list and copy the rest of the buffer to the request.
+ * The request is finished by calling request_end()
+ */
+static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *off)
+{
+ int err;
+ unsigned nbytes = iov_length(iov, nr_segs);
+ struct fuse_req *req;
+ struct fuse_out_header oh;
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (!fc)
+ return -ENODEV;
+
+ fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
+ if (nbytes < sizeof(struct fuse_out_header))
+ return -EINVAL;
+
+ err = fuse_copy_one(&cs, &oh, sizeof(oh));
+ if (err)
+ goto err_finish;
+ err = -EINVAL;
+ if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
+ oh.len != nbytes)
+ goto err_finish;
+
+ spin_lock(&fuse_lock);
+ req = request_find(fc, oh.unique);
+ err = -EINVAL;
+ if (!req)
+ goto err_unlock;
+
+ list_del_init(&req->list);
+ if (req->interrupted) {
+ request_end(fc, req);
+ fuse_copy_finish(&cs);
+ return -ENOENT;
+ }
+ req->out.h = oh;
+ req->locked = 1;
+ cs.req = req;
+ spin_unlock(&fuse_lock);
+
+ err = copy_out_args(&cs, &req->out, nbytes);
+ fuse_copy_finish(&cs);
+
+ spin_lock(&fuse_lock);
+ req->locked = 0;
+ if (!err) {
+ if (req->interrupted)
+ err = -ENOENT;
+ } else if (!req->interrupted)
+ req->out.h.error = -EIO;
+ request_end(fc, req);
+
+ return err ? err : nbytes;
+
+ err_unlock:
+ spin_unlock(&fuse_lock);
+ err_finish:
+ fuse_copy_finish(&cs);
+ return err;
+}
+
+static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
+ size_t nbytes, loff_t *off)
+{
+ struct iovec iov;
+ iov.iov_len = nbytes;
+ iov.iov_base = (char __user *) buf;
+ return fuse_dev_writev(file, &iov, 1, off);
+}
+
+static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
+{
+ struct fuse_conn *fc = fuse_get_conn(file);
+ unsigned mask = POLLOUT | POLLWRNORM;
+
+ if (!fc)
+ return -ENODEV;
+
+ poll_wait(file, &fc->waitq, wait);
+
+ spin_lock(&fuse_lock);
+ if (!list_empty(&fc->pending))
+ mask |= POLLIN | POLLRDNORM;
+ spin_unlock(&fuse_lock);
+
+ return mask;
+}
+
+/* Abort all requests on the given list (pending or processing) */
+static void end_requests(struct fuse_conn *fc, struct list_head *head)
+{
+ while (!list_empty(head)) {
+ struct fuse_req *req;
+ req = list_entry(head->next, struct fuse_req, list);
+ list_del_init(&req->list);
+ req->out.h.error = -ECONNABORTED;
+ request_end(fc, req);
+ spin_lock(&fuse_lock);
+ }
+}
+
+static int fuse_dev_release(struct inode *inode, struct file *file)
+{
+ struct fuse_conn *fc;
+
+ spin_lock(&fuse_lock);
+ fc = file->private_data;
+ if (fc) {
+ fc->connected = 0;
+ end_requests(fc, &fc->pending);
+ end_requests(fc, &fc->processing);
+ fuse_release_conn(fc);
+ }
+ spin_unlock(&fuse_lock);
+ return 0;
+}
+
+struct file_operations fuse_dev_operations = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .read = fuse_dev_read,
+ .readv = fuse_dev_readv,
+ .write = fuse_dev_write,
+ .writev = fuse_dev_writev,
+ .poll = fuse_dev_poll,
+ .release = fuse_dev_release,
+};
+
+static struct miscdevice fuse_miscdevice = {
+ .minor = FUSE_MINOR,
+ .name = "fuse",
+ .fops = &fuse_dev_operations,
+};
+
+int __init fuse_dev_init(void)
+{
+ int err = -ENOMEM;
+ fuse_req_cachep = kmem_cache_create("fuse_request",
+ sizeof(struct fuse_req),
+ 0, 0, NULL, NULL);
+ if (!fuse_req_cachep)
+ goto out;
+
+ err = misc_register(&fuse_miscdevice);
+ if (err)
+ goto out_cache_clean;
+
+ return 0;
+
+ out_cache_clean:
+ kmem_cache_destroy(fuse_req_cachep);
+ out:
+ return err;
+}
+
+void fuse_dev_cleanup(void)
+{
+ misc_deregister(&fuse_miscdevice);
+ kmem_cache_destroy(fuse_req_cachep);
+}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
new file mode 100644
index 00000000000..e79e49b3eec
--- /dev/null
+++ b/fs/fuse/dir.c
@@ -0,0 +1,982 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+
+static inline unsigned long time_to_jiffies(unsigned long sec,
+ unsigned long nsec)
+{
+ struct timespec ts = {sec, nsec};
+ return jiffies + timespec_to_jiffies(&ts);
+}
+
+static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
+ struct dentry *entry,
+ struct fuse_entry_out *outarg)
+{
+ req->in.h.opcode = FUSE_LOOKUP;
+ req->in.h.nodeid = get_node_id(dir);
+ req->inode = dir;
+ req->in.numargs = 1;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(struct fuse_entry_out);
+ req->out.args[0].value = outarg;
+}
+
+static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+{
+ if (!entry->d_inode || is_bad_inode(entry->d_inode))
+ return 0;
+ else if (time_after(jiffies, entry->d_time)) {
+ int err;
+ struct fuse_entry_out outarg;
+ struct inode *inode = entry->d_inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return 0;
+
+ fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (!err) {
+ if (outarg.nodeid != get_node_id(inode)) {
+ fuse_send_forget(fc, req, outarg.nodeid, 1);
+ return 0;
+ }
+ fi->nlookup ++;
+ }
+ fuse_put_request(fc, req);
+ if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ return 0;
+
+ fuse_change_attributes(inode, &outarg.attr);
+ entry->d_time = time_to_jiffies(outarg.entry_valid,
+ outarg.entry_valid_nsec);
+ fi->i_time = time_to_jiffies(outarg.attr_valid,
+ outarg.attr_valid_nsec);
+ }
+ return 1;
+}
+
+static struct dentry_operations fuse_dentry_operations = {
+ .d_revalidate = fuse_dentry_revalidate,
+};
+
+static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
+ struct inode **inodep)
+{
+ int err;
+ struct fuse_entry_out outarg;
+ struct inode *inode = NULL;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req;
+
+ if (entry->d_name.len > FUSE_NAME_MAX)
+ return -ENAMETOOLONG;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ fuse_lookup_init(req, dir, entry, &outarg);
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (!err) {
+ inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+ &outarg.attr);
+ if (!inode) {
+ fuse_send_forget(fc, req, outarg.nodeid, 1);
+ return -ENOMEM;
+ }
+ }
+ fuse_put_request(fc, req);
+ if (err && err != -ENOENT)
+ return err;
+
+ if (inode) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ entry->d_time = time_to_jiffies(outarg.entry_valid,
+ outarg.entry_valid_nsec);
+ fi->i_time = time_to_jiffies(outarg.attr_valid,
+ outarg.attr_valid_nsec);
+ }
+
+ entry->d_op = &fuse_dentry_operations;
+ *inodep = inode;
+ return 0;
+}
+
+void fuse_invalidate_attr(struct inode *inode)
+{
+ get_fuse_inode(inode)->i_time = jiffies - 1;
+}
+
+static void fuse_invalidate_entry(struct dentry *entry)
+{
+ d_invalidate(entry);
+ entry->d_time = jiffies - 1;
+}
+
+static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+ struct inode *dir, struct dentry *entry,
+ int mode)
+{
+ struct fuse_entry_out outarg;
+ struct inode *inode;
+ struct fuse_inode *fi;
+ int err;
+
+ req->in.h.nodeid = get_node_id(dir);
+ req->inode = dir;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (err) {
+ fuse_put_request(fc, req);
+ return err;
+ }
+ inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+ &outarg.attr);
+ if (!inode) {
+ fuse_send_forget(fc, req, outarg.nodeid, 1);
+ return -ENOMEM;
+ }
+ fuse_put_request(fc, req);
+
+ /* Don't allow userspace to do really stupid things... */
+ if ((inode->i_mode ^ mode) & S_IFMT) {
+ iput(inode);
+ return -EIO;
+ }
+
+ entry->d_time = time_to_jiffies(outarg.entry_valid,
+ outarg.entry_valid_nsec);
+
+ fi = get_fuse_inode(inode);
+ fi->i_time = time_to_jiffies(outarg.attr_valid,
+ outarg.attr_valid_nsec);
+
+ d_instantiate(entry, inode);
+ fuse_invalidate_attr(dir);
+ return 0;
+}
+
+static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
+ dev_t rdev)
+{
+ struct fuse_mknod_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.mode = mode;
+ inarg.rdev = new_encode_dev(rdev);
+ req->in.h.opcode = FUSE_MKNOD;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = entry->d_name.len + 1;
+ req->in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, req, dir, entry, mode);
+}
+
+static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
+ struct nameidata *nd)
+{
+ return fuse_mknod(dir, entry, mode, 0);
+}
+
+static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
+{
+ struct fuse_mkdir_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.mode = mode;
+ req->in.h.opcode = FUSE_MKDIR;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = entry->d_name.len + 1;
+ req->in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, req, dir, entry, S_IFDIR);
+}
+
+static int fuse_symlink(struct inode *dir, struct dentry *entry,
+ const char *link)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ unsigned len = strlen(link) + 1;
+ struct fuse_req *req;
+
+ if (len > FUSE_SYMLINK_MAX)
+ return -ENAMETOOLONG;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_SYMLINK;
+ req->in.numargs = 2;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ req->in.args[1].size = len;
+ req->in.args[1].value = link;
+ return create_new_entry(fc, req, dir, entry, S_IFLNK);
+}
+
+static int fuse_unlink(struct inode *dir, struct dentry *entry)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_UNLINK;
+ req->in.h.nodeid = get_node_id(dir);
+ req->inode = dir;
+ req->in.numargs = 1;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ struct inode *inode = entry->d_inode;
+
+ /* Set nlink to zero so the inode can be cleared, if
+ the inode does have more links this will be
+ discovered at the next lookup/getattr */
+ inode->i_nlink = 0;
+ fuse_invalidate_attr(inode);
+ fuse_invalidate_attr(dir);
+ } else if (err == -EINTR)
+ fuse_invalidate_entry(entry);
+ return err;
+}
+
+static int fuse_rmdir(struct inode *dir, struct dentry *entry)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_RMDIR;
+ req->in.h.nodeid = get_node_id(dir);
+ req->inode = dir;
+ req->in.numargs = 1;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ entry->d_inode->i_nlink = 0;
+ fuse_invalidate_attr(dir);
+ } else if (err == -EINTR)
+ fuse_invalidate_entry(entry);
+ return err;
+}
+
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent)
+{
+ int err;
+ struct fuse_rename_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(olddir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.newdir = get_node_id(newdir);
+ req->in.h.opcode = FUSE_RENAME;
+ req->in.h.nodeid = get_node_id(olddir);
+ req->inode = olddir;
+ req->inode2 = newdir;
+ req->in.numargs = 3;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = oldent->d_name.len + 1;
+ req->in.args[1].value = oldent->d_name.name;
+ req->in.args[2].size = newent->d_name.len + 1;
+ req->in.args[2].value = newent->d_name.name;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ fuse_invalidate_attr(olddir);
+ if (olddir != newdir)
+ fuse_invalidate_attr(newdir);
+ } else if (err == -EINTR) {
+ /* If request was interrupted, DEITY only knows if the
+ rename actually took place. If the invalidation
+ fails (e.g. some process has CWD under the renamed
+ directory), then there can be inconsistency between
+ the dcache and the real filesystem. Tough luck. */
+ fuse_invalidate_entry(oldent);
+ if (newent->d_inode)
+ fuse_invalidate_entry(newent);
+ }
+
+ return err;
+}
+
+static int fuse_link(struct dentry *entry, struct inode *newdir,
+ struct dentry *newent)
+{
+ int err;
+ struct fuse_link_in inarg;
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.oldnodeid = get_node_id(inode);
+ req->in.h.opcode = FUSE_LINK;
+ req->inode2 = inode;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = newent->d_name.len + 1;
+ req->in.args[1].value = newent->d_name.name;
+ err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+ /* Contrary to "normal" filesystems it can happen that link
+ makes two "logical" inodes point to the same "physical"
+ inode. We invalidate the attributes of the old one, so it
+ will reflect changes in the backing inode (link count,
+ etc.)
+ */
+ if (!err || err == -EINTR)
+ fuse_invalidate_attr(inode);
+ return err;
+}
+
+int fuse_do_getattr(struct inode *inode)
+{
+ int err;
+ struct fuse_attr_out arg;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_GETATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(arg);
+ req->out.args[0].value = &arg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) {
+ make_bad_inode(inode);
+ err = -EIO;
+ } else {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ fuse_change_attributes(inode, &arg.attr);
+ fi->i_time = time_to_jiffies(arg.attr_valid,
+ arg.attr_valid_nsec);
+ }
+ }
+ return err;
+}
+
+/*
+ * Calling into a user-controlled filesystem gives the filesystem
+ * daemon ptrace-like capabilities over the requester process. This
+ * means, that the filesystem daemon is able to record the exact
+ * filesystem operations performed, and can also control the behavior
+ * of the requester process in otherwise impossible ways. For example
+ * it can delay the operation for arbitrary length of time allowing
+ * DoS against the requester.
+ *
+ * For this reason only those processes can call into the filesystem,
+ * for which the owner of the mount has ptrace privilege. This
+ * excludes processes started by other users, suid or sgid processes.
+ */
+static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+{
+ if (fc->flags & FUSE_ALLOW_OTHER)
+ return 1;
+
+ if (task->euid == fc->user_id &&
+ task->suid == fc->user_id &&
+ task->uid == fc->user_id &&
+ task->egid == fc->group_id &&
+ task->sgid == fc->group_id &&
+ task->gid == fc->group_id)
+ return 1;
+
+ return 0;
+}
+
+static int fuse_revalidate(struct dentry *entry)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+ if (get_node_id(inode) != FUSE_ROOT_ID &&
+ time_before_eq(jiffies, fi->i_time))
+ return 0;
+
+ return fuse_do_getattr(inode);
+}
+
+static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+ else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+ int err = generic_permission(inode, mask, NULL);
+
+ /* If permission is denied, try to refresh file
+ attributes. This is also needed, because the root
+ node will at first have no permissions */
+ if (err == -EACCES) {
+ err = fuse_do_getattr(inode);
+ if (!err)
+ err = generic_permission(inode, mask, NULL);
+ }
+
+ /* FIXME: Need some mechanism to revoke permissions:
+ currently if the filesystem suddenly changes the
+ file mode, we will not be informed about it, and
+ continue to allow access to the file/directory.
+
+ This is actually not so grave, since the user can
+ simply keep access to the file/directory anyway by
+ keeping it open... */
+
+ return err;
+ } else {
+ int mode = inode->i_mode;
+ if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+ return -EROFS;
+ if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO))
+ return -EACCES;
+ return 0;
+ }
+}
+
+static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
+ void *dstbuf, filldir_t filldir)
+{
+ while (nbytes >= FUSE_NAME_OFFSET) {
+ struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+ size_t reclen = FUSE_DIRENT_SIZE(dirent);
+ int over;
+ if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+ return -EIO;
+ if (reclen > nbytes)
+ break;
+
+ over = filldir(dstbuf, dirent->name, dirent->namelen,
+ file->f_pos, dirent->ino, dirent->type);
+ if (over)
+ break;
+
+ buf += reclen;
+ nbytes -= reclen;
+ file->f_pos = dirent->off;
+ }
+
+ return 0;
+}
+
+static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos,
+ size_t count)
+{
+ return fuse_send_read_common(req, file, inode, pos, count, 1);
+}
+
+static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+{
+ int err;
+ size_t nbytes;
+ struct page *page;
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ fuse_put_request(fc, req);
+ return -ENOMEM;
+ }
+ req->num_pages = 1;
+ req->pages[0] = page;
+ nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err)
+ err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
+ filldir);
+
+ __free_page(page);
+ fuse_invalidate_attr(inode); /* atime changed */
+ return err;
+}
+
+static char *read_link(struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ char *link;
+
+ if (!req)
+ return ERR_PTR(-EINTR);
+
+ link = (char *) __get_free_page(GFP_KERNEL);
+ if (!link) {
+ link = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ req->in.h.opcode = FUSE_READLINK;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->out.argvar = 1;
+ req->out.numargs = 1;
+ req->out.args[0].size = PAGE_SIZE - 1;
+ req->out.args[0].value = link;
+ request_send(fc, req);
+ if (req->out.h.error) {
+ free_page((unsigned long) link);
+ link = ERR_PTR(req->out.h.error);
+ } else
+ link[req->out.args[0].size] = '\0';
+ out:
+ fuse_put_request(fc, req);
+ fuse_invalidate_attr(inode); /* atime changed */
+ return link;
+}
+
+static void free_link(char *link)
+{
+ if (!IS_ERR(link))
+ free_page((unsigned long) link);
+}
+
+static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ nd_set_link(nd, read_link(dentry));
+ return NULL;
+}
+
+static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
+{
+ free_link(nd_get_link(nd));
+}
+
+static int fuse_dir_open(struct inode *inode, struct file *file)
+{
+ return fuse_open_common(inode, file, 1);
+}
+
+static int fuse_dir_release(struct inode *inode, struct file *file)
+{
+ return fuse_release_common(inode, file, 1);
+}
+
+static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
+{
+ /* nfsd can call this with no file */
+ return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
+}
+
+static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr)
+{
+ unsigned ivalid = iattr->ia_valid;
+ unsigned fvalid = 0;
+
+ memset(fattr, 0, sizeof(*fattr));
+
+ if (ivalid & ATTR_MODE)
+ fvalid |= FATTR_MODE, fattr->mode = iattr->ia_mode;
+ if (ivalid & ATTR_UID)
+ fvalid |= FATTR_UID, fattr->uid = iattr->ia_uid;
+ if (ivalid & ATTR_GID)
+ fvalid |= FATTR_GID, fattr->gid = iattr->ia_gid;
+ if (ivalid & ATTR_SIZE)
+ fvalid |= FATTR_SIZE, fattr->size = iattr->ia_size;
+ /* You can only _set_ these together (they may change by themselves) */
+ if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) {
+ fvalid |= FATTR_ATIME | FATTR_MTIME;
+ fattr->atime = iattr->ia_atime.tv_sec;
+ fattr->mtime = iattr->ia_mtime.tv_sec;
+ }
+
+ return fvalid;
+}
+
+static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_req *req;
+ struct fuse_setattr_in inarg;
+ struct fuse_attr_out outarg;
+ int err;
+ int is_truncate = 0;
+
+ if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+ err = inode_change_ok(inode, attr);
+ if (err)
+ return err;
+ }
+
+ if (attr->ia_valid & ATTR_SIZE) {
+ unsigned long limit;
+ is_truncate = 1;
+ limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+ if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
+ send_sig(SIGXFSZ, current, 0);
+ return -EFBIG;
+ }
+ }
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.valid = iattr_to_fattr(attr, &inarg.attr);
+ req->in.h.opcode = FUSE_SETATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ make_bad_inode(inode);
+ err = -EIO;
+ } else {
+ if (is_truncate) {
+ loff_t origsize = i_size_read(inode);
+ i_size_write(inode, outarg.attr.size);
+ if (origsize > outarg.attr.size)
+ vmtruncate(inode, outarg.attr.size);
+ }
+ fuse_change_attributes(inode, &outarg.attr);
+ fi->i_time = time_to_jiffies(outarg.attr_valid,
+ outarg.attr_valid_nsec);
+ }
+ } else if (err == -EINTR)
+ fuse_invalidate_attr(inode);
+
+ return err;
+}
+
+static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
+ struct kstat *stat)
+{
+ struct inode *inode = entry->d_inode;
+ int err = fuse_revalidate(entry);
+ if (!err)
+ generic_fillattr(inode, stat);
+
+ return err;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+ struct nameidata *nd)
+{
+ struct inode *inode;
+ int err = fuse_lookup_iget(dir, entry, &inode);
+ if (err)
+ return ERR_PTR(err);
+ if (inode && S_ISDIR(inode->i_mode)) {
+ /* Don't allow creating an alias to a directory */
+ struct dentry *alias = d_find_alias(inode);
+ if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
+ dput(alias);
+ iput(inode);
+ return ERR_PTR(-EIO);
+ }
+ }
+ return d_splice_alias(inode, entry);
+}
+
+static int fuse_setxattr(struct dentry *entry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_setxattr_in inarg;
+ int err;
+
+ if (size > FUSE_XATTR_SIZE_MAX)
+ return -E2BIG;
+
+ if (fc->no_setxattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ inarg.flags = flags;
+ req->in.h.opcode = FUSE_SETXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 3;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = strlen(name) + 1;
+ req->in.args[1].value = name;
+ req->in.args[2].size = size;
+ req->in.args[2].value = value;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_setxattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
+ void *value, size_t size)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (fc->no_getxattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ req->in.h.opcode = FUSE_GETXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = strlen(name) + 1;
+ req->in.args[1].value = name;
+ /* This is really two different operations rolled into one */
+ req->out.numargs = 1;
+ if (size) {
+ req->out.argvar = 1;
+ req->out.args[0].size = size;
+ req->out.args[0].value = value;
+ } else {
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ }
+ request_send(fc, req);
+ ret = req->out.h.error;
+ if (!ret)
+ ret = size ? req->out.args[0].size : outarg.size;
+ else {
+ if (ret == -ENOSYS) {
+ fc->no_getxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ }
+ fuse_put_request(fc, req);
+ return ret;
+}
+
+static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (fc->no_listxattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ req->in.h.opcode = FUSE_LISTXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ /* This is really two different operations rolled into one */
+ req->out.numargs = 1;
+ if (size) {
+ req->out.argvar = 1;
+ req->out.args[0].size = size;
+ req->out.args[0].value = list;
+ } else {
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ }
+ request_send(fc, req);
+ ret = req->out.h.error;
+ if (!ret)
+ ret = size ? req->out.args[0].size : outarg.size;
+ else {
+ if (ret == -ENOSYS) {
+ fc->no_listxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ }
+ fuse_put_request(fc, req);
+ return ret;
+}
+
+static int fuse_removexattr(struct dentry *entry, const char *name)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ int err;
+
+ if (fc->no_removexattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_REMOVEXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = strlen(name) + 1;
+ req->in.args[0].value = name;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_removexattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static struct inode_operations fuse_dir_inode_operations = {
+ .lookup = fuse_lookup,
+ .mkdir = fuse_mkdir,
+ .symlink = fuse_symlink,
+ .unlink = fuse_unlink,
+ .rmdir = fuse_rmdir,
+ .rename = fuse_rename,
+ .link = fuse_link,
+ .setattr = fuse_setattr,
+ .create = fuse_create,
+ .mknod = fuse_mknod,
+ .permission = fuse_permission,
+ .getattr = fuse_getattr,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+};
+
+static struct file_operations fuse_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .readdir = fuse_readdir,
+ .open = fuse_dir_open,
+ .release = fuse_dir_release,
+ .fsync = fuse_dir_fsync,
+};
+
+static struct inode_operations fuse_common_inode_operations = {
+ .setattr = fuse_setattr,
+ .permission = fuse_permission,
+ .getattr = fuse_getattr,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+};
+
+static struct inode_operations fuse_symlink_inode_operations = {
+ .setattr = fuse_setattr,
+ .follow_link = fuse_follow_link,
+ .put_link = fuse_put_link,
+ .readlink = generic_readlink,
+ .getattr = fuse_getattr,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+};
+
+void fuse_init_common(struct inode *inode)
+{
+ inode->i_op = &fuse_common_inode_operations;
+}
+
+void fuse_init_dir(struct inode *inode)
+{
+ inode->i_op = &fuse_dir_inode_operations;
+ inode->i_fop = &fuse_dir_operations;
+}
+
+void fuse_init_symlink(struct inode *inode)
+{
+ inode->i_op = &fuse_symlink_inode_operations;
+}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
new file mode 100644
index 00000000000..6454022b053
--- /dev/null
+++ b/fs/fuse/file.c
@@ -0,0 +1,555 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+static struct file_operations fuse_direct_io_file_operations;
+
+int fuse_open_common(struct inode *inode, struct file *file, int isdir)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_open_in inarg;
+ struct fuse_open_out outarg;
+ struct fuse_file *ff;
+ int err;
+
+ err = generic_file_open(inode, file);
+ if (err)
+ return err;
+
+ /* If opening the root node, no lookup has been performed on
+ it, so the attributes must be refreshed */
+ if (get_node_id(inode) == FUSE_ROOT_ID) {
+ int err = fuse_do_getattr(inode);
+ if (err)
+ return err;
+ }
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ err = -ENOMEM;
+ ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
+ if (!ff)
+ goto out_put_request;
+
+ ff->release_req = fuse_request_alloc();
+ if (!ff->release_req) {
+ kfree(ff);
+ goto out_put_request;
+ }
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+ req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (err) {
+ fuse_request_free(ff->release_req);
+ kfree(ff);
+ } else {
+ if (!isdir && (outarg.open_flags & FOPEN_DIRECT_IO))
+ file->f_op = &fuse_direct_io_file_operations;
+ if (!(outarg.open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages(inode->i_mapping);
+ ff->fh = outarg.fh;
+ file->private_data = ff;
+ }
+
+ out_put_request:
+ fuse_put_request(fc, req);
+ return err;
+}
+
+int fuse_release_common(struct inode *inode, struct file *file, int isdir)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_req *req = ff->release_req;
+ struct fuse_release_in *inarg = &req->misc.release_in;
+
+ inarg->fh = ff->fh;
+ inarg->flags = file->f_flags & ~O_EXCL;
+ req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct fuse_release_in);
+ req->in.args[0].value = inarg;
+ request_send_background(fc, req);
+ kfree(ff);
+
+ /* Return value is ignored by VFS */
+ return 0;
+}
+
+static int fuse_open(struct inode *inode, struct file *file)
+{
+ return fuse_open_common(inode, file, 0);
+}
+
+static int fuse_release(struct inode *inode, struct file *file)
+{
+ return fuse_release_common(inode, file, 0);
+}
+
+static int fuse_flush(struct file *file)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_req *req;
+ struct fuse_flush_in inarg;
+ int err;
+
+ if (fc->no_flush)
+ return 0;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ req->in.h.opcode = FUSE_FLUSH;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->file = file;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_flush = 1;
+ err = 0;
+ }
+ return err;
+}
+
+int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
+ int isdir)
+{
+ struct inode *inode = de->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_req *req;
+ struct fuse_fsync_in inarg;
+ int err;
+
+ if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
+ return 0;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.fsync_flags = datasync ? 1 : 0;
+ req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->file = file;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ if (isdir)
+ fc->no_fsyncdir = 1;
+ else
+ fc->no_fsync = 1;
+ err = 0;
+ }
+ return err;
+}
+
+static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
+{
+ return fuse_fsync_common(file, de, datasync, 0);
+}
+
+size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos, size_t count,
+ int isdir)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_read_in inarg;
+
+ memset(&inarg, 0, sizeof(struct fuse_read_in));
+ inarg.fh = ff->fh;
+ inarg.offset = pos;
+ inarg.size = count;
+ req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->file = file;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct fuse_read_in);
+ req->in.args[0].value = &inarg;
+ req->out.argpages = 1;
+ req->out.argvar = 1;
+ req->out.numargs = 1;
+ req->out.args[0].size = count;
+ request_send(fc, req);
+ return req->out.args[0].size;
+}
+
+static inline size_t fuse_send_read(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos,
+ size_t count)
+{
+ return fuse_send_read_common(req, file, inode, pos, count, 0);
+}
+
+static int fuse_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT;
+ struct fuse_req *req = fuse_get_request(fc);
+ int err = -EINTR;
+ if (!req)
+ goto out;
+
+ req->out.page_zeroing = 1;
+ req->num_pages = 1;
+ req->pages[0] = page;
+ fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err)
+ SetPageUptodate(page);
+ fuse_invalidate_attr(inode); /* atime changed */
+ out:
+ unlock_page(page);
+ return err;
+}
+
+static int fuse_send_readpages(struct fuse_req *req, struct file *file,
+ struct inode *inode)
+{
+ loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT;
+ size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+ unsigned i;
+ req->out.page_zeroing = 1;
+ fuse_send_read(req, file, inode, pos, count);
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+ if (!req->out.h.error)
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ return req->out.h.error;
+}
+
+struct fuse_readpages_data {
+ struct fuse_req *req;
+ struct file *file;
+ struct inode *inode;
+};
+
+static int fuse_readpages_fill(void *_data, struct page *page)
+{
+ struct fuse_readpages_data *data = _data;
+ struct fuse_req *req = data->req;
+ struct inode *inode = data->inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (req->num_pages &&
+ (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+ (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
+ req->pages[req->num_pages - 1]->index + 1 != page->index)) {
+ int err = fuse_send_readpages(req, data->file, inode);
+ if (err) {
+ unlock_page(page);
+ return err;
+ }
+ fuse_reset_request(req);
+ }
+ req->pages[req->num_pages] = page;
+ req->num_pages ++;
+ return 0;
+}
+
+static int fuse_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_readpages_data data;
+ int err;
+ data.file = file;
+ data.inode = inode;
+ data.req = fuse_get_request(fc);
+ if (!data.req)
+ return -EINTR;
+
+ err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
+ if (!err && data.req->num_pages)
+ err = fuse_send_readpages(data.req, file, inode);
+ fuse_put_request(fc, data.req);
+ fuse_invalidate_attr(inode); /* atime changed */
+ return err;
+}
+
+static size_t fuse_send_write(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos, size_t count)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_write_in inarg;
+ struct fuse_write_out outarg;
+
+ memset(&inarg, 0, sizeof(struct fuse_write_in));
+ inarg.fh = ff->fh;
+ inarg.offset = pos;
+ inarg.size = count;
+ req->in.h.opcode = FUSE_WRITE;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->file = file;
+ req->in.argpages = 1;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(struct fuse_write_in);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = count;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(struct fuse_write_out);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ return outarg.size;
+}
+
+static int fuse_prepare_write(struct file *file, struct page *page,
+ unsigned offset, unsigned to)
+{
+ /* No op */
+ return 0;
+}
+
+static int fuse_commit_write(struct file *file, struct page *page,
+ unsigned offset, unsigned to)
+{
+ int err;
+ size_t nres;
+ unsigned count = to - offset;
+ struct inode *inode = page->mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset;
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->num_pages = 1;
+ req->pages[0] = page;
+ req->page_offset = offset;
+ nres = fuse_send_write(req, file, inode, pos, count);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err && nres != count)
+ err = -EIO;
+ if (!err) {
+ pos += count;
+ if (pos > i_size_read(inode))
+ i_size_write(inode, pos);
+
+ if (offset == 0 && to == PAGE_CACHE_SIZE) {
+ clear_page_dirty(page);
+ SetPageUptodate(page);
+ }
+ }
+ fuse_invalidate_attr(inode);
+ return err;
+}
+
+static void fuse_release_user_pages(struct fuse_req *req, int write)
+{
+ unsigned i;
+
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+ if (write)
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+}
+
+static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
+ unsigned nbytes, int write)
+{
+ unsigned long user_addr = (unsigned long) buf;
+ unsigned offset = user_addr & ~PAGE_MASK;
+ int npages;
+
+ /* This doesn't work with nfsd */
+ if (!current->mm)
+ return -EPERM;
+
+ nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+ npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ npages = min(npages, FUSE_MAX_PAGES_PER_REQ);
+ down_read(&current->mm->mmap_sem);
+ npages = get_user_pages(current, current->mm, user_addr, npages, write,
+ 0, req->pages, NULL);
+ up_read(&current->mm->mmap_sem);
+ if (npages < 0)
+ return npages;
+
+ req->num_pages = npages;
+ req->page_offset = offset;
+ return 0;
+}
+
+static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, int write)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ size_t nmax = write ? fc->max_write : fc->max_read;
+ loff_t pos = *ppos;
+ ssize_t res = 0;
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ while (count) {
+ size_t tmp;
+ size_t nres;
+ size_t nbytes = min(count, nmax);
+ int err = fuse_get_user_pages(req, buf, nbytes, !write);
+ if (err) {
+ res = err;
+ break;
+ }
+ tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+ nbytes = min(nbytes, tmp);
+ if (write)
+ nres = fuse_send_write(req, file, inode, pos, nbytes);
+ else
+ nres = fuse_send_read(req, file, inode, pos, nbytes);
+ fuse_release_user_pages(req, !write);
+ if (req->out.h.error) {
+ if (!res)
+ res = req->out.h.error;
+ break;
+ } else if (nres > nbytes) {
+ res = -EIO;
+ break;
+ }
+ count -= nres;
+ res += nres;
+ pos += nres;
+ buf += nres;
+ if (nres != nbytes)
+ break;
+ if (count)
+ fuse_reset_request(req);
+ }
+ fuse_put_request(fc, req);
+ if (res > 0) {
+ if (write && pos > i_size_read(inode))
+ i_size_write(inode, pos);
+ *ppos = pos;
+ }
+ fuse_invalidate_attr(inode);
+
+ return res;
+}
+
+static ssize_t fuse_direct_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return fuse_direct_io(file, buf, count, ppos, 0);
+}
+
+static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ ssize_t res;
+ /* Don't allow parallel writes to the same file */
+ down(&inode->i_sem);
+ res = fuse_direct_io(file, buf, count, ppos, 1);
+ up(&inode->i_sem);
+ return res;
+}
+
+static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if ((vma->vm_flags & VM_SHARED)) {
+ if ((vma->vm_flags & VM_WRITE))
+ return -ENODEV;
+ else
+ vma->vm_flags &= ~VM_MAYWRITE;
+ }
+ return generic_file_mmap(file, vma);
+}
+
+static int fuse_set_page_dirty(struct page *page)
+{
+ printk("fuse_set_page_dirty: should not happen\n");
+ dump_stack();
+ return 0;
+}
+
+static struct file_operations fuse_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_file_read,
+ .write = generic_file_write,
+ .mmap = fuse_file_mmap,
+ .open = fuse_open,
+ .flush = fuse_flush,
+ .release = fuse_release,
+ .fsync = fuse_fsync,
+ .sendfile = generic_file_sendfile,
+};
+
+static struct file_operations fuse_direct_io_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = fuse_direct_read,
+ .write = fuse_direct_write,
+ .open = fuse_open,
+ .flush = fuse_flush,
+ .release = fuse_release,
+ .fsync = fuse_fsync,
+ /* no mmap and sendfile */
+};
+
+static struct address_space_operations fuse_file_aops = {
+ .readpage = fuse_readpage,
+ .prepare_write = fuse_prepare_write,
+ .commit_write = fuse_commit_write,
+ .readpages = fuse_readpages,
+ .set_page_dirty = fuse_set_page_dirty,
+};
+
+void fuse_init_file_inode(struct inode *inode)
+{
+ inode->i_fop = &fuse_file_operations;
+ inode->i_data.a_ops = &fuse_file_aops;
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
new file mode 100644
index 00000000000..24d761518d8
--- /dev/null
+++ b/fs/fuse/fuse_i.h
@@ -0,0 +1,451 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include <linux/fuse.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/backing-dev.h>
+#include <asm/semaphore.h>
+
+/** Max number of pages that can be used in a single read request */
+#define FUSE_MAX_PAGES_PER_REQ 32
+
+/** If more requests are outstanding, then the operation will block */
+#define FUSE_MAX_OUTSTANDING 10
+
+/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
+ module will check permissions based on the file mode. Otherwise no
+ permission checking is done in the kernel */
+#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
+
+/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
+ doing the mount will be allowed to access the filesystem */
+#define FUSE_ALLOW_OTHER (1 << 1)
+
+
+/** FUSE inode */
+struct fuse_inode {
+ /** Inode data */
+ struct inode inode;
+
+ /** Unique ID, which identifies the inode between userspace
+ * and kernel */
+ u64 nodeid;
+
+ /** Number of lookups on this inode */
+ u64 nlookup;
+
+ /** The request used for sending the FORGET message */
+ struct fuse_req *forget_req;
+
+ /** Time in jiffies until the file attributes are valid */
+ unsigned long i_time;
+};
+
+/** FUSE specific file data */
+struct fuse_file {
+ /** Request reserved for flush and release */
+ struct fuse_req *release_req;
+
+ /** File handle used by userspace */
+ u64 fh;
+};
+
+/** One input argument of a request */
+struct fuse_in_arg {
+ unsigned size;
+ const void *value;
+};
+
+/** The request input */
+struct fuse_in {
+ /** The request header */
+ struct fuse_in_header h;
+
+ /** True if the data for the last argument is in req->pages */
+ unsigned argpages:1;
+
+ /** Number of arguments */
+ unsigned numargs;
+
+ /** Array of arguments */
+ struct fuse_in_arg args[3];
+};
+
+/** One output argument of a request */
+struct fuse_arg {
+ unsigned size;
+ void *value;
+};
+
+/** The request output */
+struct fuse_out {
+ /** Header returned from userspace */
+ struct fuse_out_header h;
+
+ /** Last argument is variable length (can be shorter than
+ arg->size) */
+ unsigned argvar:1;
+
+ /** Last argument is a list of pages to copy data to */
+ unsigned argpages:1;
+
+ /** Zero partially or not copied pages */
+ unsigned page_zeroing:1;
+
+ /** Number or arguments */
+ unsigned numargs;
+
+ /** Array of arguments */
+ struct fuse_arg args[3];
+};
+
+struct fuse_req;
+struct fuse_conn;
+
+/**
+ * A request to the client
+ */
+struct fuse_req {
+ /** This can be on either unused_list, pending or processing
+ lists in fuse_conn */
+ struct list_head list;
+
+ /** Entry on the background list */
+ struct list_head bg_entry;
+
+ /** refcount */
+ atomic_t count;
+
+ /** True if the request has reply */
+ unsigned isreply:1;
+
+ /** The request is preallocated */
+ unsigned preallocated:1;
+
+ /** The request was interrupted */
+ unsigned interrupted:1;
+
+ /** Request is sent in the background */
+ unsigned background:1;
+
+ /** Data is being copied to/from the request */
+ unsigned locked:1;
+
+ /** Request has been sent to userspace */
+ unsigned sent:1;
+
+ /** The request is finished */
+ unsigned finished:1;
+
+ /** The request input */
+ struct fuse_in in;
+
+ /** The request output */
+ struct fuse_out out;
+
+ /** Used to wake up the task waiting for completion of request*/
+ wait_queue_head_t waitq;
+
+ /** Data for asynchronous requests */
+ union {
+ struct fuse_forget_in forget_in;
+ struct fuse_release_in release_in;
+ struct fuse_init_in_out init_in_out;
+ } misc;
+
+ /** page vector */
+ struct page *pages[FUSE_MAX_PAGES_PER_REQ];
+
+ /** number of pages in vector */
+ unsigned num_pages;
+
+ /** offset of data on first page */
+ unsigned page_offset;
+
+ /** Inode used in the request */
+ struct inode *inode;
+
+ /** Second inode used in the request (or NULL) */
+ struct inode *inode2;
+
+ /** File used in the request (or NULL) */
+ struct file *file;
+};
+
+/**
+ * A Fuse connection.
+ *
+ * This structure is created, when the filesystem is mounted, and is
+ * destroyed, when the client device is closed and the filesystem is
+ * unmounted.
+ */
+struct fuse_conn {
+ /** Reference count */
+ int count;
+
+ /** The user id for this mount */
+ uid_t user_id;
+
+ /** The group id for this mount */
+ gid_t group_id;
+
+ /** The fuse mount flags for this mount */
+ unsigned flags;
+
+ /** Maximum read size */
+ unsigned max_read;
+
+ /** Maximum write size */
+ unsigned max_write;
+
+ /** Readers of the connection are waiting on this */
+ wait_queue_head_t waitq;
+
+ /** The list of pending requests */
+ struct list_head pending;
+
+ /** The list of requests being processed */
+ struct list_head processing;
+
+ /** Requests put in the background (RELEASE or any other
+ interrupted request) */
+ struct list_head background;
+
+ /** Controls the maximum number of outstanding requests */
+ struct semaphore outstanding_sem;
+
+ /** This counts the number of outstanding requests if
+ outstanding_sem would go negative */
+ unsigned outstanding_debt;
+
+ /** RW semaphore for exclusion with fuse_put_super() */
+ struct rw_semaphore sbput_sem;
+
+ /** The list of unused requests */
+ struct list_head unused_list;
+
+ /** The next unique request id */
+ u64 reqctr;
+
+ /** Mount is active */
+ unsigned mounted : 1;
+
+ /** Connection established */
+ unsigned connected : 1;
+
+ /** Connection failed (version mismatch) */
+ unsigned conn_error : 1;
+
+ /** Is fsync not implemented by fs? */
+ unsigned no_fsync : 1;
+
+ /** Is fsyncdir not implemented by fs? */
+ unsigned no_fsyncdir : 1;
+
+ /** Is flush not implemented by fs? */
+ unsigned no_flush : 1;
+
+ /** Is setxattr not implemented by fs? */
+ unsigned no_setxattr : 1;
+
+ /** Is getxattr not implemented by fs? */
+ unsigned no_getxattr : 1;
+
+ /** Is listxattr not implemented by fs? */
+ unsigned no_listxattr : 1;
+
+ /** Is removexattr not implemented by fs? */
+ unsigned no_removexattr : 1;
+
+ /** Backing dev info */
+ struct backing_dev_info bdi;
+};
+
+static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb)
+{
+ return (struct fuse_conn **) &sb->s_fs_info;
+}
+
+static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
+{
+ return *get_fuse_conn_super_p(sb);
+}
+
+static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
+{
+ return get_fuse_conn_super(inode->i_sb);
+}
+
+static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
+{
+ return container_of(inode, struct fuse_inode, inode);
+}
+
+static inline u64 get_node_id(struct inode *inode)
+{
+ return get_fuse_inode(inode)->nodeid;
+}
+
+/** Device operations */
+extern struct file_operations fuse_dev_operations;
+
+/**
+ * This is the single global spinlock which protects FUSE's structures
+ *
+ * The following data is protected by this lock:
+ *
+ * - the private_data field of the device file
+ * - the s_fs_info field of the super block
+ * - unused_list, pending, processing lists in fuse_conn
+ * - background list in fuse_conn
+ * - the unique request ID counter reqctr in fuse_conn
+ * - the sb (super_block) field in fuse_conn
+ * - the file (device file) field in fuse_conn
+ */
+extern spinlock_t fuse_lock;
+
+/**
+ * Get a filled in inode
+ */
+struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+ int generation, struct fuse_attr *attr);
+
+/**
+ * Send FORGET command
+ */
+void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
+ unsigned long nodeid, u64 nlookup);
+
+/**
+ * Send READ or READDIR request
+ */
+size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos, size_t count,
+ int isdir);
+
+/**
+ * Send OPEN or OPENDIR request
+ */
+int fuse_open_common(struct inode *inode, struct file *file, int isdir);
+
+/**
+ * Send RELEASE or RELEASEDIR request
+ */
+int fuse_release_common(struct inode *inode, struct file *file, int isdir);
+
+/**
+ * Send FSYNC or FSYNCDIR request
+ */
+int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
+ int isdir);
+
+/**
+ * Initialise file operations on a regular file
+ */
+void fuse_init_file_inode(struct inode *inode);
+
+/**
+ * Initialise inode operations on regular files and special files
+ */
+void fuse_init_common(struct inode *inode);
+
+/**
+ * Initialise inode and file operations on a directory
+ */
+void fuse_init_dir(struct inode *inode);
+
+/**
+ * Initialise inode operations on a symlink
+ */
+void fuse_init_symlink(struct inode *inode);
+
+/**
+ * Change attributes of an inode
+ */
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
+
+/**
+ * Check if the connection can be released, and if yes, then free the
+ * connection structure
+ */
+void fuse_release_conn(struct fuse_conn *fc);
+
+/**
+ * Initialize the client device
+ */
+int fuse_dev_init(void);
+
+/**
+ * Cleanup the client device
+ */
+void fuse_dev_cleanup(void);
+
+/**
+ * Allocate a request
+ */
+struct fuse_req *fuse_request_alloc(void);
+
+/**
+ * Free a request
+ */
+void fuse_request_free(struct fuse_req *req);
+
+/**
+ * Reinitialize a request, the preallocated flag is left unmodified
+ */
+void fuse_reset_request(struct fuse_req *req);
+
+/**
+ * Reserve a preallocated request
+ */
+struct fuse_req *fuse_get_request(struct fuse_conn *fc);
+
+/**
+ * Decrement reference count of a request. If count goes to zero put
+ * on unused list (preallocated) or free reqest (not preallocated).
+ */
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request (synchronous)
+ */
+void request_send(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request with no reply
+ */
+void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request in the background
+ */
+void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Release inodes and file assiciated with background request
+ */
+void fuse_release_background(struct fuse_req *req);
+
+/**
+ * Get the attributes of a file
+ */
+int fuse_do_getattr(struct inode *inode);
+
+/**
+ * Invalidate inode attributes
+ */
+void fuse_invalidate_attr(struct inode *inode);
+
+/**
+ * Send the INIT message
+ */
+void fuse_send_init(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
new file mode 100644
index 00000000000..e69a546844d
--- /dev/null
+++ b/fs/fuse/inode.c
@@ -0,0 +1,591 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/parser.h>
+#include <linux/statfs.h>
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Filesystem in Userspace");
+MODULE_LICENSE("GPL");
+
+spinlock_t fuse_lock;
+static kmem_cache_t *fuse_inode_cachep;
+
+#define FUSE_SUPER_MAGIC 0x65735546
+
+struct fuse_mount_data {
+ int fd;
+ unsigned rootmode;
+ unsigned user_id;
+ unsigned group_id;
+ unsigned fd_present : 1;
+ unsigned rootmode_present : 1;
+ unsigned user_id_present : 1;
+ unsigned group_id_present : 1;
+ unsigned flags;
+ unsigned max_read;
+};
+
+static struct inode *fuse_alloc_inode(struct super_block *sb)
+{
+ struct inode *inode;
+ struct fuse_inode *fi;
+
+ inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL);
+ if (!inode)
+ return NULL;
+
+ fi = get_fuse_inode(inode);
+ fi->i_time = jiffies - 1;
+ fi->nodeid = 0;
+ fi->nlookup = 0;
+ fi->forget_req = fuse_request_alloc();
+ if (!fi->forget_req) {
+ kmem_cache_free(fuse_inode_cachep, inode);
+ return NULL;
+ }
+
+ return inode;
+}
+
+static void fuse_destroy_inode(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ if (fi->forget_req)
+ fuse_request_free(fi->forget_req);
+ kmem_cache_free(fuse_inode_cachep, inode);
+}
+
+static void fuse_read_inode(struct inode *inode)
+{
+ /* No op */
+}
+
+void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
+ unsigned long nodeid, u64 nlookup)
+{
+ struct fuse_forget_in *inarg = &req->misc.forget_in;
+ inarg->nlookup = nlookup;
+ req->in.h.opcode = FUSE_FORGET;
+ req->in.h.nodeid = nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct fuse_forget_in);
+ req->in.args[0].value = inarg;
+ request_send_noreply(fc, req);
+}
+
+static void fuse_clear_inode(struct inode *inode)
+{
+ if (inode->i_sb->s_flags & MS_ACTIVE) {
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
+ fi->forget_req = NULL;
+ }
+}
+
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
+{
+ if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
+ invalidate_inode_pages(inode->i_mapping);
+
+ inode->i_ino = attr->ino;
+ inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777);
+ inode->i_nlink = attr->nlink;
+ inode->i_uid = attr->uid;
+ inode->i_gid = attr->gid;
+ i_size_write(inode, attr->size);
+ inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_blocks = attr->blocks;
+ inode->i_atime.tv_sec = attr->atime;
+ inode->i_atime.tv_nsec = attr->atimensec;
+ inode->i_mtime.tv_sec = attr->mtime;
+ inode->i_mtime.tv_nsec = attr->mtimensec;
+ inode->i_ctime.tv_sec = attr->ctime;
+ inode->i_ctime.tv_nsec = attr->ctimensec;
+}
+
+static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
+{
+ inode->i_mode = attr->mode & S_IFMT;
+ i_size_write(inode, attr->size);
+ if (S_ISREG(inode->i_mode)) {
+ fuse_init_common(inode);
+ fuse_init_file_inode(inode);
+ } else if (S_ISDIR(inode->i_mode))
+ fuse_init_dir(inode);
+ else if (S_ISLNK(inode->i_mode))
+ fuse_init_symlink(inode);
+ else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ fuse_init_common(inode);
+ init_special_inode(inode, inode->i_mode,
+ new_decode_dev(attr->rdev));
+ } else {
+ /* Don't let user create weird files */
+ inode->i_mode = S_IFREG;
+ fuse_init_common(inode);
+ fuse_init_file_inode(inode);
+ }
+}
+
+static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+{
+ unsigned long nodeid = *(unsigned long *) _nodeidp;
+ if (get_node_id(inode) == nodeid)
+ return 1;
+ else
+ return 0;
+}
+
+static int fuse_inode_set(struct inode *inode, void *_nodeidp)
+{
+ unsigned long nodeid = *(unsigned long *) _nodeidp;
+ get_fuse_inode(inode)->nodeid = nodeid;
+ return 0;
+}
+
+struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+ int generation, struct fuse_attr *attr)
+{
+ struct inode *inode;
+ struct fuse_inode *fi;
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ int retried = 0;
+
+ retry:
+ inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
+ if (!inode)
+ return NULL;
+
+ if ((inode->i_state & I_NEW)) {
+ inode->i_flags |= S_NOATIME|S_NOCMTIME;
+ inode->i_generation = generation;
+ inode->i_data.backing_dev_info = &fc->bdi;
+ fuse_init_inode(inode, attr);
+ unlock_new_inode(inode);
+ } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
+ BUG_ON(retried);
+ /* Inode has changed type, any I/O on the old should fail */
+ make_bad_inode(inode);
+ iput(inode);
+ retried = 1;
+ goto retry;
+ }
+
+ fi = get_fuse_inode(inode);
+ fi->nlookup ++;
+ fuse_change_attributes(inode, attr);
+ return inode;
+}
+
+static void fuse_put_super(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ down_write(&fc->sbput_sem);
+ while (!list_empty(&fc->background))
+ fuse_release_background(list_entry(fc->background.next,
+ struct fuse_req, bg_entry));
+
+ spin_lock(&fuse_lock);
+ fc->mounted = 0;
+ fc->user_id = 0;
+ fc->group_id = 0;
+ fc->flags = 0;
+ /* Flush all readers on this fs */
+ wake_up_all(&fc->waitq);
+ up_write(&fc->sbput_sem);
+ fuse_release_conn(fc);
+ spin_unlock(&fuse_lock);
+}
+
+static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
+{
+ stbuf->f_type = FUSE_SUPER_MAGIC;
+ stbuf->f_bsize = attr->bsize;
+ stbuf->f_blocks = attr->blocks;
+ stbuf->f_bfree = attr->bfree;
+ stbuf->f_bavail = attr->bavail;
+ stbuf->f_files = attr->files;
+ stbuf->f_ffree = attr->ffree;
+ stbuf->f_namelen = attr->namelen;
+ /* fsid is left zero */
+}
+
+static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_req *req;
+ struct fuse_statfs_out outarg;
+ int err;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.numargs = 0;
+ req->in.h.opcode = FUSE_STATFS;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (!err)
+ convert_fuse_statfs(buf, &outarg.st);
+ fuse_put_request(fc, req);
+ return err;
+}
+
+enum {
+ OPT_FD,
+ OPT_ROOTMODE,
+ OPT_USER_ID,
+ OPT_GROUP_ID,
+ OPT_DEFAULT_PERMISSIONS,
+ OPT_ALLOW_OTHER,
+ OPT_MAX_READ,
+ OPT_ERR
+};
+
+static match_table_t tokens = {
+ {OPT_FD, "fd=%u"},
+ {OPT_ROOTMODE, "rootmode=%o"},
+ {OPT_USER_ID, "user_id=%u"},
+ {OPT_GROUP_ID, "group_id=%u"},
+ {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
+ {OPT_ALLOW_OTHER, "allow_other"},
+ {OPT_MAX_READ, "max_read=%u"},
+ {OPT_ERR, NULL}
+};
+
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
+{
+ char *p;
+ memset(d, 0, sizeof(struct fuse_mount_data));
+ d->max_read = ~0;
+
+ while ((p = strsep(&opt, ",")) != NULL) {
+ int token;
+ int value;
+ substring_t args[MAX_OPT_ARGS];
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case OPT_FD:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->fd = value;
+ d->fd_present = 1;
+ break;
+
+ case OPT_ROOTMODE:
+ if (match_octal(&args[0], &value))
+ return 0;
+ d->rootmode = value;
+ d->rootmode_present = 1;
+ break;
+
+ case OPT_USER_ID:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->user_id = value;
+ d->user_id_present = 1;
+ break;
+
+ case OPT_GROUP_ID:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->group_id = value;
+ d->group_id_present = 1;
+ break;
+
+ case OPT_DEFAULT_PERMISSIONS:
+ d->flags |= FUSE_DEFAULT_PERMISSIONS;
+ break;
+
+ case OPT_ALLOW_OTHER:
+ d->flags |= FUSE_ALLOW_OTHER;
+ break;
+
+ case OPT_MAX_READ:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->max_read = value;
+ break;
+
+ default:
+ return 0;
+ }
+ }
+
+ if (!d->fd_present || !d->rootmode_present ||
+ !d->user_id_present || !d->group_id_present)
+ return 0;
+
+ return 1;
+}
+
+static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
+
+ seq_printf(m, ",user_id=%u", fc->user_id);
+ seq_printf(m, ",group_id=%u", fc->group_id);
+ if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
+ seq_puts(m, ",default_permissions");
+ if (fc->flags & FUSE_ALLOW_OTHER)
+ seq_puts(m, ",allow_other");
+ if (fc->max_read != ~0)
+ seq_printf(m, ",max_read=%u", fc->max_read);
+ return 0;
+}
+
+static void free_conn(struct fuse_conn *fc)
+{
+ while (!list_empty(&fc->unused_list)) {
+ struct fuse_req *req;
+ req = list_entry(fc->unused_list.next, struct fuse_req, list);
+ list_del(&req->list);
+ fuse_request_free(req);
+ }
+ kfree(fc);
+}
+
+/* Must be called with the fuse lock held */
+void fuse_release_conn(struct fuse_conn *fc)
+{
+ fc->count--;
+ if (!fc->count)
+ free_conn(fc);
+}
+
+static struct fuse_conn *new_conn(void)
+{
+ struct fuse_conn *fc;
+
+ fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+ if (fc != NULL) {
+ int i;
+ memset(fc, 0, sizeof(*fc));
+ init_waitqueue_head(&fc->waitq);
+ INIT_LIST_HEAD(&fc->pending);
+ INIT_LIST_HEAD(&fc->processing);
+ INIT_LIST_HEAD(&fc->unused_list);
+ INIT_LIST_HEAD(&fc->background);
+ sema_init(&fc->outstanding_sem, 0);
+ init_rwsem(&fc->sbput_sem);
+ for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
+ struct fuse_req *req = fuse_request_alloc();
+ if (!req) {
+ free_conn(fc);
+ return NULL;
+ }
+ list_add(&req->list, &fc->unused_list);
+ }
+ fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+ fc->bdi.unplug_io_fn = default_unplug_io_fn;
+ fc->reqctr = 0;
+ }
+ return fc;
+}
+
+static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
+{
+ struct fuse_conn *fc;
+
+ if (file->f_op != &fuse_dev_operations)
+ return ERR_PTR(-EINVAL);
+ fc = new_conn();
+ if (fc == NULL)
+ return ERR_PTR(-ENOMEM);
+ spin_lock(&fuse_lock);
+ if (file->private_data) {
+ free_conn(fc);
+ fc = ERR_PTR(-EINVAL);
+ } else {
+ file->private_data = fc;
+ *get_fuse_conn_super_p(sb) = fc;
+ fc->mounted = 1;
+ fc->connected = 1;
+ fc->count = 2;
+ }
+ spin_unlock(&fuse_lock);
+ return fc;
+}
+
+static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
+{
+ struct fuse_attr attr;
+ memset(&attr, 0, sizeof(attr));
+
+ attr.mode = mode;
+ attr.ino = FUSE_ROOT_ID;
+ return fuse_iget(sb, 1, 0, &attr);
+}
+
+static struct super_operations fuse_super_operations = {
+ .alloc_inode = fuse_alloc_inode,
+ .destroy_inode = fuse_destroy_inode,
+ .read_inode = fuse_read_inode,
+ .clear_inode = fuse_clear_inode,
+ .put_super = fuse_put_super,
+ .statfs = fuse_statfs,
+ .show_options = fuse_show_options,
+};
+
+static int fuse_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct fuse_conn *fc;
+ struct inode *root;
+ struct fuse_mount_data d;
+ struct file *file;
+ int err;
+
+ if (!parse_fuse_opt((char *) data, &d))
+ return -EINVAL;
+
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = FUSE_SUPER_MAGIC;
+ sb->s_op = &fuse_super_operations;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+ file = fget(d.fd);
+ if (!file)
+ return -EINVAL;
+
+ fc = get_conn(file, sb);
+ fput(file);
+ if (IS_ERR(fc))
+ return PTR_ERR(fc);
+
+ fc->flags = d.flags;
+ fc->user_id = d.user_id;
+ fc->group_id = d.group_id;
+ fc->max_read = d.max_read;
+ if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
+ fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+ fc->max_write = FUSE_MAX_IN / 2;
+
+ err = -ENOMEM;
+ root = get_root_inode(sb, d.rootmode);
+ if (root == NULL)
+ goto err;
+
+ sb->s_root = d_alloc_root(root);
+ if (!sb->s_root) {
+ iput(root);
+ goto err;
+ }
+ fuse_send_init(fc);
+ return 0;
+
+ err:
+ spin_lock(&fuse_lock);
+ fuse_release_conn(fc);
+ spin_unlock(&fuse_lock);
+ return err;
+}
+
+static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *raw_data)
+{
+ return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
+}
+
+static struct file_system_type fuse_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "fuse",
+ .get_sb = fuse_get_sb,
+ .kill_sb = kill_anon_super,
+};
+
+static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
+ unsigned long flags)
+{
+ struct inode * inode = foo;
+
+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR)
+ inode_init_once(inode);
+}
+
+static int __init fuse_fs_init(void)
+{
+ int err;
+
+ err = register_filesystem(&fuse_fs_type);
+ if (err)
+ printk("fuse: failed to register filesystem\n");
+ else {
+ fuse_inode_cachep = kmem_cache_create("fuse_inode",
+ sizeof(struct fuse_inode),
+ 0, SLAB_HWCACHE_ALIGN,
+ fuse_inode_init_once, NULL);
+ if (!fuse_inode_cachep) {
+ unregister_filesystem(&fuse_fs_type);
+ err = -ENOMEM;
+ }
+ }
+
+ return err;
+}
+
+static void fuse_fs_cleanup(void)
+{
+ unregister_filesystem(&fuse_fs_type);
+ kmem_cache_destroy(fuse_inode_cachep);
+}
+
+static int __init fuse_init(void)
+{
+ int res;
+
+ printk("fuse init (API version %i.%i)\n",
+ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
+
+ spin_lock_init(&fuse_lock);
+ res = fuse_fs_init();
+ if (res)
+ goto err;
+
+ res = fuse_dev_init();
+ if (res)
+ goto err_fs_cleanup;
+
+ return 0;
+
+ err_fs_cleanup:
+ fuse_fs_cleanup();
+ err:
+ return res;
+}
+
+static void __exit fuse_exit(void)
+{
+ printk(KERN_DEBUG "fuse exit\n");
+
+ fuse_fs_cleanup();
+ fuse_dev_cleanup();
+}
+
+module_init(fuse_init);
+module_exit(fuse_exit);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b2d18200a00..59c5062cd63 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -284,6 +284,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
static void hostfs_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
if(HOSTFS_I(inode)->fd != -1) {
close_file(&HOSTFS_I(inode)->fd);
HOSTFS_I(inode)->fd = -1;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 38b1741fa53..e3d17e9ea6c 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -284,6 +284,7 @@ void hpfs_write_if_changed(struct inode *inode)
void hpfs_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
lock_kernel();
hpfs_remove_fnode(inode->i_sb, inode->i_ino);
unlock_kernel();
diff --git a/fs/inode.c b/fs/inode.c
index 71df1b1e8f7..f80a79ff156 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1034,19 +1034,21 @@ void generic_delete_inode(struct inode *inode)
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
- if (inode->i_data.nrpages)
- truncate_inode_pages(&inode->i_data, 0);
-
security_inode_delete(inode);
if (op->delete_inode) {
void (*delete)(struct inode *) = op->delete_inode;
if (!is_bad_inode(inode))
DQUOT_INIT(inode);
- /* s_op->delete_inode internally recalls clear_inode() */
+ /* Filesystems implementing their own
+ * s_op->delete_inode are required to call
+ * truncate_inode_pages and clear_inode()
+ * internally */
delete(inode);
- } else
+ } else {
+ truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
+ }
spin_lock(&inode_lock);
hlist_del_init(&inode->i_hash);
spin_unlock(&inode_lock);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index c6ec66fd876..49bbc2be3d7 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1340,8 +1340,7 @@ int journal_stop(handle_t *handle)
if (handle->h_sync) {
do {
old_handle_count = transaction->t_handle_count;
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(1);
+ schedule_timeout_uninterruptible(1);
} while (old_handle_count != transaction->t_handle_count);
}
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 777b90057b8..3dcc6d2162c 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1744,6 +1744,7 @@ jffs_delete_inode(struct inode *inode)
D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n",
inode->i_ino));
+ truncate_inode_pages(&inode->i_data, 0);
lock_kernel();
inode->i_size = 0;
inode->i_blocks = 0;
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 456d7e6e29c..27f199e94cf 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -1701,12 +1701,10 @@ jffs_find_file(struct jffs_control *c, __u32 ino)
{
struct jffs_file *f;
int i = ino % c->hash_len;
- struct list_head *tmp;
D3(printk("jffs_find_file(): ino: %u\n", ino));
- for (tmp = c->hash[i].next; tmp != &c->hash[i]; tmp = tmp->next) {
- f = list_entry(tmp, struct jffs_file, hash);
+ list_for_each_entry(f, &c->hash[i], hash) {
if (ino != f->ino)
continue;
D3(printk("jffs_find_file(): Found file with ino "
@@ -2102,13 +2100,12 @@ jffs_foreach_file(struct jffs_control *c, int (*func)(struct jffs_file *))
int result = 0;
for (pos = 0; pos < c->hash_len; pos++) {
- struct list_head *p, *next;
- for (p = c->hash[pos].next; p != &c->hash[pos]; p = next) {
- /* We need a reference to the next file in the
- list because `func' might remove the current
- file `f'. */
- next = p->next;
- r = func(list_entry(p, struct jffs_file, hash));
+ struct jffs_file *f, *next;
+
+ /* We must do _safe, because 'func' might remove the
+ current file 'f' from the list. */
+ list_for_each_entry_safe(f, next, &c->hash[pos], hash) {
+ r = func(f);
if (r < 0)
return r;
result += r;
@@ -2613,9 +2610,8 @@ jffs_print_hash_table(struct jffs_control *c)
printk("JFFS: Dumping the file system's hash table...\n");
for (i = 0; i < c->hash_len; i++) {
- struct list_head *p;
- for (p = c->hash[i].next; p != &c->hash[i]; p = p->next) {
- struct jffs_file *f=list_entry(p,struct jffs_file,hash);
+ struct jffs_file *f;
+ list_for_each_entry(f, &c->hash[i], hash) {
printk("*** c->hash[%u]: \"%s\" "
"(ino: %u, pino: %u)\n",
i, (f->name ? f->name : ""),
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e892dab40c2..461e4934ca7 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -23,6 +23,7 @@
#include <linux/quotaops.h>
#include <linux/posix_acl_xattr.h>
#include "jfs_incore.h"
+#include "jfs_txnmgr.h"
#include "jfs_xattr.h"
#include "jfs_acl.h"
@@ -75,7 +76,8 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
return acl;
}
-static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
+ struct posix_acl *acl)
{
char *ea_name;
struct jfs_inode_info *ji = JFS_IP(inode);
@@ -110,7 +112,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
if (rc < 0)
goto out;
}
- rc = __jfs_setxattr(inode, ea_name, value, size, 0);
+ rc = __jfs_setxattr(tid, inode, ea_name, value, size, 0);
out:
kfree(value);
@@ -143,7 +145,7 @@ int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
return generic_permission(inode, mask, jfs_check_acl);
}
-int jfs_init_acl(struct inode *inode, struct inode *dir)
+int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
{
struct posix_acl *acl = NULL;
struct posix_acl *clone;
@@ -159,7 +161,7 @@ int jfs_init_acl(struct inode *inode, struct inode *dir)
if (acl) {
if (S_ISDIR(inode->i_mode)) {
- rc = jfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+ rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl);
if (rc)
goto cleanup;
}
@@ -173,7 +175,8 @@ int jfs_init_acl(struct inode *inode, struct inode *dir)
if (rc >= 0) {
inode->i_mode = mode;
if (rc > 0)
- rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+ rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS,
+ clone);
}
posix_acl_release(clone);
cleanup:
@@ -202,8 +205,15 @@ static int jfs_acl_chmod(struct inode *inode)
return -ENOMEM;
rc = posix_acl_chmod_masq(clone, inode->i_mode);
- if (!rc)
- rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+ if (!rc) {
+ tid_t tid = txBegin(inode->i_sb, 0);
+ down(&JFS_IP(inode)->commit_sem);
+ rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone);
+ if (!rc)
+ rc = txCommit(tid, 1, &inode, 0);
+ txEnd(tid);
+ up(&JFS_IP(inode)->commit_sem);
+ }
posix_acl_release(clone);
return rc;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 767c7ecb429..0ec62d5310d 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -128,21 +128,23 @@ void jfs_delete_inode(struct inode *inode)
{
jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
- if (is_bad_inode(inode) ||
- (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I)))
- return;
+ if (!is_bad_inode(inode) &&
+ (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) {
- if (test_cflag(COMMIT_Freewmap, inode))
- jfs_free_zero_link(inode);
+ truncate_inode_pages(&inode->i_data, 0);
- diFree(inode);
+ if (test_cflag(COMMIT_Freewmap, inode))
+ jfs_free_zero_link(inode);
- /*
- * Free the inode from the quota allocation.
- */
- DQUOT_INIT(inode);
- DQUOT_FREE_INODE(inode);
- DQUOT_DROP(inode);
+ diFree(inode);
+
+ /*
+ * Free the inode from the quota allocation.
+ */
+ DQUOT_INIT(inode);
+ DQUOT_FREE_INODE(inode);
+ DQUOT_DROP(inode);
+ }
clear_inode(inode);
}
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index a3acd3eec05..a76293767c7 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -21,8 +21,16 @@
#ifdef CONFIG_JFS_POSIX_ACL
int jfs_permission(struct inode *, int, struct nameidata *);
-int jfs_init_acl(struct inode *, struct inode *);
+int jfs_init_acl(tid_t, struct inode *, struct inode *);
int jfs_setattr(struct dentry *, struct iattr *);
-#endif /* CONFIG_JFS_POSIX_ACL */
+#else
+
+static inline int jfs_init_acl(tid_t tid, struct inode *inode,
+ struct inode *dir)
+{
+ return 0;
+}
+
+#endif
#endif /* _H_JFS_ACL */
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index a1052f3f0be..25e9990bccd 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -52,8 +52,8 @@ struct jfs_ea_list {
#define END_EALIST(ealist) \
((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
-extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t,
- int);
+extern int __jfs_setxattr(tid_t, struct inode *, const char *, const void *,
+ size_t, int);
extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t,
int);
extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
@@ -61,4 +61,14 @@ extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
extern int jfs_removexattr(struct dentry *, const char *);
+#ifdef CONFIG_JFS_SECURITY
+extern int jfs_init_security(tid_t, struct inode *, struct inode *);
+#else
+static inline int jfs_init_security(tid_t tid, struct inode *inode,
+ struct inode *dir)
+{
+ return 0;
+}
+#endif
+
#endif /* H_JFS_XATTR */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 49ccde3937f..1abe7343f92 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -39,6 +39,24 @@ struct dentry_operations jfs_ci_dentry_operations;
static s64 commitZeroLink(tid_t, struct inode *);
/*
+ * NAME: free_ea_wmap(inode)
+ *
+ * FUNCTION: free uncommitted extended attributes from working map
+ *
+ */
+static inline void free_ea_wmap(struct inode *inode)
+{
+ dxd_t *ea = &JFS_IP(inode)->ea;
+
+ if (ea->flag & DXD_EXTENT) {
+ /* free EA pages from cache */
+ invalidate_dxd_metapages(inode, *ea);
+ dbFree(inode, addressDXD(ea), lengthDXD(ea));
+ }
+ ea->flag = 0;
+}
+
+/*
* NAME: jfs_create(dip, dentry, mode)
*
* FUNCTION: create a regular file in the parent directory <dip>
@@ -89,8 +107,19 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
+ rc = jfs_init_acl(tid, ip, dip);
+ if (rc)
+ goto out3;
+
+ rc = jfs_init_security(tid, ip, dip);
+ if (rc) {
+ txAbort(tid, 0);
+ goto out3;
+ }
+
if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
jfs_err("jfs_create: dtSearch returned %d", rc);
+ txAbort(tid, 0);
goto out3;
}
@@ -139,6 +168,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
if (rc) {
+ free_ea_wmap(ip);
ip->i_nlink = 0;
iput(ip);
} else
@@ -147,11 +177,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
out2:
free_UCSname(&dname);
-#ifdef CONFIG_JFS_POSIX_ACL
- if (rc == 0)
- jfs_init_acl(ip, dip);
-#endif
-
out1:
jfs_info("jfs_create: rc:%d", rc);
@@ -216,8 +241,19 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
+ rc = jfs_init_acl(tid, ip, dip);
+ if (rc)
+ goto out3;
+
+ rc = jfs_init_security(tid, ip, dip);
+ if (rc) {
+ txAbort(tid, 0);
+ goto out3;
+ }
+
if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
jfs_err("jfs_mkdir: dtSearch returned %d", rc);
+ txAbort(tid, 0);
goto out3;
}
@@ -267,6 +303,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
if (rc) {
+ free_ea_wmap(ip);
ip->i_nlink = 0;
iput(ip);
} else
@@ -275,10 +312,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
out2:
free_UCSname(&dname);
-#ifdef CONFIG_JFS_POSIX_ACL
- if (rc == 0)
- jfs_init_acl(ip, dip);
-#endif
out1:
@@ -885,6 +918,10 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
+ rc = jfs_init_security(tid, ip, dip);
+ if (rc)
+ goto out3;
+
tblk = tid_to_tblock(tid);
tblk->xflag |= COMMIT_CREATE;
tblk->ino = ip->i_ino;
@@ -1000,6 +1037,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
if (rc) {
+ free_ea_wmap(ip);
ip->i_nlink = 0;
iput(ip);
} else
@@ -1008,11 +1046,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
out2:
free_UCSname(&dname);
-#ifdef CONFIG_JFS_POSIX_ACL
- if (rc == 0)
- jfs_init_acl(ip, dip);
-#endif
-
out1:
jfs_info("jfs_symlink: rc:%d", rc);
return rc;
@@ -1328,8 +1361,20 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
down(&JFS_IP(dir)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
- if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE)))
+ rc = jfs_init_acl(tid, ip, dir);
+ if (rc)
+ goto out3;
+
+ rc = jfs_init_security(tid, ip, dir);
+ if (rc) {
+ txAbort(tid, 0);
goto out3;
+ }
+
+ if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) {
+ txAbort(tid, 0);
+ goto out3;
+ }
tblk = tid_to_tblock(tid);
tblk->xflag |= COMMIT_CREATE;
@@ -1337,8 +1382,10 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
tblk->u.ixpxd = JFS_IP(ip)->ixpxd;
ino = ip->i_ino;
- if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack)))
+ if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) {
+ txAbort(tid, 0);
goto out3;
+ }
ip->i_op = &jfs_file_inode_operations;
jfs_ip->dev = new_encode_dev(rdev);
@@ -1360,6 +1407,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
up(&JFS_IP(ip)->commit_sem);
up(&JFS_IP(dir)->commit_sem);
if (rc) {
+ free_ea_wmap(ip);
ip->i_nlink = 0;
iput(ip);
} else
@@ -1368,11 +1416,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
out1:
free_UCSname(&dname);
-#ifdef CONFIG_JFS_POSIX_ACL
- if (rc == 0)
- jfs_init_acl(ip, dir);
-#endif
-
out:
jfs_info("jfs_mknod: returning %d", rc);
return rc;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 554ec739e49..23aa5066b5a 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -21,6 +21,7 @@
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
#include <linux/quotaops.h>
+#include <linux/security.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_dmap.h"
@@ -633,12 +634,12 @@ static void ea_release(struct inode *inode, struct ea_buffer *ea_buf)
}
}
-static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
+static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
+ int new_size)
{
struct jfs_inode_info *ji = JFS_IP(inode);
unsigned long old_blocks, new_blocks;
int rc = 0;
- tid_t tid;
if (new_size == 0) {
ea_release(inode, ea_buf);
@@ -664,9 +665,6 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
if (rc)
return rc;
- tid = txBegin(inode->i_sb, 0);
- down(&ji->commit_sem);
-
old_blocks = new_blocks = 0;
if (ji->ea.flag & DXD_EXTENT) {
@@ -695,11 +693,8 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
DQUOT_FREE_BLOCK(inode, old_blocks);
inode->i_ctime = CURRENT_TIME;
- rc = txCommit(tid, 1, &inode, 0);
- txEnd(tid);
- up(&ji->commit_sem);
- return rc;
+ return 0;
}
/*
@@ -810,8 +805,8 @@ static int can_set_xattr(struct inode *inode, const char *name,
return permission(inode, MAY_WRITE, NULL);
}
-int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
- size_t value_len, int flags)
+int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
+ const void *value, size_t value_len, int flags)
{
struct jfs_ea_list *ealist;
struct jfs_ea *ea, *old_ea = NULL, *next_ea = NULL;
@@ -825,9 +820,6 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
int rc;
int length;
- if ((rc = can_set_xattr(inode, name, value, value_len)))
- return rc;
-
if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
GFP_KERNEL);
@@ -939,7 +931,7 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
ealist->size = cpu_to_le32(new_size);
- rc = ea_put(inode, &ea_buf, new_size);
+ rc = ea_put(tid, inode, &ea_buf, new_size);
goto out;
release:
@@ -955,12 +947,29 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t value_len, int flags)
{
+ struct inode *inode = dentry->d_inode;
+ struct jfs_inode_info *ji = JFS_IP(inode);
+ int rc;
+ tid_t tid;
+
+ if ((rc = can_set_xattr(inode, name, value, value_len)))
+ return rc;
+
if (value == NULL) { /* empty EA, do not remove */
value = "";
value_len = 0;
}
- return __jfs_setxattr(dentry->d_inode, name, value, value_len, flags);
+ tid = txBegin(inode->i_sb, 0);
+ down(&ji->commit_sem);
+ rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len,
+ flags);
+ if (!rc)
+ rc = txCommit(tid, 1, &inode, 0);
+ txEnd(tid);
+ up(&ji->commit_sem);
+
+ return rc;
}
static int can_get_xattr(struct inode *inode, const char *name)
@@ -1122,5 +1131,56 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
int jfs_removexattr(struct dentry *dentry, const char *name)
{
- return __jfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+ struct inode *inode = dentry->d_inode;
+ struct jfs_inode_info *ji = JFS_IP(inode);
+ int rc;
+ tid_t tid;
+
+ if ((rc = can_set_xattr(inode, name, NULL, 0)))
+ return rc;
+
+ tid = txBegin(inode->i_sb, 0);
+ down(&ji->commit_sem);
+ rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+ if (!rc)
+ rc = txCommit(tid, 1, &inode, 0);
+ txEnd(tid);
+ up(&ji->commit_sem);
+
+ return rc;
+}
+
+#ifdef CONFIG_JFS_SECURITY
+int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
+{
+ int rc;
+ size_t len;
+ void *value;
+ char *suffix;
+ char *name;
+
+ rc = security_inode_init_security(inode, dir, &suffix, &value, &len);
+ if (rc) {
+ if (rc == -EOPNOTSUPP)
+ return 0;
+ return rc;
+ }
+ name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix),
+ GFP_NOFS);
+ if (!name) {
+ rc = -ENOMEM;
+ goto kmalloc_failed;
+ }
+ strcpy(name, XATTR_SECURITY_PREFIX);
+ strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
+
+ rc = __jfs_setxattr(tid, inode, name, value, len, 0);
+
+ kfree(name);
+kmalloc_failed:
+ kfree(suffix);
+ kfree(value);
+
+ return rc;
}
+#endif
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 14b3ce87fa2..87332f30141 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -299,8 +299,7 @@ nlmclnt_alloc_call(void)
return call;
}
printk("nlmclnt_alloc_call: failed, waiting for memory\n");
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(5*HZ);
+ schedule_timeout_interruptible(5*HZ);
}
return NULL;
}
diff --git a/fs/locks.c b/fs/locks.c
index 11956b6179f..c2c09b4798d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from)
{
struct files_struct *files = current->files;
int i, j;
+ struct fdtable *fdt;
if (from == files)
return;
lock_kernel();
j = 0;
+ fdt = files_fdtable(files);
for (;;) {
unsigned long set;
i = j * __NFDBITS;
- if (i >= files->max_fdset || i >= files->max_fds)
+ if (i >= fdt->max_fdset || i >= fdt->max_fds)
break;
- set = files->open_fds->fds_bits[j++];
+ set = fdt->open_fds->fds_bits[j++];
while (set) {
if (set & 1) {
- struct file *file = files->fd[i];
+ struct file *file = fdt->fd[i];
if (file)
__steal_locks(file, from);
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 3f18c21198d..790cc0d0e97 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,6 +24,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data);
static void minix_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
minix_truncate(inode);
minix_free_inode(inode);
diff --git a/fs/namei.c b/fs/namei.c
index 145e852c4bd..21d85f1ac83 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1316,10 +1316,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
return error;
DQUOT_INIT(dir);
error = dir->i_op->create(dir, dentry, mode, nd);
- if (!error) {
+ if (!error)
fsnotify_create(dir, dentry->d_name.name);
- security_inode_post_create(dir, dentry, mode);
- }
return error;
}
@@ -1635,10 +1633,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
DQUOT_INIT(dir);
error = dir->i_op->mknod(dir, dentry, mode, dev);
- if (!error) {
+ if (!error)
fsnotify_create(dir, dentry->d_name.name);
- security_inode_post_mknod(dir, dentry, mode, dev);
- }
return error;
}
@@ -1708,10 +1704,8 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
DQUOT_INIT(dir);
error = dir->i_op->mkdir(dir, dentry, mode);
- if (!error) {
+ if (!error)
fsnotify_mkdir(dir, dentry->d_name.name);
- security_inode_post_mkdir(dir,dentry, mode);
- }
return error;
}
@@ -1947,10 +1941,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
DQUOT_INIT(dir);
error = dir->i_op->symlink(dir, dentry, oldname);
- if (!error) {
+ if (!error)
fsnotify_create(dir, dentry->d_name.name);
- security_inode_post_symlink(dir, dentry, oldname);
- }
return error;
}
@@ -2020,10 +2012,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
DQUOT_INIT(dir);
error = dir->i_op->link(old_dentry, dir, new_dentry);
up(&old_dentry->d_inode->i_sem);
- if (!error) {
+ if (!error)
fsnotify_create(dir, new_dentry->d_name.name);
- security_inode_post_link(old_dentry, dir, new_dentry);
- }
return error;
}
@@ -2142,11 +2132,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
d_rehash(new_dentry);
dput(new_dentry);
}
- if (!error) {
+ if (!error)
d_move(old_dentry,new_dentry);
- security_inode_post_rename(old_dir, old_dentry,
- new_dir, new_dentry);
- }
return error;
}
@@ -2172,7 +2159,6 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
/* The following d_move() should become unconditional */
if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
d_move(old_dentry, new_dentry);
- security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
}
if (target)
up(&target->i_sem);
diff --git a/fs/namespace.c b/fs/namespace.c
index 34156260c9b..2fa9fdf7d6f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -537,7 +537,6 @@ lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
{
struct vfsmount *res, *p, *q, *r, *s;
- struct list_head *h;
struct nameidata nd;
res = q = clone_mnt(mnt, dentry);
@@ -546,8 +545,7 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
q->mnt_mountpoint = mnt->mnt_mountpoint;
p = mnt;
- for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) {
- r = list_entry(h, struct vfsmount, mnt_child);
+ list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry))
continue;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 44795d2f4b3..8c8839203cd 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -286,6 +286,8 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
static void
ncp_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+
if (S_ISDIR(inode->i_mode)) {
DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino);
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 541b418327c..6922469d6fc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -146,6 +146,8 @@ nfs_delete_inode(struct inode * inode)
{
dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+ truncate_inode_pages(&inode->i_data, 0);
+
nfs_wb_all(inode);
/*
* The following should never happen...
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2681485cf2d..edc95514046 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -34,8 +34,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
+ schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!signalled());
rpc_clnt_sigunmask(clnt, &oldset);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0c5a308e496..9701ca8c942 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2418,14 +2418,11 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
*timeout = NFS4_POLL_RETRY_MAX;
rpc_clnt_sigmask(clnt, &oldset);
if (clnt->cl_intr) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(*timeout);
+ schedule_timeout_interruptible(*timeout);
if (signalled())
res = -ERESTARTSYS;
- } else {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(*timeout);
- }
+ } else
+ schedule_timeout_uninterruptible(*timeout);
rpc_clnt_sigunmask(clnt, &oldset);
*timeout <<= 1;
return res;
@@ -2578,8 +2575,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
static unsigned long
nfs4_set_lock_task_retry(unsigned long timeout)
{
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(timeout);
+ schedule_timeout_interruptible(timeout);
timeout <<= 1;
if (timeout > NFS4_LOCK_MAXTIMEOUT)
return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 545236414d5..b6cc8cf2462 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/bit_spinlock.h>
#include "aops.h"
#include "attrib.h"
diff --git a/fs/open.c b/fs/open.c
index 4ee2dcc31c2..2fac58c5191 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -24,6 +24,7 @@
#include <linux/personality.h>
#include <linux/pagemap.h>
#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
#include <asm/unistd.h>
@@ -842,14 +843,16 @@ int get_unused_fd(void)
{
struct files_struct * files = current->files;
int fd, error;
+ struct fdtable *fdt;
error = -EMFILE;
spin_lock(&files->file_lock);
repeat:
- fd = find_next_zero_bit(files->open_fds->fds_bits,
- files->max_fdset,
- files->next_fd);
+ fdt = files_fdtable(files);
+ fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+ fdt->max_fdset,
+ fdt->next_fd);
/*
* N.B. For clone tasks sharing a files structure, this test
@@ -872,14 +875,14 @@ repeat:
goto repeat;
}
- FD_SET(fd, files->open_fds);
- FD_CLR(fd, files->close_on_exec);
- files->next_fd = fd + 1;
+ FD_SET(fd, fdt->open_fds);
+ FD_CLR(fd, fdt->close_on_exec);
+ fdt->next_fd = fd + 1;
#if 1
/* Sanity check */
- if (files->fd[fd] != NULL) {
+ if (fdt->fd[fd] != NULL) {
printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
- files->fd[fd] = NULL;
+ fdt->fd[fd] = NULL;
}
#endif
error = fd;
@@ -893,9 +896,10 @@ EXPORT_SYMBOL(get_unused_fd);
static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
- __FD_CLR(fd, files->open_fds);
- if (fd < files->next_fd)
- files->next_fd = fd;
+ struct fdtable *fdt = files_fdtable(files);
+ __FD_CLR(fd, fdt->open_fds);
+ if (fd < fdt->next_fd)
+ fdt->next_fd = fd;
}
void fastcall put_unused_fd(unsigned int fd)
@@ -924,10 +928,11 @@ EXPORT_SYMBOL(put_unused_fd);
void fastcall fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
- if (unlikely(files->fd[fd] != NULL))
- BUG();
- files->fd[fd] = file;
+ fdt = files_fdtable(files);
+ BUG_ON(fdt->fd[fd] != NULL);
+ rcu_assign_pointer(fdt->fd[fd], file);
spin_unlock(&files->file_lock);
}
@@ -1010,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd)
{
struct file * filp;
struct files_struct *files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
- if (fd >= files->max_fds)
+ fdt = files_fdtable(files);
+ if (fd >= fdt->max_fds)
goto out_unlock;
- filp = files->fd[fd];
+ filp = fdt->fd[fd];
if (!filp)
goto out_unlock;
- files->fd[fd] = NULL;
- FD_CLR(fd, files->close_on_exec);
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+ FD_CLR(fd, fdt->close_on_exec);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
return filp_close(filp, files);
diff --git a/fs/pipe.c b/fs/pipe.c
index 2c7a23dde2d..66aa0b938d6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -39,7 +39,11 @@ void pipe_wait(struct inode * inode)
{
DEFINE_WAIT(wait);
- prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE);
+ /*
+ * Pipes are system-local resources, so sleeping on them
+ * is considered a noninteractive wait:
+ */
+ prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
up(PIPE_SEM(*inode));
schedule();
finish_wait(PIPE_WAIT(*inode), &wait);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 37668fe998a..d88d518d30f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
{
struct group_info *group_info;
int g;
+ struct fdtable *fdt = NULL;
read_lock(&tasklist_lock);
buffer += sprintf(buffer,
@@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer)
p->gid, p->egid, p->sgid, p->fsgid);
read_unlock(&tasklist_lock);
task_lock(p);
+ if (p->files)
+ fdt = files_fdtable(p->files);
buffer += sprintf(buffer,
"FDSize:\t%d\n"
"Groups:\t",
- p->files ? p->files->max_fds : 0);
+ fdt ? fdt->max_fds : 0);
group_info = p->group_info;
get_group_info(group_info);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 84751f3f52d..23db452ab42 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -62,6 +62,7 @@
#include <linux/namespace.h>
#include <linux/mm.h>
#include <linux/smp_lock.h>
+#include <linux/rcupdate.h>
#include <linux/kallsyms.h>
#include <linux/mount.h>
#include <linux/security.h>
@@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
files = get_files_struct(task);
if (files) {
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
*mnt = mntget(file->f_vfsmnt);
*dentry = dget(file->f_dentry);
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
return 0;
}
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
}
return -ENOENT;
@@ -1039,6 +1040,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
int retval;
char buf[NUMBUF];
struct files_struct * files;
+ struct fdtable *fdt;
retval = -ENOENT;
if (!pid_alive(p))
@@ -1061,15 +1063,16 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
files = get_files_struct(p);
if (!files)
goto out;
- spin_lock(&files->file_lock);
+ rcu_read_lock();
+ fdt = files_fdtable(files);
for (fd = filp->f_pos-2;
- fd < files->max_fds;
+ fd < fdt->max_fds;
fd++, filp->f_pos++) {
unsigned int i,j;
if (!fcheck_files(files, fd))
continue;
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
j = NUMBUF;
i = fd;
@@ -1081,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
- spin_lock(&files->file_lock);
+ rcu_read_lock();
break;
}
- spin_lock(&files->file_lock);
+ rcu_read_lock();
}
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
}
out:
@@ -1261,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
files = get_files_struct(task);
if (files) {
- spin_lock(&files->file_lock);
+ rcu_read_lock();
if (fcheck_files(files, fd)) {
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
if (task_dumpable(task)) {
inode->i_uid = task->euid;
@@ -1275,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
security_task_to_inode(task, inode);
return 1;
}
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
}
d_drop(dentry);
@@ -1367,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
if (!files)
goto out_unlock;
inode->i_mode = S_IFLNK;
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
if (!file)
goto out_unlock2;
@@ -1375,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
inode->i_mode |= S_IRUSR | S_IXUSR;
if (file->f_mode & 2)
inode->i_mode |= S_IWUSR | S_IXUSR;
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
@@ -1385,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
return NULL;
out_unlock2:
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
out_unlock:
iput(inode);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 133c2868510..effa6c0c467 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -60,6 +60,8 @@ static void proc_delete_inode(struct inode *inode)
struct proc_dir_entry *de;
struct task_struct *tsk;
+ truncate_inode_pages(&inode->i_data, 0);
+
/* Let go of any associated process */
tsk = PROC_I(inode)->task;
if (tsk)
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b79162a3547..80f32911c0c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -63,6 +63,7 @@ int qnx4_sync_inode(struct inode *inode)
static void qnx4_delete_inode(struct inode *inode)
{
QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
+ truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
qnx4_truncate(inode);
lock_kernel();
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ff291c973a5..1a8a1bf2154 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -33,6 +33,8 @@ void reiserfs_delete_inode(struct inode *inode)
2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
+ truncate_inode_pages(&inode->i_data, 0);
+
reiserfs_write_lock(inode->i_sb);
/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index a8e29e9bbbd..4b15761434b 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2868,8 +2868,7 @@ static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
struct reiserfs_journal *journal = SB_JOURNAL(sb);
unsigned long bcount = journal->j_bcount;
while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(1);
+ schedule_timeout_uninterruptible(1);
journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
while ((atomic_read(&journal->j_wcount) > 0 ||
atomic_read(&journal->j_jlock)) &&
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6951c35755b..44b02fc02eb 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1934,8 +1934,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
if (SB_AP_BITMAP(s))
brelse(SB_AP_BITMAP(s)[j].bh);
}
- if (SB_AP_BITMAP(s))
- vfree(SB_AP_BITMAP(s));
+ vfree(SB_AP_BITMAP(s));
}
if (SB_BUFFER_WITH_SB(s))
brelse(SB_BUFFER_WITH_SB(s));
diff --git a/fs/select.c b/fs/select.c
index b80e7eb0ac0..f10a10317d5 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -22,6 +22,7 @@
#include <linux/personality.h> /* for STICKY_TIMEOUTS */
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
@@ -132,11 +133,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
unsigned long *open_fds;
unsigned long set;
int max;
+ struct fdtable *fdt;
/* handle last in-complete long-word first */
set = ~(~0UL << (n & (__NFDBITS-1)));
n /= __NFDBITS;
- open_fds = current->files->open_fds->fds_bits+n;
+ fdt = files_fdtable(current->files);
+ open_fds = fdt->open_fds->fds_bits+n;
max = 0;
if (set) {
set &= BITS(fds, n);
@@ -183,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
int retval, i;
long __timeout = *timeout;
- spin_lock(&current->files->file_lock);
+ rcu_read_lock();
retval = max_select_fd(n, fds);
- spin_unlock(&current->files->file_lock);
+ rcu_read_unlock();
if (retval < 0)
return retval;
@@ -299,6 +302,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
char *bits;
long timeout;
int ret, size, max_fdset;
+ struct fdtable *fdt;
timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) {
@@ -326,7 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
goto out_nofds;
/* max_fdset can increase, so grab it once to avoid race */
- max_fdset = current->files->max_fdset;
+ rcu_read_lock();
+ fdt = files_fdtable(current->files);
+ max_fdset = fdt->max_fdset;
+ rcu_read_unlock();
if (n > max_fdset)
n = max_fdset;
@@ -464,9 +471,15 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
unsigned int i;
struct poll_list *head;
struct poll_list *walk;
+ struct fdtable *fdt;
+ int max_fdset;
/* Do a sanity check on nfds ... */
- if (nfds > current->files->max_fdset && nfds > OPEN_MAX)
+ rcu_read_lock();
+ fdt = files_fdtable(current->files);
+ max_fdset = fdt->max_fdset;
+ rcu_read_unlock();
+ if (nfds > max_fdset && nfds > OPEN_MAX)
return -EINVAL;
if (timeout) {
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 4765aaac9fd..10b994428fe 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -331,6 +331,7 @@ static void
smb_delete_inode(struct inode *ino)
{
DEBUG1("ino=%ld\n", ino->i_ino);
+ truncate_inode_pages(&ino->i_data, 0);
lock_kernel();
if (smb_close(ino))
PARANOIA("could not close inode %ld\n", ino->i_ino);
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 220babe91ef..38ab558835c 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -2397,8 +2397,7 @@ smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
/* a damn Win95 bug - sometimes it clags if you
ask it too fast */
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(HZ/5);
+ schedule_timeout_interruptible(msecs_to_jiffies(200));
continue;
}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0530077d9dd..fa33eceb001 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -292,6 +292,7 @@ int sysv_sync_inode(struct inode * inode)
static void sysv_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
sysv_truncate(inode);
lock_kernel();
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3d68de39fad..b83890beaaa 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -87,6 +87,8 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
*/
void udf_delete_inode(struct inode * inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+
if (is_bad_inode(inode))
goto no_delete;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 718627ca8b5..55f4aa16e3f 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -804,6 +804,7 @@ int ufs_sync_inode (struct inode *inode)
void ufs_delete_inode (struct inode * inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
lock_kernel();
mark_inode_dirty(inode);
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
index 6c6fd0faa8e..b0d2873ab27 100644
--- a/fs/xfs/linux-2.6/time.h
+++ b/fs/xfs/linux-2.6/time.h
@@ -39,8 +39,7 @@ typedef struct timespec timespec_t;
static inline void delay(long ticks)
{
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ticks);
+ schedule_timeout_uninterruptible(ticks);
}
static inline void nanotime(struct timespec *tvp)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 655bf4a78af..e82cf72ac59 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1780,10 +1780,10 @@ xfsbufd(
xfsbufd_force_sleep = 0;
}
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
+ schedule_timeout_interruptible
+ (xfs_buf_timer_centisecs * msecs_to_jiffies(10));
- age = (xfs_buf_age_centisecs * HZ) / 100;
+ age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
spin_lock(&pbd_delwrite_lock);
list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 0da87bfc999..2302454d8d4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -467,7 +467,7 @@ xfs_flush_inode(
igrab(inode);
xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
- delay(HZ/2);
+ delay(msecs_to_jiffies(500));
}
/*
@@ -492,7 +492,7 @@ xfs_flush_device(
igrab(inode);
xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
- delay(HZ/2);
+ delay(msecs_to_jiffies(500));
xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
}
@@ -520,10 +520,9 @@ xfssyncd(
struct vfs_sync_work *work, *n;
LIST_HEAD (tmp);
- timeleft = (xfs_syncd_centisecs * HZ) / 100;
+ timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- timeleft = schedule_timeout(timeleft);
+ timeleft = schedule_timeout_interruptible(timeleft);
/* swsusp */
try_to_freeze();
if (kthread_should_stop())
@@ -537,7 +536,8 @@ xfssyncd(
*/
if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
if (!timeleft)
- timeleft = (xfs_syncd_centisecs * HZ) / 100;
+ timeleft = xfs_syncd_centisecs *
+ msecs_to_jiffies(10);
INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
list_add_tail(&vfsp->vfs_sync_work.w_list,
&vfsp->vfs_sync_list);
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 3dae14c8c55..fa8394f9437 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -170,7 +170,7 @@ ktrace_enter(
void *val14,
void *val15)
{
- static lock_t wrap_lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(wrap_lock);
unsigned long flags;
int index;
ktrace_entry_t *ktep;