Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/linux-2.6
38 files changed, 12989 insertions, 0 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
new file mode 100644
index 00000000000..364ea8c386b
--- /dev/null
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
+#include <linux/blkdev.h>
+
+#include "time.h"
+#include "kmem.h"
+
+#define MAX_VMALLOCS	6
+#define MAX_SLAB_SIZE	0x20000
+
+
+void *
+kmem_alloc(size_t size, int flags)
+{
+	int	retries = 0;
+	int	lflags = kmem_flags_convert(flags);
+	void	*ptr;
+
+	do {
+		if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
+			ptr = kmalloc(size, lflags);
+		else
+			ptr = __vmalloc(size, lflags, PAGE_KERNEL);
+		if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+			return ptr;
+		if (!(++retries % 100))
+			printk(KERN_ERR "XFS: possible memory allocation "
+					"deadlock in %s (mode:0x%x)\n",
+					__FUNCTION__, lflags);
+		blk_congestion_wait(WRITE, HZ/50);
+	} while (1);
+}
+
+void *
+kmem_zalloc(size_t size, int flags)
+{
+	void	*ptr;
+
+	ptr = kmem_alloc(size, flags);
+	if (ptr)
+		memset((char *)ptr, 0, (int)size);
+	return ptr;
+}
+
+void
+kmem_free(void *ptr, size_t size)
+{
+	if (((unsigned long)ptr < VMALLOC_START) ||
+	    ((unsigned long)ptr >= VMALLOC_END)) {
+		kfree(ptr);
+	} else {
+		vfree(ptr);
+	}
+}
+
+void *
+kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
+{
+	void	*new;
+
+	new = kmem_alloc(newsize, flags);
+	if (ptr) {
+		if (new)
+			memcpy(new, ptr,
+				((oldsize < newsize) ? oldsize : newsize));
+		kmem_free(ptr, oldsize);
+	}
+	return new;
+}
+
+void *
+kmem_zone_alloc(kmem_zone_t *zone, int flags)
+{
+	int	retries = 0;
+	int	lflags = kmem_flags_convert(flags);
+	void	*ptr;
+
+	do {
+		ptr = kmem_cache_alloc(zone, lflags);
+		if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+			return ptr;
+		if (!(++retries % 100))
+			printk(KERN_ERR "XFS: possible memory allocation "
+					"deadlock in %s (mode:0x%x)\n",
+					__FUNCTION__, lflags);
+		blk_congestion_wait(WRITE, HZ/50);
+	} while (1);
+}
+
+void *
+kmem_zone_zalloc(kmem_zone_t *zone, int flags)
+{
+	void	*ptr;
+
+	ptr = kmem_zone_alloc(zone, flags);
+	if (ptr)
+		memset((char *)ptr, 0, kmem_cache_size(zone));
+	return ptr;
+}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
new file mode 100644
index 00000000000..1397b669b05
--- /dev/null
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+/*
+ * memory management routines
+ */
+#define KM_SLEEP	0x0001
+#define KM_NOSLEEP	0x0002
+#define KM_NOFS		0x0004
+#define KM_MAYFAIL	0x0008
+
+#define	kmem_zone	kmem_cache_s
+#define kmem_zone_t	kmem_cache_t
+
+typedef unsigned long xfs_pflags_t;
+
+#define PFLAGS_TEST_NOIO()              (current->flags & PF_NOIO)
+#define PFLAGS_TEST_FSTRANS()           (current->flags & PF_FSTRANS)
+
+#define PFLAGS_SET_NOIO() do {		\
+	current->flags |= PF_NOIO;	\
+} while (0)
+
+#define PFLAGS_CLEAR_NOIO() do {	\
+	current->flags &= ~PF_NOIO;	\
+} while (0)
+
+/* these could be nested, so we save state */
+#define PFLAGS_SET_FSTRANS(STATEP) do {	\
+	*(STATEP) = current->flags;	\
+	current->flags |= PF_FSTRANS;	\
+} while (0)
+
+#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \
+	*(STATEP) = current->flags;	\
+	current->flags &= ~PF_FSTRANS;	\
+} while (0)
+
+/* Restore the PF_FSTRANS state to what was saved in STATEP */
+#define PFLAGS_RESTORE_FSTRANS(STATEP) do {     		\
+	current->flags = ((current->flags & ~PF_FSTRANS) |	\
+			  (*(STATEP) & PF_FSTRANS));		\
+} while (0)
+
+#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
+	*(NSTATEP) = *(OSTATEP);	\
+} while (0)
+
+static __inline unsigned int kmem_flags_convert(int flags)
+{
+	int	lflags = __GFP_NOWARN;	/* we'll report problems, if need be */
+
+#ifdef DEBUG
+	if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
+		printk(KERN_WARNING
+		    "XFS: memory allocation with wrong flags (%x)\n", flags);
+		BUG();
+	}
+#endif
+
+	if (flags & KM_NOSLEEP) {
+		lflags |= GFP_ATOMIC;
+	} else {
+		lflags |= GFP_KERNEL;
+
+		/* avoid recusive callbacks to filesystem during transactions */
+		if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
+			lflags &= ~__GFP_FS;
+	}
+        
+        return lflags;
+}
+
+static __inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+	return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
+}
+
+static __inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+	kmem_cache_free(zone, ptr);
+}
+
+static __inline void
+kmem_zone_destroy(kmem_zone_t *zone)
+{
+	if (zone && kmem_cache_destroy(zone))
+		BUG();
+}
+
+extern void	    *kmem_zone_zalloc(kmem_zone_t *, int);
+extern void	    *kmem_zone_alloc(kmem_zone_t *, int);
+
+extern void	    *kmem_alloc(size_t, int);
+extern void	    *kmem_realloc(void *, size_t, size_t, int);
+extern void	    *kmem_zalloc(size_t, int);
+extern void         kmem_free(void *, size_t);
+
+typedef struct shrinker *kmem_shaker_t;
+typedef int (*kmem_shake_func_t)(int, unsigned int);
+
+static __inline kmem_shaker_t
+kmem_shake_register(kmem_shake_func_t sfunc)
+{
+	return set_shrinker(DEFAULT_SEEKS, sfunc);
+}
+
+static __inline void
+kmem_shake_deregister(kmem_shaker_t shrinker)
+{
+	remove_shrinker(shrinker);
+}
+
+static __inline int
+kmem_shake_allow(unsigned int gfp_mask)
+{
+	return (gfp_mask & __GFP_WAIT);
+}
+
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
new file mode 100644
index 00000000000..d2c11a098ff
--- /dev/null
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MRLOCK_H__
+#define __XFS_SUPPORT_MRLOCK_H__
+
+#include <linux/rwsem.h>
+
+enum { MR_NONE, MR_ACCESS, MR_UPDATE };
+
+typedef struct {
+	struct rw_semaphore	mr_lock;
+	int			mr_writer;
+} mrlock_t;
+
+#define mrinit(mrp, name)	\
+	( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) )
+#define mrlock_init(mrp, t,n,s)	mrinit(mrp, n)
+#define mrfree(mrp)		do { } while (0)
+#define mraccess(mrp)		mraccessf(mrp, 0)
+#define mrupdate(mrp)		mrupdatef(mrp, 0)
+
+static inline void mraccessf(mrlock_t *mrp, int flags)
+{
+	down_read(&mrp->mr_lock);
+}
+
+static inline void mrupdatef(mrlock_t *mrp, int flags)
+{
+	down_write(&mrp->mr_lock);
+	mrp->mr_writer = 1;
+}
+
+static inline int mrtryaccess(mrlock_t *mrp)
+{
+	return down_read_trylock(&mrp->mr_lock);
+}
+
+static inline int mrtryupdate(mrlock_t *mrp)
+{
+	if (!down_write_trylock(&mrp->mr_lock))
+		return 0;
+	mrp->mr_writer = 1;
+	return 1;
+}
+
+static inline void mrunlock(mrlock_t *mrp)
+{
+	if (mrp->mr_writer) {
+		mrp->mr_writer = 0;
+		up_write(&mrp->mr_lock);
+	} else {
+		up_read(&mrp->mr_lock);
+	}
+}
+
+static inline void mrdemote(mrlock_t *mrp)
+{
+	mrp->mr_writer = 0;
+	downgrade_write(&mrp->mr_lock);
+}
+
+#ifdef DEBUG
+/*
+ * Debug-only routine, without some platform-specific asm code, we can
+ * now only answer requests regarding whether we hold the lock for write
+ * (reader state is outside our visibility, we only track writer state).
+ * Note: means !ismrlocked would give false positivies, so don't do that.
+ */
+static inline int ismrlocked(mrlock_t *mrp, int type)
+{
+	if (mrp && type == MR_UPDATE)
+		return mrp->mr_writer;
+	return 1;
+}
+#endif
+
+#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
new file mode 100644
index 00000000000..0b296bb944c
--- /dev/null
+++ b/fs/xfs/linux-2.6/mutex.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MUTEX_H__
+#define __XFS_SUPPORT_MUTEX_H__
+
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+
+/*
+ * Map the mutex'es from IRIX to Linux semaphores.
+ *
+ * Destroy just simply initializes to -99 which should block all other
+ * callers.
+ */
+#define MUTEX_DEFAULT		0x0
+typedef struct semaphore	mutex_t;
+
+#define mutex_init(lock, type, name)		sema_init(lock, 1)
+#define mutex_destroy(lock)			sema_init(lock, -99)
+#define mutex_lock(lock, num)			down(lock)
+#define mutex_trylock(lock)			(down_trylock(lock) ? 0 : 1)
+#define mutex_unlock(lock)			up(lock)
+
+#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
new file mode 100644
index 00000000000..30b67b4e1cb
--- /dev/null
+++ b/fs/xfs/linux-2.6/sema.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SEMA_H__
+#define __XFS_SUPPORT_SEMA_H__
+
+#include <linux/time.h>
+#include <linux/wait.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+/*
+ * sema_t structure just maps to struct semaphore in Linux kernel.
+ */
+
+typedef struct semaphore sema_t;
+
+#define init_sema(sp, val, c, d)	sema_init(sp, val)
+#define initsema(sp, val)		sema_init(sp, val)
+#define initnsema(sp, val, name)	sema_init(sp, val)
+#define psema(sp, b)			down(sp)
+#define vsema(sp)			up(sp)
+#define valusema(sp)			(atomic_read(&(sp)->count))
+#define freesema(sema)
+
+/*
+ * Map cpsema (try to get the sema) to down_trylock. We need to switch
+ * the return values since cpsema returns 1 (acquired) 0 (failed) and
+ * down_trylock returns the reverse 0 (acquired) 1 (failed).
+ */
+
+#define cpsema(sp)			(down_trylock(sp) ? 0 : 1)
+
+/*
+ * Didn't do cvsema(sp). Not sure how to map this to up/down/...
+ * It does a vsema if the values is < 0 other wise nothing.
+ */
+
+#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h
new file mode 100644
index 00000000000..bcf60a0b8df
--- /dev/null
+++ b/fs/xfs/linux-2.6/spin.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SPIN_H__
+#define __XFS_SUPPORT_SPIN_H__
+
+#include <linux/sched.h>	/* preempt needs this */
+#include <linux/spinlock.h>
+
+/*
+ * Map lock_t from IRIX to Linux spinlocks.
+ *
+ * We do not make use of lock_t from interrupt context, so we do not
+ * have to worry about disabling interrupts at all (unlike IRIX).
+ */
+
+typedef spinlock_t lock_t;
+
+#define SPLDECL(s)			unsigned long s
+
+#define spinlock_init(lock, name)	spin_lock_init(lock)
+#define	spinlock_destroy(lock)
+#define mutex_spinlock(lock)		({ spin_lock(lock); 0; })
+#define mutex_spinunlock(lock, s)	do { spin_unlock(lock); (void)s; } while (0)
+#define nested_spinlock(lock)		spin_lock(lock)
+#define nested_spinunlock(lock)		spin_unlock(lock)
+
+#endif /* __XFS_SUPPORT_SPIN_H__ */
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
new file mode 100644
index 00000000000..821d3167e05
--- /dev/null
+++ b/fs/xfs/linux-2.6/sv.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SV_H__
+#define __XFS_SUPPORT_SV_H__
+
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+/*
+ * Synchronisation variables.
+ *
+ * (Parameters "pri", "svf" and "rts" are not implemented)
+ */
+
+typedef struct sv_s {
+	wait_queue_head_t waiters;
+} sv_t;
+
+#define SV_FIFO		0x0		/* sv_t is FIFO type */
+#define SV_LIFO		0x2		/* sv_t is LIFO type */
+#define SV_PRIO		0x4		/* sv_t is PRIO type */
+#define SV_KEYED	0x6		/* sv_t is KEYED type */
+#define SV_DEFAULT      SV_FIFO
+
+
+static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
+			     unsigned long timeout)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue_exclusive(&sv->waiters, &wait);
+	__set_current_state(state);
+	spin_unlock(lock);
+
+	schedule_timeout(timeout);
+
+	remove_wait_queue(&sv->waiters, &wait);
+}
+
+#define init_sv(sv,type,name,flag) \
+	init_waitqueue_head(&(sv)->waiters)
+#define sv_init(sv,flag,name) \
+	init_waitqueue_head(&(sv)->waiters)
+#define sv_destroy(sv) \
+	/*NOTHING*/
+#define sv_wait(sv, pri, lock, s) \
+	_sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_wait_sig(sv, pri, lock, s)   \
+	_sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
+	_sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
+	_sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_signal(sv) \
+	wake_up(&(sv)->waiters)
+#define sv_broadcast(sv) \
+	wake_up_all(&(sv)->waiters)
+
+#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
new file mode 100644
index 00000000000..6c6fd0faa8e
--- /dev/null
+++ b/fs/xfs/linux-2.6/time.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_TIME_H__
+#define __XFS_SUPPORT_TIME_H__
+
+#include <linux/sched.h>
+#include <linux/time.h>
+
+typedef struct timespec timespec_t;
+
+static inline void delay(long ticks)
+{
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(ticks);
+}
+
+static inline void nanotime(struct timespec *tvp)
+{
+	*tvp = CURRENT_TIME;
+}
+
+#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
new file mode 100644
index 00000000000..76a84758073
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -0,0 +1,1275 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_trans.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_iomap.h"
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+
+STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
+STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
+		struct writeback_control *wbc, void *, int, int);
+
+#if defined(XFS_RW_TRACE)
+void
+xfs_page_trace(
+	int		tag,
+	struct inode	*inode,
+	struct page	*page,
+	int		mask)
+{
+	xfs_inode_t	*ip;
+	bhv_desc_t	*bdp;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	loff_t		isize = i_size_read(inode);
+	loff_t		offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	int		delalloc = -1, unmapped = -1, unwritten = -1;
+
+	if (page_has_buffers(page))
+		xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+
+	bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+	ip = XFS_BHVTOI(bdp);
+	if (!ip->i_rwtrace)
+		return;
+
+	ktrace_enter(ip->i_rwtrace,
+		(void *)((unsigned long)tag),
+		(void *)ip,
+		(void *)inode,
+		(void *)page,
+		(void *)((unsigned long)mask),
+		(void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
+		(void *)((unsigned long)((isize >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(isize & 0xffffffff)),
+		(void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(offset & 0xffffffff)),
+		(void *)((unsigned long)delalloc),
+		(void *)((unsigned long)unmapped),
+		(void *)((unsigned long)unwritten),
+		(void *)NULL,
+		(void *)NULL);
+}
+#else
+#define xfs_page_trace(tag, inode, page, mask)
+#endif
+
+void
+linvfs_unwritten_done(
+	struct buffer_head	*bh,
+	int			uptodate)
+{
+	xfs_buf_t		*pb = (xfs_buf_t *)bh->b_private;
+
+	ASSERT(buffer_unwritten(bh));
+	bh->b_end_io = NULL;
+	clear_buffer_unwritten(bh);
+	if (!uptodate)
+		pagebuf_ioerror(pb, EIO);
+	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+		pagebuf_iodone(pb, 1, 1);
+	}
+	end_buffer_async_write(bh, uptodate);
+}
+
+/*
+ * Issue transactions to convert a buffer range from unwritten
+ * to written extents (buffered IO).
+ */
+STATIC void
+linvfs_unwritten_convert(
+	xfs_buf_t	*bp)
+{
+	vnode_t		*vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
+	int		error;
+
+	BUG_ON(atomic_read(&bp->pb_hold) < 1);
+	VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
+			BMAPI_UNWRITTEN, NULL, NULL, error);
+	XFS_BUF_SET_FSPRIVATE(bp, NULL);
+	XFS_BUF_CLR_IODONE_FUNC(bp);
+	XFS_BUF_UNDATAIO(bp);
+	iput(LINVFS_GET_IP(vp));
+	pagebuf_iodone(bp, 0, 0);
+}
+
+/*
+ * Issue transactions to convert a buffer range from unwritten
+ * to written extents (direct IO).
+ */
+STATIC void
+linvfs_unwritten_convert_direct(
+	struct inode	*inode,
+	loff_t		offset,
+	ssize_t		size,
+	void		*private)
+{
+	ASSERT(!private || inode == (struct inode *)private);
+
+	/* private indicates an unwritten extent lay beneath this IO */
+	if (private && size > 0) {
+		vnode_t	*vp = LINVFS_GET_VP(inode);
+		int	error;
+
+		VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
+	}
+}
+
+STATIC int
+xfs_map_blocks(
+	struct inode		*inode,
+	loff_t			offset,
+	ssize_t			count,
+	xfs_iomap_t		*mapp,
+	int			flags)
+{
+	vnode_t			*vp = LINVFS_GET_VP(inode);
+	int			error, nmaps = 1;
+
+	VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error);
+	if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
+		VMODIFY(vp);
+	return -error;
+}
+
+/*
+ * Finds the corresponding mapping in block @map array of the
+ * given @offset within a @page.
+ */
+STATIC xfs_iomap_t *
+xfs_offset_to_map(
+	struct page		*page,
+	xfs_iomap_t		*iomapp,
+	unsigned long		offset)
+{
+	loff_t			full_offset;	/* offset from start of file */
+
+	ASSERT(offset < PAGE_CACHE_SIZE);
+
+	full_offset = page->index;		/* NB: using 64bit number */
+	full_offset <<= PAGE_CACHE_SHIFT;	/* offset from file start */
+	full_offset += offset;			/* offset from page start */
+
+	if (full_offset < iomapp->iomap_offset)
+		return NULL;
+	if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
+		return iomapp;
+	return NULL;
+}
+
+STATIC void
+xfs_map_at_offset(
+	struct page		*page,
+	struct buffer_head	*bh,
+	unsigned long		offset,
+	int			block_bits,
+	xfs_iomap_t		*iomapp)
+{
+	xfs_daddr_t		bn;
+	loff_t			delta;
+	int			sector_shift;
+
+	ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
+	ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
+	ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
+
+	delta = page->index;
+	delta <<= PAGE_CACHE_SHIFT;
+	delta += offset;
+	delta -= iomapp->iomap_offset;
+	delta >>= block_bits;
+
+	sector_shift = block_bits - BBSHIFT;
+	bn = iomapp->iomap_bn >> sector_shift;
+	bn += delta;
+	BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));
+	ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
+
+	lock_buffer(bh);
+	bh->b_blocknr = bn;
+	bh->b_bdev = iomapp->iomap_target->pbr_bdev;
+	set_buffer_mapped(bh);
+	clear_buffer_delay(bh);
+}
+
+/*
+ * Look for a page at index which is unlocked and contains our
+ * unwritten extent flagged buffers at its head.  Returns page
+ * locked and with an extra reference count, and length of the
+ * unwritten extent component on this page that we can write,
+ * in units of filesystem blocks.
+ */
+STATIC struct page *
+xfs_probe_unwritten_page(
+	struct address_space	*mapping,
+	pgoff_t			index,
+	xfs_iomap_t		*iomapp,
+	xfs_buf_t		*pb,
+	unsigned long		max_offset,
+	unsigned long		*fsbs,
+	unsigned int            bbits)
+{
+	struct page		*page;
+
+	page = find_trylock_page(mapping, index);
+	if (!page)
+		return NULL;
+	if (PageWriteback(page))
+		goto out;
+
+	if (page->mapping && page_has_buffers(page)) {
+		struct buffer_head	*bh, *head;
+		unsigned long		p_offset = 0;
+
+		*fsbs = 0;
+		bh = head = page_buffers(page);
+		do {
+			if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
+				break;
+			if (!xfs_offset_to_map(page, iomapp, p_offset))
+				break;
+			if (p_offset >= max_offset)
+				break;
+			xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
+			set_buffer_unwritten_io(bh);
+			bh->b_private = pb;
+			p_offset += bh->b_size;
+			(*fsbs)++;
+		} while ((bh = bh->b_this_page) != head);
+
+		if (p_offset)
+			return page;
+	}
+
+out:
+	unlock_page(page);
+	return NULL;
+}
+
+/*
+ * Look for a page at index which is unlocked and not mapped
+ * yet - clustering for mmap write case.
+ */
+STATIC unsigned int
+xfs_probe_unmapped_page(
+	struct address_space	*mapping,
+	pgoff_t			index,
+	unsigned int		pg_offset)
+{
+	struct page		*page;
+	int			ret = 0;
+
+	page = find_trylock_page(mapping, index);
+	if (!page)
+		return 0;
+	if (PageWriteback(page))
+		goto out;
+
+	if (page->mapping && PageDirty(page)) {
+		if (page_has_buffers(page)) {
+			struct buffer_head	*bh, *head;
+
+			bh = head = page_buffers(page);
+			do {
+				if (buffer_mapped(bh) || !buffer_uptodate(bh))
+					break;
+				ret += bh->b_size;
+				if (ret >= pg_offset)
+					break;
+			} while ((bh = bh->b_this_page) != head);
+		} else
+			ret = PAGE_CACHE_SIZE;
+	}
+
+out:
+	unlock_page(page);
+	return ret;
+}
+
+STATIC unsigned int
+xfs_probe_unmapped_cluster(
+	struct inode		*inode,
+	struct page		*startpage,
+	struct buffer_head	*bh,
+	struct buffer_head	*head)
+{
+	pgoff_t			tindex, tlast, tloff;
+	unsigned int		pg_offset, len, total = 0;
+	struct address_space	*mapping = inode->i_mapping;
+
+	/* First sum forwards in this page */
+	do {
+		if (buffer_mapped(bh))
+			break;
+		total += bh->b_size;
+	} while ((bh = bh->b_this_page) != head);
+
+	/* If we reached the end of the page, sum forwards in
+	 * following pages.
+	 */
+	if (bh == head) {
+		tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+		/* Prune this back to avoid pathological behavior */
+		tloff = min(tlast, startpage->index + 64);
+		for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
+			len = xfs_probe_unmapped_page(mapping, tindex,
+							PAGE_CACHE_SIZE);
+			if (!len)
+				return total;
+			total += len;
+		}
+		if (tindex == tlast &&
+		    (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+			total += xfs_probe_unmapped_page(mapping,
+							tindex, pg_offset);
+		}
+	}
+	return total;
+}
+
+/*
+ * Probe for a given page (index) in the inode and test if it is delayed
+ * and without unwritten buffers.  Returns page locked and with an extra
+ * reference count.
+ */
+STATIC struct page *
+xfs_probe_delalloc_page(
+	struct inode		*inode,
+	pgoff_t			index)
+{
+	struct page		*page;
+
+	page = find_trylock_page(inode->i_mapping, index);
+	if (!page)
+		return NULL;
+	if (PageWriteback(page))
+		goto out;
+
+	if (page->mapping && page_has_buffers(page)) {
+		struct buffer_head	*bh, *head;
+		int			acceptable = 0;
+
+		bh = head = page_buffers(page);
+		do {
+			if (buffer_unwritten(bh)) {
+				acceptable = 0;
+				break;
+			} else if (buffer_delay(bh)) {
+				acceptable = 1;
+			}
+		} while ((bh = bh->b_this_page) != head);
+
+		if (acceptable)
+			return page;
+	}
+
+out:
+	unlock_page(page);
+	return NULL;
+}
+
+STATIC int
+xfs_map_unwritten(
+	struct inode		*inode,
+	struct page		*start_page,
+	struct buffer_head	*head,
+	struct buffer_head	*curr,
+	unsigned long		p_offset,
+	int			block_bits,
+	xfs_iomap_t		*iomapp,
+	struct writeback_control *wbc,
+	int			startio,
+	int			all_bh)
+{
+	struct buffer_head	*bh = curr;
+	xfs_iomap_t		*tmp;
+	xfs_buf_t		*pb;
+	loff_t			offset, size;
+	unsigned long		nblocks = 0;
+
+	offset = start_page->index;
+	offset <<= PAGE_CACHE_SHIFT;
+	offset += p_offset;
+
+	/* get an "empty" pagebuf to manage IO completion
+	 * Proper values will be set before returning */
+	pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
+	if (!pb)
+		return -EAGAIN;
+
+	/* Take a reference to the inode to prevent it from
+	 * being reclaimed while we have outstanding unwritten
+	 * extent IO on it.
+	 */
+	if ((igrab(inode)) != inode) {
+		pagebuf_free(pb);
+		return -EAGAIN;
+	}
+
+	/* Set the count to 1 initially, this will stop an I/O
+	 * completion callout which happens before we have started
+	 * all the I/O from calling pagebuf_iodone too early.
+	 */
+	atomic_set(&pb->pb_io_remaining, 1);
+
+	/* First map forwards in the page consecutive buffers
+	 * covering this unwritten extent
+	 */
+	do {
+		if (!buffer_unwritten(bh))
+			break;
+		tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
+		if (!tmp)
+			break;
+		xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
+		set_buffer_unwritten_io(bh);
+		bh->b_private = pb;
+		p_offset += bh->b_size;
+		nblocks++;
+	} while ((bh = bh->b_this_page) != head);
+
+	atomic_add(nblocks, &pb->pb_io_remaining);
+
+	/* If we reached the end of the page, map forwards in any
+	 * following pages which are also covered by this extent.
+	 */
+	if (bh == head) {
+		struct address_space	*mapping = inode->i_mapping;
+		pgoff_t			tindex, tloff, tlast;
+		unsigned long		bs;
+		unsigned int		pg_offset, bbits = inode->i_blkbits;
+		struct page		*page;
+
+		tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+		tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
+		tloff = min(tlast, tloff);
+		for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
+			page = xfs_probe_unwritten_page(mapping,
+						tindex, iomapp, pb,
+						PAGE_CACHE_SIZE, &bs, bbits);
+			if (!page)
+				break;
+			nblocks += bs;
+			atomic_add(bs, &pb->pb_io_remaining);
+			xfs_convert_page(inode, page, iomapp, wbc, pb,
+							startio, all_bh);
+			/* stop if converting the next page might add
+			 * enough blocks that the corresponding byte
+			 * count won't fit in our ulong page buf length */
+			if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
+				goto enough;
+		}
+
+		if (tindex == tlast &&
+		    (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
+			page = xfs_probe_unwritten_page(mapping,
+							tindex, iomapp, pb,
+							pg_offset, &bs, bbits);
+			if (page) {
+				nblocks += bs;
+				atomic_add(bs, &pb->pb_io_remaining);
+				xfs_convert_page(inode, page, iomapp, wbc, pb,
+							startio, all_bh);
+				if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
+					goto enough;
+			}
+		}
+	}
+
+enough:
+	size = nblocks;		/* NB: using 64bit number here */
+	size <<= block_bits;	/* convert fsb's to byte range */
+
+	XFS_BUF_DATAIO(pb);
+	XFS_BUF_ASYNC(pb);
+	XFS_BUF_SET_SIZE(pb, size);
+	XFS_BUF_SET_COUNT(pb, size);
+	XFS_BUF_SET_OFFSET(pb, offset);
+	XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
+	XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
+
+	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+		pagebuf_iodone(pb, 1, 1);
+	}
+
+	return 0;
+}
+
+STATIC void
+xfs_submit_page(
+	struct page		*page,
+	struct writeback_control *wbc,
+	struct buffer_head	*bh_arr[],
+	int			bh_count,
+	int			probed_page,
+	int			clear_dirty)
+{
+	struct buffer_head	*bh;
+	int			i;
+
+	BUG_ON(PageWriteback(page));
+	set_page_writeback(page);
+	if (clear_dirty)
+		clear_page_dirty(page);
+	unlock_page(page);
+
+	if (bh_count) {
+		for (i = 0; i < bh_count; i++) {
+			bh = bh_arr[i];
+			mark_buffer_async_write(bh);
+			if (buffer_unwritten(bh))
+				set_buffer_unwritten_io(bh);
+			set_buffer_uptodate(bh);
+			clear_buffer_dirty(bh);
+		}
+
+		for (i = 0; i < bh_count; i++)
+			submit_bh(WRITE, bh_arr[i]);
+
+		if (probed_page && clear_dirty)
+			wbc->nr_to_write--;	/* Wrote an "extra" page */
+	} else {
+		end_page_writeback(page);
+		wbc->pages_skipped++;	/* We didn't write this page */
+	}
+}
+
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
+ */
+STATIC void
+xfs_convert_page(
+	struct inode		*inode,
+	struct page		*page,
+	xfs_iomap_t		*iomapp,
+	struct writeback_control *wbc,
+	void			*private,
+	int			startio,
+	int			all_bh)
+{
+	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+	xfs_iomap_t		*mp = iomapp, *tmp;
+	unsigned long		end, offset;
+	pgoff_t			end_index;
+	int			i = 0, index = 0;
+	int			bbits = inode->i_blkbits;
+
+	end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+	if (page->index < end_index) {
+		end = PAGE_CACHE_SIZE;
+	} else {
+		end = i_size_read(inode) & (PAGE_CACHE_SIZE-1);
+	}
+	bh = head = page_buffers(page);
+	do {
+		offset = i << bbits;
+		if (offset >= end)
+			break;
+		if (!(PageUptodate(page) || buffer_uptodate(bh)))
+			continue;
+		if (buffer_mapped(bh) && all_bh &&
+		    !(buffer_unwritten(bh) || buffer_delay(bh))) {
+			if (startio) {
+				lock_buffer(bh);
+				bh_arr[index++] = bh;
+			}
+			continue;
+		}
+		tmp = xfs_offset_to_map(page, mp, offset);
+		if (!tmp)
+			continue;
+		ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
+		ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
+
+		/* If this is a new unwritten extent buffer (i.e. one
+		 * that we haven't passed in private data for, we must
+		 * now map this buffer too.
+		 */
+		if (buffer_unwritten(bh) && !bh->b_end_io) {
+			ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);
+			xfs_map_unwritten(inode, page, head, bh, offset,
+					bbits, tmp, wbc, startio, all_bh);
+		} else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {
+			xfs_map_at_offset(page, bh, offset, bbits, tmp);
+			if (buffer_unwritten(bh)) {
+				set_buffer_unwritten_io(bh);
+				bh->b_private = private;
+				ASSERT(private);
+			}
+		}
+		if (startio) {
+			bh_arr[index++] = bh;
+		} else {
+			set_buffer_dirty(bh);
+			unlock_buffer(bh);
+			mark_buffer_dirty(bh);
+		}
+	} while (i++, (bh = bh->b_this_page) != head);
+
+	if (startio) {
+		xfs_submit_page(page, wbc, bh_arr, index, 1, index == i);
+	} else {
+		unlock_page(page);
+	}
+}
+
+/*
+ * Convert & write out a cluster of pages in the same extent as defined
+ * by mp and following the start page.
+ */
+STATIC void
+xfs_cluster_write(
+	struct inode		*inode,
+	pgoff_t			tindex,
+	xfs_iomap_t		*iomapp,
+	struct writeback_control *wbc,
+	int			startio,
+	int			all_bh,
+	pgoff_t			tlast)
+{
+	struct page		*page;
+
+	for (; tindex <= tlast; tindex++) {
+		page = xfs_probe_delalloc_page(inode, tindex);
+		if (!page)
+			break;
+		xfs_convert_page(inode, page, iomapp, wbc, NULL,
+				startio, all_bh);
+	}
+}
+
+/*
+ * Calling this without startio set means we are being asked to make a dirty
+ * page ready for freeing it's buffers.  When called with startio set then
+ * we are coming from writepage.
+ *
+ * When called with startio set it is important that we write the WHOLE
+ * page if possible.
+ * The bh->b_state's cannot know if any of the blocks or which block for
+ * that matter are dirty due to mmap writes, and therefore bh uptodate is
+ * only vaild if the page itself isn't completely uptodate.  Some layers
+ * may clear the page dirty flag prior to calling write page, under the
+ * assumption the entire page will be written out; by not writing out the
+ * whole page the page can be reused before all valid dirty data is
+ * written out.  Note: in the case of a page that has been dirty'd by
+ * mapwrite and but partially setup by block_prepare_write the
+ * bh->b_states's will not agree and only ones setup by BPW/BCW will have
+ * valid state, thus the whole page must be written out thing.
+ */
+
+STATIC int
+xfs_page_state_convert(
+	struct inode	*inode,
+	struct page	*page,
+	struct writeback_control *wbc,
+	int		startio,
+	int		unmapped) /* also implies page uptodate */
+{
+	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+	xfs_iomap_t		*iomp, iomap;
+	loff_t			offset;
+	unsigned long           p_offset = 0;
+	__uint64_t              end_offset;
+	pgoff_t                 end_index, last_index, tlast;
+	int			len, err, i, cnt = 0, uptodate = 1;
+	int			flags = startio ? 0 : BMAPI_TRYLOCK;
+	int			page_dirty, delalloc = 0;
+
+	/* Is this page beyond the end of the file? */
+	offset = i_size_read(inode);
+	end_index = offset >> PAGE_CACHE_SHIFT;
+	last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
+	if (page->index >= end_index) {
+		if ((page->index >= end_index + 1) ||
+		    !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+			err = -EIO;
+			goto error;
+		}
+	}
+
+	offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	end_offset = min_t(unsigned long long,
+			offset + PAGE_CACHE_SIZE, i_size_read(inode));
+
+	bh = head = page_buffers(page);
+	iomp = NULL;
+
+	/*
+	 * page_dirty is initially a count of buffers on the page and
+	 * is decrememted as we move each into a cleanable state.
+	 */
+	len = bh->b_size;
+	page_dirty = PAGE_CACHE_SIZE / len;
+
+	do {
+		if (offset >= end_offset)
+			break;
+		if (!buffer_uptodate(bh))
+			uptodate = 0;
+		if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio)
+			continue;
+
+		if (iomp) {
+			iomp = xfs_offset_to_map(page, &iomap, p_offset);
+		}
+
+		/*
+		 * First case, map an unwritten extent and prepare for
+		 * extent state conversion transaction on completion.
+		 */
+		if (buffer_unwritten(bh)) {
+			if (!startio)
+				continue;
+			if (!iomp) {
+				err = xfs_map_blocks(inode, offset, len, &iomap,
+						BMAPI_READ|BMAPI_IGNSTATE);
+				if (err) {
+					goto error;
+				}
+				iomp = xfs_offset_to_map(page, &iomap,
+								p_offset);
+			}
+			if (iomp) {
+				if (!bh->b_end_io) {
+					err = xfs_map_unwritten(inode, page,
+							head, bh, p_offset,
+							inode->i_blkbits, iomp,
+							wbc, startio, unmapped);
+					if (err) {
+						goto error;
+					}
+				} else {
+					set_bit(BH_Lock, &bh->b_state);
+				}
+				BUG_ON(!buffer_locked(bh));
+				bh_arr[cnt++] = bh;
+				page_dirty--;
+			}
+		/*
+		 * Second case, allocate space for a delalloc buffer.
+		 * We can return EAGAIN here in the release page case.
+		 */
+		} else if (buffer_delay(bh)) {
+			if (!iomp) {
+				delalloc = 1;
+				err = xfs_map_blocks(inode, offset, len, &iomap,
+						BMAPI_ALLOCATE | flags);
+				if (err) {
+					goto error;
+				}
+				iomp = xfs_offset_to_map(page, &iomap,
+								p_offset);
+			}
+			if (iomp) {
+				xfs_map_at_offset(page, bh, p_offset,
+						inode->i_blkbits, iomp);
+				if (startio) {
+					bh_arr[cnt++] = bh;
+				} else {
+					set_buffer_dirty(bh);
+					unlock_buffer(bh);
+					mark_buffer_dirty(bh);
+				}
+				page_dirty--;
+			}
+		} else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+			   (unmapped || startio)) {
+
+			if (!buffer_mapped(bh)) {
+				int	size;
+
+				/*
+				 * Getting here implies an unmapped buffer
+				 * was found, and we are in a path where we
+				 * need to write the whole page out.
+				 */
+				if (!iomp) {
+					size = xfs_probe_unmapped_cluster(
+							inode, page, bh, head);
+					err = xfs_map_blocks(inode, offset,
+							size, &iomap,
+							BMAPI_WRITE|BMAPI_MMAP);
+					if (err) {
+						goto error;
+					}
+					iomp = xfs_offset_to_map(page, &iomap,
+								     p_offset);
+				}
+				if (iomp) {
+					xfs_map_at_offset(page,
+							bh, p_offset,
+							inode->i_blkbits, iomp);
+					if (startio) {
+						bh_arr[cnt++] = bh;
+					} else {
+						set_buffer_dirty(bh);
+						unlock_buffer(bh);
+						mark_buffer_dirty(bh);
+					}
+					page_dirty--;
+				}
+			} else if (startio) {
+				if (buffer_uptodate(bh) &&
+				    !test_and_set_bit(BH_Lock, &bh->b_state)) {
+					bh_arr[cnt++] = bh;
+					page_dirty--;
+				}
+			}
+		}
+	} while (offset += len, p_offset += len,
+		((bh = bh->b_this_page) != head));
+
+	if (uptodate && bh == head)
+		SetPageUptodate(page);
+
+	if (startio)
+		xfs_submit_page(page, wbc, bh_arr, cnt, 0, 1);
+
+	if (iomp) {
+		tlast = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+					PAGE_CACHE_SHIFT;
+		if (delalloc && (tlast > last_index))
+			tlast = last_index;
+		xfs_cluster_write(inode, page->index + 1, iomp, wbc,
+					startio, unmapped, tlast);
+	}
+
+	return page_dirty;
+
+error:
+	for (i = 0; i < cnt; i++) {
+		unlock_buffer(bh_arr[i]);
+	}
+
+	/*
+	 * If it's delalloc and we have nowhere to put it,
+	 * throw it away, unless the lower layers told
+	 * us to try again.
+	 */
+	if (err != -EAGAIN) {
+		if (!unmapped) {
+			block_invalidatepage(page, 0);
+		}
+		ClearPageUptodate(page);
+	}
+	return err;
+}
+
+STATIC int
+__linvfs_get_block(
+	struct inode		*inode,
+	sector_t		iblock,
+	unsigned long		blocks,
+	struct buffer_head	*bh_result,
+	int			create,
+	int			direct,
+	bmapi_flags_t		flags)
+{
+	vnode_t			*vp = LINVFS_GET_VP(inode);
+	xfs_iomap_t		iomap;
+	int			retpbbm = 1;
+	int			error;
+	ssize_t			size;
+	loff_t			offset = (loff_t)iblock << inode->i_blkbits;
+
+	if (blocks)
+		size = blocks << inode->i_blkbits;
+	else
+		size = 1 << inode->i_blkbits;
+
+	VOP_BMAP(vp, offset, size,
+		create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
+	if (error)
+		return -error;
+
+	if (retpbbm == 0)
+		return 0;
+
+	if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
+		xfs_daddr_t		bn;
+		loff_t			delta;
+
+		/* For unwritten extents do not report a disk address on
+		 * the read case (treat as if we're reading into a hole).
+		 */
+		if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+			delta = offset - iomap.iomap_offset;
+			delta >>= inode->i_blkbits;
+
+			bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT);
+			bn += delta;
+			BUG_ON(!bn && !(iomap.iomap_flags & IOMAP_REALTIME));
+			bh_result->b_blocknr = bn;
+			set_buffer_mapped(bh_result);
+		}
+		if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+			if (direct)
+				bh_result->b_private = inode;
+			set_buffer_unwritten(bh_result);
+			set_buffer_delay(bh_result);
+		}
+	}
+
+	/* If this is a realtime file, data might be on a new device */
+	bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
+
+	/* If we previously allocated a block out beyond eof and
+	 * we are now coming back to use it then we will need to
+	 * flag it as new even if it has a disk address.
+	 */
+	if (create &&
+	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+	     (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW))) {
+		set_buffer_new(bh_result);
+	}
+
+	if (iomap.iomap_flags & IOMAP_DELAY) {
+		BUG_ON(direct);
+		if (create) {
+			set_buffer_uptodate(bh_result);
+			set_buffer_mapped(bh_result);
+			set_buffer_delay(bh_result);
+		}
+	}
+
+	if (blocks) {
+		bh_result->b_size = (ssize_t)min(
+			(loff_t)(iomap.iomap_bsize - iomap.iomap_delta),
+			(loff_t)(blocks << inode->i_blkbits));
+	}
+
+	return 0;
+}
+
+int
+linvfs_get_block(
+	struct inode		*inode,
+	sector_t		iblock,
+	struct buffer_head	*bh_result,
+	int			create)
+{
+	return __linvfs_get_block(inode, iblock, 0, bh_result,
+					create, 0, BMAPI_WRITE);
+}
+
+STATIC int
+linvfs_get_blocks_direct(
+	struct inode		*inode,
+	sector_t		iblock,
+	unsigned long		max_blocks,
+	struct buffer_head	*bh_result,
+	int			create)
+{
+	return __linvfs_get_block(inode, iblock, max_blocks, bh_result,
+					create, 1, BMAPI_WRITE|BMAPI_DIRECT);
+}
+
+STATIC ssize_t
+linvfs_direct_IO(
+	int			rw,
+	struct kiocb		*iocb,
+	const struct iovec	*iov,
+	loff_t			offset,
+	unsigned long		nr_segs)
+{
+	struct file	*file = iocb->ki_filp;
+	struct inode	*inode = file->f_mapping->host;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	xfs_iomap_t	iomap;
+	int		maps = 1;
+	int		error;
+
+	VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
+	if (error)
+		return -error;
+
+	return blockdev_direct_IO_own_locking(rw, iocb, inode,
+		iomap.iomap_target->pbr_bdev,
+		iov, offset, nr_segs,
+		linvfs_get_blocks_direct,
+		linvfs_unwritten_convert_direct);
+}
+
+
+STATIC sector_t
+linvfs_bmap(
+	struct address_space	*mapping,
+	sector_t		block)
+{
+	struct inode		*inode = (struct inode *)mapping->host;
+	vnode_t			*vp = LINVFS_GET_VP(inode);
+	int			error;
+
+	vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address);
+
+	VOP_RWLOCK(vp, VRWLOCK_READ);
+	VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
+	VOP_RWUNLOCK(vp, VRWLOCK_READ);
+	return generic_block_bmap(mapping, block, linvfs_get_block);
+}
+
+STATIC int
+linvfs_readpage(
+	struct file		*unused,
+	struct page		*page)
+{
+	return mpage_readpage(page, linvfs_get_block);
+}
+
+STATIC int
+linvfs_readpages(
+	struct file		*unused,
+	struct address_space	*mapping,
+	struct list_head	*pages,
+	unsigned		nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
+}
+
+STATIC void
+xfs_count_page_state(
+	struct page		*page,
+	int			*delalloc,
+	int			*unmapped,
+	int			*unwritten)
+{
+	struct buffer_head	*bh, *head;
+
+	*delalloc = *unmapped = *unwritten = 0;
+
+	bh = head = page_buffers(page);
+	do {
+		if (buffer_uptodate(bh) && !buffer_mapped(bh))
+			(*unmapped) = 1;
+		else if (buffer_unwritten(bh) && !buffer_delay(bh))
+			clear_buffer_unwritten(bh);
+		else if (buffer_unwritten(bh))
+			(*unwritten) = 1;
+		else if (buffer_delay(bh))
+			(*delalloc) = 1;
+	} while ((bh = bh->b_this_page) != head);
+}
+
+
+/*
+ * writepage: Called from one of two places:
+ *
+ * 1. we are flushing a delalloc buffer head.
+ *
+ * 2. we are writing out a dirty page. Typically the page dirty
+ *    state is cleared before we get here. In this case is it
+ *    conceivable we have no buffer heads.
+ *
+ * For delalloc space on the page we need to allocate space and
+ * flush it. For unmapped buffer heads on the page we should
+ * allocate space if the page is uptodate. For any other dirty
+ * buffer heads on the page we should flush them.
+ *
+ * If we detect that a transaction would be required to flush
+ * the page, we have to check the process flags first, if we
+ * are already in a transaction or disk I/O during allocations
+ * is off, we need to fail the writepage and redirty the page.
+ */
+
+STATIC int
+linvfs_writepage(
+	struct page		*page,
+	struct writeback_control *wbc)
+{
+	int			error;
+	int			need_trans;
+	int			delalloc, unmapped, unwritten;
+	struct inode		*inode = page->mapping->host;
+
+	xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
+
+	/*
+	 * We need a transaction if:
+	 *  1. There are delalloc buffers on the page
+	 *  2. The page is uptodate and we have unmapped buffers
+	 *  3. The page is uptodate and we have no buffers
+	 *  4. There are unwritten buffers on the page
+	 */
+
+	if (!page_has_buffers(page)) {
+		unmapped = 1;
+		need_trans = 1;
+	} else {
+		xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+		if (!PageUptodate(page))
+			unmapped = 0;
+		need_trans = delalloc + unmapped + unwritten;
+	}
+
+	/*
+	 * If we need a transaction and the process flags say
+	 * we are already in a transaction, or no IO is allowed
+	 * then mark the page dirty again and leave the page
+	 * as is.
+	 */
+	if (PFLAGS_TEST_FSTRANS() && need_trans)
+		goto out_fail;
+
+	/*
+	 * Delay hooking up buffer heads until we have
+	 * made our go/no-go decision.
+	 */
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+
+	/*
+	 * Convert delayed allocate, unwritten or unmapped space
+	 * to real space and flush out to disk.
+	 */
+	error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
+	if (error == -EAGAIN)
+		goto out_fail;
+	if (unlikely(error < 0))
+		goto out_unlock;
+
+	return 0;
+
+out_fail:
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+	return 0;
+out_unlock:
+	unlock_page(page);
+	return error;
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. Possibly the page is already clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 0 if the page is ok to release, 1 otherwise.
+ *
+ * Possible scenarios are:
+ *
+ * 1. We are being called to release a page which has been written
+ *    to via regular I/O. buffer heads will be dirty and possibly
+ *    delalloc. If no delalloc buffer heads in this case then we
+ *    can just return zero.
+ *
+ * 2. We are called to release a page which has been written via
+ *    mmap, all we need to do is ensure there is no delalloc
+ *    state in the buffer heads, if not we can let the caller
+ *    free them and we should come back later via writepage.
+ */
+STATIC int
+linvfs_release_page(
+	struct page		*page,
+	int			gfp_mask)
+{
+	struct inode		*inode = page->mapping->host;
+	int			dirty, delalloc, unmapped, unwritten;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 1,
+	};
+
+	xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
+
+	xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+	if (!delalloc && !unwritten)
+		goto free_buffers;
+
+	if (!(gfp_mask & __GFP_FS))
+		return 0;
+
+	/* If we are already inside a transaction or the thread cannot
+	 * do I/O, we cannot release this page.
+	 */
+	if (PFLAGS_TEST_FSTRANS())
+		return 0;
+
+	/*
+	 * Convert delalloc space to real space, do not flush the
+	 * data out to disk, that will be done by the caller.
+	 * Never need to allocate space here - we will always
+	 * come back to writepage in that case.
+	 */
+	dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
+	if (dirty == 0 && !unwritten)
+		goto free_buffers;
+	return 0;
+
+free_buffers:
+	return try_to_free_buffers(page);
+}
+
+STATIC int
+linvfs_prepare_write(
+	struct file		*file,
+	struct page		*page,
+	unsigned int		from,
+	unsigned int		to)
+{
+	return block_prepare_write(page, from, to, linvfs_get_block);
+}
+
+struct address_space_operations linvfs_aops = {
+	.readpage		= linvfs_readpage,
+	.readpages		= linvfs_readpages,
+	.writepage		= linvfs_writepage,
+	.sync_page		= block_sync_page,
+	.releasepage		= linvfs_release_page,
+	.prepare_write		= linvfs_prepare_write,
+	.commit_write		= generic_commit_write,
+	.bmap			= linvfs_bmap,
+	.direct_IO		= linvfs_direct_IO,
+};
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
new file mode 100644
index 00000000000..23e0eb67fc2
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -0,0 +1,1980 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ *	The xfs_buf.c code provides an abstract buffer cache model on top
+ *	of the Linux page cache.  Cached metadata blocks for a file system
+ *	are hashed to the inode for the block device.  xfs_buf.c assembles
+ *	buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
+ *
+ *      Written by Steve Lord, Jim Mostek, Russell Cattelan
+ *		    and Rajagopal Ananthanarayanan ("ananth") at SGI.
+ *
+ */
+
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/bio.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/percpu.h>
+#include <linux/blkdev.h>
+#include <linux/hash.h>
+
+#include "xfs_linux.h"
+
+/*
+ * File wide globals
+ */
+
+STATIC kmem_cache_t *pagebuf_cache;
+STATIC kmem_shaker_t pagebuf_shake;
+STATIC int pagebuf_daemon_wakeup(int, unsigned int);
+STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
+STATIC struct workqueue_struct *pagebuf_logio_workqueue;
+STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
+
+/*
+ * Pagebuf debugging
+ */
+
+#ifdef PAGEBUF_TRACE
+void
+pagebuf_trace(
+	xfs_buf_t	*pb,
+	char		*id,
+	void		*data,
+	void		*ra)
+{
+	ktrace_enter(pagebuf_trace_buf,
+		pb, id,
+		(void *)(unsigned long)pb->pb_flags,
+		(void *)(unsigned long)pb->pb_hold.counter,
+		(void *)(unsigned long)pb->pb_sema.count.counter,
+		(void *)current,
+		data, ra,
+		(void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
+		(void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
+		(void *)(unsigned long)pb->pb_buffer_length,
+		NULL, NULL, NULL, NULL, NULL);
+}
+ktrace_t *pagebuf_trace_buf;
+#define PAGEBUF_TRACE_SIZE	4096
+#define PB_TRACE(pb, id, data)	\
+	pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
+#else
+#define PB_TRACE(pb, id, data)	do { } while (0)
+#endif
+
+#ifdef PAGEBUF_LOCK_TRACKING
+# define PB_SET_OWNER(pb)	((pb)->pb_last_holder = current->pid)
+# define PB_CLEAR_OWNER(pb)	((pb)->pb_last_holder = -1)
+# define PB_GET_OWNER(pb)	((pb)->pb_last_holder)
+#else
+# define PB_SET_OWNER(pb)	do { } while (0)
+# define PB_CLEAR_OWNER(pb)	do { } while (0)
+# define PB_GET_OWNER(pb)	do { } while (0)
+#endif
+
+/*
+ * Pagebuf allocation / freeing.
+ */
+
+#define pb_to_gfp(flags) \
+	((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
+	  ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+
+#define pb_to_km(flags) \
+	 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+
+
+#define pagebuf_allocate(flags) \
+	kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))
+#define pagebuf_deallocate(pb) \
+	kmem_zone_free(pagebuf_cache, (pb));
+
+/*
+ * Page Region interfaces.
+ *
+ * For pages in filesystems where the blocksize is smaller than the
+ * pagesize, we use the page->private field (long) to hold a bitmap
+ * of uptodate regions within the page.
+ *
+ * Each such region is "bytes per page / bits per long" bytes long.
+ *
+ * NBPPR == number-of-bytes-per-page-region
+ * BTOPR == bytes-to-page-region (rounded up)
+ * BTOPRT == bytes-to-page-region-truncated (rounded down)
+ */
+#if (BITS_PER_LONG == 32)
+#define PRSHIFT		(PAGE_CACHE_SHIFT - 5)	/* (32 == 1<<5) */
+#elif (BITS_PER_LONG == 64)
+#define PRSHIFT		(PAGE_CACHE_SHIFT - 6)	/* (64 == 1<<6) */
+#else
+#error BITS_PER_LONG must be 32 or 64
+#endif
+#define NBPPR		(PAGE_CACHE_SIZE/BITS_PER_LONG)
+#define BTOPR(b)	(((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
+#define BTOPRT(b)	(((unsigned int)(b) >> PRSHIFT))
+
+STATIC unsigned long
+page_region_mask(
+	size_t		offset,
+	size_t		length)
+{
+	unsigned long	mask;
+	int		first, final;
+
+	first = BTOPR(offset);
+	final = BTOPRT(offset + length - 1);
+	first = min(first, final);
+
+	mask = ~0UL;
+	mask <<= BITS_PER_LONG - (final - first);
+	mask >>= BITS_PER_LONG - (final);
+
+	ASSERT(offset + length <= PAGE_CACHE_SIZE);
+	ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
+
+	return mask;
+}
+
+STATIC inline void
+set_page_region(
+	struct page	*page,
+	size_t		offset,
+	size_t		length)
+{
+	page->private |= page_region_mask(offset, length);
+	if (page->private == ~0UL)
+		SetPageUptodate(page);
+}
+
+STATIC inline int
+test_page_region(
+	struct page	*page,
+	size_t		offset,
+	size_t		length)
+{
+	unsigned long	mask = page_region_mask(offset, length);
+
+	return (mask && (page->private & mask) == mask);
+}
+
+/*
+ * Mapping of multi-page buffers into contiguous virtual space
+ */
+
+typedef struct a_list {
+	void		*vm_addr;
+	struct a_list	*next;
+} a_list_t;
+
+STATIC a_list_t		*as_free_head;
+STATIC int		as_list_len;
+STATIC DEFINE_SPINLOCK(as_lock);
+
+/*
+ * Try to batch vunmaps because they are costly.
+ */
+STATIC void
+free_address(
+	void		*addr)
+{
+	a_list_t	*aentry;
+
+	aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);
+	if (likely(aentry)) {
+		spin_lock(&as_lock);
+		aentry->next = as_free_head;
+		aentry->vm_addr = addr;
+		as_free_head = aentry;
+		as_list_len++;
+		spin_unlock(&as_lock);
+	} else {
+		vunmap(addr);
+	}
+}
+
+STATIC void
+purge_addresses(void)
+{
+	a_list_t	*aentry, *old;
+
+	if (as_free_head == NULL)
+		return;
+
+	spin_lock(&as_lock);
+	aentry = as_free_head;
+	as_free_head = NULL;
+	as_list_len = 0;
+	spin_unlock(&as_lock);
+
+	while ((old = aentry) != NULL) {
+		vunmap(aentry->vm_addr);
+		aentry = aentry->next;
+		kfree(old);
+	}
+}
+
+/*
+ *	Internal pagebuf object manipulation
+ */
+
+STATIC void
+_pagebuf_initialize(
+	xfs_buf_t		*pb,
+	xfs_buftarg_t		*target,
+	loff_t			range_base,
+	size_t			range_length,
+	page_buf_flags_t	flags)
+{
+	/*
+	 * We don't want certain flags to appear in pb->pb_flags.
+	 */
+	flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
+
+	memset(pb, 0, sizeof(xfs_buf_t));
+	atomic_set(&pb->pb_hold, 1);
+	init_MUTEX_LOCKED(&pb->pb_iodonesema);
+	INIT_LIST_HEAD(&pb->pb_list);
+	INIT_LIST_HEAD(&pb->pb_hash_list);
+	init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
+	PB_SET_OWNER(pb);
+	pb->pb_target = target;
+	pb->pb_file_offset = range_base;
+	/*
+	 * Set buffer_length and count_desired to the same value initially.
+	 * I/O routines should use count_desired, which will be the same in
+	 * most cases but may be reset (e.g. XFS recovery).
+	 */
+	pb->pb_buffer_length = pb->pb_count_desired = range_length;
+	pb->pb_flags = flags | PBF_NONE;
+	pb->pb_bn = XFS_BUF_DADDR_NULL;
+	atomic_set(&pb->pb_pin_count, 0);
+	init_waitqueue_head(&pb->pb_waiters);
+
+	XFS_STATS_INC(pb_create);
+	PB_TRACE(pb, "initialize", target);
+}
+
+/*
+ * Allocate a page array capable of holding a specified number
+ * of pages, and point the page buf at it.
+ */
+STATIC int
+_pagebuf_get_pages(
+	xfs_buf_t		*pb,
+	int			page_count,
+	page_buf_flags_t	flags)
+{
+	/* Make sure that we have a page list */
+	if (pb->pb_pages == NULL) {
+		pb->pb_offset = page_buf_poff(pb->pb_file_offset);
+		pb->pb_page_count = page_count;
+		if (page_count <= PB_PAGES) {
+			pb->pb_pages = pb->pb_page_array;
+		} else {
+			pb->pb_pages = kmem_alloc(sizeof(struct page *) *
+					page_count, pb_to_km(flags));
+			if (pb->pb_pages == NULL)
+				return -ENOMEM;
+		}
+		memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
+	}
+	return 0;
+}
+
+/*
+ *	Frees pb_pages if it was malloced.
+ */
+STATIC void
+_pagebuf_free_pages(
+	xfs_buf_t	*bp)
+{
+	if (bp->pb_pages != bp->pb_page_array) {
+		kmem_free(bp->pb_pages,
+			  bp->pb_page_count * sizeof(struct page *));
+	}
+}
+
+/*
+ *	Releases the specified buffer.
+ *
+ * 	The modification state of any associated pages is left unchanged.
+ * 	The buffer most not be on any hash - use pagebuf_rele instead for
+ * 	hashed and refcounted buffers
+ */
+void
+pagebuf_free(
+	xfs_buf_t		*bp)
+{
+	PB_TRACE(bp, "free", 0);
+
+	ASSERT(list_empty(&bp->pb_hash_list));
+
+	if (bp->pb_flags & _PBF_PAGE_CACHE) {
+		uint		i;
+
+		if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))
+			free_address(bp->pb_addr - bp->pb_offset);
+
+		for (i = 0; i < bp->pb_page_count; i++)
+			page_cache_release(bp->pb_pages[i]);
+		_pagebuf_free_pages(bp);
+	} else if (bp->pb_flags & _PBF_KMEM_ALLOC) {
+		 /*
+		  * XXX(hch): bp->pb_count_desired might be incorrect (see
+		  * pagebuf_associate_memory for details), but fortunately
+		  * the Linux version of kmem_free ignores the len argument..
+		  */
+		kmem_free(bp->pb_addr, bp->pb_count_desired);
+		_pagebuf_free_pages(bp);
+	}
+
+	pagebuf_deallocate(bp);
+}
+
+/*
+ *	Finds all pages for buffer in question and builds it's page list.
+ */
+STATIC int
+_pagebuf_lookup_pages(
+	xfs_buf_t		*bp,
+	uint			flags)
+{
+	struct address_space	*mapping = bp->pb_target->pbr_mapping;
+	size_t			blocksize = bp->pb_target->pbr_bsize;
+	size_t			size = bp->pb_count_desired;
+	size_t			nbytes, offset;
+	int			gfp_mask = pb_to_gfp(flags);
+	unsigned short		page_count, i;
+	pgoff_t			first;
+	loff_t			end;
+	int			error;
+
+	end = bp->pb_file_offset + bp->pb_buffer_length;
+	page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);
+
+	error = _pagebuf_get_pages(bp, page_count, flags);
+	if (unlikely(error))
+		return error;
+	bp->pb_flags |= _PBF_PAGE_CACHE;
+
+	offset = bp->pb_offset;
+	first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;
+
+	for (i = 0; i < bp->pb_page_count; i++) {
+		struct page	*page;
+		uint		retries = 0;
+
+	      retry:
+		page = find_or_create_page(mapping, first + i, gfp_mask);
+		if (unlikely(page == NULL)) {
+			if (flags & PBF_READ_AHEAD) {
+				bp->pb_page_count = i;
+				for (i = 0; i < bp->pb_page_count; i++)
+					unlock_page(bp->pb_pages[i]);
+				return -ENOMEM;
+			}
+
+			/*
+			 * This could deadlock.
+			 *
+			 * But until all the XFS lowlevel code is revamped to
+			 * handle buffer allocation failures we can't do much.
+			 */
+			if (!(++retries % 100))
+				printk(KERN_ERR
+					"XFS: possible memory allocation "
+					"deadlock in %s (mode:0x%x)\n",
+					__FUNCTION__, gfp_mask);
+
+			XFS_STATS_INC(pb_page_retries);
+			pagebuf_daemon_wakeup(0, gfp_mask);
+			blk_congestion_wait(WRITE, HZ/50);
+			goto retry;
+		}
+
+		XFS_STATS_INC(pb_page_found);
+
+		nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
+		size -= nbytes;
+
+		if (!PageUptodate(page)) {
+			page_count--;
+			if (blocksize >= PAGE_CACHE_SIZE) {
+				if (flags & PBF_READ)
+					bp->pb_locked = 1;
+			} else if (!PagePrivate(page)) {
+				if (test_page_region(page, offset, nbytes))
+					page_count++;
+			}
+		}
+
+		bp->pb_pages[i] = page;
+		offset = 0;
+	}
+
+	if (!bp->pb_locked) {
+		for (i = 0; i < bp->pb_page_count; i++)
+			unlock_page(bp->pb_pages[i]);
+	}
+
+	if (page_count) {
+		/* if we have any uptodate pages, mark that in the buffer */
+		bp->pb_flags &= ~PBF_NONE;
+
+		/* if some pages aren't uptodate, mark that in the buffer */
+		if (page_count != bp->pb_page_count)
+			bp->pb_flags |= PBF_PARTIAL;
+	}
+
+	PB_TRACE(bp, "lookup_pages", (long)page_count);
+	return error;
+}
+
+/*
+ *	Map buffer into kernel address-space if nessecary.
+ */
+STATIC int
+_pagebuf_map_pages(
+	xfs_buf_t		*bp,
+	uint			flags)
+{
+	/* A single page buffer is always mappable */
+	if (bp->pb_page_count == 1) {
+		bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;
+		bp->pb_flags |= PBF_MAPPED;
+	} else if (flags & PBF_MAPPED) {
+		if (as_list_len > 64)
+			purge_addresses();
+		bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,
+				VM_MAP, PAGE_KERNEL);
+		if (unlikely(bp->pb_addr == NULL))
+			return -ENOMEM;
+		bp->pb_addr += bp->pb_offset;
+		bp->pb_flags |= PBF_MAPPED;
+	}
+
+	return 0;
+}
+
+/*
+ *	Finding and Reading Buffers
+ */
+
+/*
+ *	_pagebuf_find
+ *
+ *	Looks up, and creates if absent, a lockable buffer for
+ *	a given range of an inode.  The buffer is returned
+ *	locked.	 If other overlapping buffers exist, they are
+ *	released before the new buffer is created and locked,
+ *	which may imply that this call will block until those buffers
+ *	are unlocked.  No I/O is implied by this call.
+ */
+xfs_buf_t *
+_pagebuf_find(
+	xfs_buftarg_t		*btp,	/* block device target		*/
+	loff_t			ioff,	/* starting offset of range	*/
+	size_t			isize,	/* length of range		*/
+	page_buf_flags_t	flags,	/* PBF_TRYLOCK			*/
+	xfs_buf_t		*new_pb)/* newly allocated buffer	*/
+{
+	loff_t			range_base;
+	size_t			range_length;
+	xfs_bufhash_t		*hash;
+	xfs_buf_t		*pb, *n;
+
+	range_base = (ioff << BBSHIFT);
+	range_length = (isize << BBSHIFT);
+
+	/* Check for IOs smaller than the sector size / not sector aligned */
+	ASSERT(!(range_length < (1 << btp->pbr_sshift)));
+	ASSERT(!(range_base & (loff_t)btp->pbr_smask));
+
+	hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
+
+	spin_lock(&hash->bh_lock);
+
+	list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {
+		ASSERT(btp == pb->pb_target);
+		if (pb->pb_file_offset == range_base &&
+		    pb->pb_buffer_length == range_length) {
+			/*
+			 * If we look at something bring it to the
+			 * front of the list for next time.
+			 */
+			atomic_inc(&pb->pb_hold);
+			list_move(&pb->pb_hash_list, &hash->bh_list);
+			goto found;
+		}
+	}
+
+	/* No match found */
+	if (new_pb) {
+		_pagebuf_initialize(new_pb, btp, range_base,
+				range_length, flags);
+		new_pb->pb_hash = hash;
+		list_add(&new_pb->pb_hash_list, &hash->bh_list);
+	} else {
+		XFS_STATS_INC(pb_miss_locked);
+	}
+
+	spin_unlock(&hash->bh_lock);
+	return new_pb;
+
+found:
+	spin_unlock(&hash->bh_lock);
+
+	/* Attempt to get the semaphore without sleeping,
+	 * if this does not work then we need to drop the
+	 * spinlock and do a hard attempt on the semaphore.
+	 */
+	if (down_trylock(&pb->pb_sema)) {
+		if (!(flags & PBF_TRYLOCK)) {
+			/* wait for buffer ownership */
+			PB_TRACE(pb, "get_lock", 0);
+			pagebuf_lock(pb);
+			XFS_STATS_INC(pb_get_locked_waited);
+		} else {
+			/* We asked for a trylock and failed, no need
+			 * to look at file offset and length here, we
+			 * know that this pagebuf at least overlaps our
+			 * pagebuf and is locked, therefore our buffer
+			 * either does not exist, or is this buffer
+			 */
+
+			pagebuf_rele(pb);
+			XFS_STATS_INC(pb_busy_locked);
+			return (NULL);
+		}
+	} else {
+		/* trylock worked */
+		PB_SET_OWNER(pb);
+	}
+
+	if (pb->pb_flags & PBF_STALE)
+		pb->pb_flags &= PBF_MAPPED;
+	PB_TRACE(pb, "got_lock", 0);
+	XFS_STATS_INC(pb_get_locked);
+	return (pb);
+}
+
+/*
+ *	xfs_buf_get_flags assembles a buffer covering the specified range.
+ *
+ *	Storage in memory for all portions of the buffer will be allocated,
+ *	although backing storage may not be.
+ */
+xfs_buf_t *
+xfs_buf_get_flags(			/* allocate a buffer		*/
+	xfs_buftarg_t		*target,/* target for buffer		*/
+	loff_t			ioff,	/* starting offset of range	*/
+	size_t			isize,	/* length of range		*/
+	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/
+{
+	xfs_buf_t		*pb, *new_pb;
+	int			error = 0, i;
+
+	new_pb = pagebuf_allocate(flags);
+	if (unlikely(!new_pb))
+		return NULL;
+
+	pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
+	if (pb == new_pb) {
+		error = _pagebuf_lookup_pages(pb, flags);
+		if (error)
+			goto no_buffer;
+	} else {
+		pagebuf_deallocate(new_pb);
+		if (unlikely(pb == NULL))
+			return NULL;
+	}
+
+	for (i = 0; i < pb->pb_page_count; i++)
+		mark_page_accessed(pb->pb_pages[i]);
+
+	if (!(pb->pb_flags & PBF_MAPPED)) {
+		error = _pagebuf_map_pages(pb, flags);
+		if (unlikely(error)) {
+			printk(KERN_WARNING "%s: failed to map pages\n",
+					__FUNCTION__);
+			goto no_buffer;
+		}
+	}
+
+	XFS_STATS_INC(pb_get);
+
+	/*
+	 * Always fill in the block number now, the mapped cases can do
+	 * their own overlay of this later.
+	 */
+	pb->pb_bn = ioff;
+	pb->pb_count_desired = pb->pb_buffer_length;
+
+	PB_TRACE(pb, "get", (unsigned long)flags);
+	return pb;
+
+ no_buffer:
+	if (flags & (PBF_LOCK | PBF_TRYLOCK))
+		pagebuf_unlock(pb);
+	pagebuf_rele(pb);
+	return NULL;
+}
+
+xfs_buf_t *
+xfs_buf_read_flags(
+	xfs_buftarg_t		*target,
+	loff_t			ioff,
+	size_t			isize,
+	page_buf_flags_t	flags)
+{
+	xfs_buf_t		*pb;
+
+	flags |= PBF_READ;
+
+	pb = xfs_buf_get_flags(target, ioff, isize, flags);
+	if (pb) {
+		if (PBF_NOT_DONE(pb)) {
+			PB_TRACE(pb, "read", (unsigned long)flags);
+			XFS_STATS_INC(pb_get_read);
+			pagebuf_iostart(pb, flags);
+		} else if (flags & PBF_ASYNC) {
+			PB_TRACE(pb, "read_async", (unsigned long)flags);
+			/*
+			 * Read ahead call which is already satisfied,
+			 * drop the buffer
+			 */
+			goto no_buffer;
+		} else {
+			PB_TRACE(pb, "read_done", (unsigned long)flags);
+			/* We do not want read in the flags */
+			pb->pb_flags &= ~PBF_READ;
+		}
+	}
+
+	return pb;
+
+ no_buffer:
+	if (flags & (PBF_LOCK | PBF_TRYLOCK))
+		pagebuf_unlock(pb);
+	pagebuf_rele(pb);
+	return NULL;
+}
+
+/*
+ * Create a skeletal pagebuf (no pages associated with it).
+ */
+xfs_buf_t *
+pagebuf_lookup(
+	xfs_buftarg_t		*target,
+	loff_t			ioff,
+	size_t			isize,
+	page_buf_flags_t	flags)
+{
+	xfs_buf_t		*pb;
+
+	pb = pagebuf_allocate(flags);
+	if (pb) {
+		_pagebuf_initialize(pb, target, ioff, isize, flags);
+	}
+	return pb;
+}
+
+/*
+ * If we are not low on memory then do the readahead in a deadlock
+ * safe manner.
+ */
+void
+pagebuf_readahead(
+	xfs_buftarg_t		*target,
+	loff_t			ioff,
+	size_t			isize,
+	page_buf_flags_t	flags)
+{
+	struct backing_dev_info *bdi;
+
+	bdi = target->pbr_mapping->backing_dev_info;
+	if (bdi_read_congested(bdi))
+		return;
+
+	flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);
+	xfs_buf_read_flags(target, ioff, isize, flags);
+}
+
+xfs_buf_t *
+pagebuf_get_empty(
+	size_t			len,
+	xfs_buftarg_t		*target)
+{
+	xfs_buf_t		*pb;
+
+	pb = pagebuf_allocate(0);
+	if (pb)
+		_pagebuf_initialize(pb, target, 0, len, 0);
+	return pb;
+}
+
+static inline struct page *
+mem_to_page(
+	void			*addr)
+{
+	if (((unsigned long)addr < VMALLOC_START) ||
+	    ((unsigned long)addr >= VMALLOC_END)) {
+		return virt_to_page(addr);
+	} else {
+		return vmalloc_to_page(addr);
+	}
+}
+
+int
+pagebuf_associate_memory(
+	xfs_buf_t		*pb,
+	void			*mem,
+	size_t			len)
+{
+	int			rval;
+	int			i = 0;
+	size_t			ptr;
+	size_t			end, end_cur;
+	off_t			offset;
+	int			page_count;
+
+	page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+	offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK);
+	if (offset && (len > PAGE_CACHE_SIZE))
+		page_count++;
+
+	/* Free any previous set of page pointers */
+	if (pb->pb_pages)
+		_pagebuf_free_pages(pb);
+
+	pb->pb_pages = NULL;
+	pb->pb_addr = mem;
+
+	rval = _pagebuf_get_pages(pb, page_count, 0);
+	if (rval)
+		return rval;
+
+	pb->pb_offset = offset;
+	ptr = (size_t) mem & PAGE_CACHE_MASK;
+	end = PAGE_CACHE_ALIGN((size_t) mem + len);
+	end_cur = end;
+	/* set up first page */
+	pb->pb_pages[0] = mem_to_page(mem);
+
+	ptr += PAGE_CACHE_SIZE;
+	pb->pb_page_count = ++i;
+	while (ptr < end) {
+		pb->pb_pages[i] = mem_to_page((void *)ptr);
+		pb->pb_page_count = ++i;
+		ptr += PAGE_CACHE_SIZE;
+	}
+	pb->pb_locked = 0;
+
+	pb->pb_count_desired = pb->pb_buffer_length = len;
+	pb->pb_flags |= PBF_MAPPED;
+
+	return 0;
+}
+
+xfs_buf_t *
+pagebuf_get_no_daddr(
+	size_t			len,
+	xfs_buftarg_t		*target)
+{
+	size_t			malloc_len = len;
+	xfs_buf_t		*bp;
+	void			*data;
+	int			error;
+
+	bp = pagebuf_allocate(0);
+	if (unlikely(bp == NULL))
+		goto fail;
+	_pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO);
+
+ try_again:
+	data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
+	if (unlikely(data == NULL))
+		goto fail_free_buf;
+
+	/* check whether alignment matches.. */
+	if ((__psunsigned_t)data !=
+	    ((__psunsigned_t)data & ~target->pbr_smask)) {
+		/* .. else double the size and try again */
+		kmem_free(data, malloc_len);
+		malloc_len <<= 1;
+		goto try_again;
+	}
+
+	error = pagebuf_associate_memory(bp, data, len);
+	if (error)
+		goto fail_free_mem;
+	bp->pb_flags |= _PBF_KMEM_ALLOC;
+
+	pagebuf_unlock(bp);
+
+	PB_TRACE(bp, "no_daddr", data);
+	return bp;
+ fail_free_mem:
+	kmem_free(data, malloc_len);
+ fail_free_buf:
+	pagebuf_free(bp);
+ fail:
+	return NULL;
+}
+
+/*
+ *	pagebuf_hold
+ *
+ *	Increment reference count on buffer, to hold the buffer concurrently
+ *	with another thread which may release (free) the buffer asynchronously.
+ *
+ *	Must hold the buffer already to call this function.
+ */
+void
+pagebuf_hold(
+	xfs_buf_t		*pb)
+{
+	atomic_inc(&pb->pb_hold);
+	PB_TRACE(pb, "hold", 0);
+}
+
+/*
+ *	pagebuf_rele
+ *
+ *	pagebuf_rele releases a hold on the specified buffer.  If the
+ *	the hold count is 1, pagebuf_rele calls pagebuf_free.
+ */
+void
+pagebuf_rele(
+	xfs_buf_t		*pb)
+{
+	xfs_bufhash_t		*hash = pb->pb_hash;
+
+	PB_TRACE(pb, "rele", pb->pb_relse);
+
+	/*
+	 * pagebuf_lookup buffers are not hashed, not delayed write,
+	 * and don't have their own release routines.  Special case.
+	 */
+	if (unlikely(!hash)) {
+		ASSERT(!pb->pb_relse);
+		if (atomic_dec_and_test(&pb->pb_hold))
+			xfs_buf_free(pb);
+		return;
+	}
+
+	if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
+		int		do_free = 1;
+
+		if (pb->pb_relse) {
+			atomic_inc(&pb->pb_hold);
+			spin_unlock(&hash->bh_lock);
+			(*(pb->pb_relse)) (pb);
+			spin_lock(&hash->bh_lock);
+			do_free = 0;
+		}
+
+		if (pb->pb_flags & PBF_DELWRI) {
+			pb->pb_flags |= PBF_ASYNC;
+			atomic_inc(&pb->pb_hold);
+			pagebuf_delwri_queue(pb, 0);
+			do_free = 0;
+		} else if (pb->pb_flags & PBF_FS_MANAGED) {
+			do_free = 0;
+		}
+
+		if (do_free) {
+			list_del_init(&pb->pb_hash_list);
+			spin_unlock(&hash->bh_lock);
+			pagebuf_free(pb);
+		} else {
+			spin_unlock(&hash->bh_lock);
+		}
+	}
+}
+
+
+/*
+ *	Mutual exclusion on buffers.  Locking model:
+ *
+ *	Buffers associated with inodes for which buffer locking
+ *	is not enabled are not protected by semaphores, and are
+ *	assumed to be exclusively owned by the caller.  There is a
+ *	spinlock in the buffer, used by the caller when concurrent
+ *	access is possible.
+ */
+
+/*
+ *	pagebuf_cond_lock
+ *
+ *	pagebuf_cond_lock locks a buffer object, if it is not already locked.
+ *	Note that this in no way
+ *	locks the underlying pages, so it is only useful for synchronizing
+ *	concurrent use of page buffer objects, not for synchronizing independent
+ *	access to the underlying pages.
+ */
+int
+pagebuf_cond_lock(			/* lock buffer, if not locked	*/
+					/* returns -EBUSY if locked)	*/
+	xfs_buf_t		*pb)
+{
+	int			locked;
+
+	locked = down_trylock(&pb->pb_sema) == 0;
+	if (locked) {
+		PB_SET_OWNER(pb);
+	}
+	PB_TRACE(pb, "cond_lock", (long)locked);
+	return(locked ? 0 : -EBUSY);
+}
+
+#if defined(DEBUG) || defined(XFS_BLI_TRACE)
+/*
+ *	pagebuf_lock_value
+ *
+ *	Return lock value for a pagebuf
+ */
+int
+pagebuf_lock_value(
+	xfs_buf_t		*pb)
+{
+	return(atomic_read(&pb->pb_sema.count));
+}
+#endif
+
+/*
+ *	pagebuf_lock
+ *
+ *	pagebuf_lock locks a buffer object.  Note that this in no way
+ *	locks the underlying pages, so it is only useful for synchronizing
+ *	concurrent use of page buffer objects, not for synchronizing independent
+ *	access to the underlying pages.
+ */
+int
+pagebuf_lock(
+	xfs_buf_t		*pb)
+{
+	PB_TRACE(pb, "lock", 0);
+	if (atomic_read(&pb->pb_io_remaining))
+		blk_run_address_space(pb->pb_target->pbr_mapping);
+	down(&pb->pb_sema);
+	PB_SET_OWNER(pb);
+	PB_TRACE(pb, "locked", 0);
+	return 0;
+}
+
+/*
+ *	pagebuf_unlock
+ *
+ *	pagebuf_unlock releases the lock on the buffer object created by
+ *	pagebuf_lock or pagebuf_cond_lock (not any
+ *	pinning of underlying pages created by pagebuf_pin).
+ */
+void
+pagebuf_unlock(				/* unlock buffer		*/
+	xfs_buf_t		*pb)	/* buffer to unlock		*/
+{
+	PB_CLEAR_OWNER(pb);
+	up(&pb->pb_sema);
+	PB_TRACE(pb, "unlock", 0);
+}
+
+
+/*
+ *	Pinning Buffer Storage in Memory
+ */
+
+/*
+ *	pagebuf_pin
+ *
+ *	pagebuf_pin locks all of the memory represented by a buffer in
+ *	memory.  Multiple calls to pagebuf_pin and pagebuf_unpin, for
+ *	the same or different buffers affecting a given page, will
+ *	properly count the number of outstanding "pin" requests.  The
+ *	buffer may be released after the pagebuf_pin and a different
+ *	buffer used when calling pagebuf_unpin, if desired.
+ *	pagebuf_pin should be used by the file system when it wants be
+ *	assured that no attempt will be made to force the affected
+ *	memory to disk.	 It does not assure that a given logical page
+ *	will not be moved to a different physical page.
+ */
+void
+pagebuf_pin(
+	xfs_buf_t		*pb)
+{
+	atomic_inc(&pb->pb_pin_count);
+	PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
+}
+
+/*
+ *	pagebuf_unpin
+ *
+ *	pagebuf_unpin reverses the locking of memory performed by
+ *	pagebuf_pin.  Note that both functions affected the logical
+ *	pages associated with the buffer, not the buffer itself.
+ */
+void
+pagebuf_unpin(
+	xfs_buf_t		*pb)
+{
+	if (atomic_dec_and_test(&pb->pb_pin_count)) {
+		wake_up_all(&pb->pb_waiters);
+	}
+	PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
+}
+
+int
+pagebuf_ispin(
+	xfs_buf_t		*pb)
+{
+	return atomic_read(&pb->pb_pin_count);
+}
+
+/*
+ *	pagebuf_wait_unpin
+ *
+ *	pagebuf_wait_unpin waits until all of the memory associated
+ *	with the buffer is not longer locked in memory.  It returns
+ *	immediately if none of the affected pages are locked.
+ */
+static inline void
+_pagebuf_wait_unpin(
+	xfs_buf_t		*pb)
+{
+	DECLARE_WAITQUEUE	(wait, current);
+
+	if (atomic_read(&pb->pb_pin_count) == 0)
+		return;
+
+	add_wait_queue(&pb->pb_waiters, &wait);
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (atomic_read(&pb->pb_pin_count) == 0)
+			break;
+		if (atomic_read(&pb->pb_io_remaining))
+			blk_run_address_space(pb->pb_target->pbr_mapping);
+		schedule();
+	}
+	remove_wait_queue(&pb->pb_waiters, &wait);
+	set_current_state(TASK_RUNNING);
+}
+
+/*
+ *	Buffer Utility Routines
+ */
+
+/*
+ *	pagebuf_iodone
+ *
+ *	pagebuf_iodone marks a buffer for which I/O is in progress
+ *	done with respect to that I/O.	The pb_iodone routine, if
+ *	present, will be called as a side-effect.
+ */
+STATIC void
+pagebuf_iodone_work(
+	void			*v)
+{
+	xfs_buf_t		*bp = (xfs_buf_t *)v;
+
+	if (bp->pb_iodone)
+		(*(bp->pb_iodone))(bp);
+	else if (bp->pb_flags & PBF_ASYNC)
+		xfs_buf_relse(bp);
+}
+
+void
+pagebuf_iodone(
+	xfs_buf_t		*pb,
+	int			dataio,
+	int			schedule)
+{
+	pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
+	if (pb->pb_error == 0) {
+		pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE);
+	}
+
+	PB_TRACE(pb, "iodone", pb->pb_iodone);
+
+	if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
+		if (schedule) {
+			INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
+			queue_work(dataio ? pagebuf_dataio_workqueue :
+				pagebuf_logio_workqueue, &pb->pb_iodone_work);
+		} else {
+			pagebuf_iodone_work(pb);
+		}
+	} else {
+		up(&pb->pb_iodonesema);
+	}
+}
+
+/*
+ *	pagebuf_ioerror
+ *
+ *	pagebuf_ioerror sets the error code for a buffer.
+ */
+void
+pagebuf_ioerror(			/* mark/clear buffer error flag */
+	xfs_buf_t		*pb,	/* buffer to mark		*/
+	int			error)	/* error to store (0 if none)	*/
+{
+	ASSERT(error >= 0 && error <= 0xffff);
+	pb->pb_error = (unsigned short)error;
+	PB_TRACE(pb, "ioerror", (unsigned long)error);
+}
+
+/*
+ *	pagebuf_iostart
+ *
+ *	pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
+ *	If necessary, it will arrange for any disk space allocation required,
+ *	and it will break up the request if the block mappings require it.
+ *	The pb_iodone routine in the buffer supplied will only be called
+ *	when all of the subsidiary I/O requests, if any, have been completed.
+ *	pagebuf_iostart calls the pagebuf_ioinitiate routine or
+ *	pagebuf_iorequest, if the former routine is not defined, to start
+ *	the I/O on a given low-level request.
+ */
+int
+pagebuf_iostart(			/* start I/O on a buffer	  */
+	xfs_buf_t		*pb,	/* buffer to start		  */
+	page_buf_flags_t	flags)	/* PBF_LOCK, PBF_ASYNC, PBF_READ, */
+					/* PBF_WRITE, PBF_DELWRI,	  */
+					/* PBF_DONT_BLOCK		  */
+{
+	int			status = 0;
+
+	PB_TRACE(pb, "iostart", (unsigned long)flags);
+
+	if (flags & PBF_DELWRI) {
+		pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
+		pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC);
+		pagebuf_delwri_queue(pb, 1);
+		return status;
+	}
+
+	pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \
+			PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+	pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
+			PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+
+	BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);
+
+	/* For writes allow an alternate strategy routine to precede
+	 * the actual I/O request (which may not be issued at all in
+	 * a shutdown situation, for example).
+	 */
+	status = (flags & PBF_WRITE) ?
+		pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
+
+	/* Wait for I/O if we are not an async request.
+	 * Note: async I/O request completion will release the buffer,
+	 * and that can already be done by this point.  So using the
+	 * buffer pointer from here on, after async I/O, is invalid.
+	 */
+	if (!status && !(flags & PBF_ASYNC))
+		status = pagebuf_iowait(pb);
+
+	return status;
+}
+
+/*
+ * Helper routine for pagebuf_iorequest
+ */
+
+STATIC __inline__ int
+_pagebuf_iolocked(
+	xfs_buf_t		*pb)
+{
+	ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
+	if (pb->pb_flags & PBF_READ)
+		return pb->pb_locked;
+	return 0;
+}
+
+STATIC __inline__ void
+_pagebuf_iodone(
+	xfs_buf_t		*pb,
+	int			schedule)
+{
+	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+		pb->pb_locked = 0;
+		pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule);
+	}
+}
+
+STATIC int
+bio_end_io_pagebuf(
+	struct bio		*bio,
+	unsigned int		bytes_done,
+	int			error)
+{
+	xfs_buf_t		*pb = (xfs_buf_t *)bio->bi_private;
+	unsigned int		i, blocksize = pb->pb_target->pbr_bsize;
+	struct bio_vec		*bvec = bio->bi_io_vec;
+
+	if (bio->bi_size)
+		return 1;
+
+	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+		pb->pb_error = EIO;
+
+	for (i = 0; i < bio->bi_vcnt; i++, bvec++) {
+		struct page	*page = bvec->bv_page;
+
+		if (pb->pb_error) {
+			SetPageError(page);
+		} else if (blocksize == PAGE_CACHE_SIZE) {
+			SetPageUptodate(page);
+		} else if (!PagePrivate(page) &&
+				(pb->pb_flags & _PBF_PAGE_CACHE)) {
+			set_page_region(page, bvec->bv_offset, bvec->bv_len);
+		}
+
+		if (_pagebuf_iolocked(pb)) {
+			unlock_page(page);
+		}
+	}
+
+	_pagebuf_iodone(pb, 1);
+	bio_put(bio);
+	return 0;
+}
+
+STATIC void
+_pagebuf_ioapply(
+	xfs_buf_t		*pb)
+{
+	int			i, rw, map_i, total_nr_pages, nr_pages;
+	struct bio		*bio;
+	int			offset = pb->pb_offset;
+	int			size = pb->pb_count_desired;
+	sector_t		sector = pb->pb_bn;
+	unsigned int		blocksize = pb->pb_target->pbr_bsize;
+	int			locking = _pagebuf_iolocked(pb);
+
+	total_nr_pages = pb->pb_page_count;
+	map_i = 0;
+
+	if (pb->pb_flags & _PBF_RUN_QUEUES) {
+		pb->pb_flags &= ~_PBF_RUN_QUEUES;
+		rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC;
+	} else {
+		rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
+	}
+
+	/* Special code path for reading a sub page size pagebuf in --
+	 * we populate up the whole page, and hence the other metadata
+	 * in the same page.  This optimization is only valid when the
+	 * filesystem block size and the page size are equal.
+	 */
+	if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
+	    (pb->pb_flags & PBF_READ) && locking &&
+	    (blocksize == PAGE_CACHE_SIZE)) {
+		bio = bio_alloc(GFP_NOIO, 1);
+
+		bio->bi_bdev = pb->pb_target->pbr_bdev;
+		bio->bi_sector = sector - (offset >> BBSHIFT);
+		bio->bi_end_io = bio_end_io_pagebuf;
+		bio->bi_private = pb;
+
+		bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
+		size = 0;
+
+		atomic_inc(&pb->pb_io_remaining);
+
+		goto submit_io;
+	}
+
+	/* Lock down the pages which we need to for the request */
+	if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
+		for (i = 0; size; i++) {
+			int		nbytes = PAGE_CACHE_SIZE - offset;
+			struct page	*page = pb->pb_pages[i];
+
+			if (nbytes > size)
+				nbytes = size;
+
+			lock_page(page);
+
+			size -= nbytes;
+			offset = 0;
+		}
+		offset = pb->pb_offset;
+		size = pb->pb_count_desired;
+	}
+
+next_chunk:
+	atomic_inc(&pb->pb_io_remaining);
+	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
+	if (nr_pages > total_nr_pages)
+		nr_pages = total_nr_pages;
+
+	bio = bio_alloc(GFP_NOIO, nr_pages);
+	bio->bi_bdev = pb->pb_target->pbr_bdev;
+	bio->bi_sector = sector;
+	bio->bi_end_io = bio_end_io_pagebuf;
+	bio->bi_private = pb;
+
+	for (; size && nr_pages; nr_pages--, map_i++) {
+		int	nbytes = PAGE_CACHE_SIZE - offset;
+
+		if (nbytes > size)
+			nbytes = size;
+
+		if (bio_add_page(bio, pb->pb_pages[map_i],
+					nbytes, offset) < nbytes)
+			break;
+
+		offset = 0;
+		sector += nbytes >> BBSHIFT;
+		size -= nbytes;
+		total_nr_pages--;
+	}
+
+submit_io:
+	if (likely(bio->bi_size)) {
+		submit_bio(rw, bio);
+		if (size)
+			goto next_chunk;
+	} else {
+		bio_put(bio);
+		pagebuf_ioerror(pb, EIO);
+	}
+}
+
+/*
+ *	pagebuf_iorequest -- the core I/O request routine.
+ */
+int
+pagebuf_iorequest(			/* start real I/O		*/
+	xfs_buf_t		*pb)	/* buffer to convey to device	*/
+{
+	PB_TRACE(pb, "iorequest", 0);
+
+	if (pb->pb_flags & PBF_DELWRI) {
+		pagebuf_delwri_queue(pb, 1);
+		return 0;
+	}
+
+	if (pb->pb_flags & PBF_WRITE) {
+		_pagebuf_wait_unpin(pb);
+	}
+
+	pagebuf_hold(pb);
+
+	/* Set the count to 1 initially, this will stop an I/O
+	 * completion callout which happens before we have started
+	 * all the I/O from calling pagebuf_iodone too early.
+	 */
+	atomic_set(&pb->pb_io_remaining, 1);
+	_pagebuf_ioapply(pb);
+	_pagebuf_iodone(pb, 0);
+
+	pagebuf_rele(pb);
+	return 0;
+}
+
+/*
+ *	pagebuf_iowait
+ *
+ *	pagebuf_iowait waits for I/O to complete on the buffer supplied.
+ *	It returns immediately if no I/O is pending.  In any case, it returns
+ *	the error code, if any, or 0 if there is no error.
+ */
+int
+pagebuf_iowait(
+	xfs_buf_t		*pb)
+{
+	PB_TRACE(pb, "iowait", 0);
+	if (atomic_read(&pb->pb_io_remaining))
+		blk_run_address_space(pb->pb_target->pbr_mapping);
+	down(&pb->pb_iodonesema);
+	PB_TRACE(pb, "iowaited", (long)pb->pb_error);
+	return pb->pb_error;
+}
+
+caddr_t
+pagebuf_offset(
+	xfs_buf_t		*pb,
+	size_t			offset)
+{
+	struct page		*page;
+
+	offset += pb->pb_offset;
+
+	page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
+	return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
+}
+
+/*
+ *	pagebuf_iomove
+ *
+ *	Move data into or out of a buffer.
+ */
+void
+pagebuf_iomove(
+	xfs_buf_t		*pb,	/* buffer to process		*/
+	size_t			boff,	/* starting buffer offset	*/
+	size_t			bsize,	/* length to copy		*/
+	caddr_t			data,	/* data address			*/
+	page_buf_rw_t		mode)	/* read/write flag		*/
+{
+	size_t			bend, cpoff, csize;
+	struct page		*page;
+
+	bend = boff + bsize;
+	while (boff < bend) {
+		page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
+		cpoff = page_buf_poff(boff + pb->pb_offset);
+		csize = min_t(size_t,
+			      PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
+
+		ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
+
+		switch (mode) {
+		case PBRW_ZERO:
+			memset(page_address(page) + cpoff, 0, csize);
+			break;
+		case PBRW_READ:
+			memcpy(data, page_address(page) + cpoff, csize);
+			break;
+		case PBRW_WRITE:
+			memcpy(page_address(page) + cpoff, data, csize);
+		}
+
+		boff += csize;
+		data += csize;
+	}
+}
+
+/*
+ *	Handling of buftargs.
+ */
+
+/*
+ * Wait for any bufs with callbacks that have been submitted but
+ * have not yet returned... walk the hash list for the target.
+ */
+void
+xfs_wait_buftarg(
+	xfs_buftarg_t	*btp)
+{
+	xfs_buf_t	*bp, *n;
+	xfs_bufhash_t	*hash;
+	uint		i;
+
+	for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+		hash = &btp->bt_hash[i];
+again:
+		spin_lock(&hash->bh_lock);
+		list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {
+			ASSERT(btp == bp->pb_target);
+			if (!(bp->pb_flags & PBF_FS_MANAGED)) {
+				spin_unlock(&hash->bh_lock);
+				delay(100);
+				goto again;
+			}
+		}
+		spin_unlock(&hash->bh_lock);
+	}
+}
+
+/*
+ * Allocate buffer hash table for a given target.
+ * For devices containing metadata (i.e. not the log/realtime devices)
+ * we need to allocate a much larger hash table.
+ */
+STATIC void
+xfs_alloc_bufhash(
+	xfs_buftarg_t		*btp,
+	int			external)
+{
+	unsigned int		i;
+
+	btp->bt_hashshift = external ? 3 : 8;	/* 8 or 256 buckets */
+	btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
+	btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
+					sizeof(xfs_bufhash_t), KM_SLEEP);
+	for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+		spin_lock_init(&btp->bt_hash[i].bh_lock);
+		INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+	}
+}
+
+STATIC void
+xfs_free_bufhash(
+	xfs_buftarg_t		*btp)
+{
+	kmem_free(btp->bt_hash,
+			(1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+	btp->bt_hash = NULL;
+}
+
+void
+xfs_free_buftarg(
+	xfs_buftarg_t		*btp,
+	int			external)
+{
+	xfs_flush_buftarg(btp, 1);
+	if (external)
+		xfs_blkdev_put(btp->pbr_bdev);
+	xfs_free_bufhash(btp);
+	iput(btp->pbr_mapping->host);
+	kmem_free(btp, sizeof(*btp));
+}
+
+void
+xfs_incore_relse(
+	xfs_buftarg_t		*btp,
+	int			delwri_only,
+	int			wait)
+{
+	invalidate_bdev(btp->pbr_bdev, 1);
+	truncate_inode_pages(btp->pbr_mapping, 0LL);
+}
+
+STATIC int
+xfs_setsize_buftarg_flags(
+	xfs_buftarg_t		*btp,
+	unsigned int		blocksize,
+	unsigned int		sectorsize,
+	int			verbose)
+{
+	btp->pbr_bsize = blocksize;
+	btp->pbr_sshift = ffs(sectorsize) - 1;
+	btp->pbr_smask = sectorsize - 1;
+
+	if (set_blocksize(btp->pbr_bdev, sectorsize)) {
+		printk(KERN_WARNING
+			"XFS: Cannot set_blocksize to %u on device %s\n",
+			sectorsize, XFS_BUFTARG_NAME(btp));
+		return EINVAL;
+	}
+
+	if (verbose &&
+	    (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
+		printk(KERN_WARNING
+			"XFS: %u byte sectors in use on device %s.  "
+			"This is suboptimal; %u or greater is ideal.\n",
+			sectorsize, XFS_BUFTARG_NAME(btp),
+			(unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
+	}
+
+	return 0;
+}
+
+/*
+* When allocating the initial buffer target we have not yet
+* read in the superblock, so don't know what sized sectors
+* are being used is at this early stage.  Play safe.
+*/
+STATIC int
+xfs_setsize_buftarg_early(
+	xfs_buftarg_t		*btp,
+	struct block_device	*bdev)
+{
+	return xfs_setsize_buftarg_flags(btp,
+			PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
+}
+
+int
+xfs_setsize_buftarg(
+	xfs_buftarg_t		*btp,
+	unsigned int		blocksize,
+	unsigned int		sectorsize)
+{
+	return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+}
+
+STATIC int
+xfs_mapping_buftarg(
+	xfs_buftarg_t		*btp,
+	struct block_device	*bdev)
+{
+	struct backing_dev_info	*bdi;
+	struct inode		*inode;
+	struct address_space	*mapping;
+	static struct address_space_operations mapping_aops = {
+		.sync_page = block_sync_page,
+	};
+
+	inode = new_inode(bdev->bd_inode->i_sb);
+	if (!inode) {
+		printk(KERN_WARNING
+			"XFS: Cannot allocate mapping inode for device %s\n",
+			XFS_BUFTARG_NAME(btp));
+		return ENOMEM;
+	}
+	inode->i_mode = S_IFBLK;
+	inode->i_bdev = bdev;
+	inode->i_rdev = bdev->bd_dev;
+	bdi = blk_get_backing_dev_info(bdev);
+	if (!bdi)
+		bdi = &default_backing_dev_info;
+	mapping = &inode->i_data;
+	mapping->a_ops = &mapping_aops;
+	mapping->backing_dev_info = bdi;
+	mapping_set_gfp_mask(mapping, GFP_NOFS);
+	btp->pbr_mapping = mapping;
+	return 0;
+}
+
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+	struct block_device	*bdev,
+	int			external)
+{
+	xfs_buftarg_t		*btp;
+
+	btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+
+	btp->pbr_dev =  bdev->bd_dev;
+	btp->pbr_bdev = bdev;
+	if (xfs_setsize_buftarg_early(btp, bdev))
+		goto error;
+	if (xfs_mapping_buftarg(btp, bdev))
+		goto error;
+	xfs_alloc_bufhash(btp, external);
+	return btp;
+
+error:
+	kmem_free(btp, sizeof(*btp));
+	return NULL;
+}
+
+
+/*
+ * Pagebuf delayed write buffer handling
+ */
+
+STATIC LIST_HEAD(pbd_delwrite_queue);
+STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
+
+STATIC void
+pagebuf_delwri_queue(
+	xfs_buf_t		*pb,
+	int			unlock)
+{
+	PB_TRACE(pb, "delwri_q", (long)unlock);
+	ASSERT(pb->pb_flags & PBF_DELWRI);
+
+	spin_lock(&pbd_delwrite_lock);
+	/* If already in the queue, dequeue and place at tail */
+	if (!list_empty(&pb->pb_list)) {
+		if (unlock) {
+			atomic_dec(&pb->pb_hold);
+		}
+		list_del(&pb->pb_list);
+	}
+
+	list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
+	pb->pb_queuetime = jiffies;
+	spin_unlock(&pbd_delwrite_lock);
+
+	if (unlock)
+		pagebuf_unlock(pb);
+}
+
+void
+pagebuf_delwri_dequeue(
+	xfs_buf_t		*pb)
+{
+	int			dequeued = 0;
+
+	spin_lock(&pbd_delwrite_lock);
+	if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
+		list_del_init(&pb->pb_list);
+		dequeued = 1;
+	}
+	pb->pb_flags &= ~PBF_DELWRI;
+	spin_unlock(&pbd_delwrite_lock);
+
+	if (dequeued)
+		pagebuf_rele(pb);
+
+	PB_TRACE(pb, "delwri_dq", (long)dequeued);
+}
+
+STATIC void
+pagebuf_runall_queues(
+	struct workqueue_struct	*queue)
+{
+	flush_workqueue(queue);
+}
+
+/* Defines for pagebuf daemon */
+STATIC DECLARE_COMPLETION(pagebuf_daemon_done);
+STATIC struct task_struct *pagebuf_daemon_task;
+STATIC int pagebuf_daemon_active;
+STATIC int force_flush;
+
+
+STATIC int
+pagebuf_daemon_wakeup(
+	int			priority,
+	unsigned int		mask)
+{
+	force_flush = 1;
+	barrier();
+	wake_up_process(pagebuf_daemon_task);
+	return 0;
+}
+
+STATIC int
+pagebuf_daemon(
+	void			*data)
+{
+	struct list_head	tmp;
+	unsigned long		age;
+	xfs_buftarg_t		*target;
+	xfs_buf_t		*pb, *n;
+
+	/*  Set up the thread  */
+	daemonize("xfsbufd");
+	current->flags |= PF_MEMALLOC;
+
+	pagebuf_daemon_task = current;
+	pagebuf_daemon_active = 1;
+	barrier();
+
+	INIT_LIST_HEAD(&tmp);
+	do {
+		try_to_freeze(PF_FREEZE);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
+
+		age = (xfs_buf_age_centisecs * HZ) / 100;
+		spin_lock(&pbd_delwrite_lock);
+		list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+			PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
+			ASSERT(pb->pb_flags & PBF_DELWRI);
+
+			if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
+				if (!force_flush &&
+				    time_before(jiffies,
+						pb->pb_queuetime + age)) {
+					pagebuf_unlock(pb);
+					break;
+				}
+
+				pb->pb_flags &= ~PBF_DELWRI;
+				pb->pb_flags |= PBF_WRITE;
+				list_move(&pb->pb_list, &tmp);
+			}
+		}
+		spin_unlock(&pbd_delwrite_lock);
+
+		while (!list_empty(&tmp)) {
+			pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+			target = pb->pb_target;
+
+			list_del_init(&pb->pb_list);
+			pagebuf_iostrategy(pb);
+
+			blk_run_address_space(target->pbr_mapping);
+		}
+
+		if (as_list_len > 0)
+			purge_addresses();
+
+		force_flush = 0;
+	} while (pagebuf_daemon_active);
+
+	complete_and_exit(&pagebuf_daemon_done, 0);
+}
+
+/*
+ * Go through all incore buffers, and release buffers if they belong to
+ * the given device. This is used in filesystem error handling to
+ * preserve the consistency of its metadata.
+ */
+int
+xfs_flush_buftarg(
+	xfs_buftarg_t		*target,
+	int			wait)
+{
+	struct list_head	tmp;
+	xfs_buf_t		*pb, *n;
+	int			pincount = 0;
+
+	pagebuf_runall_queues(pagebuf_dataio_workqueue);
+	pagebuf_runall_queues(pagebuf_logio_workqueue);
+
+	INIT_LIST_HEAD(&tmp);
+	spin_lock(&pbd_delwrite_lock);
+	list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+
+		if (pb->pb_target != target)
+			continue;
+
+		ASSERT(pb->pb_flags & PBF_DELWRI);
+		PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
+		if (pagebuf_ispin(pb)) {
+			pincount++;
+			continue;
+		}
+
+		pb->pb_flags &= ~PBF_DELWRI;
+		pb->pb_flags |= PBF_WRITE;
+		list_move(&pb->pb_list, &tmp);
+	}
+	spin_unlock(&pbd_delwrite_lock);
+
+	/*
+	 * Dropped the delayed write list lock, now walk the temporary list
+	 */
+	list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+		if (wait)
+			pb->pb_flags &= ~PBF_ASYNC;
+		else
+			list_del_init(&pb->pb_list);
+
+		pagebuf_lock(pb);
+		pagebuf_iostrategy(pb);
+	}
+
+	/*
+	 * Remaining list items must be flushed before returning
+	 */
+	while (!list_empty(&tmp)) {
+		pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+
+		list_del_init(&pb->pb_list);
+		xfs_iowait(pb);
+		xfs_buf_relse(pb);
+	}
+
+	if (wait)
+		blk_run_address_space(target->pbr_mapping);
+
+	return pincount;
+}
+
+STATIC int
+pagebuf_daemon_start(void)
+{
+	int		rval;
+
+	pagebuf_logio_workqueue = create_workqueue("xfslogd");
+	if (!pagebuf_logio_workqueue)
+		return -ENOMEM;
+
+	pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
+	if (!pagebuf_dataio_workqueue) {
+		destroy_workqueue(pagebuf_logio_workqueue);
+		return -ENOMEM;
+	}
+
+	rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
+	if (rval < 0) {
+		destroy_workqueue(pagebuf_logio_workqueue);
+		destroy_workqueue(pagebuf_dataio_workqueue);
+	}
+
+	return rval;
+}
+
+/*
+ * pagebuf_daemon_stop
+ *
+ * Note: do not mark as __exit, it is called from pagebuf_terminate.
+ */
+STATIC void
+pagebuf_daemon_stop(void)
+{
+	pagebuf_daemon_active = 0;
+	barrier();
+	wait_for_completion(&pagebuf_daemon_done);
+
+	destroy_workqueue(pagebuf_logio_workqueue);
+	destroy_workqueue(pagebuf_dataio_workqueue);
+}
+
+/*
+ *	Initialization and Termination
+ */
+
+int __init
+pagebuf_init(void)
+{
+	pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (pagebuf_cache == NULL) {
+		printk("XFS: couldn't init xfs_buf_t cache\n");
+		pagebuf_terminate();
+		return -ENOMEM;
+	}
+
+#ifdef PAGEBUF_TRACE
+	pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
+#endif
+
+	pagebuf_daemon_start();
+
+	pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup);
+	if (pagebuf_shake == NULL) {
+		pagebuf_terminate();
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+
+/*
+ *	pagebuf_terminate.
+ *
+ *	Note: do not mark as __exit, this is also called from the __init code.
+ */
+void
+pagebuf_terminate(void)
+{
+	pagebuf_daemon_stop();
+
+#ifdef PAGEBUF_TRACE
+	ktrace_free(pagebuf_trace_buf);
+#endif
+
+	kmem_zone_destroy(pagebuf_cache);
+	kmem_shake_deregister(pagebuf_shake);
+}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
new file mode 100644
index 00000000000..74deed8e6d9
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI
+ */
+
+#ifndef __XFS_BUF_H__
+#define __XFS_BUF_H__
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/uio.h>
+
+/*
+ *	Base types
+ */
+
+#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
+
+#define page_buf_ctob(pp)	((pp) * PAGE_CACHE_SIZE)
+#define page_buf_btoc(dd)	(((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+#define page_buf_btoct(dd)	((dd) >> PAGE_CACHE_SHIFT)
+#define page_buf_poff(aa)	((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum page_buf_rw_e {
+	PBRW_READ = 1,			/* transfer into target memory */
+	PBRW_WRITE = 2,			/* transfer from target memory */
+	PBRW_ZERO = 3			/* Zero target memory */
+} page_buf_rw_t;
+
+
+typedef enum page_buf_flags_e {		/* pb_flags values */
+	PBF_READ = (1 << 0),	/* buffer intended for reading from device */
+	PBF_WRITE = (1 << 1),	/* buffer intended for writing to device   */
+	PBF_MAPPED = (1 << 2),  /* buffer mapped (pb_addr valid)           */
+	PBF_PARTIAL = (1 << 3), /* buffer partially read                   */
+	PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
+	PBF_NONE = (1 << 5),    /* buffer not read at all                  */
+	PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
+	PBF_STALE = (1 << 7),	/* buffer has been staled, do not find it  */
+	PBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
+	PBF_FS_DATAIOD = (1 << 9),  /* schedule IO completion on fs datad  */
+	PBF_FORCEIO = (1 << 10),    /* ignore any cache state		   */
+	PBF_FLUSH = (1 << 11),	    /* flush disk write cache		   */
+	PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead		   */
+
+	/* flags used only as arguments to access routines */
+	PBF_LOCK = (1 << 14),       /* lock requested			   */
+	PBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait	   */
+	PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread	   */
+
+	/* flags used only internally */
+	_PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache		   */
+	_PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()		   */
+	_PBF_RUN_QUEUES = (1 << 19),/* run block device task queue	   */
+} page_buf_flags_t;
+
+#define PBF_UPDATE (PBF_READ | PBF_WRITE)
+#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
+#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
+
+typedef struct xfs_bufhash {
+	struct list_head	bh_list;
+	spinlock_t		bh_lock;
+} xfs_bufhash_t;
+
+typedef struct xfs_buftarg {
+	dev_t			pbr_dev;
+	struct block_device	*pbr_bdev;
+	struct address_space	*pbr_mapping;
+	unsigned int		pbr_bsize;
+	unsigned int		pbr_sshift;
+	size_t			pbr_smask;
+
+	/* per-device buffer hash table */
+	uint			bt_hashmask;
+	uint			bt_hashshift;
+	xfs_bufhash_t		*bt_hash;
+} xfs_buftarg_t;
+
+/*
+ *	xfs_buf_t:  Buffer structure for page cache-based buffers
+ *
+ * This buffer structure is used by the page cache buffer management routines
+ * to refer to an assembly of pages forming a logical buffer.  The actual I/O
+ * is performed with buffer_head structures, as required by drivers.
+ * 
+ * The buffer structure is used on temporary basis only, and discarded when
+ * released.  The real data storage is recorded in the page cache.  Metadata is
+ * hashed to the block device on which the file system resides.
+ */
+
+struct xfs_buf;
+
+/* call-back function on I/O completion */
+typedef void (*page_buf_iodone_t)(struct xfs_buf *);
+/* call-back function on I/O completion */
+typedef void (*page_buf_relse_t)(struct xfs_buf *);
+/* pre-write function */
+typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
+
+#define PB_PAGES	2
+
+typedef struct xfs_buf {
+	struct semaphore	pb_sema;	/* semaphore for lockables  */
+	unsigned long		pb_queuetime;	/* time buffer was queued   */
+	atomic_t		pb_pin_count;	/* pin count		    */
+	wait_queue_head_t	pb_waiters;	/* unpin waiters	    */
+	struct list_head	pb_list;
+	page_buf_flags_t	pb_flags;	/* status flags */
+	struct list_head	pb_hash_list;	/* hash table list */
+	xfs_bufhash_t		*pb_hash;	/* hash table list start */
+	xfs_buftarg_t		*pb_target;	/* buffer target (device) */
+	atomic_t		pb_hold;	/* reference count */
+	xfs_daddr_t		pb_bn;		/* block number for I/O */
+	loff_t			pb_file_offset;	/* offset in file */
+	size_t			pb_buffer_length; /* size of buffer in bytes */
+	size_t			pb_count_desired; /* desired transfer size */
+	void			*pb_addr;	/* virtual address of buffer */
+	struct work_struct	pb_iodone_work;
+	atomic_t		pb_io_remaining;/* #outstanding I/O requests */
+	page_buf_iodone_t	pb_iodone;	/* I/O completion function */
+	page_buf_relse_t	pb_relse;	/* releasing function */
+	page_buf_bdstrat_t	pb_strat;	/* pre-write function */
+	struct semaphore	pb_iodonesema;	/* Semaphore for I/O waiters */
+	void			*pb_fspriv;
+	void			*pb_fspriv2;
+	void			*pb_fspriv3;
+	unsigned short		pb_error;	/* error code on I/O */
+ 	unsigned short		pb_locked;	/* page array is locked */
+ 	unsigned int		pb_page_count;	/* size of page array */
+	unsigned int		pb_offset;	/* page offset in first page */
+	struct page		**pb_pages;	/* array of page pointers */
+	struct page		*pb_page_array[PB_PAGES]; /* inline pages */
+#ifdef PAGEBUF_LOCK_TRACKING
+	int			pb_last_holder;
+#endif
+} xfs_buf_t;
+
+
+/* Finding and Reading Buffers */
+
+extern xfs_buf_t *_pagebuf_find(	/* find buffer for block if	*/
+					/* the block is in memory	*/
+		xfs_buftarg_t *,	/* inode for block		*/
+		loff_t,			/* starting offset of range	*/
+		size_t,			/* length of range		*/
+		page_buf_flags_t,	/* PBF_LOCK			*/
+	        xfs_buf_t *);		/* newly allocated buffer	*/
+
+#define xfs_incore(buftarg,blkno,len,lockit) \
+	_pagebuf_find(buftarg, blkno ,len, lockit, NULL)
+
+extern xfs_buf_t *xfs_buf_get_flags(	/* allocate a buffer		*/
+		xfs_buftarg_t *,	/* inode for buffer		*/
+		loff_t,			/* starting offset of range     */
+		size_t,			/* length of range              */
+		page_buf_flags_t);	/* PBF_LOCK, PBF_READ,		*/
+					/* PBF_ASYNC			*/
+
+#define xfs_buf_get(target, blkno, len, flags) \
+	xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+
+extern xfs_buf_t *xfs_buf_read_flags(	/* allocate and read a buffer	*/
+		xfs_buftarg_t *,	/* inode for buffer		*/
+		loff_t,			/* starting offset of range	*/
+		size_t,			/* length of range		*/
+		page_buf_flags_t);	/* PBF_LOCK, PBF_ASYNC		*/
+
+#define xfs_buf_read(target, blkno, len, flags) \
+	xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+
+extern xfs_buf_t *pagebuf_lookup(
+		xfs_buftarg_t *,
+		loff_t,			/* starting offset of range	*/
+		size_t,			/* length of range		*/
+		page_buf_flags_t);	/* PBF_READ, PBF_WRITE,		*/
+					/* PBF_FORCEIO, 		*/
+
+extern xfs_buf_t *pagebuf_get_empty(	/* allocate pagebuf struct with	*/
+					/*  no memory or disk address	*/
+		size_t len,
+		xfs_buftarg_t *);	/* mount point "fake" inode	*/
+
+extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct	*/
+					/* without disk address		*/
+		size_t len,
+		xfs_buftarg_t *);	/* mount point "fake" inode	*/
+
+extern int pagebuf_associate_memory(
+		xfs_buf_t *,
+		void *,
+		size_t);
+
+extern void pagebuf_hold(		/* increment reference count	*/
+		xfs_buf_t *);		/* buffer to hold		*/
+
+extern void pagebuf_readahead(		/* read ahead into cache	*/
+		xfs_buftarg_t  *,	/* target for buffer (or NULL)	*/
+		loff_t,			/* starting offset of range     */
+		size_t,			/* length of range              */
+		page_buf_flags_t);	/* additional read flags	*/
+
+/* Releasing Buffers */
+
+extern void pagebuf_free(		/* deallocate a buffer		*/
+		xfs_buf_t *);		/* buffer to deallocate		*/
+
+extern void pagebuf_rele(		/* release hold on a buffer	*/
+		xfs_buf_t *);		/* buffer to release		*/
+
+/* Locking and Unlocking Buffers */
+
+extern int pagebuf_cond_lock(		/* lock buffer, if not locked	*/
+					/* (returns -EBUSY if locked)	*/
+		xfs_buf_t *);		/* buffer to lock		*/
+
+extern int pagebuf_lock_value(		/* return count on lock		*/
+		xfs_buf_t *);          /* buffer to check              */
+
+extern int pagebuf_lock(		/* lock buffer                  */
+		xfs_buf_t *);          /* buffer to lock               */
+
+extern void pagebuf_unlock(		/* unlock buffer		*/
+		xfs_buf_t *);		/* buffer to unlock		*/
+
+/* Buffer Read and Write Routines */
+
+extern void pagebuf_iodone(		/* mark buffer I/O complete	*/
+		xfs_buf_t *,		/* buffer to mark		*/
+		int,			/* use data/log helper thread.	*/
+		int);			/* run completion locally, or in
+					 * a helper thread.		*/
+
+extern void pagebuf_ioerror(		/* mark buffer in error	(or not) */
+		xfs_buf_t *,		/* buffer to mark		*/
+		int);			/* error to store (0 if none)	*/
+
+extern int pagebuf_iostart(		/* start I/O on a buffer	*/
+		xfs_buf_t *,		/* buffer to start		*/
+		page_buf_flags_t);	/* PBF_LOCK, PBF_ASYNC,		*/
+					/* PBF_READ, PBF_WRITE,		*/
+					/* PBF_DELWRI			*/
+
+extern int pagebuf_iorequest(		/* start real I/O		*/
+		xfs_buf_t *);		/* buffer to convey to device	*/
+
+extern int pagebuf_iowait(		/* wait for buffer I/O done	*/
+		xfs_buf_t *);		/* buffer to wait on		*/
+
+extern void pagebuf_iomove(		/* move data in/out of pagebuf	*/
+		xfs_buf_t *,		/* buffer to manipulate		*/
+		size_t,			/* starting buffer offset	*/
+		size_t,			/* length in buffer		*/
+		caddr_t,		/* data pointer			*/
+		page_buf_rw_t);		/* direction			*/
+
+static inline int pagebuf_iostrategy(xfs_buf_t *pb)
+{
+	return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
+}
+
+static inline int pagebuf_geterror(xfs_buf_t *pb)
+{
+	return pb ? pb->pb_error : ENOMEM;
+}
+
+/* Buffer Utility Routines */
+
+extern caddr_t pagebuf_offset(		/* pointer at offset in buffer	*/
+		xfs_buf_t *,		/* buffer to offset into	*/
+		size_t);		/* offset			*/
+
+/* Pinning Buffer Storage in Memory */
+
+extern void pagebuf_pin(		/* pin buffer in memory		*/
+		xfs_buf_t *);		/* buffer to pin		*/
+
+extern void pagebuf_unpin(		/* unpin buffered data		*/
+		xfs_buf_t *);		/* buffer to unpin		*/
+
+extern int pagebuf_ispin(		/* check if buffer is pinned	*/
+		xfs_buf_t *);		/* buffer to check		*/
+
+/* Delayed Write Buffer Routines */
+
+extern void pagebuf_delwri_dequeue(xfs_buf_t *);
+
+/* Buffer Daemon Setup Routines */
+
+extern int pagebuf_init(void);
+extern void pagebuf_terminate(void);
+
+
+#ifdef PAGEBUF_TRACE
+extern ktrace_t *pagebuf_trace_buf;
+extern void pagebuf_trace(
+		xfs_buf_t *,		/* buffer being traced		*/
+		char *,			/* description of operation	*/
+		void *,			/* arbitrary diagnostic value	*/
+		void *);		/* return address		*/
+#else
+# define pagebuf_trace(pb, id, ptr, ra)	do { } while (0)
+#endif
+
+#define pagebuf_target_name(target)	\
+	({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
+
+
+
+
+
+/* These are just for xfs_syncsub... it sets an internal variable
+ * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
+ */
+#define XFS_B_ASYNC		PBF_ASYNC
+#define XFS_B_DELWRI		PBF_DELWRI
+#define XFS_B_READ		PBF_READ
+#define XFS_B_WRITE		PBF_WRITE
+#define XFS_B_STALE		PBF_STALE
+
+#define XFS_BUF_TRYLOCK		PBF_TRYLOCK
+#define XFS_INCORE_TRYLOCK	PBF_TRYLOCK
+#define XFS_BUF_LOCK		PBF_LOCK
+#define XFS_BUF_MAPPED		PBF_MAPPED
+
+#define BUF_BUSY		PBF_DONT_BLOCK
+
+#define XFS_BUF_BFLAGS(x)	((x)->pb_flags)
+#define XFS_BUF_ZEROFLAGS(x)	\
+	((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
+
+#define XFS_BUF_STALE(x)	((x)->pb_flags |= XFS_B_STALE)
+#define XFS_BUF_UNSTALE(x)	((x)->pb_flags &= ~XFS_B_STALE)
+#define XFS_BUF_ISSTALE(x)	((x)->pb_flags & XFS_B_STALE)
+#define XFS_BUF_SUPER_STALE(x)	do {				\
+					XFS_BUF_STALE(x);	\
+					pagebuf_delwri_dequeue(x);	\
+					XFS_BUF_DONE(x);	\
+				} while (0)
+
+#define XFS_BUF_MANAGE		PBF_FS_MANAGED
+#define XFS_BUF_UNMANAGE(x)	((x)->pb_flags &= ~PBF_FS_MANAGED)
+
+#define XFS_BUF_DELAYWRITE(x)	 ((x)->pb_flags |= PBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(x)	 pagebuf_delwri_dequeue(x)
+#define XFS_BUF_ISDELAYWRITE(x)	 ((x)->pb_flags & PBF_DELWRI)
+
+#define XFS_BUF_ERROR(x,no)	 pagebuf_ioerror(x,no)
+#define XFS_BUF_GETERROR(x)	 pagebuf_geterror(x)
+#define XFS_BUF_ISERROR(x)	 (pagebuf_geterror(x)?1:0)
+
+#define XFS_BUF_DONE(x)		 ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE))
+#define XFS_BUF_UNDONE(x)	 ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE)
+#define XFS_BUF_ISDONE(x)	 (!(PBF_NOT_DONE(x)))
+
+#define XFS_BUF_BUSY(x)		 ((x)->pb_flags |= PBF_FORCEIO)
+#define XFS_BUF_UNBUSY(x)	 ((x)->pb_flags &= ~PBF_FORCEIO)
+#define XFS_BUF_ISBUSY(x)	 (1)
+
+#define XFS_BUF_ASYNC(x)	 ((x)->pb_flags |= PBF_ASYNC)
+#define XFS_BUF_UNASYNC(x)	 ((x)->pb_flags &= ~PBF_ASYNC)
+#define XFS_BUF_ISASYNC(x)	 ((x)->pb_flags & PBF_ASYNC)
+
+#define XFS_BUF_FLUSH(x)	 ((x)->pb_flags |= PBF_FLUSH)
+#define XFS_BUF_UNFLUSH(x)	 ((x)->pb_flags &= ~PBF_FLUSH)
+#define XFS_BUF_ISFLUSH(x)	 ((x)->pb_flags & PBF_FLUSH)
+
+#define XFS_BUF_SHUT(x)		 printk("XFS_BUF_SHUT not implemented yet\n")
+#define XFS_BUF_UNSHUT(x)	 printk("XFS_BUF_UNSHUT not implemented yet\n")
+#define XFS_BUF_ISSHUT(x)	 (0)
+
+#define XFS_BUF_HOLD(x)		pagebuf_hold(x)
+#define XFS_BUF_READ(x)		((x)->pb_flags |= PBF_READ)
+#define XFS_BUF_UNREAD(x)	((x)->pb_flags &= ~PBF_READ)
+#define XFS_BUF_ISREAD(x)	((x)->pb_flags & PBF_READ)
+
+#define XFS_BUF_WRITE(x)	((x)->pb_flags |= PBF_WRITE)
+#define XFS_BUF_UNWRITE(x)	((x)->pb_flags &= ~PBF_WRITE)
+#define XFS_BUF_ISWRITE(x)	((x)->pb_flags & PBF_WRITE)
+
+#define XFS_BUF_ISUNINITIAL(x)	 (0)
+#define XFS_BUF_UNUNINITIAL(x)	 (0)
+
+#define XFS_BUF_BP_ISMAPPED(bp)	 1
+
+#define XFS_BUF_DATAIO(x)	((x)->pb_flags |= PBF_FS_DATAIOD)
+#define XFS_BUF_UNDATAIO(x)	((x)->pb_flags &= ~PBF_FS_DATAIOD)
+
+#define XFS_BUF_IODONE_FUNC(buf)	(buf)->pb_iodone
+#define XFS_BUF_SET_IODONE_FUNC(buf, func)	\
+			(buf)->pb_iodone = (func)
+#define XFS_BUF_CLR_IODONE_FUNC(buf)		\
+			(buf)->pb_iodone = NULL
+#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func)	\
+			(buf)->pb_strat = (func)
+#define XFS_BUF_CLR_BDSTRAT_FUNC(buf)		\
+			(buf)->pb_strat = NULL
+
+#define XFS_BUF_FSPRIVATE(buf, type)		\
+			((type)(buf)->pb_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(buf, value)	\
+			(buf)->pb_fspriv = (void *)(value)
+#define XFS_BUF_FSPRIVATE2(buf, type)		\
+			((type)(buf)->pb_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(buf, value)	\
+			(buf)->pb_fspriv2 = (void *)(value)
+#define XFS_BUF_FSPRIVATE3(buf, type)		\
+			((type)(buf)->pb_fspriv3)
+#define XFS_BUF_SET_FSPRIVATE3(buf, value)	\
+			(buf)->pb_fspriv3  = (void *)(value)
+#define XFS_BUF_SET_START(buf)
+
+#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
+			(buf)->pb_relse = (value)
+
+#define XFS_BUF_PTR(bp)		(xfs_caddr_t)((bp)->pb_addr)
+
+extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
+{
+	if (bp->pb_flags & PBF_MAPPED)
+		return XFS_BUF_PTR(bp) + offset;
+	return (xfs_caddr_t) pagebuf_offset(bp, offset);
+}
+
+#define XFS_BUF_SET_PTR(bp, val, count)		\
+				pagebuf_associate_memory(bp, val, count)
+#define XFS_BUF_ADDR(bp)	((bp)->pb_bn)
+#define XFS_BUF_SET_ADDR(bp, blk)		\
+			((bp)->pb_bn = (xfs_daddr_t)(blk))
+#define XFS_BUF_OFFSET(bp)	((bp)->pb_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)		\
+			((bp)->pb_file_offset = (off))
+#define XFS_BUF_COUNT(bp)	((bp)->pb_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)		\
+			((bp)->pb_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)	((bp)->pb_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)		\
+			((bp)->pb_buffer_length = (cnt))
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+
+#define XFS_BUF_ISPINNED(bp)	pagebuf_ispin(bp)
+
+#define XFS_BUF_VALUSEMA(bp)	pagebuf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp)	(pagebuf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp)	pagebuf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x)	pagebuf_lock(bp)
+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
+
+/* setup the buffer target from a buftarg structure */
+#define XFS_BUF_SET_TARGET(bp, target)	\
+		(bp)->pb_target = (target)
+#define XFS_BUF_TARGET(bp)	((bp)->pb_target)
+#define XFS_BUFTARG_NAME(target)	\
+		pagebuf_target_name(target)
+
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+
+static inline int	xfs_bawrite(void *mp, xfs_buf_t *bp)
+{
+	bp->pb_fspriv3 = mp;
+	bp->pb_strat = xfs_bdstrat_cb;
+	pagebuf_delwri_dequeue(bp);
+	return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
+}
+
+static inline void	xfs_buf_relse(xfs_buf_t *bp)
+{
+	if (!bp->pb_relse)
+		pagebuf_unlock(bp);
+	pagebuf_rele(bp);
+}
+
+#define xfs_bpin(bp)		pagebuf_pin(bp)
+#define xfs_bunpin(bp)		pagebuf_unpin(bp)
+
+#define xfs_buftrace(id, bp)	\
+	    pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
+
+#define xfs_biodone(pb)		    \
+	    pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0)
+
+#define xfs_biomove(pb, off, len, data, rw) \
+	    pagebuf_iomove((pb), (off), (len), (data), \
+		((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
+
+#define xfs_biozero(pb, off, len) \
+	    pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
+
+
+static inline int	XFS_bwrite(xfs_buf_t *pb)
+{
+	int	iowait = (pb->pb_flags & PBF_ASYNC) == 0;
+	int	error = 0;
+
+	if (!iowait)
+		pb->pb_flags |= _PBF_RUN_QUEUES;
+
+	pagebuf_delwri_dequeue(pb);
+	pagebuf_iostrategy(pb);
+	if (iowait) {
+		error = pagebuf_iowait(pb);
+		xfs_buf_relse(pb);
+	}
+	return error;
+}
+
+#define XFS_bdwrite(pb)		     \
+	    pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
+
+static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
+{
+	bp->pb_strat = xfs_bdstrat_cb;
+	bp->pb_fspriv3 = mp;
+
+	return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
+}
+
+#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
+
+#define xfs_iowait(pb)	pagebuf_iowait(pb)
+
+#define xfs_baread(target, rablkno, ralen)  \
+	pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
+
+#define xfs_buf_get_empty(len, target)	pagebuf_get_empty((len), (target))
+#define xfs_buf_get_noaddr(len, target)	pagebuf_get_no_daddr((len), (target))
+#define xfs_buf_free(bp)		pagebuf_free(bp)
+
+
+/*
+ *	Handling of buftargs.
+ */
+
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
+extern void xfs_free_buftarg(xfs_buftarg_t *, int);
+extern void xfs_wait_buftarg(xfs_buftarg_t *);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern void xfs_incore_relse(xfs_buftarg_t *, int, int);
+extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
+
+#define xfs_getsize_buftarg(buftarg) \
+	block_size((buftarg)->pbr_bdev)
+#define xfs_readonly_buftarg(buftarg) \
+	bdev_read_only((buftarg)->pbr_bdev)
+#define xfs_binval(buftarg) \
+	xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg) \
+	xfs_flush_buftarg(buftarg, 1)
+
+#endif	/* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
new file mode 100644
index 00000000000..00c45849d41
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_CRED_H__
+#define __XFS_CRED_H__
+
+/*
+ * Credentials
+ */
+typedef struct cred {
+	/* EMPTY */
+} cred_t;
+
+extern struct cred *sys_cred;
+
+/* this is a hack.. (assums sys_cred is the only cred_t in the system) */
+static __inline int capable_cred(cred_t *cr, int cid)
+{
+	return (cr == sys_cred) ? 1 : capable(cid);
+}
+
+#endif  /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
new file mode 100644
index 00000000000..f372a1a5e16
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_dmapi.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_mount.h"
+#include "xfs_export.h"
+
+/*
+ * XFS encode and decodes the fileid portion of NFS filehandles
+ * itself instead of letting the generic NFS code do it.  This
+ * allows filesystems with 64 bit inode numbers to be exported.
+ *
+ * Note that a side effect is that xfs_vget() won't be passed a
+ * zero inode/generation pair under normal circumstances.  As
+ * however a malicious client could send us such data, the check
+ * remains in that code.
+ */
+
+
+STATIC struct dentry *
+linvfs_decode_fh(
+	struct super_block	*sb,
+	__u32			*fh,
+	int			fh_len,
+	int			fileid_type,
+	int (*acceptable)(
+		void		*context,
+		struct dentry	*de),
+	void			*context)
+{
+	xfs_fid2_t		ifid;
+	xfs_fid2_t		pfid;
+	void			*parent = NULL;
+	int			is64 = 0;
+	__u32			*p = fh;
+
+#if XFS_BIG_INUMS
+	is64 = (fileid_type & XFS_FILEID_TYPE_64FLAG);
+	fileid_type &= ~XFS_FILEID_TYPE_64FLAG;
+#endif
+
+	/*
+	 * Note that we only accept fileids which are long enough
+	 * rather than allow the parent generation number to default
+	 * to zero.  XFS considers zero a valid generation number not
+	 * an invalid/wildcard value.  There's little point printk'ing
+	 * a warning here as we don't have the client information
+	 * which would make such a warning useful.
+	 */
+	if (fileid_type > 2 ||
+	    fh_len < xfs_fileid_length((fileid_type == 2), is64))
+		return NULL;
+
+	p = xfs_fileid_decode_fid2(p, &ifid, is64);
+
+	if (fileid_type == 2) {
+		p = xfs_fileid_decode_fid2(p, &pfid, is64);
+		parent = &pfid;
+	}
+	
+	fh = (__u32 *)&ifid;
+	return find_exported_dentry(sb, fh, parent, acceptable, context);
+}
+
+
+STATIC int
+linvfs_encode_fh(
+	struct dentry		*dentry,
+	__u32			*fh,
+	int			*max_len,
+	int			connectable)
+{
+	struct inode		*inode = dentry->d_inode;
+	int			type = 1;
+	__u32			*p = fh;
+	int			len;
+	int			is64 = 0;
+#if XFS_BIG_INUMS
+	vfs_t			*vfs = LINVFS_GET_VFS(inode->i_sb);
+	xfs_mount_t		*mp = XFS_VFSTOM(vfs);
+	
+	if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) {
+		/* filesystem may contain 64bit inode numbers */
+		is64 = XFS_FILEID_TYPE_64FLAG;
+	}
+#endif
+
+	/* Directories don't need their parent encoded, they have ".." */
+	if (S_ISDIR(inode->i_mode))
+	    connectable = 0;
+
+	/*
+	 * Only encode if there is enough space given.  In practice
+	 * this means we can't export a filesystem with 64bit inodes
+	 * over NFSv2 with the subtree_check export option; the other
+	 * seven combinations work.  The real answer is "don't use v2".
+	 */
+	len = xfs_fileid_length(connectable, is64);
+	if (*max_len < len)
+		return 255;
+	*max_len = len;
+
+	p = xfs_fileid_encode_inode(p, inode, is64);
+	if (connectable) {
+		spin_lock(&dentry->d_lock);
+		p = xfs_fileid_encode_inode(p, dentry->d_parent->d_inode, is64);
+		spin_unlock(&dentry->d_lock);
+		type = 2;
+	}
+	BUG_ON((p - fh) != len);
+	return type | is64;
+}
+
+STATIC struct dentry *
+linvfs_get_dentry(
+	struct super_block	*sb,
+	void			*data)
+{
+	vnode_t			*vp;
+	struct inode		*inode;
+	struct dentry		*result;
+	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
+
+	VFS_VGET(vfsp, &vp, (fid_t *)data, error);
+	if (error || vp == NULL)
+		return ERR_PTR(-ESTALE) ;
+
+	inode = LINVFS_GET_IP(vp);
+	result = d_alloc_anon(inode);
+        if (!result) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	return result;
+}
+
+STATIC struct dentry *
+linvfs_get_parent(
+	struct dentry		*child)
+{
+	int			error;
+	vnode_t			*vp, *cvp;
+	struct dentry		*parent;
+	struct dentry		dotdot;
+
+	dotdot.d_name.name = "..";
+	dotdot.d_name.len = 2;
+	dotdot.d_inode = NULL;
+
+	cvp = NULL;
+	vp = LINVFS_GET_VP(child->d_inode);
+	VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
+	if (unlikely(error))
+		return ERR_PTR(-error);
+
+	parent = d_alloc_anon(LINVFS_GET_IP(cvp));
+	if (unlikely(!parent)) {
+		VN_RELE(cvp);
+		return ERR_PTR(-ENOMEM);
+	}
+	return parent;
+}
+
+struct export_operations linvfs_export_ops = {
+	.decode_fh		= linvfs_decode_fh,
+	.encode_fh		= linvfs_encode_fh,
+	.get_parent		= linvfs_get_parent,
+	.get_dentry		= linvfs_get_dentry,
+};
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
new file mode 100644
index 00000000000..60b2abac1c1
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_EXPORT_H__
+#define __XFS_EXPORT_H__
+
+/*
+ * Common defines for code related to exporting XFS filesystems over NFS.
+ *
+ * The NFS fileid goes out on the wire as an array of
+ * 32bit unsigned ints in host order.  There are 5 possible
+ * formats.
+ *
+ * (1)	fileid_type=0x00
+ *	(no fileid data; handled by the generic code)
+ *
+ * (2)	fileid_type=0x01
+ *	inode-num
+ *	generation
+ *
+ * (3)	fileid_type=0x02
+ *	inode-num
+ *	generation
+ *	parent-inode-num
+ *	parent-generation
+ *
+ * (4)	fileid_type=0x81
+ *	inode-num-lo32
+ *	inode-num-hi32
+ *	generation
+ *
+ * (5)	fileid_type=0x82
+ *	inode-num-lo32
+ *	inode-num-hi32
+ *	generation
+ *	parent-inode-num-lo32
+ *	parent-inode-num-hi32
+ *	parent-generation
+ *
+ * Note, the NFS filehandle also includes an fsid portion which
+ * may have an inode number in it.  That number is hardcoded to
+ * 32bits and there is no way for XFS to intercept it.  In
+ * practice this means when exporting an XFS filesytem with 64bit
+ * inodes you should either export the mountpoint (rather than
+ * a subdirectory) or use the "fsid" export option.
+ */
+
+/* This flag goes on the wire.  Don't play with it. */
+#define XFS_FILEID_TYPE_64FLAG	0x80	/* NFS fileid has 64bit inodes */
+
+/* Calculate the length in u32 units of the fileid data */
+static inline int
+xfs_fileid_length(int hasparent, int is64)
+{
+	return hasparent ? (is64 ? 6 : 4) : (is64 ? 3 : 2);
+}
+
+/*
+ * Decode encoded inode information (either for the inode itself
+ * or the parent) into an xfs_fid2_t structure.  Advances and
+ * returns the new data pointer
+ */
+static inline __u32 *
+xfs_fileid_decode_fid2(__u32 *p, xfs_fid2_t *fid, int is64)
+{
+	fid->fid_len = sizeof(xfs_fid2_t) - sizeof(fid->fid_len);
+	fid->fid_pad = 0;
+	fid->fid_ino = *p++;
+#if XFS_BIG_INUMS
+	if (is64)
+		fid->fid_ino |= (((__u64)(*p++)) << 32);
+#endif
+	fid->fid_gen = *p++;
+	return p;
+}
+
+/*
+ * Encode inode information (either for the inode itself or the
+ * parent) into a fileid buffer.  Advances and returns the new
+ * data pointer.
+ */
+static inline __u32 *
+xfs_fileid_encode_inode(__u32 *p, struct inode *inode, int is64)
+{
+	*p++ = (__u32)inode->i_ino;
+#if XFS_BIG_INUMS
+	if (is64)
+		*p++ = (__u32)(inode->i_ino >> 32);
+#endif
+	*p++ = inode->i_generation;
+	return p;
+}
+
+#endif	/* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
new file mode 100644
index 00000000000..9f057a4a5b0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_trans.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_ioctl32.h"
+
+#include <linux/dcache.h>
+#include <linux/smp_lock.h>
+
+static struct vm_operations_struct linvfs_file_vm_ops;
+
+
+STATIC inline ssize_t
+__linvfs_read(
+	struct kiocb		*iocb,
+	char			__user *buf,
+	int			ioflags,
+	size_t			count,
+	loff_t			pos)
+{
+	struct iovec		iov = {buf, count};
+	struct file		*file = iocb->ki_filp;
+	vnode_t			*vp = LINVFS_GET_VP(file->f_dentry->d_inode);
+	ssize_t			rval;
+
+	BUG_ON(iocb->ki_pos != pos);
+
+	if (unlikely(file->f_flags & O_DIRECT))
+		ioflags |= IO_ISDIRECT;
+	VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+	return rval;
+}
+
+
+STATIC ssize_t
+linvfs_aio_read(
+	struct kiocb		*iocb,
+	char			__user *buf,
+	size_t			count,
+	loff_t			pos)
+{
+	return __linvfs_read(iocb, buf, IO_ISAIO, count, pos);
+}
+
+STATIC ssize_t
+linvfs_aio_read_invis(
+	struct kiocb		*iocb,
+	char			__user *buf,
+	size_t			count,
+	loff_t			pos)
+{
+	return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+}
+
+
+STATIC inline ssize_t
+__linvfs_write(
+	struct kiocb	*iocb,
+	const char	__user *buf,
+	int		ioflags,
+	size_t		count,
+	loff_t		pos)
+{
+	struct iovec	iov = {(void __user *)buf, count};
+	struct file	*file = iocb->ki_filp;
+	struct inode	*inode = file->f_mapping->host;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	ssize_t		rval;
+
+	BUG_ON(iocb->ki_pos != pos);
+	if (unlikely(file->f_flags & O_DIRECT))
+		ioflags |= IO_ISDIRECT;
+
+	VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+	return rval;
+}
+
+
+STATIC ssize_t
+linvfs_aio_write(
+	struct kiocb		*iocb,
+	const char		__user *buf,
+	size_t			count,
+	loff_t			pos)
+{
+	return __linvfs_write(iocb, buf, IO_ISAIO, count, pos);
+}
+
+STATIC ssize_t
+linvfs_aio_write_invis(
+	struct kiocb		*iocb,
+	const char		__user *buf,
+	size_t			count,
+	loff_t			pos)
+{
+	return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+}
+
+
+STATIC inline ssize_t
+__linvfs_readv(
+	struct file		*file,
+	const struct iovec 	*iov,
+	int			ioflags,
+	unsigned long		nr_segs,
+	loff_t			*ppos)
+{
+	struct inode	*inode = file->f_mapping->host;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	struct		kiocb kiocb;
+	ssize_t		rval;
+
+	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = *ppos;
+
+	if (unlikely(file->f_flags & O_DIRECT))
+		ioflags |= IO_ISDIRECT;
+	VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+
+	*ppos = kiocb.ki_pos;
+	return rval;
+}
+
+STATIC ssize_t
+linvfs_readv(
+	struct file		*file,
+	const struct iovec 	*iov,
+	unsigned long		nr_segs,
+	loff_t			*ppos)
+{
+	return __linvfs_readv(file, iov, 0, nr_segs, ppos);
+}
+
+STATIC ssize_t
+linvfs_readv_invis(
+	struct file		*file,
+	const struct iovec 	*iov,
+	unsigned long		nr_segs,
+	loff_t			*ppos)
+{
+	return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos);
+}
+
+
+STATIC inline ssize_t
+__linvfs_writev(
+	struct file		*file,
+	const struct iovec 	*iov,
+	int			ioflags,
+	unsigned long		nr_segs,
+	loff_t			*ppos)
+{
+	struct inode	*inode = file->f_mapping->host;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	struct		kiocb kiocb;
+	ssize_t		rval;
+
+	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = *ppos;
+	if (unlikely(file->f_flags & O_DIRECT))
+		ioflags |= IO_ISDIRECT;
+
+	VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+
+	*ppos = kiocb.ki_pos;
+	return rval;
+}
+
+
+STATIC ssize_t
+linvfs_writev(
+	struct file		*file,
+	const struct iovec 	*iov,
+	unsigned long		nr_segs,
+	loff_t			*ppos)
+{
+	return __linvfs_writev(file, iov, 0, nr_segs, ppos);
+}
+
+STATIC ssize_t
+linvfs_writev_invis(
+	struct file		*file,
+	const struct iovec 	*iov,
+	unsigned long		nr_segs,
+	loff_t			*ppos)
+{
+	return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos);
+}
+
+STATIC ssize_t
+linvfs_sendfile(
+	struct file		*filp,
+	loff_t			*ppos,
+	size_t			count,
+	read_actor_t		actor,
+	void			*target)
+{
+	vnode_t			*vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+	ssize_t			rval;
+
+	VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval);
+	return rval;
+}
+
+
+STATIC int
+linvfs_open(
+	struct inode	*inode,
+	struct file	*filp)
+{
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	int		error;
+
+	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+		return -EFBIG;
+
+	ASSERT(vp);
+	VOP_OPEN(vp, NULL, error);
+	return -error;
+}
+
+
+STATIC int
+linvfs_release(
+	struct inode	*inode,
+	struct file	*filp)
+{
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	int		error = 0;
+
+	if (vp)
+		VOP_RELEASE(vp, error);
+	return -error;
+}
+
+
+STATIC int
+linvfs_fsync(
+	struct file	*filp,
+	struct dentry	*dentry,
+	int		datasync)
+{
+	struct inode	*inode = dentry->d_inode;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	int		error;
+	int		flags = FSYNC_WAIT;
+
+	if (datasync)
+		flags |= FSYNC_DATA;
+
+	ASSERT(vp);
+	VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
+	return -error;
+}
+
+/*
+ * linvfs_readdir maps to VOP_READDIR().
+ * We need to build a uio, cred, ...
+ */
+
+#define nextdp(dp)      ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
+
+STATIC int
+linvfs_readdir(
+	struct file	*filp,
+	void		*dirent,
+	filldir_t	filldir)
+{
+	int		error = 0;
+	vnode_t		*vp;
+	uio_t		uio;
+	iovec_t		iov;
+	int		eof = 0;
+	caddr_t		read_buf;
+	int		namelen, size = 0;
+	size_t		rlen = PAGE_CACHE_SIZE;
+	xfs_off_t	start_offset, curr_offset;
+	xfs_dirent_t	*dbp = NULL;
+
+	vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+	ASSERT(vp);
+
+	/* Try fairly hard to get memory */
+	do {
+		if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL)))
+			break;
+		rlen >>= 1;
+	} while (rlen >= 1024);
+
+	if (read_buf == NULL)
+		return -ENOMEM;
+
+	uio.uio_iov = &iov;
+	uio.uio_segflg = UIO_SYSSPACE;
+	curr_offset = filp->f_pos;
+	if (filp->f_pos != 0x7fffffff)
+		uio.uio_offset = filp->f_pos;
+	else
+		uio.uio_offset = 0xffffffff;
+
+	while (!eof) {
+		uio.uio_resid = iov.iov_len = rlen;
+		iov.iov_base = read_buf;
+		uio.uio_iovcnt = 1;
+
+		start_offset = uio.uio_offset;
+
+		VOP_READDIR(vp, &uio, NULL, &eof, error);
+		if ((uio.uio_offset == start_offset) || error) {
+			size = 0;
+			break;
+		}
+
+		size = rlen - uio.uio_resid;
+		dbp = (xfs_dirent_t *)read_buf;
+		while (size > 0) {
+			namelen = strlen(dbp->d_name);
+
+			if (filldir(dirent, dbp->d_name, namelen,
+					(loff_t) curr_offset & 0x7fffffff,
+					(ino_t) dbp->d_ino,
+					DT_UNKNOWN)) {
+				goto done;
+			}
+			size -= dbp->d_reclen;
+			curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */;
+			dbp = nextdp(dbp);
+		}
+	}
+done:
+	if (!error) {
+		if (size == 0)
+			filp->f_pos = uio.uio_offset & 0x7fffffff;
+		else if (dbp)
+			filp->f_pos = curr_offset;
+	}
+
+	kfree(read_buf);
+	return -error;
+}
+
+
+STATIC int
+linvfs_file_mmap(
+	struct file	*filp,
+	struct vm_area_struct *vma)
+{
+	struct inode	*ip = filp->f_dentry->d_inode;
+	vnode_t		*vp = LINVFS_GET_VP(ip);
+	vattr_t		va = { .va_mask = XFS_AT_UPDATIME };
+	int		error;
+
+	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+		xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
+
+		error = -XFS_SEND_MMAP(mp, vma, 0);
+		if (error)
+			return error;
+	}
+
+	vma->vm_ops = &linvfs_file_vm_ops;
+
+	VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
+	if (!error)
+		vn_revalidate(vp);	/* update Linux inode flags */
+	return 0;
+}
+
+
+STATIC long
+linvfs_ioctl(
+	struct file	*filp,
+	unsigned int	cmd,
+	unsigned long	arg)
+{
+	int		error;
+	struct inode *inode = filp->f_dentry->d_inode;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+
+	VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
+	VMODIFY(vp);
+
+	/* NOTE:  some of the ioctl's return positive #'s as a
+	 *	  byte count indicating success, such as
+	 *	  readlink_by_handle.  So we don't "sign flip"
+	 *	  like most other routines.  This means true
+	 *	  errors need to be returned as a negative value.
+	 */
+	return error;
+}
+
+STATIC long
+linvfs_ioctl_invis(
+	struct file	*filp,
+	unsigned int	cmd,
+	unsigned long	arg)
+{
+	int		error;
+	struct inode *inode = filp->f_dentry->d_inode;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+
+	ASSERT(vp);
+	VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
+	VMODIFY(vp);
+
+	/* NOTE:  some of the ioctl's return positive #'s as a
+	 *	  byte count indicating success, such as
+	 *	  readlink_by_handle.  So we don't "sign flip"
+	 *	  like most other routines.  This means true
+	 *	  errors need to be returned as a negative value.
+	 */
+	return error;
+}
+
+#ifdef HAVE_VMOP_MPROTECT
+STATIC int
+linvfs_mprotect(
+	struct vm_area_struct *vma,
+	unsigned int	newflags)
+{
+	vnode_t		*vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode);
+	int		error = 0;
+
+	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+		if ((vma->vm_flags & VM_MAYSHARE) &&
+		    (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE)) {
+			xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
+
+			error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
+		    }
+	}
+	return error;
+}
+#endif /* HAVE_VMOP_MPROTECT */
+
+#ifdef HAVE_FOP_OPEN_EXEC
+/* If the user is attempting to execute a file that is offline then
+ * we have to trigger a DMAPI READ event before the file is marked as busy
+ * otherwise the invisible I/O will not be able to write to the file to bring
+ * it back online.
+ */
+STATIC int
+linvfs_open_exec(
+	struct inode	*inode)
+{
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
+	int		error = 0;
+	bhv_desc_t	*bdp;
+	xfs_inode_t	*ip;
+
+	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+		bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+		if (!bdp) {
+			error = -EINVAL;
+			goto open_exec_out;
+		}
+		ip = XFS_BHVTOI(bdp);
+		if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
+			error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
+					       0, 0, 0, NULL);
+		}
+	}
+open_exec_out:
+	return error;
+}
+#endif /* HAVE_FOP_OPEN_EXEC */
+
+struct file_operations linvfs_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.readv		= linvfs_readv,
+	.writev		= linvfs_writev,
+	.aio_read	= linvfs_aio_read,
+	.aio_write	= linvfs_aio_write,
+	.sendfile	= linvfs_sendfile,
+	.unlocked_ioctl	= linvfs_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl   = xfs_compat_ioctl,
+#endif
+	.mmap		= linvfs_file_mmap,
+	.open		= linvfs_open,
+	.release	= linvfs_release,
+	.fsync		= linvfs_fsync,
+#ifdef HAVE_FOP_OPEN_EXEC
+	.open_exec	= linvfs_open_exec,
+#endif
+};
+
+struct file_operations linvfs_invis_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.readv		= linvfs_readv_invis,
+	.writev		= linvfs_writev_invis,
+	.aio_read	= linvfs_aio_read_invis,
+	.aio_write	= linvfs_aio_write_invis,
+	.sendfile	= linvfs_sendfile,
+	.unlocked_ioctl	= linvfs_ioctl_invis,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl   = xfs_compat_invis_ioctl,
+#endif
+	.mmap		= linvfs_file_mmap,
+	.open		= linvfs_open,
+	.release	= linvfs_release,
+	.fsync		= linvfs_fsync,
+};
+
+
+struct file_operations linvfs_dir_operations = {
+	.read		= generic_read_dir,
+	.readdir	= linvfs_readdir,
+	.unlocked_ioctl	= linvfs_ioctl,
+	.fsync		= linvfs_fsync,
+};
+
+static struct vm_operations_struct linvfs_file_vm_ops = {
+	.nopage		= filemap_nopage,
+	.populate	= filemap_populate,
+#ifdef HAVE_VMOP_MPROTECT
+	.mprotect	= linvfs_mprotect,
+#endif
+};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
new file mode 100644
index 00000000000..05ebd30ec96
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+/*
+ * Stub for no-op vnode operations that return error status.
+ */
+int
+fs_noerr(void)
+{
+	return 0;
+}
+
+/*
+ * Operation unsupported under this file system.
+ */
+int
+fs_nosys(void)
+{
+	return ENOSYS;
+}
+
+/*
+ * Stub for inactive, strategy, and read/write lock/unlock.  Does nothing.
+ */
+/* ARGSUSED */
+void
+fs_noval(void)
+{
+}
+
+/*
+ * vnode pcache layer for vnode_tosspages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+void
+fs_tosspages(
+	bhv_desc_t	*bdp,
+	xfs_off_t	first,
+	xfs_off_t	last,
+	int		fiopt)
+{
+	vnode_t		*vp = BHV_TO_VNODE(bdp);
+	struct inode	*ip = LINVFS_GET_IP(vp);
+
+	if (VN_CACHED(vp))
+		truncate_inode_pages(ip->i_mapping, first);
+}
+
+
+/*
+ * vnode pcache layer for vnode_flushinval_pages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+void
+fs_flushinval_pages(
+	bhv_desc_t	*bdp,
+	xfs_off_t	first,
+	xfs_off_t	last,
+	int		fiopt)
+{
+	vnode_t		*vp = BHV_TO_VNODE(bdp);
+	struct inode	*ip = LINVFS_GET_IP(vp);
+
+	if (VN_CACHED(vp)) {
+		filemap_fdatawrite(ip->i_mapping);
+		filemap_fdatawait(ip->i_mapping);
+
+		truncate_inode_pages(ip->i_mapping, first);
+	}
+}
+
+/*
+ * vnode pcache layer for vnode_flush_pages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+int
+fs_flush_pages(
+	bhv_desc_t	*bdp,
+	xfs_off_t	first,
+	xfs_off_t	last,
+	uint64_t	flags,
+	int		fiopt)
+{
+	vnode_t		*vp = BHV_TO_VNODE(bdp);
+	struct inode	*ip = LINVFS_GET_IP(vp);
+
+	if (VN_CACHED(vp)) {
+		filemap_fdatawrite(ip->i_mapping);
+		filemap_fdatawait(ip->i_mapping);
+	}
+
+	return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
new file mode 100644
index 00000000000..2db9ddbd456
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef	__XFS_SUBR_H__
+#define __XFS_SUBR_H__
+
+/*
+ * Utilities shared among file system implementations.
+ */
+
+struct cred;
+
+extern int	fs_noerr(void);
+extern int	fs_nosys(void);
+extern void	fs_noval(void);
+extern void	fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern void	fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern int	fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int);
+
+#endif	/* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
new file mode 100644
index 00000000000..a6da5b4fd24
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * This file contains globals needed by XFS that were normally defined
+ * somewhere else in IRIX.
+ */
+
+#include "xfs.h"
+#include "xfs_cred.h"
+#include "xfs_sysctl.h"
+
+/*
+ * System memory size - used to scale certain data structures in XFS.
+ */
+unsigned long xfs_physmem;
+
+/*
+ * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
+ * other XFS code uses these values.  Times are measured in centisecs (i.e.
+ * 100ths of a second).
+ */
+xfs_param_t xfs_params = {
+			  /*	MIN		DFLT		MAX	*/
+	.restrict_chown	= {	0,		1,		1	},
+	.sgid_inherit	= {	0,		0,		1	},
+	.symlink_mode	= {	0,		0,		1	},
+	.panic_mask	= {	0,		0,		127	},
+	.error_level	= {	0,		3,		11	},
+	.syncd_timer	= {	1*100,		30*100,		7200*100},
+	.stats_clear	= {	0,		0,		1	},
+	.inherit_sync	= {	0,		1,		1	},
+	.inherit_nodump	= {	0,		1,		1	},
+	.inherit_noatim = {	0,		1,		1	},
+	.xfs_buf_timer	= {	100/2,		1*100,		30*100	},
+	.xfs_buf_age	= {	1*100,		15*100,		7200*100},
+	.inherit_nosym	= {	0,		0,		1	},
+	.rotorstep	= {	1,		1,		255	},
+};
+
+/*
+ * Global system credential structure.
+ */
+cred_t sys_cred_val, *sys_cred = &sys_cred_val;
+
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
new file mode 100644
index 00000000000..e81e2f38a85
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_GLOBALS_H__
+#define __XFS_GLOBALS_H__
+
+/*
+ * This file declares globals needed by XFS that were normally defined
+ * somewhere else in IRIX.
+ */
+
+extern uint64_t	xfs_panic_mask;		/* set to cause more panics */
+extern unsigned long xfs_physmem;
+extern struct cred *sys_cred;
+
+#endif	/* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
new file mode 100644
index 00000000000..69809eef8a5
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -0,0 +1,1336 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_dfrag.h"
+#include "xfs_fsops.h"
+
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+
+/*
+ * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
+ * a file or fs handle.
+ *
+ * XFS_IOC_PATH_TO_FSHANDLE
+ *    returns fs handle for a mount point or path within that mount point
+ * XFS_IOC_FD_TO_HANDLE
+ *    returns full handle for a FD opened in user space
+ * XFS_IOC_PATH_TO_HANDLE
+ *    returns full handle for a path
+ */
+STATIC int
+xfs_find_handle(
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	int			hsize;
+	xfs_handle_t		handle;
+	xfs_fsop_handlereq_t	hreq;
+	struct inode		*inode;
+	struct vnode		*vp;
+
+	if (copy_from_user(&hreq, arg, sizeof(hreq)))
+		return -XFS_ERROR(EFAULT);
+
+	memset((char *)&handle, 0, sizeof(handle));
+
+	switch (cmd) {
+	case XFS_IOC_PATH_TO_FSHANDLE:
+	case XFS_IOC_PATH_TO_HANDLE: {
+		struct nameidata	nd;
+		int			error;
+
+		error = user_path_walk_link((const char __user *)hreq.path, &nd);
+		if (error)
+			return error;
+
+		ASSERT(nd.dentry);
+		ASSERT(nd.dentry->d_inode);
+		inode = igrab(nd.dentry->d_inode);
+		path_release(&nd);
+		break;
+	}
+
+	case XFS_IOC_FD_TO_HANDLE: {
+		struct file	*file;
+
+		file = fget(hreq.fd);
+		if (!file)
+		    return -EBADF;
+
+		ASSERT(file->f_dentry);
+		ASSERT(file->f_dentry->d_inode);
+		inode = igrab(file->f_dentry->d_inode);
+		fput(file);
+		break;
+	}
+
+	default:
+		ASSERT(0);
+		return -XFS_ERROR(EINVAL);
+	}
+
+	if (inode->i_sb->s_magic != XFS_SB_MAGIC) {
+		/* we're not in XFS anymore, Toto */
+		iput(inode);
+		return -XFS_ERROR(EINVAL);
+	}
+
+	/* we need the vnode */
+	vp = LINVFS_GET_VP(inode);
+	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+		iput(inode);
+		return -XFS_ERROR(EBADF);
+	}
+
+	/* now we can grab the fsid */
+	memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
+	hsize = sizeof(xfs_fsid_t);
+
+	if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
+		xfs_inode_t	*ip;
+		bhv_desc_t	*bhv;
+		int		lock_mode;
+
+		/* need to get access to the xfs_inode to read the generation */
+		bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
+		ASSERT(bhv);
+		ip = XFS_BHVTOI(bhv);
+		ASSERT(ip);
+		lock_mode = xfs_ilock_map_shared(ip);
+
+		/* fill in fid section of handle from inode */
+		handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) -
+					    sizeof(handle.ha_fid.xfs_fid_len);
+		handle.ha_fid.xfs_fid_pad = 0;
+		handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen;
+		handle.ha_fid.xfs_fid_ino = ip->i_ino;
+
+		xfs_iunlock_map_shared(ip, lock_mode);
+
+		hsize = XFS_HSIZE(handle);
+	}
+
+	/* now copy our handle into the user buffer & write out the size */
+	if (copy_to_user(hreq.ohandle, &handle, hsize) ||
+	    copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) {
+		iput(inode);
+		return -XFS_ERROR(EFAULT);
+	}
+
+	iput(inode);
+	return 0;
+}
+
+
+/*
+ * Convert userspace handle data into vnode (and inode).
+ * We [ab]use the fact that all the fsop_handlereq ioctl calls
+ * have a data structure argument whose first component is always
+ * a xfs_fsop_handlereq_t, so we can cast to and from this type.
+ * This allows us to optimise the copy_from_user calls and gives
+ * a handy, shared routine.
+ *
+ * If no error, caller must always VN_RELE the returned vp.
+ */
+STATIC int
+xfs_vget_fsop_handlereq(
+	xfs_mount_t		*mp,
+	struct inode		*parinode,	/* parent inode pointer    */
+	xfs_fsop_handlereq_t	*hreq,
+	vnode_t			**vp,
+	struct inode		**inode)
+{
+	void			__user *hanp;
+	size_t			hlen;
+	xfs_fid_t		*xfid;
+	xfs_handle_t		*handlep;
+	xfs_handle_t		handle;
+	xfs_inode_t		*ip;
+	struct inode		*inodep;
+	vnode_t			*vpp;
+	xfs_ino_t		ino;
+	__u32			igen;
+	int			error;
+
+	/*
+	 * Only allow handle opens under a directory.
+	 */
+	if (!S_ISDIR(parinode->i_mode))
+		return XFS_ERROR(ENOTDIR);
+
+	hanp = hreq->ihandle;
+	hlen = hreq->ihandlen;
+	handlep = &handle;
+
+	if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
+		return XFS_ERROR(EINVAL);
+	if (copy_from_user(handlep, hanp, hlen))
+		return XFS_ERROR(EFAULT);
+	if (hlen < sizeof(*handlep))
+		memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
+	if (hlen > sizeof(handlep->ha_fsid)) {
+		if (handlep->ha_fid.xfs_fid_len !=
+				(hlen - sizeof(handlep->ha_fsid)
+					- sizeof(handlep->ha_fid.xfs_fid_len))
+		    || handlep->ha_fid.xfs_fid_pad)
+			return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * Crack the handle, obtain the inode # & generation #
+	 */
+	xfid = (struct xfs_fid *)&handlep->ha_fid;
+	if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) {
+		ino  = xfid->xfs_fid_ino;
+		igen = xfid->xfs_fid_gen;
+	} else {
+		return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * Get the XFS inode, building a vnode to go with it.
+	 */
+	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
+	if (error)
+		return error;
+	if (ip == NULL)
+		return XFS_ERROR(EIO);
+	if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
+		xfs_iput_new(ip, XFS_ILOCK_SHARED);
+		return XFS_ERROR(ENOENT);
+	}
+
+	vpp = XFS_ITOV(ip);
+	inodep = LINVFS_GET_IP(vpp);
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+	*vp = vpp;
+	*inode = inodep;
+	return 0;
+}
+
+STATIC int
+xfs_open_by_handle(
+	xfs_mount_t		*mp,
+	void			__user *arg,
+	struct file		*parfilp,
+	struct inode		*parinode)
+{
+	int			error;
+	int			new_fd;
+	int			permflag;
+	struct file		*filp;
+	struct inode		*inode;
+	struct dentry		*dentry;
+	vnode_t			*vp;
+	xfs_fsop_handlereq_t	hreq;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
+	if (error)
+		return -error;
+
+	/* Restrict xfs_open_by_handle to directories & regular files. */
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+		iput(inode);
+		return -XFS_ERROR(EINVAL);
+	}
+
+#if BITS_PER_LONG != 32
+	hreq.oflags |= O_LARGEFILE;
+#endif
+	/* Put open permission in namei format. */
+	permflag = hreq.oflags;
+	if ((permflag+1) & O_ACCMODE)
+		permflag++;
+	if (permflag & O_TRUNC)
+		permflag |= 2;
+
+	if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
+	    (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
+		iput(inode);
+		return -XFS_ERROR(EPERM);
+	}
+
+	if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+		iput(inode);
+		return -XFS_ERROR(EACCES);
+	}
+
+	/* Can't write directories. */
+	if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
+		iput(inode);
+		return -XFS_ERROR(EISDIR);
+	}
+
+	if ((new_fd = get_unused_fd()) < 0) {
+		iput(inode);
+		return new_fd;
+	}
+
+	dentry = d_alloc_anon(inode);
+	if (dentry == NULL) {
+		iput(inode);
+		put_unused_fd(new_fd);
+		return -XFS_ERROR(ENOMEM);
+	}
+
+	/* Ensure umount returns EBUSY on umounts while this file is open. */
+	mntget(parfilp->f_vfsmnt);
+
+	/* Create file pointer. */
+	filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags);
+	if (IS_ERR(filp)) {
+		put_unused_fd(new_fd);
+		return -XFS_ERROR(-PTR_ERR(filp));
+	}
+	if (inode->i_mode & S_IFREG)
+		filp->f_op = &linvfs_invis_file_operations;
+
+	fd_install(new_fd, filp);
+	return new_fd;
+}
+
+STATIC int
+xfs_readlink_by_handle(
+	xfs_mount_t		*mp,
+	void			__user *arg,
+	struct file		*parfilp,
+	struct inode		*parinode)
+{
+	int			error;
+	struct iovec		aiov;
+	struct uio		auio;
+	struct inode		*inode;
+	xfs_fsop_handlereq_t	hreq;
+	vnode_t			*vp;
+	__u32			olen;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
+	if (error)
+		return -error;
+
+	/* Restrict this handle operation to symlinks only. */
+	if (vp->v_type != VLNK) {
+		VN_RELE(vp);
+		return -XFS_ERROR(EINVAL);
+	}
+
+	if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) {
+		VN_RELE(vp);
+		return -XFS_ERROR(EFAULT);
+	}
+	aiov.iov_len	= olen;
+	aiov.iov_base	= hreq.ohandle;
+
+	auio.uio_iov	= &aiov;
+	auio.uio_iovcnt	= 1;
+	auio.uio_offset	= 0;
+	auio.uio_segflg	= UIO_USERSPACE;
+	auio.uio_resid	= olen;
+
+	VOP_READLINK(vp, &auio, IO_INVIS, NULL, error);
+
+	VN_RELE(vp);
+	return (olen - auio.uio_resid);
+}
+
+STATIC int
+xfs_fssetdm_by_handle(
+	xfs_mount_t		*mp,
+	void			__user *arg,
+	struct file		*parfilp,
+	struct inode		*parinode)
+{
+	int			error;
+	struct fsdmidata	fsd;
+	xfs_fsop_setdm_handlereq_t dmhreq;
+	struct inode		*inode;
+	bhv_desc_t		*bdp;
+	vnode_t			*vp;
+
+	if (!capable(CAP_MKNOD))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &vp, &inode);
+	if (error)
+		return -error;
+
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+		VN_RELE(vp);
+		return -XFS_ERROR(EPERM);
+	}
+
+	if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
+		VN_RELE(vp);
+		return -XFS_ERROR(EFAULT);
+	}
+
+	bdp = bhv_base_unlocked(VN_BHV_HEAD(vp));
+	error = xfs_set_dmattrs(bdp, fsd.fsd_dmevmask, fsd.fsd_dmstate, NULL);
+
+	VN_RELE(vp);
+	if (error)
+		return -error;
+	return 0;
+}
+
+STATIC int
+xfs_attrlist_by_handle(
+	xfs_mount_t		*mp,
+	void			__user *arg,
+	struct file		*parfilp,
+	struct inode		*parinode)
+{
+	int			error;
+	attrlist_cursor_kern_t	*cursor;
+	xfs_fsop_attrlist_handlereq_t al_hreq;
+	struct inode		*inode;
+	vnode_t			*vp;
+	char			*kbuf;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+	if (al_hreq.buflen > XATTR_LIST_MAX)
+		return -XFS_ERROR(EINVAL);
+
+	error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq,
+			&vp, &inode);
+	if (error)
+		goto out;
+
+	kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+	if (!kbuf)
+		goto out_vn_rele;
+
+	cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+	VOP_ATTR_LIST(vp, kbuf, al_hreq.buflen, al_hreq.flags,
+			cursor, NULL, error);
+	if (error)
+		goto out_kfree;
+
+	if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
+		error = -EFAULT;
+
+ out_kfree:
+	kfree(kbuf);
+ out_vn_rele:
+	VN_RELE(vp);
+ out:
+	return -error;
+}
+
+STATIC int
+xfs_attrmulti_attr_get(
+	struct vnode		*vp,
+	char			*name,
+	char			__user *ubuf,
+	__uint32_t		*len,
+	__uint32_t		flags)
+{
+	char			*kbuf;
+	int			error = EFAULT;
+	
+	if (*len > XATTR_SIZE_MAX)
+		return EINVAL;
+	kbuf = kmalloc(*len, GFP_KERNEL);
+	if (!kbuf)
+		return ENOMEM;
+
+	VOP_ATTR_GET(vp, name, kbuf, len, flags, NULL, error);
+	if (error)
+		goto out_kfree;
+
+	if (copy_to_user(ubuf, kbuf, *len))
+		error = EFAULT;
+
+ out_kfree:
+	kfree(kbuf);
+	return error;
+}
+
+STATIC int
+xfs_attrmulti_attr_set(
+	struct vnode		*vp,
+	char			*name,
+	const char		__user *ubuf,
+	__uint32_t		len,
+	__uint32_t		flags)
+{
+	char			*kbuf;
+	int			error = EFAULT;
+
+	if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
+		return EPERM;
+	if (len > XATTR_SIZE_MAX)
+		return EINVAL;
+
+	kbuf = kmalloc(len, GFP_KERNEL);
+	if (!kbuf)
+		return ENOMEM;
+
+	if (copy_from_user(kbuf, ubuf, len))
+		goto out_kfree;
+			
+	VOP_ATTR_SET(vp, name, kbuf, len, flags, NULL, error);
+
+ out_kfree:
+	kfree(kbuf);
+	return error;
+}
+
+STATIC int
+xfs_attrmulti_attr_remove(
+	struct vnode		*vp,
+	char			*name,
+	__uint32_t		flags)
+{
+	int			error;
+
+	if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
+		return EPERM;
+
+	VOP_ATTR_REMOVE(vp, name, flags, NULL, error);
+	return error;
+}
+
+STATIC int
+xfs_attrmulti_by_handle(
+	xfs_mount_t		*mp,
+	void			__user *arg,
+	struct file		*parfilp,
+	struct inode		*parinode)
+{
+	int			error;
+	xfs_attr_multiop_t	*ops;
+	xfs_fsop_attrmulti_handlereq_t am_hreq;
+	struct inode		*inode;
+	vnode_t			*vp;
+	unsigned int		i, size;
+	char			*attr_name;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &vp, &inode);
+	if (error)
+		goto out;
+
+	error = E2BIG;
+	size = am_hreq.opcount * sizeof(attr_multiop_t);
+	if (!size || size > 16 * PAGE_SIZE)
+		goto out_vn_rele;
+
+	error = ENOMEM;
+	ops = kmalloc(size, GFP_KERNEL);
+	if (!ops)
+		goto out_vn_rele;
+
+	error = EFAULT;
+	if (copy_from_user(ops, am_hreq.ops, size))
+		goto out_kfree_ops;
+
+	attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+	if (!attr_name)
+		goto out_kfree_ops;
+
+
+	error = 0;
+	for (i = 0; i < am_hreq.opcount; i++) {
+		ops[i].am_error = strncpy_from_user(attr_name,
+				ops[i].am_attrname, MAXNAMELEN);
+		if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+			error = -ERANGE;
+		if (ops[i].am_error < 0)
+			break;
+
+		switch (ops[i].am_opcode) {
+		case ATTR_OP_GET:
+			ops[i].am_error = xfs_attrmulti_attr_get(vp,
+					attr_name, ops[i].am_attrvalue,
+					&ops[i].am_length, ops[i].am_flags);
+			break;
+		case ATTR_OP_SET:
+			ops[i].am_error = xfs_attrmulti_attr_set(vp,
+					attr_name, ops[i].am_attrvalue,
+					ops[i].am_length, ops[i].am_flags);
+			break;
+		case ATTR_OP_REMOVE:
+			ops[i].am_error = xfs_attrmulti_attr_remove(vp,
+					attr_name, ops[i].am_flags);
+			break;
+		default:
+			ops[i].am_error = EINVAL;
+		}
+	}
+
+	if (copy_to_user(am_hreq.ops, ops, size))
+		error = XFS_ERROR(EFAULT);
+
+	kfree(attr_name);
+ out_kfree_ops:
+	kfree(ops);
+ out_vn_rele:
+	VN_RELE(vp);
+ out:
+	return -error;
+}
+
+/* prototypes for a few of the stack-hungry cases that have
+ * their own functions.  Functions are defined after their use
+ * so gcc doesn't get fancy and inline them with -03 */
+
+STATIC int
+xfs_ioc_space(
+	bhv_desc_t		*bdp,
+	vnode_t			*vp,
+	struct file		*filp,
+	int			flags,
+	unsigned int		cmd,
+	void			__user *arg);
+
+STATIC int
+xfs_ioc_bulkstat(
+	xfs_mount_t		*mp,
+	unsigned int		cmd,
+	void			__user *arg);
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+	xfs_mount_t		*mp,
+	void			__user *arg);
+
+STATIC int
+xfs_ioc_fsgeometry(
+	xfs_mount_t		*mp,
+	void			__user *arg);
+
+STATIC int
+xfs_ioc_xattr(
+	vnode_t			*vp,
+	xfs_inode_t		*ip,
+	struct file		*filp,
+	unsigned int		cmd,
+	void			__user *arg);
+
+STATIC int
+xfs_ioc_getbmap(
+	bhv_desc_t		*bdp,
+	struct file		*filp,
+	int			flags,
+	unsigned int		cmd,
+	void			__user *arg);
+
+STATIC int
+xfs_ioc_getbmapx(
+	bhv_desc_t		*bdp,
+	void			__user *arg);
+
+int
+xfs_ioctl(
+	bhv_desc_t		*bdp,
+	struct inode		*inode,
+	struct file		*filp,
+	int			ioflags,
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	int			error;
+	vnode_t			*vp;
+	xfs_inode_t		*ip;
+	xfs_mount_t		*mp;
+
+	vp = LINVFS_GET_VP(inode);
+
+	vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address);
+
+	ip = XFS_BHVTOI(bdp);
+	mp = ip->i_mount;
+
+	switch (cmd) {
+
+	case XFS_IOC_ALLOCSP:
+	case XFS_IOC_FREESP:
+	case XFS_IOC_RESVSP:
+	case XFS_IOC_UNRESVSP:
+	case XFS_IOC_ALLOCSP64:
+	case XFS_IOC_FREESP64:
+	case XFS_IOC_RESVSP64:
+	case XFS_IOC_UNRESVSP64:
+		/*
+		 * Only allow the sys admin to reserve space unless
+		 * unwritten extents are enabled.
+		 */
+		if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
+		    !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg);
+
+	case XFS_IOC_DIOINFO: {
+		struct dioattr	da;
+		xfs_buftarg_t	*target =
+			(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+			mp->m_rtdev_targp : mp->m_ddev_targp;
+
+		da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;
+		/* The size dio will do in one go */
+		da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
+
+		if (copy_to_user(arg, &da, sizeof(da)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_FSBULKSTAT_SINGLE:
+	case XFS_IOC_FSBULKSTAT:
+	case XFS_IOC_FSINUMBERS:
+		return xfs_ioc_bulkstat(mp, cmd, arg);
+
+	case XFS_IOC_FSGEOMETRY_V1:
+		return xfs_ioc_fsgeometry_v1(mp, arg);
+
+	case XFS_IOC_FSGEOMETRY:
+		return xfs_ioc_fsgeometry(mp, arg);
+
+	case XFS_IOC_GETVERSION:
+	case XFS_IOC_GETXFLAGS:
+	case XFS_IOC_SETXFLAGS:
+	case XFS_IOC_FSGETXATTR:
+	case XFS_IOC_FSSETXATTR:
+	case XFS_IOC_FSGETXATTRA:
+		return xfs_ioc_xattr(vp, ip, filp, cmd, arg);
+
+	case XFS_IOC_FSSETDM: {
+		struct fsdmidata	dmi;
+
+		if (copy_from_user(&dmi, arg, sizeof(dmi)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate,
+							NULL);
+		return -error;
+	}
+
+	case XFS_IOC_GETBMAP:
+	case XFS_IOC_GETBMAPA:
+		return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg);
+
+	case XFS_IOC_GETBMAPX:
+		return xfs_ioc_getbmapx(bdp, arg);
+
+	case XFS_IOC_FD_TO_HANDLE:
+	case XFS_IOC_PATH_TO_HANDLE:
+	case XFS_IOC_PATH_TO_FSHANDLE:
+		return xfs_find_handle(cmd, arg);
+
+	case XFS_IOC_OPEN_BY_HANDLE:
+		return xfs_open_by_handle(mp, arg, filp, inode);
+
+	case XFS_IOC_FSSETDM_BY_HANDLE:
+		return xfs_fssetdm_by_handle(mp, arg, filp, inode);
+
+	case XFS_IOC_READLINK_BY_HANDLE:
+		return xfs_readlink_by_handle(mp, arg, filp, inode);
+
+	case XFS_IOC_ATTRLIST_BY_HANDLE:
+		return xfs_attrlist_by_handle(mp, arg, filp, inode);
+
+	case XFS_IOC_ATTRMULTI_BY_HANDLE:
+		return xfs_attrmulti_by_handle(mp, arg, filp, inode);
+
+	case XFS_IOC_SWAPEXT: {
+		error = xfs_swapext((struct xfs_swapext __user *)arg);
+		return -error;
+	}
+
+	case XFS_IOC_FSCOUNTS: {
+		xfs_fsop_counts_t out;
+
+		error = xfs_fs_counts(mp, &out);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &out, sizeof(out)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_SET_RESBLKS: {
+		xfs_fsop_resblks_t inout;
+		__uint64_t	   in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&inout, arg, sizeof(inout)))
+			return -XFS_ERROR(EFAULT);
+
+		/* input parameter is passed in resblks field of structure */
+		in = inout.resblks;
+		error = xfs_reserve_blocks(mp, &in, &inout);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &inout, sizeof(inout)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_GET_RESBLKS: {
+		xfs_fsop_resblks_t out;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		error = xfs_reserve_blocks(mp, NULL, &out);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &out, sizeof(out)))
+			return -XFS_ERROR(EFAULT);
+
+		return 0;
+	}
+
+	case XFS_IOC_FSGROWFSDATA: {
+		xfs_growfs_data_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_data(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FSGROWFSLOG: {
+		xfs_growfs_log_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_log(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FSGROWFSRT: {
+		xfs_growfs_rt_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_rt(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FREEZE:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (inode->i_sb->s_frozen == SB_UNFROZEN)
+			freeze_bdev(inode->i_sb->s_bdev);
+		return 0;
+
+	case XFS_IOC_THAW:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (inode->i_sb->s_frozen != SB_UNFROZEN)
+			thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
+		return 0;
+
+	case XFS_IOC_GOINGDOWN: {
+		__uint32_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (get_user(in, (__uint32_t __user *)arg))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_fs_goingdown(mp, in);
+		return -error;
+	}
+
+	case XFS_IOC_ERROR_INJECTION: {
+		xfs_error_injection_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_errortag_add(in.errtag, mp);
+		return -error;
+	}
+
+	case XFS_IOC_ERROR_CLEARALL:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		error = xfs_errortag_clearall(mp);
+		return -error;
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+STATIC int
+xfs_ioc_space(
+	bhv_desc_t		*bdp,
+	vnode_t			*vp,
+	struct file		*filp,
+	int			ioflags,
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	xfs_flock64_t		bf;
+	int			attr_flags = 0;
+	int			error;
+
+	if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
+		return -XFS_ERROR(EPERM);
+
+	if (!(filp->f_flags & FMODE_WRITE))
+		return -XFS_ERROR(EBADF);
+
+	if (vp->v_type != VREG)
+		return -XFS_ERROR(EINVAL);
+
+	if (copy_from_user(&bf, arg, sizeof(bf)))
+		return -XFS_ERROR(EFAULT);
+
+	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+		attr_flags |= ATTR_NONBLOCK;
+	if (ioflags & IO_INVIS)
+		attr_flags |= ATTR_DMI;
+
+	error = xfs_change_file_space(bdp, cmd, &bf, filp->f_pos,
+					      NULL, attr_flags);
+	return -error;
+}
+
+STATIC int
+xfs_ioc_bulkstat(
+	xfs_mount_t		*mp,
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	xfs_fsop_bulkreq_t	bulkreq;
+	int			count;	/* # of records returned */
+	xfs_ino_t		inlast;	/* last inode number */
+	int			done;
+	int			error;
+
+	/* done = 1 if there are more stats to get and if bulkstat */
+	/* should be called again (unused here, but used in dmapi) */
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -XFS_ERROR(EIO);
+
+	if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+		return -XFS_ERROR(EFAULT);
+
+	if ((count = bulkreq.icount) <= 0)
+		return -XFS_ERROR(EINVAL);
+
+	if (cmd == XFS_IOC_FSINUMBERS)
+		error = xfs_inumbers(mp, &inlast, &count,
+						bulkreq.ubuffer);
+	else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
+		error = xfs_bulkstat_single(mp, &inlast,
+						bulkreq.ubuffer, &done);
+	else {	/* XFS_IOC_FSBULKSTAT */
+		if (count == 1 && inlast != 0) {
+			inlast++;
+			error = xfs_bulkstat_single(mp, &inlast,
+					bulkreq.ubuffer, &done);
+		} else {
+			error = xfs_bulkstat(mp, &inlast, &count,
+				(bulkstat_one_pf)xfs_bulkstat_one, NULL,
+				sizeof(xfs_bstat_t), bulkreq.ubuffer,
+				BULKSTAT_FG_QUICK, &done);
+		}
+	}
+
+	if (error)
+		return -error;
+
+	if (bulkreq.ocount != NULL) {
+		if (copy_to_user(bulkreq.lastip, &inlast,
+						sizeof(xfs_ino_t)))
+			return -XFS_ERROR(EFAULT);
+
+		if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+			return -XFS_ERROR(EFAULT);
+	}
+
+	return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+	xfs_mount_t		*mp,
+	void			__user *arg)
+{
+	xfs_fsop_geom_v1_t	fsgeo;
+	int			error;
+
+	error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
+	if (error)
+		return -error;
+
+	if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry(
+	xfs_mount_t		*mp,
+	void			__user *arg)
+{
+	xfs_fsop_geom_t		fsgeo;
+	int			error;
+
+	error = xfs_fs_geometry(mp, &fsgeo, 4);
+	if (error)
+		return -error;
+
+	if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+/*
+ * Linux extended inode flags interface.
+ */
+#define LINUX_XFLAG_SYNC	0x00000008 /* Synchronous updates */
+#define LINUX_XFLAG_IMMUTABLE	0x00000010 /* Immutable file */
+#define LINUX_XFLAG_APPEND	0x00000020 /* writes to file may only append */
+#define LINUX_XFLAG_NODUMP	0x00000040 /* do not dump file */
+#define LINUX_XFLAG_NOATIME	0x00000080 /* do not update atime */
+
+STATIC unsigned int
+xfs_merge_ioc_xflags(
+	unsigned int	flags,
+	unsigned int	start)
+{
+	unsigned int	xflags = start;
+
+	if (flags & LINUX_XFLAG_IMMUTABLE)
+		xflags |= XFS_XFLAG_IMMUTABLE;
+	else
+		xflags &= ~XFS_XFLAG_IMMUTABLE;
+	if (flags & LINUX_XFLAG_APPEND)
+		xflags |= XFS_XFLAG_APPEND;
+	else
+		xflags &= ~XFS_XFLAG_APPEND;
+	if (flags & LINUX_XFLAG_SYNC)
+		xflags |= XFS_XFLAG_SYNC;
+	else
+		xflags &= ~XFS_XFLAG_SYNC;
+	if (flags & LINUX_XFLAG_NOATIME)
+		xflags |= XFS_XFLAG_NOATIME;
+	else
+		xflags &= ~XFS_XFLAG_NOATIME;
+	if (flags & LINUX_XFLAG_NODUMP)
+		xflags |= XFS_XFLAG_NODUMP;
+	else
+		xflags &= ~XFS_XFLAG_NODUMP;
+
+	return xflags;
+}
+
+STATIC unsigned int
+xfs_di2lxflags(
+	__uint16_t	di_flags)
+{
+	unsigned int	flags = 0;
+
+	if (di_flags & XFS_DIFLAG_IMMUTABLE)
+		flags |= LINUX_XFLAG_IMMUTABLE;
+	if (di_flags & XFS_DIFLAG_APPEND)
+		flags |= LINUX_XFLAG_APPEND;
+	if (di_flags & XFS_DIFLAG_SYNC)
+		flags |= LINUX_XFLAG_SYNC;
+	if (di_flags & XFS_DIFLAG_NOATIME)
+		flags |= LINUX_XFLAG_NOATIME;
+	if (di_flags & XFS_DIFLAG_NODUMP)
+		flags |= LINUX_XFLAG_NODUMP;
+	return flags;
+}
+
+STATIC int
+xfs_ioc_xattr(
+	vnode_t			*vp,
+	xfs_inode_t		*ip,
+	struct file		*filp,
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	struct fsxattr		fa;
+	vattr_t			va;
+	int			error;
+	int			attr_flags;
+	unsigned int		flags;
+
+	switch (cmd) {
+	case XFS_IOC_FSGETXATTR: {
+		va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS;
+		VOP_GETATTR(vp, &va, 0, NULL, error);
+		if (error)
+			return -error;
+
+		fa.fsx_xflags	= va.va_xflags;
+		fa.fsx_extsize	= va.va_extsize;
+		fa.fsx_nextents = va.va_nextents;
+
+		if (copy_to_user(arg, &fa, sizeof(fa)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_FSSETXATTR: {
+		if (copy_from_user(&fa, arg, sizeof(fa)))
+			return -XFS_ERROR(EFAULT);
+
+		attr_flags = 0;
+		if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+			attr_flags |= ATTR_NONBLOCK;
+
+		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE;
+		va.va_xflags  = fa.fsx_xflags;
+		va.va_extsize = fa.fsx_extsize;
+
+		VOP_SETATTR(vp, &va, attr_flags, NULL, error);
+		if (!error)
+			vn_revalidate(vp);	/* update Linux inode flags */
+		return -error;
+	}
+
+	case XFS_IOC_FSGETXATTRA: {
+		va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS;
+		VOP_GETATTR(vp, &va, 0, NULL, error);
+		if (error)
+			return -error;
+
+		fa.fsx_xflags	= va.va_xflags;
+		fa.fsx_extsize	= va.va_extsize;
+		fa.fsx_nextents = va.va_anextents;
+
+		if (copy_to_user(arg, &fa, sizeof(fa)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_GETXFLAGS: {
+		flags = xfs_di2lxflags(ip->i_d.di_flags);
+		if (copy_to_user(arg, &flags, sizeof(flags)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_SETXFLAGS: {
+		if (copy_from_user(&flags, arg, sizeof(flags)))
+			return -XFS_ERROR(EFAULT);
+
+		if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \
+			      LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \
+			      LINUX_XFLAG_SYNC))
+			return -XFS_ERROR(EOPNOTSUPP);
+
+		attr_flags = 0;
+		if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+			attr_flags |= ATTR_NONBLOCK;
+
+		va.va_mask = XFS_AT_XFLAGS;
+		va.va_xflags = xfs_merge_ioc_xflags(flags,
+				xfs_ip2xflags(ip));
+
+		VOP_SETATTR(vp, &va, attr_flags, NULL, error);
+		if (!error)
+			vn_revalidate(vp);	/* update Linux inode flags */
+		return -error;
+	}
+
+	case XFS_IOC_GETVERSION: {
+		flags = LINVFS_GET_IP(vp)->i_generation;
+		if (copy_to_user(arg, &flags, sizeof(flags)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+STATIC int
+xfs_ioc_getbmap(
+	bhv_desc_t		*bdp,
+	struct file		*filp,
+	int			ioflags,
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	struct getbmap		bm;
+	int			iflags;
+	int			error;
+
+	if (copy_from_user(&bm, arg, sizeof(bm)))
+		return -XFS_ERROR(EFAULT);
+
+	if (bm.bmv_count < 2)
+		return -XFS_ERROR(EINVAL);
+
+	iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+	if (ioflags & IO_INVIS)
+		iflags |= BMV_IF_NO_DMAPI_READ;
+
+	error = xfs_getbmap(bdp, &bm, (struct getbmap __user *)arg+1, iflags);
+	if (error)
+		return -error;
+
+	if (copy_to_user(arg, &bm, sizeof(bm)))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+STATIC int
+xfs_ioc_getbmapx(
+	bhv_desc_t		*bdp,
+	void			__user *arg)
+{
+	struct getbmapx		bmx;
+	struct getbmap		bm;
+	int			iflags;
+	int			error;
+
+	if (copy_from_user(&bmx, arg, sizeof(bmx)))
+		return -XFS_ERROR(EFAULT);
+
+	if (bmx.bmv_count < 2)
+		return -XFS_ERROR(EINVAL);
+
+	/*
+	 * Map input getbmapx structure to a getbmap
+	 * structure for xfs_getbmap.
+	 */
+	GETBMAP_CONVERT(bmx, bm);
+
+	iflags = bmx.bmv_iflags;
+
+	if (iflags & (~BMV_IF_VALID))
+		return -XFS_ERROR(EINVAL);
+
+	iflags |= BMV_IF_EXTENDED;
+
+	error = xfs_getbmap(bdp, &bm, (struct getbmapx __user *)arg+1, iflags);
+	if (error)
+		return -error;
+
+	GETBMAP_CONVERT(bm, bmx);
+
+	if (copy_to_user(arg, &bmx, sizeof(bmx)))
+		return -XFS_ERROR(EFAULT);
+
+	return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
new file mode 100644
index 00000000000..7a12c83184f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <linux/config.h>
+#include <linux/compat.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/ioctl32.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_fs.h"
+#include "xfs_vfs.h"
+#include "xfs_vnode.h"
+#include "xfs_dfrag.h"
+
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#define BROKEN_X86_ALIGNMENT
+#else
+
+typedef struct xfs_fsop_bulkreq32 {
+	compat_uptr_t	lastip;		/* last inode # pointer		*/
+	__s32		icount;		/* count of entries in buffer	*/
+	compat_uptr_t	ubuffer;	/* user buffer for inode desc.	*/
+	__s32		ocount;		/* output count pointer		*/
+} xfs_fsop_bulkreq32_t;
+
+static unsigned long
+xfs_ioctl32_bulkstat(unsigned long arg)
+{
+	xfs_fsop_bulkreq32_t	__user *p32 = (void __user *)arg;
+	xfs_fsop_bulkreq_t	__user *p = compat_alloc_user_space(sizeof(*p));
+	u32			addr;
+
+	if (get_user(addr, &p32->lastip) ||
+	    put_user(compat_ptr(addr), &p->lastip) ||
+	    copy_in_user(&p->icount, &p32->icount, sizeof(s32)) ||
+	    get_user(addr, &p32->ubuffer) ||
+	    put_user(compat_ptr(addr), &p->ubuffer) ||
+	    get_user(addr, &p32->ocount) ||
+	    put_user(compat_ptr(addr), &p->ocount))
+		return -EFAULT;
+
+	return (unsigned long)p;
+}
+#endif
+
+static long
+__xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
+{
+	int		error;
+	struct inode *inode = f->f_dentry->d_inode;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+
+	switch (cmd) {
+	case XFS_IOC_DIOINFO:
+	case XFS_IOC_FSGEOMETRY_V1:
+	case XFS_IOC_FSGEOMETRY:
+	case XFS_IOC_GETVERSION:
+	case XFS_IOC_GETXFLAGS:
+	case XFS_IOC_SETXFLAGS:
+	case XFS_IOC_FSGETXATTR:
+	case XFS_IOC_FSSETXATTR:
+	case XFS_IOC_FSGETXATTRA:
+	case XFS_IOC_FSSETDM:
+	case XFS_IOC_GETBMAP:
+	case XFS_IOC_GETBMAPA:
+	case XFS_IOC_GETBMAPX:
+/* not handled
+	case XFS_IOC_FD_TO_HANDLE:
+	case XFS_IOC_PATH_TO_HANDLE:
+	case XFS_IOC_PATH_TO_HANDLE:
+	case XFS_IOC_PATH_TO_FSHANDLE:
+	case XFS_IOC_OPEN_BY_HANDLE:
+	case XFS_IOC_FSSETDM_BY_HANDLE:
+	case XFS_IOC_READLINK_BY_HANDLE:
+	case XFS_IOC_ATTRLIST_BY_HANDLE:
+	case XFS_IOC_ATTRMULTI_BY_HANDLE:
+*/
+	case XFS_IOC_FSCOUNTS:
+	case XFS_IOC_SET_RESBLKS:
+	case XFS_IOC_GET_RESBLKS:
+	case XFS_IOC_FSGROWFSDATA:
+	case XFS_IOC_FSGROWFSLOG:
+	case XFS_IOC_FSGROWFSRT:
+	case XFS_IOC_FREEZE:
+	case XFS_IOC_THAW:
+	case XFS_IOC_GOINGDOWN:
+	case XFS_IOC_ERROR_INJECTION:
+	case XFS_IOC_ERROR_CLEARALL:
+		break;
+
+#ifndef BROKEN_X86_ALIGNMENT
+	/* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */
+	case XFS_IOC_ALLOCSP:
+	case XFS_IOC_FREESP:
+	case XFS_IOC_RESVSP:
+	case XFS_IOC_UNRESVSP:
+	case XFS_IOC_ALLOCSP64:
+	case XFS_IOC_FREESP64:
+	case XFS_IOC_RESVSP64:
+	case XFS_IOC_UNRESVSP64:
+	case XFS_IOC_SWAPEXT:
+		break;
+
+	case XFS_IOC_FSBULKSTAT_SINGLE:
+	case XFS_IOC_FSBULKSTAT:
+	case XFS_IOC_FSINUMBERS:
+		arg = xfs_ioctl32_bulkstat(arg);
+		break;
+#endif
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error);
+	VMODIFY(vp);
+
+	return error;
+}
+
+long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg)
+{
+	return __xfs_compat_ioctl(0, f, cmd, arg);
+}
+
+long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg)
+{
+	return __xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
+}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
new file mode 100644
index 00000000000..779f69a4811
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg);
+long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
new file mode 100644
index 00000000000..407e9935939
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -0,0 +1,680 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+
+#include <linux/xattr.h>
+#include <linux/namei.h>
+
+
+/*
+ * Pull the link count and size up from the xfs inode to the linux inode
+ */
+STATIC void
+validate_fields(
+	struct inode	*ip)
+{
+	vnode_t		*vp = LINVFS_GET_VP(ip);
+	vattr_t		va;
+	int		error;
+
+	va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
+	VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error);
+	if (likely(!error)) {
+		ip->i_nlink = va.va_nlink;
+		ip->i_blocks = va.va_nblocks;
+
+		/* we're under i_sem so i_size can't change under us */
+		if (i_size_read(ip) != va.va_size)
+			i_size_write(ip, va.va_size);
+	}
+}
+
+/*
+ * Determine whether a process has a valid fs_struct (kernel daemons
+ * like knfsd don't have an fs_struct).
+ *
+ * XXX(hch):  nfsd is broken, better fix it instead.
+ */
+STATIC inline int
+has_fs_struct(struct task_struct *task)
+{
+	return (task->fs != init_task.fs);
+}
+
+STATIC int
+linvfs_mknod(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	int		mode,
+	dev_t		rdev)
+{
+	struct inode	*ip;
+	vattr_t		va;
+	vnode_t		*vp = NULL, *dvp = LINVFS_GET_VP(dir);
+	xfs_acl_t	*default_acl = NULL;
+	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
+	int		error;
+
+	/*
+	 * Irix uses Missed'em'V split, but doesn't want to see
+	 * the upper 5 bits of (14bit) major.
+	 */
+	if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)
+		return -EINVAL;
+
+	if (test_default_acl && test_default_acl(dvp)) {
+		if (!_ACL_ALLOC(default_acl))
+			return -ENOMEM;
+		if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
+			_ACL_FREE(default_acl);
+			default_acl = NULL;
+		}
+	}
+
+	if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current))
+		mode &= ~current->fs->umask;
+
+	memset(&va, 0, sizeof(va));
+	va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+	va.va_type = IFTOVT(mode);
+	va.va_mode = mode;
+
+	switch (mode & S_IFMT) {
+	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
+		va.va_rdev = sysv_encode_dev(rdev);
+		va.va_mask |= XFS_AT_RDEV;
+		/*FALLTHROUGH*/
+	case S_IFREG:
+		VOP_CREATE(dvp, dentry, &va, &vp, NULL, error);
+		break;
+	case S_IFDIR:
+		VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	if (default_acl) {
+		if (!error) {
+			error = _ACL_INHERIT(vp, &va, default_acl);
+			if (!error) {
+				VMODIFY(vp);
+			} else {
+				struct dentry	teardown = {};
+				int		err2;
+
+				/* Oh, the horror.
+				 * If we can't add the ACL we must back out.
+				 * ENOSPC can hit here, among other things.
+				 */
+				teardown.d_inode = ip = LINVFS_GET_IP(vp);
+				teardown.d_name = dentry->d_name;
+
+				vn_mark_bad(vp);
+				
+				if (S_ISDIR(mode))
+					VOP_RMDIR(dvp, &teardown, NULL, err2);
+				else
+					VOP_REMOVE(dvp, &teardown, NULL, err2);
+				VN_RELE(vp);
+			}
+		}
+		_ACL_FREE(default_acl);
+	}
+
+	if (!error) {
+		ASSERT(vp);
+		ip = LINVFS_GET_IP(vp);
+
+		if (S_ISCHR(mode) || S_ISBLK(mode))
+			ip->i_rdev = rdev;
+		else if (S_ISDIR(mode))
+			validate_fields(ip);
+		d_instantiate(dentry, ip);
+		validate_fields(dir);
+	}
+	return -error;
+}
+
+STATIC int
+linvfs_create(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	int		mode,
+	struct nameidata *nd)
+{
+	return linvfs_mknod(dir, dentry, mode, 0);
+}
+
+STATIC int
+linvfs_mkdir(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	int		mode)
+{
+	return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0);
+}
+
+STATIC struct dentry *
+linvfs_lookup(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	struct nameidata *nd)
+{
+	struct vnode	*vp = LINVFS_GET_VP(dir), *cvp;
+	int		error;
+
+	if (dentry->d_name.len >= MAXNAMELEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error);
+	if (error) {
+		if (unlikely(error != ENOENT))
+			return ERR_PTR(-error);
+		d_add(dentry, NULL);
+		return NULL;
+	}
+
+	return d_splice_alias(LINVFS_GET_IP(cvp), dentry);
+}
+
+STATIC int
+linvfs_link(
+	struct dentry	*old_dentry,
+	struct inode	*dir,
+	struct dentry	*dentry)
+{
+	struct inode	*ip;	/* inode of guy being linked to */
+	vnode_t		*tdvp;	/* target directory for new name/link */
+	vnode_t		*vp;	/* vp of name being linked */
+	int		error;
+
+	ip = old_dentry->d_inode;	/* inode being linked to */
+	if (S_ISDIR(ip->i_mode))
+		return -EPERM;
+
+	tdvp = LINVFS_GET_VP(dir);
+	vp = LINVFS_GET_VP(ip);
+
+	VOP_LINK(tdvp, vp, dentry, NULL, error);
+	if (!error) {
+		VMODIFY(tdvp);
+		VN_HOLD(vp);
+		validate_fields(ip);
+		d_instantiate(dentry, ip);
+	}
+	return -error;
+}
+
+STATIC int
+linvfs_unlink(
+	struct inode	*dir,
+	struct dentry	*dentry)
+{
+	struct inode	*inode;
+	vnode_t		*dvp;	/* directory containing name to remove */
+	int		error;
+
+	inode = dentry->d_inode;
+	dvp = LINVFS_GET_VP(dir);
+
+	VOP_REMOVE(dvp, dentry, NULL, error);
+	if (!error) {
+		validate_fields(dir);	/* For size only */
+		validate_fields(inode);
+	}
+
+	return -error;
+}
+
+STATIC int
+linvfs_symlink(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	const char	*symname)
+{
+	struct inode	*ip;
+	vattr_t		va;
+	vnode_t		*dvp;	/* directory containing name of symlink */
+	vnode_t		*cvp;	/* used to lookup symlink to put in dentry */
+	int		error;
+
+	dvp = LINVFS_GET_VP(dir);
+	cvp = NULL;
+
+	memset(&va, 0, sizeof(va));
+	va.va_type = VLNK;
+	va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
+	va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+
+	error = 0;
+	VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
+	if (!error && cvp) {
+		ASSERT(cvp->v_type == VLNK);
+		ip = LINVFS_GET_IP(cvp);
+		d_instantiate(dentry, ip);
+		validate_fields(dir);
+		validate_fields(ip); /* size needs update */
+	}
+	return -error;
+}
+
+STATIC int
+linvfs_rmdir(
+	struct inode	*dir,
+	struct dentry	*dentry)
+{
+	struct inode	*inode = dentry->d_inode;
+	vnode_t		*dvp = LINVFS_GET_VP(dir);
+	int		error;
+
+	VOP_RMDIR(dvp, dentry, NULL, error);
+	if (!error) {
+		validate_fields(inode);
+		validate_fields(dir);
+	}
+	return -error;
+}
+
+STATIC int
+linvfs_rename(
+	struct inode	*odir,
+	struct dentry	*odentry,
+	struct inode	*ndir,
+	struct dentry	*ndentry)
+{
+	struct inode	*new_inode = ndentry->d_inode;
+	vnode_t		*fvp;	/* from directory */
+	vnode_t		*tvp;	/* target directory */
+	int		error;
+
+	fvp = LINVFS_GET_VP(odir);
+	tvp = LINVFS_GET_VP(ndir);
+
+	VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
+	if (error)
+		return -error;
+
+	if (new_inode)
+		validate_fields(new_inode);
+
+	validate_fields(odir);
+	if (ndir != odir)
+		validate_fields(ndir);
+	return 0;
+}
+
+/*
+ * careful here - this function can get called recursively, so
+ * we need to be very careful about how much stack we use.
+ * uio is kmalloced for this reason...
+ */
+STATIC int
+linvfs_follow_link(
+	struct dentry		*dentry,
+	struct nameidata	*nd)
+{
+	vnode_t			*vp;
+	uio_t			*uio;
+	iovec_t			iov;
+	int			error;
+	char			*link;
+
+	ASSERT(dentry);
+	ASSERT(nd);
+
+	link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
+	if (!link) {
+		nd_set_link(nd, ERR_PTR(-ENOMEM));
+		return 0;
+	}
+
+	uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
+	if (!uio) {
+		kfree(link);
+		nd_set_link(nd, ERR_PTR(-ENOMEM));
+		return 0;
+	}
+
+	vp = LINVFS_GET_VP(dentry->d_inode);
+
+	iov.iov_base = link;
+	iov.iov_len = MAXNAMELEN;
+
+	uio->uio_iov = &iov;
+	uio->uio_offset = 0;
+	uio->uio_segflg = UIO_SYSSPACE;
+	uio->uio_resid = MAXNAMELEN;
+	uio->uio_iovcnt = 1;
+
+	VOP_READLINK(vp, uio, 0, NULL, error);
+	if (error) {
+		kfree(link);
+		link = ERR_PTR(-error);
+	} else {
+		link[MAXNAMELEN - uio->uio_resid] = '\0';
+	}
+	kfree(uio);
+
+	nd_set_link(nd, link);
+	return 0;
+}
+
+static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		kfree(s);
+}
+
+#ifdef CONFIG_XFS_POSIX_ACL
+STATIC int
+linvfs_permission(
+	struct inode	*inode,
+	int		mode,
+	struct nameidata *nd)
+{
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	int		error;
+
+	mode <<= 6;		/* convert from linux to vnode access bits */
+	VOP_ACCESS(vp, mode, NULL, error);
+	return -error;
+}
+#else
+#define linvfs_permission NULL
+#endif
+
+STATIC int
+linvfs_getattr(
+	struct vfsmount	*mnt,
+	struct dentry	*dentry,
+	struct kstat	*stat)
+{
+	struct inode	*inode = dentry->d_inode;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	int		error = 0;
+
+	if (unlikely(vp->v_flag & VMODIFIED))
+		error = vn_revalidate(vp);
+	if (!error)
+		generic_fillattr(inode, stat);
+	return 0;
+}
+
+STATIC int
+linvfs_setattr(
+	struct dentry	*dentry,
+	struct iattr	*attr)
+{
+	struct inode	*inode = dentry->d_inode;
+	unsigned int	ia_valid = attr->ia_valid;
+	vnode_t		*vp = LINVFS_GET_VP(inode);
+	vattr_t		vattr;
+	int		flags = 0;
+	int		error;
+
+	memset(&vattr, 0, sizeof(vattr_t));
+	if (ia_valid & ATTR_UID) {
+		vattr.va_mask |= XFS_AT_UID;
+		vattr.va_uid = attr->ia_uid;
+	}
+	if (ia_valid & ATTR_GID) {
+		vattr.va_mask |= XFS_AT_GID;
+		vattr.va_gid = attr->ia_gid;
+	}
+	if (ia_valid & ATTR_SIZE) {
+		vattr.va_mask |= XFS_AT_SIZE;
+		vattr.va_size = attr->ia_size;
+	}
+	if (ia_valid & ATTR_ATIME) {
+		vattr.va_mask |= XFS_AT_ATIME;
+		vattr.va_atime = attr->ia_atime;
+	}
+	if (ia_valid & ATTR_MTIME) {
+		vattr.va_mask |= XFS_AT_MTIME;
+		vattr.va_mtime = attr->ia_mtime;
+	}
+	if (ia_valid & ATTR_CTIME) {
+		vattr.va_mask |= XFS_AT_CTIME;
+		vattr.va_ctime = attr->ia_ctime;
+	}
+	if (ia_valid & ATTR_MODE) {
+		vattr.va_mask |= XFS_AT_MODE;
+		vattr.va_mode = attr->ia_mode;
+		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+			inode->i_mode &= ~S_ISGID;
+	}
+
+	if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
+		flags |= ATTR_UTIME;
+#ifdef ATTR_NO_BLOCK
+	if ((ia_valid & ATTR_NO_BLOCK))
+		flags |= ATTR_NONBLOCK;
+#endif
+
+	VOP_SETATTR(vp, &vattr, flags, NULL, error);
+	if (error)
+		return -error;
+	vn_revalidate(vp);
+	return error;
+}
+
+STATIC void
+linvfs_truncate(
+	struct inode	*inode)
+{
+	block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block);
+}
+
+STATIC int
+linvfs_setxattr(
+	struct dentry	*dentry,
+	const char	*name,
+	const void	*data,
+	size_t		size,
+	int		flags)
+{
+	vnode_t		*vp = LINVFS_GET_VP(dentry->d_inode);
+	char		*attr = (char *)name;
+	attrnames_t	*namesp;
+	int		xflags = 0;
+	int		error;
+
+	namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+	if (!namesp)
+		return -EOPNOTSUPP;
+	attr += namesp->attr_namelen;
+	error = namesp->attr_capable(vp, NULL);
+	if (error)
+		return error;
+
+	/* Convert Linux syscall to XFS internal ATTR flags */
+	if (flags & XATTR_CREATE)
+		xflags |= ATTR_CREATE;
+	if (flags & XATTR_REPLACE)
+		xflags |= ATTR_REPLACE;
+	xflags |= namesp->attr_flag;
+	return namesp->attr_set(vp, attr, (void *)data, size, xflags);
+}
+
+STATIC ssize_t
+linvfs_getxattr(
+	struct dentry	*dentry,
+	const char	*name,
+	void		*data,
+	size_t		size)
+{
+	vnode_t		*vp = LINVFS_GET_VP(dentry->d_inode);
+	char		*attr = (char *)name;
+	attrnames_t	*namesp;
+	int		xflags = 0;
+	ssize_t		error;
+
+	namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+	if (!namesp)
+		return -EOPNOTSUPP;
+	attr += namesp->attr_namelen;
+	error = namesp->attr_capable(vp, NULL);
+	if (error)
+		return error;
+
+	/* Convert Linux syscall to XFS internal ATTR flags */
+	if (!size) {
+		xflags |= ATTR_KERNOVAL;
+		data = NULL;
+	}
+	xflags |= namesp->attr_flag;
+	return namesp->attr_get(vp, attr, (void *)data, size, xflags);
+}
+
+STATIC ssize_t
+linvfs_listxattr(
+	struct dentry		*dentry,
+	char			*data,
+	size_t			size)
+{
+	vnode_t			*vp = LINVFS_GET_VP(dentry->d_inode);
+	int			error, xflags = ATTR_KERNAMELS;
+	ssize_t			result;
+
+	if (!size)
+		xflags |= ATTR_KERNOVAL;
+	xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
+
+	error = attr_generic_list(vp, data, size, xflags, &result);
+	if (error < 0)
+		return error;
+	return result;
+}
+
+STATIC int
+linvfs_removexattr(
+	struct dentry	*dentry,
+	const char	*name)
+{
+	vnode_t		*vp = LINVFS_GET_VP(dentry->d_inode);
+	char		*attr = (char *)name;
+	attrnames_t	*namesp;
+	int		xflags = 0;
+	int		error;
+
+	namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+	if (!namesp)
+		return -EOPNOTSUPP;
+	attr += namesp->attr_namelen;
+	error = namesp->attr_capable(vp, NULL);
+	if (error)
+		return error;
+	xflags |= namesp->attr_flag;
+	return namesp->attr_remove(vp, attr, xflags);
+}
+
+
+struct inode_operations linvfs_file_inode_operations = {
+	.permission		= linvfs_permission,
+	.truncate		= linvfs_truncate,
+	.getattr		= linvfs_getattr,
+	.setattr		= linvfs_setattr,
+	.setxattr		= linvfs_setxattr,
+	.getxattr		= linvfs_getxattr,
+	.listxattr		= linvfs_listxattr,
+	.removexattr		= linvfs_removexattr,
+};
+
+struct inode_operations linvfs_dir_inode_operations = {
+	.create			= linvfs_create,
+	.lookup			= linvfs_lookup,
+	.link			= linvfs_link,
+	.unlink			= linvfs_unlink,
+	.symlink		= linvfs_symlink,
+	.mkdir			= linvfs_mkdir,
+	.rmdir			= linvfs_rmdir,
+	.mknod			= linvfs_mknod,
+	.rename			= linvfs_rename,
+	.permission		= linvfs_permission,
+	.getattr		= linvfs_getattr,
+	.setattr		= linvfs_setattr,
+	.setxattr		= linvfs_setxattr,
+	.getxattr		= linvfs_getxattr,
+	.listxattr		= linvfs_listxattr,
+	.removexattr		= linvfs_removexattr,
+};
+
+struct inode_operations linvfs_symlink_inode_operations = {
+	.readlink		= generic_readlink,
+	.follow_link		= linvfs_follow_link,
+	.put_link		= linvfs_put_link,
+	.permission		= linvfs_permission,
+	.getattr		= linvfs_getattr,
+	.setattr		= linvfs_setattr,
+	.setxattr		= linvfs_setxattr,
+	.getxattr		= linvfs_getxattr,
+	.listxattr		= linvfs_listxattr,
+	.removexattr		= linvfs_removexattr,
+};
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
new file mode 100644
index 00000000000..6a69a62c36b
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IOPS_H__
+#define __XFS_IOPS_H__
+
+extern struct inode_operations linvfs_file_inode_operations;
+extern struct inode_operations linvfs_dir_inode_operations;
+extern struct inode_operations linvfs_symlink_inode_operations;
+
+extern struct file_operations linvfs_file_operations;
+extern struct file_operations linvfs_invis_file_operations;
+extern struct file_operations linvfs_dir_operations;
+
+extern struct address_space_operations linvfs_aops;
+
+extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern void linvfs_unwritten_done(struct buffer_head *, int);
+
+extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
+                        int, unsigned int, void __user *);
+
+#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
new file mode 100644
index 00000000000..71bb41019a1
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_LINUX__
+#define __XFS_LINUX__
+
+#include <linux/types.h>
+#include <linux/config.h>
+
+/*
+ * Some types are conditional depending on the target system.
+ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
+ * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
+ * as requiring XFS_BIG_BLKNOS to be set.
+ */
+#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+# define XFS_BIG_BLKNOS	1
+# if BITS_PER_LONG == 64
+#  define XFS_BIG_INUMS	1
+# else
+#  define XFS_BIG_INUMS	0
+# endif
+#else
+# define XFS_BIG_BLKNOS	0
+# define XFS_BIG_INUMS	0
+#endif
+
+#include <xfs_types.h>
+#include <xfs_arch.h>
+
+#include <kmem.h>
+#include <mrlock.h>
+#include <spin.h>
+#include <sv.h>
+#include <mutex.h>
+#include <sema.h>
+#include <time.h>
+
+#include <support/qsort.h>
+#include <support/ktrace.h>
+#include <support/debug.h>
+#include <support/move.h>
+#include <support/uuid.h>
+
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/major.h>
+#include <linux/pagemap.h>
+#include <linux/vfs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+#include <linux/sort.h>
+
+#include <asm/page.h>
+#include <asm/div64.h>
+#include <asm/param.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#include <xfs_behavior.h>
+#include <xfs_vfs.h>
+#include <xfs_cred.h>
+#include <xfs_vnode.h>
+#include <xfs_stats.h>
+#include <xfs_sysctl.h>
+#include <xfs_iops.h>
+#include <xfs_super.h>
+#include <xfs_globals.h>
+#include <xfs_fs_subr.h>
+#include <xfs_lrw.h>
+#include <xfs_buf.h>
+
+/*
+ * Feature macros (disable/enable)
+ */
+#undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
+#define HAVE_SENDFILE	/* sendfile(2) exists in 2.6, but not in 2.4 */
+
+/*
+ * State flag for unwritten extent buffers.
+ *
+ * We need to be able to distinguish between these and delayed
+ * allocate buffers within XFS.  The generic IO path code does
+ * not need to distinguish - we use the BH_Delay flag for both
+ * delalloc and these ondisk-uninitialised buffers.
+ */
+BUFFER_FNS(PrivateStart, unwritten);
+static inline void set_buffer_unwritten_io(struct buffer_head *bh)
+{
+	bh->b_end_io = linvfs_unwritten_done;
+}
+
+#define restricted_chown	xfs_params.restrict_chown.val
+#define irix_sgid_inherit	xfs_params.sgid_inherit.val
+#define irix_symlink_mode	xfs_params.symlink_mode.val
+#define xfs_panic_mask		xfs_params.panic_mask.val
+#define xfs_error_level		xfs_params.error_level.val
+#define xfs_syncd_centisecs	xfs_params.syncd_timer.val
+#define xfs_stats_clear		xfs_params.stats_clear.val
+#define xfs_inherit_sync	xfs_params.inherit_sync.val
+#define xfs_inherit_nodump	xfs_params.inherit_nodump.val
+#define xfs_inherit_noatime	xfs_params.inherit_noatim.val
+#define xfs_buf_timer_centisecs	xfs_params.xfs_buf_timer.val
+#define xfs_buf_age_centisecs	xfs_params.xfs_buf_age.val
+#define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
+#define xfs_rotorstep		xfs_params.rotorstep.val
+
+#ifndef __smp_processor_id
+#define __smp_processor_id()	smp_processor_id()
+#endif
+#define current_cpu()		__smp_processor_id()
+#define current_pid()		(current->pid)
+#define current_fsuid(cred)	(current->fsuid)
+#define current_fsgid(cred)	(current->fsgid)
+
+#define NBPP		PAGE_SIZE
+#define DPPSHFT		(PAGE_SHIFT - 9)
+#define NDPP		(1 << (PAGE_SHIFT - 9))
+#define dtop(DD)	(((DD) + NDPP - 1) >> DPPSHFT)
+#define dtopt(DD)	((DD) >> DPPSHFT)
+#define dpoff(DD)	((DD) & (NDPP-1))
+
+#define NBBY		8		/* number of bits per byte */
+#define	NBPC		PAGE_SIZE	/* Number of bytes per click */
+#define	BPCSHIFT	PAGE_SHIFT	/* LOG2(NBPC) if exact */
+
+/*
+ * Size of block device i/o is parameterized here.
+ * Currently the system supports page-sized i/o.
+ */
+#define	BLKDEV_IOSHIFT		BPCSHIFT
+#define	BLKDEV_IOSIZE		(1<<BLKDEV_IOSHIFT)
+/* number of BB's per block device block */
+#define	BLKDEV_BB		BTOBB(BLKDEV_IOSIZE)
+
+/* bytes to clicks */
+#define	btoc(x)		(((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define	btoct(x)	((__psunsigned_t)(x)>>BPCSHIFT)
+#define	btoc64(x)	(((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define	btoct64(x)	((__uint64_t)(x)>>BPCSHIFT)
+#define	io_btoc(x)	(((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT)
+#define	io_btoct(x)	((__psunsigned_t)(x)>>IO_BPCSHIFT)
+
+/* off_t bytes to clicks */
+#define offtoc(x)       (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define offtoct(x)      ((xfs_off_t)(x)>>BPCSHIFT)
+
+/* clicks to off_t bytes */
+#define	ctooff(x)	((xfs_off_t)(x)<<BPCSHIFT)
+
+/* clicks to bytes */
+#define	ctob(x)		((__psunsigned_t)(x)<<BPCSHIFT)
+#define btoct(x)        ((__psunsigned_t)(x)>>BPCSHIFT)
+#define	ctob64(x)	((__uint64_t)(x)<<BPCSHIFT)
+#define	io_ctob(x)	((__psunsigned_t)(x)<<IO_BPCSHIFT)
+
+/* bytes to clicks */
+#define btoc(x)         (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
+
+#ifndef CELL_CAPABLE
+#define FSC_NOTIFY_NAME_CHANGED(vp)
+#endif
+
+#ifndef ENOATTR
+#define ENOATTR		ENODATA		/* Attribute not found */
+#endif
+
+/* Note: EWRONGFS never visible outside the kernel */
+#define	EWRONGFS	EINVAL		/* Mount with wrong filesystem type */
+
+/*
+ * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't
+ *     return codes out of its known range in errno.
+ * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't
+ *     conflict with any code we use already or any code a driver may use)
+ * XXX Some options (currently we do #2):
+ *	1/ New error code ["Filesystem is corrupted", _after_ glibc updated]
+ *	2/ 990 ["Unknown error 990"]
+ *	3/ EUCLEAN ["Structure needs cleaning"]
+ *	4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace]
+ */
+#define EFSCORRUPTED    990		/* Filesystem is corrupted */
+
+#define SYNCHRONIZE()	barrier()
+#define __return_address __builtin_return_address(0)
+
+/*
+ * IRIX (BSD) quotactl makes use of separate commands for user/group,
+ * whereas on Linux the syscall encodes this information into the cmd
+ * field (see the QCMD macro in quota.h).  These macros help keep the
+ * code portable - they are not visible from the syscall interface.
+ */
+#define Q_XSETGQLIM	XQM_CMD(0x8)	/* set groups disk limits */
+#define Q_XGETGQUOTA	XQM_CMD(0x9)	/* get groups disk limits */
+
+/* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
+/* we may well need to fine-tune this if it ever becomes an issue.  */
+#define DQUOT_MAX_HEURISTIC	1024	/* NR_DQUOTS */
+#define ndquot			DQUOT_MAX_HEURISTIC
+
+/* IRIX uses the current size of the name cache to guess a good value */
+/* - this isn't the same but is a good enough starting point for now. */
+#define DQUOT_HASH_HEURISTIC	files_stat.nr_files
+
+/* IRIX inodes maintain the project ID also, zero this field on Linux */
+#define DEFAULT_PROJID	0
+#define dfltprid	DEFAULT_PROJID
+
+#define MAXPATHLEN	1024
+
+#define MIN(a,b)	(min(a,b))
+#define MAX(a,b)	(max(a,b))
+#define howmany(x, y)	(((x)+((y)-1))/(y))
+#define roundup(x, y)	((((x)+((y)-1))/(y))*(y))
+
+#define xfs_stack_trace()	dump_stack()
+
+#define xfs_itruncate_data(ip, off)	\
+	(-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
+
+
+/* Move the kernel do_div definition off to one side */
+
+#if defined __i386__
+/* For ia32 we need to pull some tricks to get past various versions
+ * of the compiler which do not like us using do_div in the middle
+ * of large functions.
+ */
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+	__u32	mod;
+
+	switch (n) {
+		case 4:
+			mod = *(__u32 *)a % b;
+			*(__u32 *)a = *(__u32 *)a / b;
+			return mod;
+		case 8:
+			{
+			unsigned long __upper, __low, __high, __mod;
+			__u64	c = *(__u64 *)a;
+			__upper = __high = c >> 32;
+			__low = c;
+			if (__high) {
+				__upper = __high % (b);
+				__high = __high / (b);
+			}
+			asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+			asm("":"=A" (c):"a" (__low),"d" (__high));
+			*(__u64 *)a = c;
+			return __mod;
+			}
+	}
+
+	/* NOTREACHED */
+	return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+	switch (n) {
+		case 4:
+			return *(__u32 *)a % b;
+		case 8:
+			{
+			unsigned long __upper, __low, __high, __mod;
+			__u64	c = *(__u64 *)a;
+			__upper = __high = c >> 32;
+			__low = c;
+			if (__high) {
+				__upper = __high % (b);
+				__high = __high / (b);
+			}
+			asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+			asm("":"=A" (c):"a" (__low),"d" (__high));
+			return __mod;
+			}
+	}
+
+	/* NOTREACHED */
+	return 0;
+}
+#else
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+	__u32	mod;
+
+	switch (n) {
+		case 4:
+			mod = *(__u32 *)a % b;
+			*(__u32 *)a = *(__u32 *)a / b;
+			return mod;
+		case 8:
+			mod = do_div(*(__u64 *)a, b);
+			return mod;
+	}
+
+	/* NOTREACHED */
+	return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+	switch (n) {
+		case 4:
+			return *(__u32 *)a % b;
+		case 8:
+			{
+			__u64	c = *(__u64 *)a;
+			return do_div(c, b);
+			}
+	}
+
+	/* NOTREACHED */
+	return 0;
+}
+#endif
+
+#undef do_div
+#define do_div(a, b)	xfs_do_div(&(a), (b), sizeof(a))
+#define do_mod(a, b)	xfs_do_mod(&(a), (b), sizeof(a))
+
+static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+{
+	x += y - 1;
+	do_div(x, y);
+	return(x * y);
+}
+
+#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL)
+
+#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
new file mode 100644
index 00000000000..ff145fd0d1a
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+/*
+ *  fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff)
+ *
+ */
+
+#include "xfs.h"
+
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_inode_item.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_iomap.h"
+
+#include <linux/capability.h>
+#include <linux/writeback.h>
+
+
+#if defined(XFS_RW_TRACE)
+void
+xfs_rw_enter_trace(
+	int			tag,
+	xfs_iocore_t		*io,
+	void			*data,
+	size_t			segs,
+	loff_t			offset,
+	int			ioflags)
+{
+	xfs_inode_t	*ip = XFS_IO_INODE(io);
+
+	if (ip->i_rwtrace == NULL)
+		return;
+	ktrace_enter(ip->i_rwtrace,
+		(void *)(unsigned long)tag,
+		(void *)ip,
+		(void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
+		(void *)data,
+		(void *)((unsigned long)segs),
+		(void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(offset & 0xffffffff)),
+		(void *)((unsigned long)ioflags),
+		(void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(io->io_new_size & 0xffffffff)),
+		(void *)NULL,
+		(void *)NULL,
+		(void *)NULL,
+		(void *)NULL,
+		(void *)NULL);
+}
+
+void
+xfs_inval_cached_trace(
+	xfs_iocore_t	*io,
+	xfs_off_t	offset,
+	xfs_off_t	len,
+	xfs_off_t	first,
+	xfs_off_t	last)
+{
+	xfs_inode_t	*ip = XFS_IO_INODE(io);
+
+	if (ip->i_rwtrace == NULL)
+		return;
+	ktrace_enter(ip->i_rwtrace,
+		(void *)(__psint_t)XFS_INVAL_CACHED,
+		(void *)ip,
+		(void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(offset & 0xffffffff)),
+		(void *)((unsigned long)((len >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(len & 0xffffffff)),
+		(void *)((unsigned long)((first >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(first & 0xffffffff)),
+		(void *)((unsigned long)((last >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(last & 0xffffffff)),
+		(void *)NULL,
+		(void *)NULL,
+		(void *)NULL,
+		(void *)NULL,
+		(void *)NULL,
+		(void *)NULL);
+}
+#endif
+
+/*
+ *	xfs_iozero
+ *
+ *	xfs_iozero clears the specified range of buffer supplied,
+ *	and marks all the affected blocks as valid and modified.  If
+ *	an affected block is not allocated, it will be allocated.  If
+ *	an affected block is not completely overwritten, and is not
+ *	valid before the operation, it will be read from disk before
+ *	being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+	struct inode		*ip,	/* inode			*/
+	loff_t			pos,	/* offset in file		*/
+	size_t			count,	/* size of data to zero		*/
+	loff_t			end_size)	/* max file size to set */
+{
+	unsigned		bytes;
+	struct page		*page;
+	struct address_space	*mapping;
+	char			*kaddr;
+	int			status;
+
+	mapping = ip->i_mapping;
+	do {
+		unsigned long index, offset;
+
+		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+		index = pos >> PAGE_CACHE_SHIFT;
+		bytes = PAGE_CACHE_SIZE - offset;
+		if (bytes > count)
+			bytes = count;
+
+		status = -ENOMEM;
+		page = grab_cache_page(mapping, index);
+		if (!page)
+			break;
+
+		kaddr = kmap(page);
+		status = mapping->a_ops->prepare_write(NULL, page, offset,
+							offset + bytes);
+		if (status) {
+			goto unlock;
+		}
+
+		memset((void *) (kaddr + offset), 0, bytes);
+		flush_dcache_page(page);
+		status = mapping->a_ops->commit_write(NULL, page, offset,
+							offset + bytes);
+		if (!status) {
+			pos += bytes;
+			count -= bytes;
+			if (pos > i_size_read(ip))
+				i_size_write(ip, pos < end_size ? pos : end_size);
+		}
+
+unlock:
+		kunmap(page);
+		unlock_page(page);
+		page_cache_release(page);
+		if (status)
+			break;
+	} while (count);
+
+	return (-status);
+}
+
+/*
+ * xfs_inval_cached_pages
+ * 
+ * This routine is responsible for keeping direct I/O and buffered I/O
+ * somewhat coherent.  From here we make sure that we're at least
+ * temporarily holding the inode I/O lock exclusively and then call
+ * the page cache to flush and invalidate any cached pages.  If there
+ * are no cached pages this routine will be very quick.
+ */
+void
+xfs_inval_cached_pages(
+	vnode_t		*vp,
+	xfs_iocore_t	*io,
+	xfs_off_t	offset,
+	int		write,
+	int		relock)
+{
+	if (VN_CACHED(vp)) {
+		xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
+		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
+	}
+
+}
+
+ssize_t			/* bytes read, or (-)  error */
+xfs_read(
+	bhv_desc_t		*bdp,
+	struct kiocb		*iocb,
+	const struct iovec	*iovp,
+	unsigned int		segs,
+	loff_t			*offset,
+	int			ioflags,
+	cred_t			*credp)
+{
+	struct file		*file = iocb->ki_filp;
+	struct inode		*inode = file->f_mapping->host;
+	size_t			size = 0;
+	ssize_t			ret;
+	xfs_fsize_t		n;
+	xfs_inode_t		*ip;
+	xfs_mount_t		*mp;
+	vnode_t			*vp;
+	unsigned long		seg;
+
+	ip = XFS_BHVTOI(bdp);
+	vp = BHV_TO_VNODE(bdp);
+	mp = ip->i_mount;
+
+	XFS_STATS_INC(xs_read_calls);
+
+	/* START copy & waste from filemap.c */
+	for (seg = 0; seg < segs; seg++) {
+		const struct iovec *iv = &iovp[seg];
+
+		/*
+		 * If any segment has a negative length, or the cumulative
+		 * length ever wraps negative then return -EINVAL.
+		 */
+		size += iv->iov_len;
+		if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+			return XFS_ERROR(-EINVAL);
+	}
+	/* END copy & waste from filemap.c */
+
+	if (unlikely(ioflags & IO_ISDIRECT)) {
+		xfs_buftarg_t	*target =
+			(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+				mp->m_rtdev_targp : mp->m_ddev_targp;
+		if ((*offset & target->pbr_smask) ||
+		    (size & target->pbr_smask)) {
+			if (*offset == ip->i_d.di_size) {
+				return (0);
+			}
+			return -XFS_ERROR(EINVAL);
+		}
+	}
+
+	n = XFS_MAXIOFFSET(mp) - *offset;
+	if ((n <= 0) || (size == 0))
+		return 0;
+
+	if (n < size)
+		size = n;
+
+	if (XFS_FORCED_SHUTDOWN(mp)) {
+		return -EIO;
+	}
+
+	if (unlikely(ioflags & IO_ISDIRECT))
+		down(&inode->i_sem);
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
+	    !(ioflags & IO_INVIS)) {
+		vrwlock_t locktype = VRWLOCK_READ;
+
+		ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
+					BHV_TO_VNODE(bdp), *offset, size,
+					FILP_DELAY_FLAG(file), &locktype);
+		if (ret) {
+			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+			goto unlock_isem;
+		}
+	}
+
+	xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
+				(void *)iovp, segs, *offset, ioflags);
+	ret = __generic_file_aio_read(iocb, iovp, segs, offset);
+	if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
+		ret = wait_on_sync_kiocb(iocb);
+	if (ret > 0)
+		XFS_STATS_ADD(xs_read_bytes, ret);
+
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	if (likely(!(ioflags & IO_INVIS)))
+		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+
+unlock_isem:
+	if (unlikely(ioflags & IO_ISDIRECT))
+		up(&inode->i_sem);
+	return ret;
+}
+
+ssize_t
+xfs_sendfile(
+	bhv_desc_t		*bdp,
+	struct file		*filp,
+	loff_t			*offset,
+	int			ioflags,
+	size_t			count,
+	read_actor_t		actor,
+	void			*target,
+	cred_t			*credp)
+{
+	ssize_t			ret;
+	xfs_fsize_t		n;
+	xfs_inode_t		*ip;
+	xfs_mount_t		*mp;
+	vnode_t			*vp;
+
+	ip = XFS_BHVTOI(bdp);
+	vp = BHV_TO_VNODE(bdp);
+	mp = ip->i_mount;
+
+	XFS_STATS_INC(xs_read_calls);
+
+	n = XFS_MAXIOFFSET(mp) - *offset;
+	if ((n <= 0) || (count == 0))
+		return 0;
+
+	if (n < count)
+		count = n;
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return -EIO;
+
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
+	    (!(ioflags & IO_INVIS))) {
+		vrwlock_t locktype = VRWLOCK_READ;
+		int error;
+
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count,
+				      FILP_DELAY_FLAG(filp), &locktype);
+		if (error) {
+			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+			return -error;
+		}
+	}
+	xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
+		   (void *)(unsigned long)target, count, *offset, ioflags);
+	ret = generic_file_sendfile(filp, offset, count, actor, target);
+
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	if (ret > 0)
+		XFS_STATS_ADD(xs_read_bytes, ret);
+
+	if (likely(!(ioflags & IO_INVIS)))
+		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+
+	return ret;
+}
+
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF.  We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int				/* error (positive) */
+xfs_zero_last_block(
+	struct inode	*ip,
+	xfs_iocore_t	*io,
+	xfs_off_t	offset,
+	xfs_fsize_t	isize,
+	xfs_fsize_t	end_size)
+{
+	xfs_fileoff_t	last_fsb;
+	xfs_mount_t	*mp;
+	int		nimaps;
+	int		zero_offset;
+	int		zero_len;
+	int		isize_fsb_offset;
+	int		error = 0;
+	xfs_bmbt_irec_t	imap;
+	loff_t		loff;
+	size_t		lsize;
+
+	ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
+	ASSERT(offset > isize);
+
+	mp = io->io_mount;
+
+	isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);
+	if (isize_fsb_offset == 0) {
+		/*
+		 * There are no extra bytes in the last block on disk to
+		 * zero, so return.
+		 */
+		return 0;
+	}
+
+	last_fsb = XFS_B_TO_FSBT(mp, isize);
+	nimaps = 1;
+	error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
+			  &nimaps, NULL);
+	if (error) {
+		return error;
+	}
+	ASSERT(nimaps > 0);
+	/*
+	 * If the block underlying isize is just a hole, then there
+	 * is nothing to zero.
+	 */
+	if (imap.br_startblock == HOLESTARTBLOCK) {
+		return 0;
+	}
+	/*
+	 * Zero the part of the last block beyond the EOF, and write it
+	 * out sync.  We need to drop the ilock while we do this so we
+	 * don't deadlock when the buffer cache calls back to us.
+	 */
+	XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
+	loff = XFS_FSB_TO_B(mp, last_fsb);
+	lsize = XFS_FSB_TO_B(mp, 1);
+
+	zero_offset = isize_fsb_offset;
+	zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
+
+	error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
+
+	XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+	ASSERT(error >= 0);
+	return error;
+}
+
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF.  This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file.  This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated.  If fill is set,
+ * then any holes in the range are filled and zeroed.  If not, the holes
+ * are left alone as holes.
+ */
+
+int					/* error (positive) */
+xfs_zero_eof(
+	vnode_t		*vp,
+	xfs_iocore_t	*io,
+	xfs_off_t	offset,		/* starting I/O offset */
+	xfs_fsize_t	isize,		/* current inode size */
+	xfs_fsize_t	end_size)	/* terminal inode size */
+{
+	struct inode	*ip = LINVFS_GET_IP(vp);
+	xfs_fileoff_t	start_zero_fsb;
+	xfs_fileoff_t	end_zero_fsb;
+	xfs_fileoff_t	prev_zero_fsb;
+	xfs_fileoff_t	zero_count_fsb;
+	xfs_fileoff_t	last_fsb;
+	xfs_extlen_t	buf_len_fsb;
+	xfs_extlen_t	prev_zero_count;
+	xfs_mount_t	*mp;
+	int		nimaps;
+	int		error = 0;
+	xfs_bmbt_irec_t	imap;
+	loff_t		loff;
+	size_t		lsize;
+
+	ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+	ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+
+	mp = io->io_mount;
+
+	/*
+	 * First handle zeroing the block on which isize resides.
+	 * We only zero a part of that block so it is handled specially.
+	 */
+	error = xfs_zero_last_block(ip, io, offset, isize, end_size);
+	if (error) {
+		ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+		ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+		return error;
+	}
+
+	/*
+	 * Calculate the range between the new size and the old
+	 * where blocks needing to be zeroed may exist.  To get the
+	 * block where the last byte in the file currently resides,
+	 * we need to subtract one from the size and truncate back
+	 * to a block boundary.  We subtract 1 in case the size is
+	 * exactly on a block boundary.
+	 */
+	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+	end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+	ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+	if (last_fsb == end_zero_fsb) {
+		/*
+		 * The size was only incremented on its last block.
+		 * We took care of that above, so just return.
+		 */
+		return 0;
+	}
+
+	ASSERT(start_zero_fsb <= end_zero_fsb);
+	prev_zero_fsb = NULLFILEOFF;
+	prev_zero_count = 0;
+	while (start_zero_fsb <= end_zero_fsb) {
+		nimaps = 1;
+		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+		error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
+				  0, NULL, 0, &imap, &nimaps, NULL);
+		if (error) {
+			ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+			ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+			return error;
+		}
+		ASSERT(nimaps > 0);
+
+		if (imap.br_state == XFS_EXT_UNWRITTEN ||
+		    imap.br_startblock == HOLESTARTBLOCK) {
+			/*
+			 * This loop handles initializing pages that were
+			 * partially initialized by the code below this
+			 * loop. It basically zeroes the part of the page
+			 * that sits on a hole and sets the page as P_HOLE
+			 * and calls remapf if it is a mapped file.
+			 */
+			prev_zero_fsb = NULLFILEOFF;
+			prev_zero_count = 0;
+			start_zero_fsb = imap.br_startoff +
+					 imap.br_blockcount;
+			ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+			continue;
+		}
+
+		/*
+		 * There are blocks in the range requested.
+		 * Zero them a single write at a time.  We actually
+		 * don't zero the entire range returned if it is
+		 * too big and simply loop around to get the rest.
+		 * That is not the most efficient thing to do, but it
+		 * is simple and this path should not be exercised often.
+		 */
+		buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount,
+					      mp->m_writeio_blocks << 8);
+		/*
+		 * Drop the inode lock while we're doing the I/O.
+		 * We'll still have the iolock to protect us.
+		 */
+		XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+
+		loff = XFS_FSB_TO_B(mp, start_zero_fsb);
+		lsize = XFS_FSB_TO_B(mp, buf_len_fsb);
+
+		error = xfs_iozero(ip, loff, lsize, end_size);
+
+		if (error) {
+			goto out_lock;
+		}
+
+		prev_zero_fsb = start_zero_fsb;
+		prev_zero_count = buf_len_fsb;
+		start_zero_fsb = imap.br_startoff + buf_len_fsb;
+		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+
+		XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+	}
+
+	return 0;
+
+out_lock:
+
+	XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+	ASSERT(error >= 0);
+	return error;
+}
+
+ssize_t				/* bytes written, or (-) error */
+xfs_write(
+	bhv_desc_t		*bdp,
+	struct kiocb		*iocb,
+	const struct iovec	*iovp,
+	unsigned int		nsegs,
+	loff_t			*offset,
+	int			ioflags,
+	cred_t			*credp)
+{
+	struct file		*file = iocb->ki_filp;
+	struct address_space	*mapping = file->f_mapping;
+	struct inode		*inode = mapping->host;
+	unsigned long		segs = nsegs;
+	xfs_inode_t		*xip;
+	xfs_mount_t		*mp;
+	ssize_t			ret = 0, error = 0;
+	xfs_fsize_t		isize, new_size;
+	xfs_iocore_t		*io;
+	vnode_t			*vp;
+	unsigned long		seg;
+	int			iolock;
+	int			eventsent = 0;
+	vrwlock_t		locktype;
+	size_t			ocount = 0, count;
+	loff_t			pos;
+	int			need_isem = 1, need_flush = 0;
+
+	XFS_STATS_INC(xs_write_calls);
+
+	vp = BHV_TO_VNODE(bdp);
+	xip = XFS_BHVTOI(bdp);
+
+	for (seg = 0; seg < segs; seg++) {
+		const struct iovec *iv = &iovp[seg];
+
+		/*
+		 * If any segment has a negative length, or the cumulative
+		 * length ever wraps negative then return -EINVAL.
+		 */
+		ocount += iv->iov_len;
+		if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
+			return -EINVAL;
+		if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
+			continue;
+		if (seg == 0)
+			return -EFAULT;
+		segs = seg;
+		ocount -= iv->iov_len;  /* This segment is no good */
+		break;
+	}
+
+	count = ocount;
+	pos = *offset;
+
+	if (count == 0)
+		return 0;
+
+	io = &xip->i_iocore;
+	mp = io->io_mount;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
+	fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE);
+
+	if (ioflags & IO_ISDIRECT) {
+		xfs_buftarg_t	*target =
+			(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+				mp->m_rtdev_targp : mp->m_ddev_targp;
+
+		if ((pos & target->pbr_smask) || (count & target->pbr_smask))
+			return XFS_ERROR(-EINVAL);
+
+		if (!VN_CACHED(vp) && pos < i_size_read(inode))
+			need_isem = 0;
+
+		if (VN_CACHED(vp))
+			need_flush = 1;
+	}
+
+relock:
+	if (need_isem) {
+		iolock = XFS_IOLOCK_EXCL;
+		locktype = VRWLOCK_WRITE;
+
+		down(&inode->i_sem);
+	} else {
+		iolock = XFS_IOLOCK_SHARED;
+		locktype = VRWLOCK_WRITE_DIRECT;
+	}
+
+	xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
+
+	isize = i_size_read(inode);
+
+	if (file->f_flags & O_APPEND)
+		*offset = isize;
+
+start:
+	error = -generic_write_checks(file, &pos, &count,
+					S_ISBLK(inode->i_mode));
+	if (error) {
+		xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+		goto out_unlock_isem;
+	}
+
+	new_size = pos + count;
+	if (new_size > isize)
+		io->io_new_size = new_size;
+
+	if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
+	    !(ioflags & IO_INVIS) && !eventsent)) {
+		loff_t		savedsize = pos;
+		int		dmflags = FILP_DELAY_FLAG(file);
+
+		if (need_isem)
+			dmflags |= DM_FLAGS_ISEM;
+
+		xfs_iunlock(xip, XFS_ILOCK_EXCL);
+		error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
+				      pos, count,
+				      dmflags, &locktype);
+		if (error) {
+			xfs_iunlock(xip, iolock);
+			goto out_unlock_isem;
+		}
+		xfs_ilock(xip, XFS_ILOCK_EXCL);
+		eventsent = 1;
+
+		/*
+		 * The iolock was dropped and reaquired in XFS_SEND_DATA
+		 * so we have to recheck the size when appending.
+		 * We will only "goto start;" once, since having sent the
+		 * event prevents another call to XFS_SEND_DATA, which is
+		 * what allows the size to change in the first place.
+		 */
+		if ((file->f_flags & O_APPEND) && savedsize != isize) {
+			pos = isize = xip->i_d.di_size;
+			goto start;
+		}
+	}
+
+	/*
+	 * On Linux, generic_file_write updates the times even if
+	 * no data is copied in so long as the write had a size.
+	 *
+	 * We must update xfs' times since revalidate will overcopy xfs.
+	 */
+	if (!(ioflags & IO_INVIS)) {
+		xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+		inode_update_time(inode, 1);
+	}
+
+	/*
+	 * If the offset is beyond the size of the file, we have a couple
+	 * of things to do. First, if there is already space allocated
+	 * we need to either create holes or zero the disk or ...
+	 *
+	 * If there is a page where the previous size lands, we need
+	 * to zero it out up to the new size.
+	 */
+
+	if (pos > isize) {
+		error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
+					isize, pos + count);
+		if (error) {
+			xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+			goto out_unlock_isem;
+		}
+	}
+	xfs_iunlock(xip, XFS_ILOCK_EXCL);
+
+	/*
+	 * If we're writing the file then make sure to clear the
+	 * setuid and setgid bits if the process is not being run
+	 * by root.  This keeps people from modifying setuid and
+	 * setgid binaries.
+	 */
+
+	if (((xip->i_d.di_mode & S_ISUID) ||
+	    ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) ==
+		(S_ISGID | S_IXGRP))) &&
+	     !capable(CAP_FSETID)) {
+		error = xfs_write_clear_setuid(xip);
+		if (likely(!error))
+			error = -remove_suid(file->f_dentry);
+		if (unlikely(error)) {
+			xfs_iunlock(xip, iolock);
+			goto out_unlock_isem;
+		}
+	}
+
+retry:
+	/* We can write back this queue in page reclaim */
+	current->backing_dev_info = mapping->backing_dev_info;
+
+	if ((ioflags & IO_ISDIRECT)) {
+		if (need_flush) {
+			xfs_inval_cached_trace(io, pos, -1,
+					ctooff(offtoct(pos)), -1);
+			VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)),
+					-1, FI_REMAPF_LOCKED);
+		}
+
+		if (need_isem) {
+			/* demote the lock now the cached pages are gone */
+			XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
+			up(&inode->i_sem);
+
+			iolock = XFS_IOLOCK_SHARED;
+			locktype = VRWLOCK_WRITE_DIRECT;
+			need_isem = 0;
+		}
+
+ 		xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
+				*offset, ioflags);
+		ret = generic_file_direct_write(iocb, iovp,
+				&segs, pos, offset, count, ocount);
+
+		/*
+		 * direct-io write to a hole: fall through to buffered I/O
+		 * for completing the rest of the request.
+		 */
+		if (ret >= 0 && ret != count) {
+			XFS_STATS_ADD(xs_write_bytes, ret);
+
+			pos += ret;
+			count -= ret;
+
+			need_isem = 1;
+			ioflags &= ~IO_ISDIRECT;
+			xfs_iunlock(xip, iolock);
+			goto relock;
+		}
+	} else {
+		xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs,
+				*offset, ioflags);
+		ret = generic_file_buffered_write(iocb, iovp, segs,
+				pos, offset, count, ret);
+	}
+
+	current->backing_dev_info = NULL;
+
+	if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
+		ret = wait_on_sync_kiocb(iocb);
+
+	if ((ret == -ENOSPC) &&
+	    DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
+	    !(ioflags & IO_INVIS)) {
+
+		xfs_rwunlock(bdp, locktype);
+		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
+				DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
+				0, 0, 0); /* Delay flag intentionally  unused */
+		if (error)
+			goto out_unlock_isem;
+		xfs_rwlock(bdp, locktype);
+		pos = xip->i_d.di_size;
+		ret = 0;
+		goto retry;
+	}
+
+	if (*offset > xip->i_d.di_size) {
+		xfs_ilock(xip, XFS_ILOCK_EXCL);
+		if (*offset > xip->i_d.di_size) {
+			xip->i_d.di_size = *offset;
+			i_size_write(inode, *offset);
+			xip->i_update_core = 1;
+			xip->i_update_size = 1;
+		}
+		xfs_iunlock(xip, XFS_ILOCK_EXCL);
+	}
+
+	error = -ret;
+	if (ret <= 0)
+		goto out_unlock_internal;
+
+	XFS_STATS_ADD(xs_write_bytes, ret);
+
+	/* Handle various SYNC-type writes */
+	if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
+		/*
+		 * If we're treating this as O_DSYNC and we have not updated the
+		 * size, force the log.
+		 */
+		if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
+		    !(xip->i_update_size)) {
+			xfs_inode_log_item_t	*iip = xip->i_itemp;
+
+			/*
+			 * If an allocation transaction occurred
+			 * without extending the size, then we have to force
+			 * the log up the proper point to ensure that the
+			 * allocation is permanent.  We can't count on
+			 * the fact that buffered writes lock out direct I/O
+			 * writes - the direct I/O write could have extended
+			 * the size nontransactionally, then finished before
+			 * we started.  xfs_write_file will think that the file
+			 * didn't grow but the update isn't safe unless the
+			 * size change is logged.
+			 *
+			 * Force the log if we've committed a transaction
+			 * against the inode or if someone else has and
+			 * the commit record hasn't gone to disk (e.g.
+			 * the inode is pinned).  This guarantees that
+			 * all changes affecting the inode are permanent
+			 * when we return.
+			 */
+			if (iip && iip->ili_last_lsn) {
+				xfs_log_force(mp, iip->ili_last_lsn,
+						XFS_LOG_FORCE | XFS_LOG_SYNC);
+			} else if (xfs_ipincount(xip) > 0) {
+				xfs_log_force(mp, (xfs_lsn_t)0,
+						XFS_LOG_FORCE | XFS_LOG_SYNC);
+			}
+
+		} else {
+			xfs_trans_t	*tp;
+
+			/*
+			 * O_SYNC or O_DSYNC _with_ a size update are handled
+			 * the same way.
+			 *
+			 * If the write was synchronous then we need to make
+			 * sure that the inode modification time is permanent.
+			 * We'll have updated the timestamp above, so here
+			 * we use a synchronous transaction to log the inode.
+			 * It's not fast, but it's necessary.
+			 *
+			 * If this a dsync write and the size got changed
+			 * non-transactionally, then we need to ensure that
+			 * the size change gets logged in a synchronous
+			 * transaction.
+			 */
+
+			tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
+			if ((error = xfs_trans_reserve(tp, 0,
+						      XFS_SWRITE_LOG_RES(mp),
+						      0, 0, 0))) {
+				/* Transaction reserve failed */
+				xfs_trans_cancel(tp, 0);
+			} else {
+				/* Transaction reserve successful */
+				xfs_ilock(xip, XFS_ILOCK_EXCL);
+				xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL);
+				xfs_trans_ihold(tp, xip);
+				xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE);
+				xfs_trans_set_sync(tp);
+				error = xfs_trans_commit(tp, 0, NULL);
+				xfs_iunlock(xip, XFS_ILOCK_EXCL);
+			}
+			if (error)
+				goto out_unlock_internal;
+		}
+	
+		xfs_rwunlock(bdp, locktype);
+		if (need_isem)
+			up(&inode->i_sem);
+
+		error = sync_page_range(inode, mapping, pos, ret);
+		if (!error)
+			error = ret;
+		return error;
+	}
+
+ out_unlock_internal:
+	xfs_rwunlock(bdp, locktype);
+ out_unlock_isem:
+	if (need_isem)
+		up(&inode->i_sem);
+	return -error;
+}
+
+/*
+ * All xfs metadata buffers except log state machine buffers
+ * get this attached as their b_bdstrat callback function.
+ * This is so that we can catch a buffer
+ * after prematurely unpinning it to forcibly shutdown the filesystem.
+ */
+int
+xfs_bdstrat_cb(struct xfs_buf *bp)
+{
+	xfs_mount_t	*mp;
+
+	mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
+	if (!XFS_FORCED_SHUTDOWN(mp)) {
+		pagebuf_iorequest(bp);
+		return 0;
+	} else {
+		xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
+		/*
+		 * Metadata write that didn't get logged but
+		 * written delayed anyway. These aren't associated
+		 * with a transaction, and can be ignored.
+		 */
+		if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
+		    (XFS_BUF_ISREAD(bp)) == 0)
+			return (xfs_bioerror_relse(bp));
+		else
+			return (xfs_bioerror(bp));
+	}
+}
+
+
+int
+xfs_bmap(bhv_desc_t	*bdp,
+	xfs_off_t	offset,
+	ssize_t		count,
+	int		flags,
+	xfs_iomap_t	*iomapp,
+	int		*niomaps)
+{
+	xfs_inode_t	*ip = XFS_BHVTOI(bdp);
+	xfs_iocore_t	*io = &ip->i_iocore;
+
+	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
+	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
+	       ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
+
+	return xfs_iomap(io, offset, count, flags, iomapp, niomaps);
+}
+
+/*
+ * Wrapper around bdstrat so that we can stop data
+ * from going to disk in case we are shutting down the filesystem.
+ * Typically user data goes thru this path; one of the exceptions
+ * is the superblock.
+ */
+int
+xfsbdstrat(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp)
+{
+	ASSERT(mp);
+	if (!XFS_FORCED_SHUTDOWN(mp)) {
+		/* Grio redirection would go here
+		 * if (XFS_BUF_IS_GRIO(bp)) {
+		 */
+
+		pagebuf_iorequest(bp);
+		return 0;
+	}
+
+	xfs_buftrace("XFSBDSTRAT IOERROR", bp);
+	return (xfs_bioerror_relse(bp));
+}
+
+/*
+ * If the underlying (data/log/rt) device is readonly, there are some
+ * operations that cannot proceed.
+ */
+int
+xfs_dev_is_read_only(
+	xfs_mount_t		*mp,
+	char			*message)
+{
+	if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
+	    xfs_readonly_buftarg(mp->m_logdev_targp) ||
+	    (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
+		cmn_err(CE_NOTE,
+			"XFS: %s required on read-only device.", message);
+		cmn_err(CE_NOTE,
+			"XFS: write access unavailable, cannot proceed.");
+		return EROFS;
+	}
+	return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
new file mode 100644
index 00000000000..d723e35254a
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_LRW_H__
+#define __XFS_LRW_H__
+
+struct vnode;
+struct bhv_desc;
+struct xfs_mount;
+struct xfs_iocore;
+struct xfs_inode;
+struct xfs_bmbt_irec;
+struct xfs_buf;
+struct xfs_iomap;
+
+#if defined(XFS_RW_TRACE)
+/*
+ * Defines for the trace mechanisms in xfs_lrw.c.
+ */
+#define	XFS_RW_KTRACE_SIZE	128
+
+#define	XFS_READ_ENTER		1
+#define	XFS_WRITE_ENTER		2
+#define XFS_IOMAP_READ_ENTER	3
+#define	XFS_IOMAP_WRITE_ENTER	4
+#define	XFS_IOMAP_READ_MAP	5
+#define	XFS_IOMAP_WRITE_MAP	6
+#define	XFS_IOMAP_WRITE_NOSPACE	7
+#define	XFS_ITRUNC_START	8
+#define	XFS_ITRUNC_FINISH1	9
+#define	XFS_ITRUNC_FINISH2	10
+#define	XFS_CTRUNC1		11
+#define	XFS_CTRUNC2		12
+#define	XFS_CTRUNC3		13
+#define	XFS_CTRUNC4		14
+#define	XFS_CTRUNC5		15
+#define	XFS_CTRUNC6		16
+#define	XFS_BUNMAPI		17
+#define	XFS_INVAL_CACHED	18
+#define	XFS_DIORD_ENTER		19
+#define	XFS_DIOWR_ENTER		20
+#define	XFS_SENDFILE_ENTER	21
+#define	XFS_WRITEPAGE_ENTER	22
+#define	XFS_RELEASEPAGE_ENTER	23
+#define	XFS_IOMAP_ALLOC_ENTER	24
+#define	XFS_IOMAP_ALLOC_MAP	25
+#define	XFS_IOMAP_UNWRITTEN	26
+extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
+				void *, size_t, loff_t, int);
+extern void xfs_inval_cached_trace(struct xfs_iocore *,
+				xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
+#else
+#define xfs_rw_enter_trace(tag, io, data, size, offset, ioflags)
+#define xfs_inval_cached_trace(io, offset, len, first, last)
+#endif
+
+/*
+ * Maximum count of bmaps used by read and write paths.
+ */
+#define	XFS_MAX_RW_NBMAPS	4
+
+extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int,
+			struct xfs_iomap *, int *);
+extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+extern int xfs_bdstrat_cb(struct xfs_buf *);
+
+extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
+				xfs_fsize_t, xfs_fsize_t);
+extern void xfs_inval_cached_pages(struct vnode	*, struct xfs_iocore *,
+				xfs_off_t, int, int);
+extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
+				const struct iovec *, unsigned int,
+				loff_t *, int, struct cred *);
+extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
+				const struct iovec *, unsigned int,
+				loff_t *, int, struct cred *);
+extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
+				loff_t *, int, size_t, read_actor_t,
+				void *, struct cred *);
+
+extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
+
+#define XFS_FSB_TO_DB_IO(io,fsb) \
+		(((io)->io_flags & XFS_IOCORE_RT) ? \
+		 XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \
+		 XFS_FSB_TO_DADDR((io)->io_mount, (fsb)))
+
+#endif	/* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
new file mode 100644
index 00000000000..aaf5ddba47f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include <linux/proc_fs.h>
+
+DEFINE_PER_CPU(struct xfsstats, xfsstats);
+
+STATIC int
+xfs_read_xfsstats(
+	char		*buffer,
+	char		**start,
+	off_t		offset,
+	int		count,
+	int		*eof,
+	void		*data)
+{
+	int		c, i, j, len, val;
+	__uint64_t	xs_xstrat_bytes = 0;
+	__uint64_t	xs_write_bytes = 0;
+	__uint64_t	xs_read_bytes = 0;
+
+	static struct xstats_entry {
+		char	*desc;
+		int	endpoint;
+	} xstats[] = {
+		{ "extent_alloc",	XFSSTAT_END_EXTENT_ALLOC	},
+		{ "abt",		XFSSTAT_END_ALLOC_BTREE		},
+		{ "blk_map",		XFSSTAT_END_BLOCK_MAPPING	},
+		{ "bmbt",		XFSSTAT_END_BLOCK_MAP_BTREE	},
+		{ "dir",		XFSSTAT_END_DIRECTORY_OPS	},
+		{ "trans",		XFSSTAT_END_TRANSACTIONS	},
+		{ "ig",			XFSSTAT_END_INODE_OPS		},
+		{ "log",		XFSSTAT_END_LOG_OPS		},
+		{ "push_ail",		XFSSTAT_END_TAIL_PUSHING	},
+		{ "xstrat",		XFSSTAT_END_WRITE_CONVERT	},
+		{ "rw",			XFSSTAT_END_READ_WRITE_OPS	},
+		{ "attr",		XFSSTAT_END_ATTRIBUTE_OPS	},
+		{ "icluster",		XFSSTAT_END_INODE_CLUSTER	},
+		{ "vnodes",		XFSSTAT_END_VNODE_OPS		},
+		{ "buf",		XFSSTAT_END_BUF			},
+	};
+
+	/* Loop over all stats groups */
+	for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) {
+		len += sprintf(buffer + len, xstats[i].desc);
+		/* inner loop does each group */
+		while (j < xstats[i].endpoint) {
+			val = 0;
+			/* sum over all cpus */
+			for (c = 0; c < NR_CPUS; c++) {
+				if (!cpu_possible(c)) continue;
+				val += *(((__u32*)&per_cpu(xfsstats, c) + j));
+			}
+			len += sprintf(buffer + len, " %u", val);
+			j++;
+		}
+		buffer[len++] = '\n';
+	}
+	/* extra precision counters */
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_possible(i)) continue;
+		xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
+		xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
+		xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+	}
+
+	len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n",
+			xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+	len += sprintf(buffer + len, "debug %u\n",
+#if defined(DEBUG)
+		1);
+#else
+		0);
+#endif
+
+	if (offset >= len) {
+		*start = buffer;
+		*eof = 1;
+		return 0;
+	}
+	*start = buffer + offset;
+	if ((len -= offset) > count)
+		return count;
+	*eof = 1;
+
+	return len;
+}
+
+void
+xfs_init_procfs(void)
+{
+	if (!proc_mkdir("fs/xfs", NULL))
+		return;
+	create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL);
+}
+
+void
+xfs_cleanup_procfs(void)
+{
+	remove_proc_entry("fs/xfs/stat", NULL);
+	remove_proc_entry("fs/xfs", NULL);
+}
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
new file mode 100644
index 00000000000..3f756a6c3eb
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_STATS_H__
+#define __XFS_STATS_H__
+
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+#include <linux/percpu.h>
+
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC	4
+	__uint32_t		xs_allocx;
+	__uint32_t		xs_allocb;
+	__uint32_t		xs_freex;
+	__uint32_t		xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE	(XFSSTAT_END_EXTENT_ALLOC+4)
+	__uint32_t		xs_abt_lookup;
+	__uint32_t		xs_abt_compare;
+	__uint32_t		xs_abt_insrec;
+	__uint32_t		xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING	(XFSSTAT_END_ALLOC_BTREE+7)
+	__uint32_t		xs_blk_mapr;
+	__uint32_t		xs_blk_mapw;
+	__uint32_t		xs_blk_unmap;
+	__uint32_t		xs_add_exlist;
+	__uint32_t		xs_del_exlist;
+	__uint32_t		xs_look_exlist;
+	__uint32_t		xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE	(XFSSTAT_END_BLOCK_MAPPING+4)
+	__uint32_t		xs_bmbt_lookup;
+	__uint32_t		xs_bmbt_compare;
+	__uint32_t		xs_bmbt_insrec;
+	__uint32_t		xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS	(XFSSTAT_END_BLOCK_MAP_BTREE+4)
+	__uint32_t		xs_dir_lookup;
+	__uint32_t		xs_dir_create;
+	__uint32_t		xs_dir_remove;
+	__uint32_t		xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS	(XFSSTAT_END_DIRECTORY_OPS+3)
+	__uint32_t		xs_trans_sync;
+	__uint32_t		xs_trans_async;
+	__uint32_t		xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS		(XFSSTAT_END_TRANSACTIONS+7)
+	__uint32_t		xs_ig_attempts;
+	__uint32_t		xs_ig_found;
+	__uint32_t		xs_ig_frecycle;
+	__uint32_t		xs_ig_missed;
+	__uint32_t		xs_ig_dup;
+	__uint32_t		xs_ig_reclaims;
+	__uint32_t		xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS		(XFSSTAT_END_INODE_OPS+5)
+	__uint32_t		xs_log_writes;
+	__uint32_t		xs_log_blocks;
+	__uint32_t		xs_log_noiclogs;
+	__uint32_t		xs_log_force;
+	__uint32_t		xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING	(XFSSTAT_END_LOG_OPS+10)
+	__uint32_t		xs_try_logspace;
+	__uint32_t		xs_sleep_logspace;
+	__uint32_t		xs_push_ail;
+	__uint32_t		xs_push_ail_success;
+	__uint32_t		xs_push_ail_pushbuf;
+	__uint32_t		xs_push_ail_pinned;
+	__uint32_t		xs_push_ail_locked;
+	__uint32_t		xs_push_ail_flushing;
+	__uint32_t		xs_push_ail_restarts;
+	__uint32_t		xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT	(XFSSTAT_END_TAIL_PUSHING+2)
+	__uint32_t		xs_xstrat_quick;
+	__uint32_t		xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS	(XFSSTAT_END_WRITE_CONVERT+2)
+	__uint32_t		xs_write_calls;
+	__uint32_t		xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS	(XFSSTAT_END_READ_WRITE_OPS+4)
+	__uint32_t		xs_attr_get;
+	__uint32_t		xs_attr_set;
+	__uint32_t		xs_attr_remove;
+	__uint32_t		xs_attr_list;
+# define XFSSTAT_END_INODE_CLUSTER	(XFSSTAT_END_ATTRIBUTE_OPS+3)
+	__uint32_t		xs_iflush_count;
+	__uint32_t		xs_icluster_flushcnt;
+	__uint32_t		xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS		(XFSSTAT_END_INODE_CLUSTER+8)
+	__uint32_t		vn_active;	/* # vnodes not on free lists */
+	__uint32_t		vn_alloc;	/* # times vn_alloc called */
+	__uint32_t		vn_get;		/* # times vn_get called */
+	__uint32_t		vn_hold;	/* # times vn_hold called */
+	__uint32_t		vn_rele;	/* # times vn_rele called */
+	__uint32_t		vn_reclaim;	/* # times vn_reclaim called */
+	__uint32_t		vn_remove;	/* # times vn_remove called */
+	__uint32_t		vn_free;	/* # times vn_free called */
+#define XFSSTAT_END_BUF			(XFSSTAT_END_VNODE_OPS+9)
+	__uint32_t		pb_get;
+	__uint32_t		pb_create;
+	__uint32_t		pb_get_locked;
+	__uint32_t		pb_get_locked_waited;
+	__uint32_t		pb_busy_locked;
+	__uint32_t		pb_miss_locked;
+	__uint32_t		pb_page_retries;
+	__uint32_t		pb_page_found;
+	__uint32_t		pb_get_read;
+/* Extra precision counters */
+	__uint64_t		xs_xstrat_bytes;
+	__uint64_t		xs_write_bytes;
+	__uint64_t		xs_read_bytes;
+};
+
+DECLARE_PER_CPU(struct xfsstats, xfsstats);
+
+/*
+ * We don't disable preempt, not too worried about poking the
+ * wrong CPU's stat for now (also aggregated before reporting).
+ */
+#define XFS_STATS_INC(v)	(per_cpu(xfsstats, current_cpu()).v++)
+#define XFS_STATS_DEC(v)	(per_cpu(xfsstats, current_cpu()).v--)
+#define XFS_STATS_ADD(v, inc)	(per_cpu(xfsstats, current_cpu()).v += (inc))
+
+extern void xfs_init_procfs(void);
+extern void xfs_cleanup_procfs(void);
+
+
+#else	/* !CONFIG_PROC_FS */
+
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+
+static __inline void xfs_init_procfs(void) { };
+static __inline void xfs_cleanup_procfs(void) { };
+
+#endif	/* !CONFIG_PROC_FS */
+
+#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
new file mode 100644
index 00000000000..53dc658cafa
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -0,0 +1,912 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_version.h"
+#include "xfs_ioctl32.h"
+
+#include <linux/namei.h>
+#include <linux/init.h>
+#include <linux/mount.h>
+#include <linux/writeback.h>
+
+STATIC struct quotactl_ops linvfs_qops;
+STATIC struct super_operations linvfs_sops;
+STATIC kmem_zone_t *linvfs_inode_zone;
+
+STATIC struct xfs_mount_args *
+xfs_args_allocate(
+	struct super_block	*sb)
+{
+	struct xfs_mount_args	*args;
+
+	args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
+	args->logbufs = args->logbufsize = -1;
+	strncpy(args->fsname, sb->s_id, MAXNAMELEN);
+
+	/* Copy the already-parsed mount(2) flags we're interested in */
+	if (sb->s_flags & MS_NOATIME)
+		args->flags |= XFSMNT_NOATIME;
+	if (sb->s_flags & MS_DIRSYNC)
+		args->flags |= XFSMNT_DIRSYNC;
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		args->flags |= XFSMNT_WSYNC;
+
+	/* Default to 32 bit inodes on Linux all the time */
+	args->flags |= XFSMNT_32BITINODES;
+
+	return args;
+}
+
+__uint64_t
+xfs_max_file_offset(
+	unsigned int		blockshift)
+{
+	unsigned int		pagefactor = 1;
+	unsigned int		bitshift = BITS_PER_LONG - 1;
+
+	/* Figure out maximum filesize, on Linux this can depend on
+	 * the filesystem blocksize (on 32 bit platforms).
+	 * __block_prepare_write does this in an [unsigned] long...
+	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
+	 * So, for page sized blocks (4K on 32 bit platforms),
+	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
+	 *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+	 * but for smaller blocksizes it is less (bbits = log2 bsize).
+	 * Note1: get_block_t takes a long (implicit cast from above)
+	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
+	 * can optionally convert the [unsigned] long from above into
+	 * an [unsigned] long long.
+	 */
+
+#if BITS_PER_LONG == 32
+# if defined(CONFIG_LBD)
+	ASSERT(sizeof(sector_t) == 8);
+	pagefactor = PAGE_CACHE_SIZE;
+	bitshift = BITS_PER_LONG;
+# else
+	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+# endif
+#endif
+
+	return (((__uint64_t)pagefactor) << bitshift) - 1;
+}
+
+STATIC __inline__ void
+xfs_set_inodeops(
+	struct inode		*inode)
+{
+	vnode_t			*vp = LINVFS_GET_VP(inode);
+
+	if (vp->v_type == VNON) {
+		vn_mark_bad(vp);
+	} else if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &linvfs_file_inode_operations;
+		inode->i_fop = &linvfs_file_operations;
+		inode->i_mapping->a_ops = &linvfs_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &linvfs_dir_inode_operations;
+		inode->i_fop = &linvfs_dir_operations;
+	} else if (S_ISLNK(inode->i_mode)) {
+		inode->i_op = &linvfs_symlink_inode_operations;
+		if (inode->i_blocks)
+			inode->i_mapping->a_ops = &linvfs_aops;
+	} else {
+		inode->i_op = &linvfs_file_inode_operations;
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
+	}
+}
+
+STATIC __inline__ void
+xfs_revalidate_inode(
+	xfs_mount_t		*mp,
+	vnode_t			*vp,
+	xfs_inode_t		*ip)
+{
+	struct inode		*inode = LINVFS_GET_IP(vp);
+
+	inode->i_mode	= (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
+	inode->i_nlink	= ip->i_d.di_nlink;
+	inode->i_uid	= ip->i_d.di_uid;
+	inode->i_gid	= ip->i_d.di_gid;
+	if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+		inode->i_rdev = 0;
+	} else {
+		xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
+		inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
+	}
+	inode->i_blksize = PAGE_CACHE_SIZE;
+	inode->i_generation = ip->i_d.di_gen;
+	i_size_write(inode, ip->i_d.di_size);
+	inode->i_blocks =
+		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec;
+	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec;
+	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec;
+	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec;
+	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec;
+	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;
+	if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+		inode->i_flags |= S_IMMUTABLE;
+	else
+		inode->i_flags &= ~S_IMMUTABLE;
+	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+		inode->i_flags |= S_APPEND;
+	else
+		inode->i_flags &= ~S_APPEND;
+	if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+		inode->i_flags |= S_SYNC;
+	else
+		inode->i_flags &= ~S_SYNC;
+	if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+		inode->i_flags |= S_NOATIME;
+	else
+		inode->i_flags &= ~S_NOATIME;
+	vp->v_flag &= ~VMODIFIED;
+}
+
+void
+xfs_initialize_vnode(
+	bhv_desc_t		*bdp,
+	vnode_t			*vp,
+	bhv_desc_t		*inode_bhv,
+	int			unlock)
+{
+	xfs_inode_t		*ip = XFS_BHVTOI(inode_bhv);
+	struct inode		*inode = LINVFS_GET_IP(vp);
+
+	if (!inode_bhv->bd_vobj) {
+		vp->v_vfsp = bhvtovfs(bdp);
+		bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops);
+		bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
+	}
+
+	/*
+	 * We need to set the ops vectors, and unlock the inode, but if
+	 * we have been called during the new inode create process, it is
+	 * too early to fill in the Linux inode.  We will get called a
+	 * second time once the inode is properly set up, and then we can
+	 * finish our work.
+	 */
+	if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
+		vp->v_type = IFTOVT(ip->i_d.di_mode);
+		xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
+		xfs_set_inodeops(inode);
+	
+		ip->i_flags &= ~XFS_INEW;
+		barrier();
+
+		unlock_new_inode(inode);
+	}
+}
+
+int
+xfs_blkdev_get(
+	xfs_mount_t		*mp,
+	const char		*name,
+	struct block_device	**bdevp)
+{
+	int			error = 0;
+
+	*bdevp = open_bdev_excl(name, 0, mp);
+	if (IS_ERR(*bdevp)) {
+		error = PTR_ERR(*bdevp);
+		printk("XFS: Invalid device [%s], error=%d\n", name, error);
+	}
+
+	return -error;
+}
+
+void
+xfs_blkdev_put(
+	struct block_device	*bdev)
+{
+	if (bdev)
+		close_bdev_excl(bdev);
+}
+
+
+STATIC struct inode *
+linvfs_alloc_inode(
+	struct super_block	*sb)
+{
+	vnode_t			*vp;
+
+	vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, 
+                kmem_flags_convert(KM_SLEEP));
+	if (!vp)
+		return NULL;
+	return LINVFS_GET_IP(vp);
+}
+
+STATIC void
+linvfs_destroy_inode(
+	struct inode		*inode)
+{
+	kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode));
+}
+
+STATIC void
+init_once(
+	void			*data,
+	kmem_cache_t		*cachep,
+	unsigned long		flags)
+{
+	vnode_t			*vp = (vnode_t *)data;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(LINVFS_GET_IP(vp));
+}
+
+STATIC int
+init_inodecache( void )
+{
+	linvfs_inode_zone = kmem_cache_create("linvfs_icache",
+				sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
+				init_once, NULL);
+	if (linvfs_inode_zone == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+STATIC void
+destroy_inodecache( void )
+{
+	if (kmem_cache_destroy(linvfs_inode_zone))
+		printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
+}
+
+/*
+ * Attempt to flush the inode, this will actually fail
+ * if the inode is pinned, but we dirty the inode again
+ * at the point when it is unpinned after a log write,
+ * since this is when the inode itself becomes flushable. 
+ */
+STATIC int
+linvfs_write_inode(
+	struct inode		*inode,
+	int			sync)
+{
+	vnode_t			*vp = LINVFS_GET_VP(inode);
+	int			error = 0, flags = FLUSH_INODE;
+
+	if (vp) {
+		vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+		if (sync)
+			flags |= FLUSH_SYNC;
+		VOP_IFLUSH(vp, flags, error);
+		if (error == EAGAIN) {
+			if (sync)
+				VOP_IFLUSH(vp, flags | FLUSH_LOG, error);
+			else
+				error = 0;
+		}
+	}
+
+	return -error;
+}
+
+STATIC void
+linvfs_clear_inode(
+	struct inode		*inode)
+{
+	vnode_t			*vp = LINVFS_GET_VP(inode);
+
+	if (vp) {
+		vn_rele(vp);
+		vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+		/*
+		 * Do all our cleanup, and remove this vnode.
+		 */
+		vn_remove(vp);
+	}
+}
+
+
+/*
+ * Enqueue a work item to be picked up by the vfs xfssyncd thread.
+ * Doing this has two advantages:
+ * - It saves on stack space, which is tight in certain situations
+ * - It can be used (with care) as a mechanism to avoid deadlocks.
+ * Flushing while allocating in a full filesystem requires both.
+ */
+STATIC void
+xfs_syncd_queue_work(
+	struct vfs	*vfs,
+	void		*data,
+	void		(*syncer)(vfs_t *, void *))
+{
+	vfs_sync_work_t	*work;
+
+	work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP);
+	INIT_LIST_HEAD(&work->w_list);
+	work->w_syncer = syncer;
+	work->w_data = data;
+	work->w_vfs = vfs;
+	spin_lock(&vfs->vfs_sync_lock);
+	list_add_tail(&work->w_list, &vfs->vfs_sync_list);
+	spin_unlock(&vfs->vfs_sync_lock);
+	wake_up_process(vfs->vfs_sync_task);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room...
+ */
+STATIC void
+xfs_flush_inode_work(
+	vfs_t		*vfs,
+	void		*inode)
+{
+	filemap_flush(((struct inode *)inode)->i_mapping);
+	iput((struct inode *)inode);
+}
+
+void
+xfs_flush_inode(
+	xfs_inode_t	*ip)
+{
+	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct vfs	*vfs = XFS_MTOVFS(ip->i_mount);
+
+	igrab(inode);
+	xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
+	delay(HZ/2);
+}
+
+/*
+ * This is the "bigger hammer" version of xfs_flush_inode_work...
+ * (IOW, "If at first you don't succeed, use a Bigger Hammer").
+ */
+STATIC void
+xfs_flush_device_work(
+	vfs_t		*vfs,
+	void		*inode)
+{
+	sync_blockdev(vfs->vfs_super->s_bdev);
+	iput((struct inode *)inode);
+}
+
+void
+xfs_flush_device(
+	xfs_inode_t	*ip)
+{
+	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
+	struct vfs	*vfs = XFS_MTOVFS(ip->i_mount);
+
+	igrab(inode);
+	xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
+	delay(HZ/2);
+	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+}
+
+#define SYNCD_FLAGS	(SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
+STATIC void
+vfs_sync_worker(
+	vfs_t		*vfsp,
+	void		*unused)
+{
+	int		error;
+
+	if (!(vfsp->vfs_flag & VFS_RDONLY))
+		VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
+	vfsp->vfs_sync_seq++;
+	wmb();
+	wake_up(&vfsp->vfs_wait_single_sync_task);
+}
+
+STATIC int
+xfssyncd(
+	void			*arg)
+{
+	long			timeleft;
+	vfs_t			*vfsp = (vfs_t *) arg;
+	struct list_head	tmp;
+	struct vfs_sync_work	*work, *n;
+
+	daemonize("xfssyncd");
+
+	vfsp->vfs_sync_work.w_vfs = vfsp;
+	vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
+	vfsp->vfs_sync_task = current;
+	wmb();
+	wake_up(&vfsp->vfs_wait_sync_task);
+
+	INIT_LIST_HEAD(&tmp);
+	timeleft = (xfs_syncd_centisecs * HZ) / 100;
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		timeleft = schedule_timeout(timeleft);
+		/* swsusp */
+		try_to_freeze(PF_FREEZE);
+		if (vfsp->vfs_flag & VFS_UMOUNT)
+			break;
+
+		spin_lock(&vfsp->vfs_sync_lock);
+		/*
+		 * We can get woken by laptop mode, to do a sync -
+		 * that's the (only!) case where the list would be
+		 * empty with time remaining.
+		 */
+		if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
+			if (!timeleft)
+				timeleft = (xfs_syncd_centisecs * HZ) / 100;
+			INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
+			list_add_tail(&vfsp->vfs_sync_work.w_list,
+					&vfsp->vfs_sync_list);
+		}
+		list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list)
+			list_move(&work->w_list, &tmp);
+		spin_unlock(&vfsp->vfs_sync_lock);
+
+		list_for_each_entry_safe(work, n, &tmp, w_list) {
+			(*work->w_syncer)(vfsp, work->w_data);
+			list_del(&work->w_list);
+			if (work == &vfsp->vfs_sync_work)
+				continue;
+			kmem_free(work, sizeof(struct vfs_sync_work));
+		}
+	}
+
+	vfsp->vfs_sync_task = NULL;
+	wmb();
+	wake_up(&vfsp->vfs_wait_sync_task);
+
+	return 0;
+}
+
+STATIC int
+linvfs_start_syncd(
+	vfs_t			*vfsp)
+{
+	int			pid;
+
+	pid = kernel_thread(xfssyncd, (void *) vfsp,
+			CLONE_VM | CLONE_FS | CLONE_FILES);
+	if (pid < 0)
+		return -pid;
+	wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
+	return 0;
+}
+
+STATIC void
+linvfs_stop_syncd(
+	vfs_t			*vfsp)
+{
+	vfsp->vfs_flag |= VFS_UMOUNT;
+	wmb();
+
+	wake_up_process(vfsp->vfs_sync_task);
+	wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
+}
+
+STATIC void
+linvfs_put_super(
+	struct super_block	*sb)
+{
+	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
+
+	linvfs_stop_syncd(vfsp);
+	VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
+	if (!error)
+		VFS_UNMOUNT(vfsp, 0, NULL, error);
+	if (error) {
+		printk("XFS unmount got error %d\n", error);
+		printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp);
+		return;
+	}
+
+	vfs_deallocate(vfsp);
+}
+
+STATIC void
+linvfs_write_super(
+	struct super_block	*sb)
+{
+	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
+
+	if (sb->s_flags & MS_RDONLY) {
+		sb->s_dirt = 0; /* paranoia */
+		return;
+	}
+	/* Push the log and superblock a little */
+	VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error);
+	sb->s_dirt = 0;
+}
+
+STATIC int
+linvfs_sync_super(
+	struct super_block	*sb,
+	int			wait)
+{
+	vfs_t		*vfsp = LINVFS_GET_VFS(sb);
+	int		error;
+	int		flags = SYNC_FSDATA;
+
+	if (wait)
+		flags |= SYNC_WAIT;
+
+	VFS_SYNC(vfsp, flags, NULL, error);
+	sb->s_dirt = 0;
+
+	if (unlikely(laptop_mode)) {
+		int	prev_sync_seq = vfsp->vfs_sync_seq;
+
+		/*
+		 * The disk must be active because we're syncing.
+		 * We schedule xfssyncd now (now that the disk is
+		 * active) instead of later (when it might not be).
+		 */
+		wake_up_process(vfsp->vfs_sync_task);
+		/*
+		 * We have to wait for the sync iteration to complete.
+		 * If we don't, the disk activity caused by the sync
+		 * will come after the sync is completed, and that
+		 * triggers another sync from laptop mode.
+		 */
+		wait_event(vfsp->vfs_wait_single_sync_task,
+				vfsp->vfs_sync_seq != prev_sync_seq);
+	}
+
+	return -error;
+}
+
+STATIC int
+linvfs_statfs(
+	struct super_block	*sb,
+	struct kstatfs		*statp)
+{
+	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
+
+	VFS_STATVFS(vfsp, statp, NULL, error);
+	return -error;
+}
+
+STATIC int
+linvfs_remount(
+	struct super_block	*sb,
+	int			*flags,
+	char			*options)
+{
+	vfs_t			*vfsp = LINVFS_GET_VFS(sb);
+	struct xfs_mount_args	*args = xfs_args_allocate(sb);
+	int			error;
+
+	VFS_PARSEARGS(vfsp, options, args, 1, error);
+	if (!error)
+		VFS_MNTUPDATE(vfsp, flags, args, error);
+	kmem_free(args, sizeof(*args));
+	return -error;
+}
+
+STATIC void
+linvfs_freeze_fs(
+	struct super_block	*sb)
+{
+	VFS_FREEZE(LINVFS_GET_VFS(sb));
+}
+
+STATIC int
+linvfs_show_options(
+	struct seq_file		*m,
+	struct vfsmount		*mnt)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
+	int			error;
+
+	VFS_SHOWARGS(vfsp, m, error);
+	return error;
+}
+
+STATIC int
+linvfs_getxstate(
+	struct super_block	*sb,
+	struct fs_quota_stat	*fqs)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
+
+	VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
+	return -error;
+}
+
+STATIC int
+linvfs_setxstate(
+	struct super_block	*sb,
+	unsigned int		flags,
+	int			op)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	int			error;
+
+	VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
+	return -error;
+}
+
+STATIC int
+linvfs_getxquota(
+	struct super_block	*sb,
+	int			type,
+	qid_t			id,
+	struct fs_disk_quota	*fdq)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	int			error, getmode;
+
+	getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA;
+	VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
+	return -error;
+}
+
+STATIC int
+linvfs_setxquota(
+	struct super_block	*sb,
+	int			type,
+	qid_t			id,
+	struct fs_disk_quota	*fdq)
+{
+	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
+	int			error, setmode;
+
+	setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM;
+	VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
+	return -error;
+}
+
+STATIC int
+linvfs_fill_super(
+	struct super_block	*sb,
+	void			*data,
+	int			silent)
+{
+	vnode_t			*rootvp;
+	struct vfs		*vfsp = vfs_allocate();
+	struct xfs_mount_args	*args = xfs_args_allocate(sb);
+	struct kstatfs		statvfs;
+	int			error, error2;
+
+	vfsp->vfs_super = sb;
+	LINVFS_SET_VFS(sb, vfsp);
+	if (sb->s_flags & MS_RDONLY)
+		vfsp->vfs_flag |= VFS_RDONLY;
+	bhv_insert_all_vfsops(vfsp);
+
+	VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
+	if (error) {
+		bhv_remove_all_vfsops(vfsp, 1);
+		goto fail_vfsop;
+	}
+
+	sb_min_blocksize(sb, BBSIZE);
+#ifdef CONFIG_XFS_EXPORT
+	sb->s_export_op = &linvfs_export_ops;
+#endif
+	sb->s_qcop = &linvfs_qops;
+	sb->s_op = &linvfs_sops;
+
+	VFS_MOUNT(vfsp, args, NULL, error);
+	if (error) {
+		bhv_remove_all_vfsops(vfsp, 1);
+		goto fail_vfsop;
+	}
+
+	VFS_STATVFS(vfsp, &statvfs, NULL, error);
+	if (error)
+		goto fail_unmount;
+
+	sb->s_dirt = 1;
+	sb->s_magic = statvfs.f_type;
+	sb->s_blocksize = statvfs.f_bsize;
+	sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
+	sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+	sb->s_time_gran = 1;
+	set_posix_acl_flag(sb);
+
+	VFS_ROOT(vfsp, &rootvp, error);
+	if (error)
+		goto fail_unmount;
+
+	sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
+	if (!sb->s_root) {
+		error = ENOMEM;
+		goto fail_vnrele;
+	}
+	if (is_bad_inode(sb->s_root->d_inode)) {
+		error = EINVAL;
+		goto fail_vnrele;
+	}
+	if ((error = linvfs_start_syncd(vfsp)))
+		goto fail_vnrele;
+	vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
+
+	kmem_free(args, sizeof(*args));
+	return 0;
+
+fail_vnrele:
+	if (sb->s_root) {
+		dput(sb->s_root);
+		sb->s_root = NULL;
+	} else {
+		VN_RELE(rootvp);
+	}
+
+fail_unmount:
+	VFS_UNMOUNT(vfsp, 0, NULL, error2);
+
+fail_vfsop:
+	vfs_deallocate(vfsp);
+	kmem_free(args, sizeof(*args));
+	return -error;
+}
+
+STATIC struct super_block *
+linvfs_get_sb(
+	struct file_system_type	*fs_type,
+	int			flags,
+	const char		*dev_name,
+	void			*data)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
+}
+
+STATIC struct super_operations linvfs_sops = {
+	.alloc_inode		= linvfs_alloc_inode,
+	.destroy_inode		= linvfs_destroy_inode,
+	.write_inode		= linvfs_write_inode,
+	.clear_inode		= linvfs_clear_inode,
+	.put_super		= linvfs_put_super,
+	.write_super		= linvfs_write_super,
+	.sync_fs		= linvfs_sync_super,
+	.write_super_lockfs	= linvfs_freeze_fs,
+	.statfs			= linvfs_statfs,
+	.remount_fs		= linvfs_remount,
+	.show_options		= linvfs_show_options,
+};
+
+STATIC struct quotactl_ops linvfs_qops = {
+	.get_xstate		= linvfs_getxstate,
+	.set_xstate		= linvfs_setxstate,
+	.get_xquota		= linvfs_getxquota,
+	.set_xquota		= linvfs_setxquota,
+};
+
+STATIC struct file_system_type xfs_fs_type = {
+	.owner			= THIS_MODULE,
+	.name			= "xfs",
+	.get_sb			= linvfs_get_sb,
+	.kill_sb		= kill_block_super,
+	.fs_flags		= FS_REQUIRES_DEV,
+};
+
+
+STATIC int __init
+init_xfs_fs( void )
+{
+	int			error;
+	struct sysinfo		si;
+	static char		message[] __initdata = KERN_INFO \
+		XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
+
+	printk(message);
+
+	si_meminfo(&si);
+	xfs_physmem = si.totalram;
+
+	ktrace_init(64);
+
+	error = init_inodecache();
+	if (error < 0)
+		goto undo_inodecache;
+
+	error = pagebuf_init();
+	if (error < 0)
+		goto undo_pagebuf;
+
+	vn_init();
+	xfs_init();
+	uuid_init();
+	vfs_initquota();
+
+	error = register_filesystem(&xfs_fs_type);
+	if (error)
+		goto undo_register;
+	XFS_DM_INIT(&xfs_fs_type);
+	return 0;
+
+undo_register:
+	pagebuf_terminate();
+
+undo_pagebuf:
+	destroy_inodecache();
+
+undo_inodecache:
+	return error;
+}
+
+STATIC void __exit
+exit_xfs_fs( void )
+{
+	vfs_exitquota();
+	XFS_DM_EXIT(&xfs_fs_type);
+	unregister_filesystem(&xfs_fs_type);
+	xfs_cleanup();
+	pagebuf_terminate();
+	destroy_inodecache();
+	ktrace_uninit();
+}
+
+module_init(init_xfs_fs);
+module_exit(exit_xfs_fs);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
+MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
new file mode 100644
index 00000000000..ec7e0035c73
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPER_H__
+#define __XFS_SUPER_H__
+
+#ifdef CONFIG_XFS_DMAPI
+# define vfs_insertdmapi(vfs)	vfs_insertops(vfsp, &xfs_dmops)
+# define vfs_initdmapi()	dmapi_init()
+# define vfs_exitdmapi()	dmapi_uninit()
+#else
+# define vfs_insertdmapi(vfs)	do { } while (0)
+# define vfs_initdmapi()	do { } while (0)
+# define vfs_exitdmapi()	do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_QUOTA
+# define vfs_insertquota(vfs)	vfs_insertops(vfsp, &xfs_qmops)
+extern void xfs_qm_init(void);
+extern void xfs_qm_exit(void);
+# define vfs_initquota()	xfs_qm_init()
+# define vfs_exitquota()	xfs_qm_exit()
+#else
+# define vfs_insertquota(vfs)	do { } while (0)
+# define vfs_initquota()	do { } while (0)
+# define vfs_exitquota()	do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_POSIX_ACL
+# define XFS_ACL_STRING		"ACLs, "
+# define set_posix_acl_flag(sb)	((sb)->s_flags |= MS_POSIXACL)
+#else
+# define XFS_ACL_STRING
+# define set_posix_acl_flag(sb)	do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_SECURITY
+# define XFS_SECURITY_STRING	"security attributes, "
+# define ENOSECURITY		0
+#else
+# define XFS_SECURITY_STRING
+# define ENOSECURITY		EOPNOTSUPP
+#endif
+
+#ifdef CONFIG_XFS_RT
+# define XFS_REALTIME_STRING	"realtime, "
+#else
+# define XFS_REALTIME_STRING
+#endif
+
+#if XFS_BIG_BLKNOS
+# if XFS_BIG_INUMS
+#  define XFS_BIGFS_STRING	"large block/inode numbers, "
+# else
+#  define XFS_BIGFS_STRING	"large block numbers, "
+# endif
+#else
+# define XFS_BIGFS_STRING
+#endif
+
+#ifdef CONFIG_XFS_TRACE
+# define XFS_TRACE_STRING	"tracing, "
+#else
+# define XFS_TRACE_STRING
+#endif
+
+#ifdef CONFIG_XFS_DMAPI
+# define XFS_DMAPI_STRING	"dmapi support, "
+#else
+# define XFS_DMAPI_STRING
+#endif
+
+#ifdef DEBUG
+# define XFS_DBG_STRING		"debug"
+#else
+# define XFS_DBG_STRING		"no debug"
+#endif
+
+#define XFS_BUILD_OPTIONS	XFS_ACL_STRING \
+				XFS_SECURITY_STRING \
+				XFS_REALTIME_STRING \
+				XFS_BIGFS_STRING \
+				XFS_TRACE_STRING \
+				XFS_DMAPI_STRING \
+				XFS_DBG_STRING /* DBG must be last */
+
+#define LINVFS_GET_VFS(s) \
+	(vfs_t *)((s)->s_fs_info)
+#define LINVFS_SET_VFS(s, vfsp) \
+	((s)->s_fs_info = vfsp)
+
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_buftarg;
+struct block_device;
+
+extern __uint64_t xfs_max_file_offset(unsigned int);
+
+extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int);
+
+extern void xfs_flush_inode(struct xfs_inode *);
+extern void xfs_flush_device(struct xfs_inode *);
+
+extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
+				struct block_device **);
+extern void xfs_blkdev_put(struct block_device *);
+
+extern struct export_operations linvfs_export_ops;
+
+#endif	/* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
new file mode 100644
index 00000000000..0dc010356f4
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_rw.h"
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+
+
+static struct ctl_table_header *xfs_table_header;
+
+
+#ifdef CONFIG_PROC_FS
+STATIC int
+xfs_stats_clear_proc_handler(
+	ctl_table	*ctl,
+	int		write,
+	struct file	*filp,
+	void		__user *buffer,
+	size_t		*lenp,
+	loff_t		*ppos)
+{
+	int		c, ret, *valp = ctl->data;
+	__uint32_t	vn_active;
+
+	ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
+
+	if (!ret && write && *valp) {
+		printk("XFS Clearing xfsstats\n");
+		for (c = 0; c < NR_CPUS; c++) {
+			if (!cpu_possible(c)) continue;
+			preempt_disable();
+			/* save vn_active, it's a universal truth! */
+			vn_active = per_cpu(xfsstats, c).vn_active;
+			memset(&per_cpu(xfsstats, c), 0,
+			       sizeof(struct xfsstats));
+			per_cpu(xfsstats, c).vn_active = vn_active;
+			preempt_enable();
+		}
+		xfs_stats_clear = 0;
+	}
+
+	return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
+STATIC ctl_table xfs_table[] = {
+	{XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL, 
+	&xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
+
+	{XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL,
+	&xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max},
+
+	{XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL, 
+	&xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
+
+	{XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL, 
+	&xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
+
+	{XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL, 
+	&xfs_params.error_level.min, &xfs_params.error_level.max},
+
+	{XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL, 
+	&xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max},
+
+	{XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL,
+	&xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max},
+
+	{XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL,
+	&xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max},
+
+	{XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL,
+	&xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
+	
+	{XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL,
+	&xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max},
+
+	{XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL,
+	&xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max},
+
+	{XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL,
+	&xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max},
+
+	{XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL, 
+	&xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
+
+	/* please keep this the last entry */
+#ifdef CONFIG_PROC_FS
+	{XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
+	sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
+	&sysctl_intvec, NULL, 
+	&xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
+#endif /* CONFIG_PROC_FS */
+
+	{0}
+};
+
+STATIC ctl_table xfs_dir_table[] = {
+	{FS_XFS, "xfs", NULL, 0, 0555, xfs_table},
+	{0}
+};
+
+STATIC ctl_table xfs_root_table[] = {
+	{CTL_FS, "fs",  NULL, 0, 0555, xfs_dir_table},
+	{0}
+};
+
+void
+xfs_sysctl_register(void)
+{
+	xfs_table_header = register_sysctl_table(xfs_root_table, 1);
+}
+
+void
+xfs_sysctl_unregister(void)
+{
+	if (xfs_table_header)
+		unregister_sysctl_table(xfs_table_header);
+}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
new file mode 100644
index 00000000000..a39a95020a5
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef __XFS_SYSCTL_H__
+#define __XFS_SYSCTL_H__
+
+#include <linux/sysctl.h>
+
+/*
+ * Tunable xfs parameters
+ */
+
+typedef struct xfs_sysctl_val {
+	int min;
+	int val;
+	int max;
+} xfs_sysctl_val_t;
+
+typedef struct xfs_param {
+	xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
+	xfs_sysctl_val_t sgid_inherit;	/* Inherit S_ISGID if process' GID is
+					 * not a member of parent dir GID. */
+	xfs_sysctl_val_t symlink_mode;	/* Link creat mode affected by umask */
+	xfs_sysctl_val_t panic_mask;	/* bitmask to cause panic on errors. */
+	xfs_sysctl_val_t error_level;	/* Degree of reporting for problems  */
+	xfs_sysctl_val_t syncd_timer;	/* Interval between xfssyncd wakeups */
+	xfs_sysctl_val_t stats_clear;	/* Reset all XFS statistics to zero. */
+	xfs_sysctl_val_t inherit_sync;	/* Inherit the "sync" inode flag. */
+	xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
+	xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
+	xfs_sysctl_val_t xfs_buf_timer;	/* Interval between xfsbufd wakeups. */
+	xfs_sysctl_val_t xfs_buf_age;	/* Metadata buffer age before flush. */
+	xfs_sysctl_val_t inherit_nosym;	/* Inherit the "nosymlinks" flag. */
+	xfs_sysctl_val_t rotorstep;	/* inode32 AG rotoring control knob */
+} xfs_param_t;
+
+/*
+ * xfs_error_level:
+ *
+ * How much error reporting will be done when internal problems are
+ * encountered.  These problems normally return an EFSCORRUPTED to their
+ * caller, with no other information reported.
+ *
+ * 0	No error reports
+ * 1	Report EFSCORRUPTED errors that will cause a filesystem shutdown
+ * 5	Report all EFSCORRUPTED errors (all of the above errors, plus any
+ *	additional errors that are known to not cause shutdowns)
+ *
+ * xfs_panic_mask bit 0x8 turns the error reports into panics
+ */
+
+enum {
+	/* XFS_REFCACHE_SIZE = 1 */
+	/* XFS_REFCACHE_PURGE = 2 */
+	XFS_RESTRICT_CHOWN = 3,
+	XFS_SGID_INHERIT = 4,
+	XFS_SYMLINK_MODE = 5,
+	XFS_PANIC_MASK = 6,
+	XFS_ERRLEVEL = 7,
+	XFS_SYNCD_TIMER = 8,
+	/* XFS_PROBE_DMAPI = 9 */
+	/* XFS_PROBE_IOOPS = 10 */
+	/* XFS_PROBE_QUOTA = 11 */
+	XFS_STATS_CLEAR = 12,
+	XFS_INHERIT_SYNC = 13,
+	XFS_INHERIT_NODUMP = 14,
+	XFS_INHERIT_NOATIME = 15,
+	XFS_BUF_TIMER = 16,
+	XFS_BUF_AGE = 17,
+	/* XFS_IO_BYPASS = 18 */
+	XFS_INHERIT_NOSYM = 19,
+	XFS_ROTORSTEP = 20,
+};
+
+extern xfs_param_t	xfs_params;
+
+#ifdef CONFIG_SYSCTL
+extern void xfs_sysctl_register(void);
+extern void xfs_sysctl_unregister(void);
+#else
+# define xfs_sysctl_register()		do { } while (0)
+# define xfs_sysctl_unregister()	do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
+#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
new file mode 100644
index 00000000000..96f96394417
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_version.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Dummy file that can contain a timestamp to put into the
+ * XFS init string, to help users keep track of what they're
+ * running
+ */
+
+#ifndef __XFS_VERSION_H__
+#define __XFS_VERSION_H__
+
+#define XFS_VERSION_STRING "SGI XFS"
+
+#endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
new file mode 100644
index 00000000000..669c6164495
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_macros.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_imap.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+
+int
+vfs_mount(
+	struct bhv_desc		*bdp,
+	struct xfs_mount_args	*args,
+	struct cred		*cr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_mount)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_mount)(next, args, cr));
+}
+
+int
+vfs_parseargs(
+	struct bhv_desc		*bdp,
+	char			*s,
+	struct xfs_mount_args	*args,
+	int			f)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_parseargs)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_parseargs)(next, s, args, f));
+}
+
+int
+vfs_showargs(
+	struct bhv_desc		*bdp,
+	struct seq_file		*m)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_showargs)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_showargs)(next, m));
+}
+
+int
+vfs_unmount(
+	struct bhv_desc		*bdp,
+	int			fl,
+	struct cred		*cr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_unmount)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_unmount)(next, fl, cr));
+}
+
+int
+vfs_mntupdate(
+	struct bhv_desc		*bdp,
+	int			*fl,
+	struct xfs_mount_args	*args)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_mntupdate)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_mntupdate)(next, fl, args));
+}
+
+int
+vfs_root(
+	struct bhv_desc		*bdp,
+	struct vnode		**vpp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_root)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_root)(next, vpp));
+}
+
+int
+vfs_statvfs(
+	struct bhv_desc		*bdp,
+	xfs_statfs_t		*sp,
+	struct vnode		*vp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_statvfs)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp));
+}
+
+int
+vfs_sync(
+	struct bhv_desc		*bdp,
+	int			fl,
+	struct cred		*cr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_sync)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_sync)(next, fl, cr));
+}
+
+int
+vfs_vget(
+	struct bhv_desc		*bdp,
+	struct vnode		**vpp,
+	struct fid		*fidp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_vget)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_vget)(next, vpp, fidp));
+}
+
+int
+vfs_dmapiops(
+	struct bhv_desc		*bdp,
+	caddr_t			addr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_dmapiops)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_dmapiops)(next, addr));
+}
+
+int
+vfs_quotactl(
+	struct bhv_desc		*bdp,
+	int			cmd,
+	int			id,
+	caddr_t			addr)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_quotactl)
+		next = BHV_NEXT(next);
+	return ((*bhvtovfsops(next)->vfs_quotactl)(next, cmd, id, addr));
+}
+
+void
+vfs_init_vnode(
+	struct bhv_desc		*bdp,
+	struct vnode		*vp,
+	struct bhv_desc		*bp,
+	int			unlock)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_init_vnode)
+		next = BHV_NEXT(next);
+	((*bhvtovfsops(next)->vfs_init_vnode)(next, vp, bp, unlock));
+}
+
+void
+vfs_force_shutdown(
+	struct bhv_desc		*bdp,
+	int			fl,
+	char			*file,
+	int			line)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_force_shutdown)
+		next = BHV_NEXT(next);
+	((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
+}
+
+void
+vfs_freeze(
+	struct bhv_desc		*bdp)
+{
+	struct bhv_desc		*next = bdp;
+
+	ASSERT(next);
+	while (! (bhvtovfsops(next))->vfs_freeze)
+		next = BHV_NEXT(next);
+	((*bhvtovfsops(next)->vfs_freeze)(next));
+}
+
+vfs_t *
+vfs_allocate( void )
+{
+	struct vfs		*vfsp;
+
+	vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP);
+	bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
+	INIT_LIST_HEAD(&vfsp->vfs_sync_list);
+	spin_lock_init(&vfsp->vfs_sync_lock);
+	init_waitqueue_head(&vfsp->vfs_wait_sync_task);
+	init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
+	return vfsp;
+}
+
+void
+vfs_deallocate(
+	struct vfs		*vfsp)
+{
+	bhv_head_destroy(VFS_BHVHEAD(vfsp));
+	kmem_free(vfsp, sizeof(vfs_t));
+}
+
+void
+vfs_insertops(
+	struct vfs		*vfsp,
+	struct bhv_vfsops	*vfsops)
+{
+	struct bhv_desc		*bdp;
+
+	bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP);
+	bhv_desc_init(bdp, NULL, vfsp, vfsops);
+	bhv_insert(&vfsp->vfs_bh, bdp);
+}
+
+void
+vfs_insertbhv(
+	struct vfs		*vfsp,
+	struct bhv_desc		*bdp,
+	struct vfsops		*vfsops,
+	void			*mount)
+{
+	bhv_desc_init(bdp, mount, vfsp, vfsops);
+	bhv_insert_initial(&vfsp->vfs_bh, bdp);
+}
+
+void
+bhv_remove_vfsops(
+	struct vfs		*vfsp,
+	int			pos)
+{
+	struct bhv_desc		*bhv;
+
+	bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos);
+	if (!bhv)
+		return;
+	bhv_remove(&vfsp->vfs_bh, bhv);
+	kmem_free(bhv, sizeof(*bhv));
+}
+
+void
+bhv_remove_all_vfsops(
+	struct vfs		*vfsp,
+	int			freebase)
+{
+	struct xfs_mount	*mp;
+
+	bhv_remove_vfsops(vfsp, VFS_POSITION_QM);
+	bhv_remove_vfsops(vfsp, VFS_POSITION_DM);
+	if (!freebase)
+		return;
+	mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops));
+	VFS_REMOVEBHV(vfsp, &mp->m_bhv);
+	xfs_mount_free(mp, 0);
+}
+
+void
+bhv_insert_all_vfsops(
+	struct vfs		*vfsp)
+{
+	struct xfs_mount	*mp;
+
+	mp = xfs_mount_init();
+	vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+	vfs_insertdmapi(vfsp);
+	vfs_insertquota(vfsp);
+}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
new file mode 100644
index 00000000000..76493991578
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_VFS_H__
+#define __XFS_VFS_H__
+
+#include <linux/vfs.h>
+#include "xfs_fs.h"
+
+struct fid;
+struct vfs;
+struct cred;
+struct vnode;
+struct kstatfs;
+struct seq_file;
+struct super_block;
+struct xfs_mount_args;
+
+typedef struct kstatfs xfs_statfs_t;
+
+typedef struct vfs_sync_work {
+	struct list_head	w_list;
+	struct vfs		*w_vfs;
+	void			*w_data;	/* syncer routine argument */
+	void			(*w_syncer)(struct vfs *, void *);
+} vfs_sync_work_t;
+
+typedef struct vfs {
+	u_int			vfs_flag;	/* flags */
+	xfs_fsid_t		vfs_fsid;	/* file system ID */
+	xfs_fsid_t		*vfs_altfsid;	/* An ID fixed for life of FS */
+	bhv_head_t		vfs_bh;		/* head of vfs behavior chain */
+	struct super_block	*vfs_super;	/* generic superblock pointer */
+	struct task_struct	*vfs_sync_task;	/* generalised sync thread */
+	vfs_sync_work_t		vfs_sync_work;	/* work item for VFS_SYNC */
+	struct list_head	vfs_sync_list;	/* sync thread work item list */
+	spinlock_t		vfs_sync_lock;	/* work item list lock */
+	int 			vfs_sync_seq;	/* sync thread generation no. */
+	wait_queue_head_t	vfs_wait_single_sync_task;
+	wait_queue_head_t	vfs_wait_sync_task;
+} vfs_t;
+
+#define vfs_fbhv		vfs_bh.bh_first	/* 1st on vfs behavior chain */
+
+#define bhvtovfs(bdp)		( (struct vfs *)BHV_VOBJ(bdp) )
+#define bhvtovfsops(bdp)	( (struct vfsops *)BHV_OPS(bdp) )
+#define VFS_BHVHEAD(vfs)	( &(vfs)->vfs_bh )
+#define VFS_REMOVEBHV(vfs, bdp)	( bhv_remove(VFS_BHVHEAD(vfs), bdp) )
+
+#define VFS_POSITION_BASE	BHV_POSITION_BASE	/* chain bottom */
+#define VFS_POSITION_TOP	BHV_POSITION_TOP	/* chain top */
+#define VFS_POSITION_INVALID	BHV_POSITION_INVALID	/* invalid pos. num */
+
+typedef enum {
+	VFS_BHV_UNKNOWN,	/* not specified */
+	VFS_BHV_XFS,		/* xfs */
+	VFS_BHV_DM,		/* data migration */
+	VFS_BHV_QM,		/* quota manager */
+	VFS_BHV_IO,		/* IO path */
+	VFS_BHV_END		/* housekeeping end-of-range */
+} vfs_bhv_t;
+
+#define VFS_POSITION_XFS	(BHV_POSITION_BASE)
+#define VFS_POSITION_DM		(VFS_POSITION_BASE+10)
+#define VFS_POSITION_QM		(VFS_POSITION_BASE+20)
+#define VFS_POSITION_IO		(VFS_POSITION_BASE+30)
+
+#define VFS_RDONLY		0x0001	/* read-only vfs */
+#define VFS_GRPID		0x0002	/* group-ID assigned from directory */
+#define VFS_DMI			0x0004	/* filesystem has the DMI enabled */
+#define VFS_UMOUNT		0x0008	/* unmount in progress */
+#define VFS_END			0x0008	/* max flag */
+
+#define SYNC_ATTR		0x0001	/* sync attributes */
+#define SYNC_CLOSE		0x0002	/* close file system down */
+#define SYNC_DELWRI		0x0004	/* look at delayed writes */
+#define SYNC_WAIT		0x0008	/* wait for i/o to complete */
+#define SYNC_BDFLUSH		0x0010	/* BDFLUSH is calling -- don't block */
+#define SYNC_FSDATA		0x0020	/* flush fs data (e.g. superblocks) */
+#define SYNC_REFCACHE		0x0040  /* prune some of the nfs ref cache */
+#define SYNC_REMOUNT		0x0080  /* remount readonly, no dummy LRs */
+
+typedef int	(*vfs_mount_t)(bhv_desc_t *,
+				struct xfs_mount_args *, struct cred *);
+typedef int	(*vfs_parseargs_t)(bhv_desc_t *, char *,
+				struct xfs_mount_args *, int);
+typedef	int	(*vfs_showargs_t)(bhv_desc_t *, struct seq_file *);
+typedef int	(*vfs_unmount_t)(bhv_desc_t *, int, struct cred *);
+typedef int	(*vfs_mntupdate_t)(bhv_desc_t *, int *,
+				struct xfs_mount_args *);
+typedef int	(*vfs_root_t)(bhv_desc_t *, struct vnode **);
+typedef int	(*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+typedef int	(*vfs_sync_t)(bhv_desc_t *, int, struct cred *);
+typedef int	(*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *);
+typedef int	(*vfs_dmapiops_t)(bhv_desc_t *, caddr_t);
+typedef int	(*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t);
+typedef void	(*vfs_init_vnode_t)(bhv_desc_t *,
+				struct vnode *, bhv_desc_t *, int);
+typedef void	(*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
+typedef void	(*vfs_freeze_t)(bhv_desc_t *);
+
+typedef struct vfsops {
+	bhv_position_t		vf_position;	/* behavior chain position */
+	vfs_mount_t		vfs_mount;	/* mount file system */
+	vfs_parseargs_t		vfs_parseargs;	/* parse mount options */
+	vfs_showargs_t		vfs_showargs;	/* unparse mount options */
+	vfs_unmount_t		vfs_unmount;	/* unmount file system */
+	vfs_mntupdate_t		vfs_mntupdate;	/* update file system options */
+	vfs_root_t		vfs_root;	/* get root vnode */
+	vfs_statvfs_t		vfs_statvfs;	/* file system statistics */
+	vfs_sync_t		vfs_sync;	/* flush files */
+	vfs_vget_t		vfs_vget;	/* get vnode from fid */
+	vfs_dmapiops_t		vfs_dmapiops;	/* data migration */
+	vfs_quotactl_t		vfs_quotactl;	/* disk quota */
+	vfs_init_vnode_t	vfs_init_vnode;	/* initialize a new vnode */
+	vfs_force_shutdown_t	vfs_force_shutdown;	/* crash and burn */
+	vfs_freeze_t		vfs_freeze;	/* freeze fs for snapshot */
+} vfsops_t;
+
+/*
+ * VFS's.  Operates on vfs structure pointers (starts at bhv head).
+ */
+#define VHEAD(v)			((v)->vfs_fbhv)
+#define VFS_MOUNT(v, ma,cr, rv)		((rv) = vfs_mount(VHEAD(v), ma,cr))
+#define VFS_PARSEARGS(v, o,ma,f, rv)	((rv) = vfs_parseargs(VHEAD(v), o,ma,f))
+#define VFS_SHOWARGS(v, m, rv)		((rv) = vfs_showargs(VHEAD(v), m))
+#define VFS_UNMOUNT(v, f, cr, rv)	((rv) = vfs_unmount(VHEAD(v), f,cr))
+#define VFS_MNTUPDATE(v, fl, args, rv)	((rv) = vfs_mntupdate(VHEAD(v), fl, args))
+#define VFS_ROOT(v, vpp, rv)		((rv) = vfs_root(VHEAD(v), vpp))
+#define VFS_STATVFS(v, sp,vp, rv)	((rv) = vfs_statvfs(VHEAD(v), sp,vp))
+#define VFS_SYNC(v, flag,cr, rv)	((rv) = vfs_sync(VHEAD(v), flag,cr))
+#define VFS_VGET(v, vpp,fidp, rv)	((rv) = vfs_vget(VHEAD(v), vpp,fidp))
+#define VFS_DMAPIOPS(v, p, rv)		((rv) = vfs_dmapiops(VHEAD(v), p))
+#define VFS_QUOTACTL(v, c,id,p, rv)	((rv) = vfs_quotactl(VHEAD(v), c,id,p))
+#define VFS_INIT_VNODE(v, vp,b,ul)	( vfs_init_vnode(VHEAD(v), vp,b,ul) )
+#define VFS_FORCE_SHUTDOWN(v, fl,f,l)	( vfs_force_shutdown(VHEAD(v), fl,f,l) )
+#define VFS_FREEZE(v)			( vfs_freeze(VHEAD(v)) )
+
+/*
+ * PVFS's.  Operates on behavior descriptor pointers.
+ */
+#define PVFS_MOUNT(b, ma,cr, rv)	((rv) = vfs_mount(b, ma,cr))
+#define PVFS_PARSEARGS(b, o,ma,f, rv)	((rv) = vfs_parseargs(b, o,ma,f))
+#define PVFS_SHOWARGS(b, m, rv)		((rv) = vfs_showargs(b, m))
+#define PVFS_UNMOUNT(b, f,cr, rv)	((rv) = vfs_unmount(b, f,cr))
+#define PVFS_MNTUPDATE(b, fl, args, rv)	((rv) = vfs_mntupdate(b, fl, args))
+#define PVFS_ROOT(b, vpp, rv)		((rv) = vfs_root(b, vpp))
+#define PVFS_STATVFS(b, sp,vp, rv)	((rv) = vfs_statvfs(b, sp,vp))
+#define PVFS_SYNC(b, flag,cr, rv)	((rv) = vfs_sync(b, flag,cr))
+#define PVFS_VGET(b, vpp,fidp, rv)	((rv) = vfs_vget(b, vpp,fidp))
+#define PVFS_DMAPIOPS(b, p, rv)		((rv) = vfs_dmapiops(b, p))
+#define PVFS_QUOTACTL(b, c,id,p, rv)	((rv) = vfs_quotactl(b, c,id,p))
+#define PVFS_INIT_VNODE(b, vp,b2,ul)	( vfs_init_vnode(b, vp,b2,ul) )
+#define PVFS_FORCE_SHUTDOWN(b, fl,f,l)	( vfs_force_shutdown(b, fl,f,l) )
+#define PVFS_FREEZE(b)			( vfs_freeze(b) )
+
+extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
+extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
+extern int vfs_showargs(bhv_desc_t *, struct seq_file *);
+extern int vfs_unmount(bhv_desc_t *, int, struct cred *);
+extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *);
+extern int vfs_root(bhv_desc_t *, struct vnode **);
+extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+extern int vfs_sync(bhv_desc_t *, int, struct cred *);
+extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *);
+extern int vfs_dmapiops(bhv_desc_t *, caddr_t);
+extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
+extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
+extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
+extern void vfs_freeze(bhv_desc_t *);
+
+typedef struct bhv_vfsops {
+	struct vfsops		bhv_common;
+	void *			bhv_custom;
+} bhv_vfsops_t;
+
+#define vfs_bhv_lookup(v, id)	( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) )
+#define vfs_bhv_custom(b)	( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom )
+#define vfs_bhv_set_custom(b,o)	( (b)->bhv_custom = (void *)(o))
+#define vfs_bhv_clr_custom(b)	( (b)->bhv_custom = NULL )
+
+extern vfs_t *vfs_allocate(void);
+extern void vfs_deallocate(vfs_t *);
+extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
+extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
+
+extern void bhv_insert_all_vfsops(struct vfs *);
+extern void bhv_remove_all_vfsops(struct vfs *, int);
+extern void bhv_remove_vfsops(struct vfs *, int);
+
+#define fs_frozen(vfsp)		((vfsp)->vfs_super->s_frozen)
+#define fs_check_frozen(vfsp, level) \
+	vfs_check_frozen(vfsp->vfs_super, level);
+
+#endif	/* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
new file mode 100644
index 00000000000..849c61c74f3
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+
+uint64_t vn_generation;		/* vnode generation number */
+DEFINE_SPINLOCK(vnumber_lock);
+
+/*
+ * Dedicated vnode inactive/reclaim sync semaphores.
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC                  37
+#define vptosync(v)             (&vsync[((unsigned long)v) % NVSYNC])
+sv_t vsync[NVSYNC];
+
+/*
+ * Translate stat(2) file types to vnode types and vice versa.
+ * Aware of numeric order of S_IFMT and vnode type values.
+ */
+enum vtype iftovt_tab[] = {
+	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
+};
+
+u_short vttoif_tab[] = {
+	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
+};
+
+
+void
+vn_init(void)
+{
+	register sv_t *svp;
+	register int i;
+
+	for (svp = vsync, i = 0; i < NVSYNC; i++, svp++)
+		init_sv(svp, SV_DEFAULT, "vsy", i);
+}
+
+/*
+ * Clean a vnode of filesystem-specific data and prepare it for reuse.
+ */
+STATIC int
+vn_reclaim(
+	struct vnode	*vp)
+{
+	int		error;
+
+	XFS_STATS_INC(vn_reclaim);
+	vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
+
+	/*
+	 * Only make the VOP_RECLAIM call if there are behaviors
+	 * to call.
+	 */
+	if (vp->v_fbhv) {
+		VOP_RECLAIM(vp, error);
+		if (error)
+			return -error;
+	}
+	ASSERT(vp->v_fbhv == NULL);
+
+	VN_LOCK(vp);
+	vp->v_flag &= (VRECLM|VWAIT);
+	VN_UNLOCK(vp, 0);
+
+	vp->v_type = VNON;
+	vp->v_fbhv = NULL;
+
+#ifdef XFS_VNODE_TRACE
+	ktrace_free(vp->v_trace);
+	vp->v_trace = NULL;
+#endif
+
+	return 0;
+}
+
+STATIC void
+vn_wakeup(
+	struct vnode	*vp)
+{
+	VN_LOCK(vp);
+	if (vp->v_flag & VWAIT)
+		sv_broadcast(vptosync(vp));
+	vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
+	VN_UNLOCK(vp, 0);
+}
+
+int
+vn_wait(
+	struct vnode	*vp)
+{
+	VN_LOCK(vp);
+	if (vp->v_flag & (VINACT | VRECLM)) {
+		vp->v_flag |= VWAIT;
+		sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
+		return 1;
+	}
+	VN_UNLOCK(vp, 0);
+	return 0;
+}
+
+struct vnode *
+vn_initialize(
+	struct inode	*inode)
+{
+	struct vnode	*vp = LINVFS_GET_VP(inode);
+
+	XFS_STATS_INC(vn_active);
+	XFS_STATS_INC(vn_alloc);
+
+	vp->v_flag = VMODIFIED;
+	spinlock_init(&vp->v_lock, "v_lock");
+
+	spin_lock(&vnumber_lock);
+	if (!++vn_generation)	/* v_number shouldn't be zero */
+		vn_generation++;
+	vp->v_number = vn_generation;
+	spin_unlock(&vnumber_lock);
+
+	ASSERT(VN_CACHED(vp) == 0);
+
+	/* Initialize the first behavior and the behavior chain head. */
+	vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
+
+#ifdef	XFS_VNODE_TRACE
+	vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
+	printk("Allocated VNODE_TRACE at 0x%p\n", vp->v_trace);
+#endif	/* XFS_VNODE_TRACE */
+
+	vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address);
+	return vp;
+}
+
+/*
+ * Get a reference on a vnode.
+ */
+vnode_t *
+vn_get(
+	struct vnode	*vp,
+	vmap_t		*vmap)
+{
+	struct inode	*inode;
+
+	XFS_STATS_INC(vn_get);
+	inode = LINVFS_GET_IP(vp);
+	if (inode->i_state & I_FREEING)
+		return NULL;
+
+	inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
+	if (!inode)	/* Inode not present */
+		return NULL;
+
+	vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
+
+	return vp;
+}
+
+/*
+ * Revalidate the Linux inode from the vattr.
+ * Note: i_size _not_ updated; we must hold the inode
+ * semaphore when doing that - callers responsibility.
+ */
+void
+vn_revalidate_core(
+	struct vnode	*vp,
+	vattr_t		*vap)
+{
+	struct inode	*inode = LINVFS_GET_IP(vp);
+
+	inode->i_mode	    = VTTOIF(vap->va_type) | vap->va_mode;
+	inode->i_nlink	    = vap->va_nlink;
+	inode->i_uid	    = vap->va_uid;
+	inode->i_gid	    = vap->va_gid;
+	inode->i_blocks	    = vap->va_nblocks;
+	inode->i_mtime	    = vap->va_mtime;
+	inode->i_ctime	    = vap->va_ctime;
+	inode->i_atime	    = vap->va_atime;
+	if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
+		inode->i_flags |= S_IMMUTABLE;
+	else
+		inode->i_flags &= ~S_IMMUTABLE;
+	if (vap->va_xflags & XFS_XFLAG_APPEND)
+		inode->i_flags |= S_APPEND;
+	else
+		inode->i_flags &= ~S_APPEND;
+	if (vap->va_xflags & XFS_XFLAG_SYNC)
+		inode->i_flags |= S_SYNC;
+	else
+		inode->i_flags &= ~S_SYNC;
+	if (vap->va_xflags & XFS_XFLAG_NOATIME)
+		inode->i_flags |= S_NOATIME;
+	else
+		inode->i_flags &= ~S_NOATIME;
+}
+
+/*
+ * Revalidate the Linux inode from the vnode.
+ */
+int
+vn_revalidate(
+	struct vnode	*vp)
+{
+	vattr_t		va;
+	int		error;
+
+	vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address);
+	ASSERT(vp->v_fbhv != NULL);
+
+	va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS;
+	VOP_GETATTR(vp, &va, 0, NULL, error);
+	if (!error) {
+		vn_revalidate_core(vp, &va);
+		VUNMODIFY(vp);
+	}
+	return -error;
+}
+
+/*
+ * purge a vnode from the cache
+ * At this point the vnode is guaranteed to have no references (vn_count == 0)
+ * The caller has to make sure that there are no ways someone could
+ * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
+ */
+void
+vn_purge(
+	struct vnode	*vp,
+	vmap_t		*vmap)
+{
+	vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
+
+again:
+	/*
+	 * Check whether vp has already been reclaimed since our caller
+	 * sampled its version while holding a filesystem cache lock that
+	 * its VOP_RECLAIM function acquires.
+	 */
+	VN_LOCK(vp);
+	if (vp->v_number != vmap->v_number) {
+		VN_UNLOCK(vp, 0);
+		return;
+	}
+
+	/*
+	 * If vp is being reclaimed or inactivated, wait until it is inert,
+	 * then proceed.  Can't assume that vnode is actually reclaimed
+	 * just because the reclaimed flag is asserted -- a vn_alloc
+	 * reclaim can fail.
+	 */
+	if (vp->v_flag & (VINACT | VRECLM)) {
+		ASSERT(vn_count(vp) == 0);
+		vp->v_flag |= VWAIT;
+		sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
+		goto again;
+	}
+
+	/*
+	 * Another process could have raced in and gotten this vnode...
+	 */
+	if (vn_count(vp) > 0) {
+		VN_UNLOCK(vp, 0);
+		return;
+	}
+
+	XFS_STATS_DEC(vn_active);
+	vp->v_flag |= VRECLM;
+	VN_UNLOCK(vp, 0);
+
+	/*
+	 * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
+	 * vp's filesystem to flush and invalidate all cached resources.
+	 * When vn_reclaim returns, vp should have no private data,
+	 * either in a system cache or attached to v_data.
+	 */
+	if (vn_reclaim(vp) != 0)
+		panic("vn_purge: cannot reclaim");
+
+	/*
+	 * Wakeup anyone waiting for vp to be reclaimed.
+	 */
+	vn_wakeup(vp);
+}
+
+/*
+ * Add a reference to a referenced vnode.
+ */
+struct vnode *
+vn_hold(
+	struct vnode	*vp)
+{
+	struct inode	*inode;
+
+	XFS_STATS_INC(vn_hold);
+
+	VN_LOCK(vp);
+	inode = igrab(LINVFS_GET_IP(vp));
+	ASSERT(inode);
+	VN_UNLOCK(vp, 0);
+
+	return vp;
+}
+
+/*
+ *  Call VOP_INACTIVE on last reference.
+ */
+void
+vn_rele(
+	struct vnode	*vp)
+{
+	int		vcnt;
+	int		cache;
+
+	XFS_STATS_INC(vn_rele);
+
+	VN_LOCK(vp);
+
+	vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
+	vcnt = vn_count(vp);
+
+	/*
+	 * Since we always get called from put_inode we know
+	 * that i_count won't be decremented after we
+	 * return.
+	 */
+	if (!vcnt) {
+		/*
+		 * As soon as we turn this on, noone can find us in vn_get
+		 * until we turn off VINACT or VRECLM
+		 */
+		vp->v_flag |= VINACT;
+		VN_UNLOCK(vp, 0);
+
+		/*
+		 * Do not make the VOP_INACTIVE call if there
+		 * are no behaviors attached to the vnode to call.
+		 */
+		if (vp->v_fbhv)
+			VOP_INACTIVE(vp, NULL, cache);
+
+		VN_LOCK(vp);
+		if (vp->v_flag & VWAIT)
+			sv_broadcast(vptosync(vp));
+
+		vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
+	}
+
+	VN_UNLOCK(vp, 0);
+
+	vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
+}
+
+/*
+ * Finish the removal of a vnode.
+ */
+void
+vn_remove(
+	struct vnode	*vp)
+{
+	vmap_t		vmap;
+
+	/* Make sure we don't do this to the same vnode twice */
+	if (!(vp->v_fbhv))
+		return;
+
+	XFS_STATS_INC(vn_remove);
+	vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
+
+	/*
+	 * After the following purge the vnode
+	 * will no longer exist.
+	 */
+	VMAP(vp, vmap);
+	vn_purge(vp, &vmap);
+}
+
+
+#ifdef	XFS_VNODE_TRACE
+
+#define KTRACE_ENTER(vp, vk, s, line, ra)			\
+	ktrace_enter(	(vp)->v_trace,				\
+/*  0 */		(void *)(__psint_t)(vk),		\
+/*  1 */		(void *)(s),				\
+/*  2 */		(void *)(__psint_t) line,		\
+/*  3 */		(void *)(vn_count(vp)), \
+/*  4 */		(void *)(ra),				\
+/*  5 */		(void *)(__psunsigned_t)(vp)->v_flag,	\
+/*  6 */		(void *)(__psint_t)current_cpu(),	\
+/*  7 */		(void *)(__psint_t)current_pid(),	\
+/*  8 */		(void *)__return_address,		\
+/*  9 */		0, 0, 0, 0, 0, 0, 0)
+
+/*
+ * Vnode tracing code.
+ */
+void
+vn_trace_entry(vnode_t *vp, char *func, inst_t *ra)
+{
+	KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra);
+}
+
+void
+vn_trace_exit(vnode_t *vp, char *func, inst_t *ra)
+{
+	KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra);
+}
+
+void
+vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+	KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra);
+}
+
+void
+vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+	KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra);
+}
+
+void
+vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+	KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra);
+}
+#endif	/* XFS_VNODE_TRACE */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
new file mode 100644
index 00000000000..da76c1f1e11
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -0,0 +1,666 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *
+ * Portions Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef __XFS_VNODE_H__
+#define __XFS_VNODE_H__
+
+struct uio;
+struct file;
+struct vattr;
+struct xfs_iomap;
+struct attrlist_cursor_kern;
+
+/*
+ * Vnode types.  VNON means no type.
+ */
+enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
+
+typedef xfs_ino_t vnumber_t;
+typedef struct dentry vname_t;
+typedef bhv_head_t vn_bhv_head_t;
+
+/*
+ * MP locking protocols:
+ *	v_flag, v_vfsp				VN_LOCK/VN_UNLOCK
+ *	v_type					read-only or fs-dependent
+ */
+typedef struct vnode {
+	__u32		v_flag;			/* vnode flags (see below) */
+	enum vtype	v_type;			/* vnode type */
+	struct vfs	*v_vfsp;		/* ptr to containing VFS */
+	vnumber_t	v_number;		/* in-core vnode number */
+	vn_bhv_head_t	v_bh;			/* behavior head */
+	spinlock_t	v_lock;			/* VN_LOCK/VN_UNLOCK */
+	struct inode	v_inode;		/* Linux inode */
+#ifdef XFS_VNODE_TRACE
+	struct ktrace	*v_trace;		/* trace header structure    */
+#endif
+} vnode_t;
+
+#define v_fbhv			v_bh.bh_first	       /* first behavior */
+#define v_fops			v_bh.bh_first->bd_ops  /* first behavior ops */
+
+#define VNODE_POSITION_BASE	BHV_POSITION_BASE	/* chain bottom */
+#define VNODE_POSITION_TOP	BHV_POSITION_TOP	/* chain top */
+#define VNODE_POSITION_INVALID	BHV_POSITION_INVALID	/* invalid pos. num */
+
+typedef enum {
+	VN_BHV_UNKNOWN,		/* not specified */
+	VN_BHV_XFS,		/* xfs */
+	VN_BHV_DM,		/* data migration */
+	VN_BHV_QM,		/* quota manager */
+	VN_BHV_IO,		/* IO path */
+	VN_BHV_END		/* housekeeping end-of-range */
+} vn_bhv_t;
+
+#define VNODE_POSITION_XFS	(VNODE_POSITION_BASE)
+#define VNODE_POSITION_DM	(VNODE_POSITION_BASE+10)
+#define VNODE_POSITION_QM	(VNODE_POSITION_BASE+20)
+#define VNODE_POSITION_IO	(VNODE_POSITION_BASE+30)
+
+/*
+ * Macros for dealing with the behavior descriptor inside of the vnode.
+ */
+#define BHV_TO_VNODE(bdp)	((vnode_t *)BHV_VOBJ(bdp))
+#define BHV_TO_VNODE_NULL(bdp)	((vnode_t *)BHV_VOBJNULL(bdp))
+
+#define VN_BHV_HEAD(vp)			((bhv_head_t *)(&((vp)->v_bh)))
+#define vn_bhv_head_init(bhp,name)	bhv_head_init(bhp,name)
+#define vn_bhv_remove(bhp,bdp)		bhv_remove(bhp,bdp)
+#define vn_bhv_lookup(bhp,ops)		bhv_lookup(bhp,ops)
+#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops)
+
+/*
+ * Vnode to Linux inode mapping.
+ */
+#define LINVFS_GET_VP(inode)	((vnode_t *)list_entry(inode, vnode_t, v_inode))
+#define LINVFS_GET_IP(vp)	(&(vp)->v_inode)
+
+/*
+ * Convert between vnode types and inode formats (since POSIX.1
+ * defines mode word of stat structure in terms of inode formats).
+ */
+extern enum vtype	iftovt_tab[];
+extern u_short		vttoif_tab[];
+#define IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
+#define VTTOIF(indx)	(vttoif_tab[(int)(indx)])
+#define MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
+
+
+/*
+ * Vnode flags.
+ */
+#define VINACT		       0x1	/* vnode is being inactivated	*/
+#define VRECLM		       0x2	/* vnode is being reclaimed	*/
+#define VWAIT		       0x4	/* waiting for VINACT/VRECLM to end */
+#define VMODIFIED	       0x8	/* XFS inode state possibly differs */
+					/* to the Linux inode state.	*/
+
+/*
+ * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
+ */
+typedef enum vrwlock {
+	VRWLOCK_NONE,
+	VRWLOCK_READ,
+	VRWLOCK_WRITE,
+	VRWLOCK_WRITE_DIRECT,
+	VRWLOCK_TRY_READ,
+	VRWLOCK_TRY_WRITE
+} vrwlock_t;
+
+/*
+ * Return values for VOP_INACTIVE.  A return value of
+ * VN_INACTIVE_NOCACHE implies that the file system behavior
+ * has disassociated its state and bhv_desc_t from the vnode.
+ */
+#define	VN_INACTIVE_CACHE	0
+#define	VN_INACTIVE_NOCACHE	1
+
+/*
+ * Values for the cmd code given to VOP_VNODE_CHANGE.
+ */
+typedef enum vchange {
+	VCHANGE_FLAGS_FRLOCKS		= 0,
+	VCHANGE_FLAGS_ENF_LOCKING	= 1,
+	VCHANGE_FLAGS_TRUNCATED		= 2,
+	VCHANGE_FLAGS_PAGE_DIRTY	= 3,
+	VCHANGE_FLAGS_IOEXCL_COUNT	= 4
+} vchange_t;
+
+
+typedef int	(*vop_open_t)(bhv_desc_t *, struct cred *);
+typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
+				const struct iovec *, unsigned int,
+				loff_t *, int, struct cred *);
+typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
+				const struct iovec *, unsigned int,
+				loff_t *, int, struct cred *);
+typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
+				loff_t *, int, size_t, read_actor_t,
+				void *, struct cred *);
+typedef int	(*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
+				int, unsigned int, void __user *);
+typedef int	(*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
+				struct cred *);
+typedef int	(*vop_setattr_t)(bhv_desc_t *, struct vattr *, int,
+				struct cred *);
+typedef int	(*vop_access_t)(bhv_desc_t *, int, struct cred *);
+typedef int	(*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **,
+				int, vnode_t *, struct cred *);
+typedef int	(*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *,
+				vnode_t **, struct cred *);
+typedef int	(*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *);
+typedef int	(*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *,
+				struct cred *);
+typedef int	(*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *,
+				struct cred *);
+typedef int	(*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *,
+				vnode_t **, struct cred *);
+typedef int	(*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *);
+typedef int	(*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *,
+				int *);
+typedef int	(*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *,
+				char *, vnode_t **, struct cred *);
+typedef int	(*vop_readlink_t)(bhv_desc_t *, struct uio *, int,
+				struct cred *);
+typedef int	(*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
+				xfs_off_t, xfs_off_t);
+typedef int	(*vop_inactive_t)(bhv_desc_t *, struct cred *);
+typedef int	(*vop_fid2_t)(bhv_desc_t *, struct fid *);
+typedef int	(*vop_release_t)(bhv_desc_t *);
+typedef int	(*vop_rwlock_t)(bhv_desc_t *, vrwlock_t);
+typedef void	(*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
+typedef int	(*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int,
+				struct xfs_iomap *, int *);
+typedef int	(*vop_reclaim_t)(bhv_desc_t *);
+typedef int	(*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int,
+				struct cred *);
+typedef	int	(*vop_attr_set_t)(bhv_desc_t *, char *, char *, int, int,
+				struct cred *);
+typedef	int	(*vop_attr_remove_t)(bhv_desc_t *, char *, int, struct cred *);
+typedef	int	(*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
+				struct attrlist_cursor_kern *, struct cred *);
+typedef void	(*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int);
+typedef void	(*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t);
+typedef void	(*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+typedef void	(*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+typedef int	(*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
+				uint64_t, int);
+typedef int	(*vop_iflush_t)(bhv_desc_t *, int);
+
+
+typedef struct vnodeops {
+	bhv_position_t  vn_position;    /* position within behavior chain */
+	vop_open_t		vop_open;
+	vop_read_t		vop_read;
+	vop_write_t		vop_write;
+	vop_sendfile_t		vop_sendfile;
+	vop_ioctl_t		vop_ioctl;
+	vop_getattr_t		vop_getattr;
+	vop_setattr_t		vop_setattr;
+	vop_access_t		vop_access;
+	vop_lookup_t		vop_lookup;
+	vop_create_t		vop_create;
+	vop_remove_t		vop_remove;
+	vop_link_t		vop_link;
+	vop_rename_t		vop_rename;
+	vop_mkdir_t		vop_mkdir;
+	vop_rmdir_t		vop_rmdir;
+	vop_readdir_t		vop_readdir;
+	vop_symlink_t		vop_symlink;
+	vop_readlink_t		vop_readlink;
+	vop_fsync_t		vop_fsync;
+	vop_inactive_t		vop_inactive;
+	vop_fid2_t		vop_fid2;
+	vop_rwlock_t		vop_rwlock;
+	vop_rwunlock_t		vop_rwunlock;
+	vop_bmap_t		vop_bmap;
+	vop_reclaim_t		vop_reclaim;
+	vop_attr_get_t		vop_attr_get;
+	vop_attr_set_t		vop_attr_set;
+	vop_attr_remove_t	vop_attr_remove;
+	vop_attr_list_t		vop_attr_list;
+	vop_link_removed_t	vop_link_removed;
+	vop_vnode_change_t	vop_vnode_change;
+	vop_ptossvp_t		vop_tosspages;
+	vop_pflushinvalvp_t	vop_flushinval_pages;
+	vop_pflushvp_t		vop_flush_pages;
+	vop_release_t		vop_release;
+	vop_iflush_t		vop_iflush;
+} vnodeops_t;
+
+/*
+ * VOP's.
+ */
+#define _VOP_(op, vp)	(*((vnodeops_t *)(vp)->v_fops)->op)
+
+#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv)			\
+	rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
+#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv)		\
+	rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
+#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv)		\
+	rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
+#define VOP_BMAP(vp,of,sz,rw,b,n,rv)					\
+	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
+#define VOP_OPEN(vp, cr, rv)						\
+	rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
+#define VOP_GETATTR(vp, vap, f, cr, rv)					\
+	rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
+#define	VOP_SETATTR(vp, vap, f, cr, rv)					\
+	rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr)
+#define	VOP_ACCESS(vp, mode, cr, rv)					\
+	rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr)
+#define	VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv)				\
+	rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr)
+#define VOP_CREATE(dvp,d,vap,vpp,cr,rv)					\
+	rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr)
+#define VOP_REMOVE(dvp,d,cr,rv)						\
+	rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr)
+#define	VOP_LINK(tdvp,fvp,d,cr,rv)					\
+	rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr)
+#define	VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv)				\
+	rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr)
+#define	VOP_MKDIR(dp,d,vap,vpp,cr,rv)					\
+	rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr)
+#define	VOP_RMDIR(dp,d,cr,rv)	 					\
+	rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr)
+#define	VOP_READDIR(vp,uiop,cr,eofp,rv)					\
+	rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp)
+#define	VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv)				\
+	rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr)
+#define	VOP_READLINK(vp,uiop,fl,cr,rv)					\
+	rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr)
+#define	VOP_FSYNC(vp,f,cr,b,e,rv)					\
+	rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e)
+#define VOP_INACTIVE(vp, cr, rv)					\
+	rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr)
+#define VOP_RELEASE(vp, rv)						\
+	rv = _VOP_(vop_release, vp)((vp)->v_fbhv)
+#define VOP_FID2(vp, fidp, rv)						\
+	rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp)
+#define VOP_RWLOCK(vp,i)						\
+	(void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
+#define VOP_RWLOCK_TRY(vp,i)						\
+	_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
+#define VOP_RWUNLOCK(vp,i)						\
+	(void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i)
+#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv)				\
+	rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr)
+#define VOP_RECLAIM(vp, rv)						\
+	rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv)
+#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv)		\
+	rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred)
+#define	VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv)		\
+	rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred)
+#define	VOP_ATTR_REMOVE(vp, name, flags, cred, rv)			\
+	rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred)
+#define	VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv)		\
+	rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred)
+#define VOP_LINK_REMOVED(vp, dvp, linkzero)				\
+	(void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero)
+#define VOP_VNODE_CHANGE(vp, cmd, val)					\
+	(void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val)
+/*
+ * These are page cache functions that now go thru VOPs.
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_TOSS_PAGES(vp, first, last, fiopt)				\
+	_VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt)
+/*
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt)			\
+	_VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt)
+/*
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv)		\
+	rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt)
+#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv)			\
+	rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg)
+#define VOP_IFLUSH(vp, flags, rv)					\
+	rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags)
+
+/*
+ * Flags for read/write calls - same values as IRIX
+ */
+#define IO_ISAIO	0x00001		/* don't wait for completion */
+#define IO_ISDIRECT	0x00004		/* bypass page cache */
+#define IO_INVIS	0x00020		/* don't update inode timestamps */
+
+/*
+ * Flags for VOP_IFLUSH call
+ */
+#define FLUSH_SYNC		1	/* wait for flush to complete	*/
+#define FLUSH_INODE		2	/* flush the inode itself	*/
+#define FLUSH_LOG		4	/* force the last log entry for
+					 * this inode out to disk	*/
+
+/*
+ * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and
+ *	VOP_FLUSH_PAGES.
+ */
+#define FI_NONE			0	/* none */
+#define FI_REMAPF		1	/* Do a remapf prior to the operation */
+#define FI_REMAPF_LOCKED	2	/* Do a remapf prior to the operation.
+					   Prevent VM access to the pages until
+					   the operation completes. */
+
+/*
+ * Vnode attributes.  va_mask indicates those attributes the caller
+ * wants to set or extract.
+ */
+typedef struct vattr {
+	int		va_mask;	/* bit-mask of attributes present */
+	enum vtype	va_type;	/* vnode type (for create) */
+	mode_t		va_mode;	/* file access mode and type */
+	nlink_t		va_nlink;	/* number of references to file */
+	uid_t		va_uid;		/* owner user id */
+	gid_t		va_gid;		/* owner group id */
+	xfs_ino_t	va_nodeid;	/* file id */
+	xfs_off_t	va_size;	/* file size in bytes */
+	u_long		va_blocksize;	/* blocksize preferred for i/o */
+	struct timespec	va_atime;	/* time of last access */
+	struct timespec	va_mtime;	/* time of last modification */
+	struct timespec	va_ctime;	/* time file changed */
+	u_int		va_gen;		/* generation number of file */
+	xfs_dev_t	va_rdev;	/* device the special file represents */
+	__int64_t	va_nblocks;	/* number of blocks allocated */
+	u_long		va_xflags;	/* random extended file flags */
+	u_long		va_extsize;	/* file extent size */
+	u_long		va_nextents;	/* number of extents in file */
+	u_long		va_anextents;	/* number of attr extents in file */
+	int		va_projid;	/* project id */
+} vattr_t;
+
+/*
+ * setattr or getattr attributes
+ */
+#define XFS_AT_TYPE		0x00000001
+#define XFS_AT_MODE		0x00000002
+#define XFS_AT_UID		0x00000004
+#define XFS_AT_GID		0x00000008
+#define XFS_AT_FSID		0x00000010
+#define XFS_AT_NODEID		0x00000020
+#define XFS_AT_NLINK		0x00000040
+#define XFS_AT_SIZE		0x00000080
+#define XFS_AT_ATIME		0x00000100
+#define XFS_AT_MTIME		0x00000200
+#define XFS_AT_CTIME		0x00000400
+#define XFS_AT_RDEV		0x00000800
+#define XFS_AT_BLKSIZE		0x00001000
+#define XFS_AT_NBLOCKS		0x00002000
+#define XFS_AT_VCODE		0x00004000
+#define XFS_AT_MAC		0x00008000
+#define XFS_AT_UPDATIME		0x00010000
+#define XFS_AT_UPDMTIME		0x00020000
+#define XFS_AT_UPDCTIME		0x00040000
+#define XFS_AT_ACL		0x00080000
+#define XFS_AT_CAP		0x00100000
+#define XFS_AT_INF		0x00200000
+#define XFS_AT_XFLAGS		0x00400000
+#define XFS_AT_EXTSIZE		0x00800000
+#define XFS_AT_NEXTENTS		0x01000000
+#define XFS_AT_ANEXTENTS	0x02000000
+#define XFS_AT_PROJID		0x04000000
+#define XFS_AT_SIZE_NOPERM	0x08000000
+#define XFS_AT_GENCOUNT		0x10000000
+
+#define XFS_AT_ALL	(XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
+		XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
+		XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
+		XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
+		XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
+		XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
+
+#define XFS_AT_STAT	(XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
+		XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
+		XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
+		XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
+
+#define XFS_AT_TIMES	(XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
+
+#define XFS_AT_UPDTIMES	(XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
+
+#define XFS_AT_NOSET	(XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
+		XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
+		XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
+
+/*
+ *  Modes.
+ */
+#define VSUID	S_ISUID		/* set user id on execution */
+#define VSGID	S_ISGID		/* set group id on execution */
+#define VSVTX	S_ISVTX		/* save swapped text even after use */
+#define VREAD	S_IRUSR		/* read, write, execute permissions */
+#define VWRITE	S_IWUSR
+#define VEXEC	S_IXUSR
+
+#define MODEMASK S_IALLUGO	/* mode bits plus permission bits */
+
+/*
+ * Check whether mandatory file locking is enabled.
+ */
+#define MANDLOCK(vp, mode)	\
+	((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
+
+extern void	vn_init(void);
+extern int	vn_wait(struct vnode *);
+extern vnode_t	*vn_initialize(struct inode *);
+
+/*
+ * Acquiring and invalidating vnodes:
+ *
+ *	if (vn_get(vp, version, 0))
+ *		...;
+ *	vn_purge(vp, version);
+ *
+ * vn_get and vn_purge must be called with vmap_t arguments, sampled
+ * while a lock that the vnode's VOP_RECLAIM function acquires is
+ * held, to ensure that the vnode sampled with the lock held isn't
+ * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
+ * and the subsequent vn_get or vn_purge.
+ */
+
+/*
+ * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
+ */
+typedef struct vnode_map {
+	vfs_t		*v_vfsp;
+	vnumber_t	v_number;		/* in-core vnode number */
+	xfs_ino_t	v_ino;			/* inode #	*/
+} vmap_t;
+
+#define VMAP(vp, vmap)	{(vmap).v_vfsp	 = (vp)->v_vfsp,	\
+			 (vmap).v_number = (vp)->v_number,	\
+			 (vmap).v_ino	 = (vp)->v_inode.i_ino; }
+
+extern void	vn_purge(struct vnode *, vmap_t *);
+extern vnode_t	*vn_get(struct vnode *, vmap_t *);
+extern int	vn_revalidate(struct vnode *);
+extern void	vn_revalidate_core(struct vnode *, vattr_t *);
+extern void	vn_remove(struct vnode *);
+
+static inline int vn_count(struct vnode *vp)
+{
+	return atomic_read(&LINVFS_GET_IP(vp)->i_count);
+}
+
+/*
+ * Vnode reference counting functions (and macros for compatibility).
+ */
+extern vnode_t	*vn_hold(struct vnode *);
+extern void	vn_rele(struct vnode *);
+
+#if defined(XFS_VNODE_TRACE)
+#define VN_HOLD(vp)		\
+	((void)vn_hold(vp),	\
+	  vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address))
+#define VN_RELE(vp)		\
+	  (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \
+	   iput(LINVFS_GET_IP(vp)))
+#else
+#define VN_HOLD(vp)		((void)vn_hold(vp))
+#define VN_RELE(vp)		(iput(LINVFS_GET_IP(vp)))
+#endif
+
+/*
+ * Vname handling macros.
+ */
+#define VNAME(dentry)		((char *) (dentry)->d_name.name)
+#define VNAMELEN(dentry)	((dentry)->d_name.len)
+#define VNAME_TO_VNODE(dentry)	(LINVFS_GET_VP((dentry)->d_inode))
+
+/*
+ * Vnode spinlock manipulation.
+ */
+#define VN_LOCK(vp)		mutex_spinlock(&(vp)->v_lock)
+#define VN_UNLOCK(vp, s)	mutex_spinunlock(&(vp)->v_lock, s)
+#define VN_FLAGSET(vp,b)	vn_flagset(vp,b)
+#define VN_FLAGCLR(vp,b)	vn_flagclr(vp,b)
+
+static __inline__ void vn_flagset(struct vnode *vp, uint flag)
+{
+	spin_lock(&vp->v_lock);
+	vp->v_flag |= flag;
+	spin_unlock(&vp->v_lock);
+}
+
+static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
+{
+	spin_lock(&vp->v_lock);
+	vp->v_flag &= ~flag;
+	spin_unlock(&vp->v_lock);
+}
+
+/*
+ * Update modify/access/change times on the vnode
+ */
+#define VN_MTIMESET(vp, tvp)	(LINVFS_GET_IP(vp)->i_mtime = *(tvp))
+#define VN_ATIMESET(vp, tvp)	(LINVFS_GET_IP(vp)->i_atime = *(tvp))
+#define VN_CTIMESET(vp, tvp)	(LINVFS_GET_IP(vp)->i_ctime = *(tvp))
+
+/*
+ * Dealing with bad inodes
+ */
+static inline void vn_mark_bad(struct vnode *vp)
+{
+	make_bad_inode(LINVFS_GET_IP(vp));
+}
+
+static inline int VN_BAD(struct vnode *vp)
+{
+	return is_bad_inode(LINVFS_GET_IP(vp));
+}
+
+/*
+ * Some useful predicates.
+ */
+#define VN_MAPPED(vp)	mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
+#define VN_CACHED(vp)	(LINVFS_GET_IP(vp)->i_mapping->nrpages)
+#define VN_DIRTY(vp)	mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \
+					PAGECACHE_TAG_DIRTY)
+#define VMODIFY(vp)	VN_FLAGSET(vp, VMODIFIED)
+#define VUNMODIFY(vp)	VN_FLAGCLR(vp, VMODIFIED)
+
+/*
+ * Flags to VOP_SETATTR/VOP_GETATTR.
+ */
+#define	ATTR_UTIME	0x01	/* non-default utime(2) request */
+#define	ATTR_DMI	0x08	/* invocation from a DMI function */
+#define	ATTR_LAZY	0x80	/* set/get attributes lazily */
+#define	ATTR_NONBLOCK	0x100	/* return EAGAIN if operation would block */
+
+/*
+ * Flags to VOP_FSYNC and VOP_RECLAIM.
+ */
+#define FSYNC_NOWAIT	0	/* asynchronous flush */
+#define FSYNC_WAIT	0x1	/* synchronous fsync or forced reclaim */
+#define FSYNC_INVAL	0x2	/* flush and invalidate cached data */
+#define FSYNC_DATA	0x4	/* synchronous fsync of data only */
+
+/*
+ * Tracking vnode activity.
+ */
+#if defined(XFS_VNODE_TRACE)
+
+#define	VNODE_TRACE_SIZE	16		/* number of trace entries */
+#define	VNODE_KTRACE_ENTRY	1
+#define	VNODE_KTRACE_EXIT	2
+#define	VNODE_KTRACE_HOLD	3
+#define	VNODE_KTRACE_REF	4
+#define	VNODE_KTRACE_RELE	5
+
+extern void vn_trace_entry(struct vnode *, char *, inst_t *);
+extern void vn_trace_exit(struct vnode *, char *, inst_t *);
+extern void vn_trace_hold(struct vnode *, char *, int, inst_t *);
+extern void vn_trace_ref(struct vnode *, char *, int, inst_t *);
+extern void vn_trace_rele(struct vnode *, char *, int, inst_t *);
+
+#define	VN_TRACE(vp)		\
+	vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address)
+#else
+#define	vn_trace_entry(a,b,c)
+#define	vn_trace_exit(a,b,c)
+#define	vn_trace_hold(a,b,c,d)
+#define	vn_trace_ref(a,b,c,d)
+#define	vn_trace_rele(a,b,c,d)
+#define	VN_TRACE(vp)
+#endif
+
+#endif	/* __XFS_VNODE_H__ */
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/linux-2.6