Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/xfs_inode.c
1 files changed, 3876 insertions, 0 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
new file mode 100644
index 00000000000..43c632ab86a
--- /dev/null
+++ b/fs/xfs/xfs_inode.c
@@ -0,0 +1,3876 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_macros.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_imap.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode_item.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_buf_item.h"
+#include "xfs_rw.h"
+#include "xfs_error.h"
+#include "xfs_bit.h"
+#include "xfs_utils.h"
+#include "xfs_dir2_trace.h"
+#include "xfs_quota.h"
+#include "xfs_mac.h"
+#include "xfs_acl.h"
+
+
+kmem_zone_t *xfs_ifork_zone;
+kmem_zone_t *xfs_inode_zone;
+kmem_zone_t *xfs_chashlist_zone;
+
+/*
+ * Used in xfs_itruncate().  This is the maximum number of extents
+ * freed from a file in a single transaction.
+ */
+#define	XFS_ITRUNC_MAX_EXTENTS	2
+
+STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
+STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
+STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
+STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
+
+
+#ifdef DEBUG
+/*
+ * Make sure that the extents in the given memory buffer
+ * are valid.
+ */
+STATIC void
+xfs_validate_extents(
+	xfs_bmbt_rec_t		*ep,
+	int			nrecs,
+	int			disk,
+	xfs_exntfmt_t		fmt)
+{
+	xfs_bmbt_irec_t		irec;
+	xfs_bmbt_rec_t		rec;
+	int			i;
+
+	for (i = 0; i < nrecs; i++) {
+		rec.l0 = get_unaligned((__uint64_t*)&ep->l0);
+		rec.l1 = get_unaligned((__uint64_t*)&ep->l1);
+		if (disk)
+			xfs_bmbt_disk_get_all(&rec, &irec);
+		else
+			xfs_bmbt_get_all(&rec, &irec);
+		if (fmt == XFS_EXTFMT_NOSTATE)
+			ASSERT(irec.br_state == XFS_EXT_NORM);
+		ep++;
+	}
+}
+#else /* DEBUG */
+#define xfs_validate_extents(ep, nrecs, disk, fmt)
+#endif /* DEBUG */
+
+/*
+ * Check that none of the inode's in the buffer have a next
+ * unlinked field of 0.
+ */
+#if defined(DEBUG)
+void
+xfs_inobp_check(
+	xfs_mount_t	*mp,
+	xfs_buf_t	*bp)
+{
+	int		i;
+	int		j;
+	xfs_dinode_t	*dip;
+
+	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
+
+	for (i = 0; i < j; i++) {
+		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+					i * mp->m_sb.sb_inodesize);
+		if (!dip->di_next_unlinked)  {
+			xfs_fs_cmn_err(CE_ALERT, mp,
+				"Detected a bogus zero next_unlinked field in incore inode buffer 0x%p.  About to pop an ASSERT.",
+				bp);
+			ASSERT(dip->di_next_unlinked);
+		}
+	}
+}
+#endif
+
+/*
+ * called from bwrite on xfs inode buffers
+ */
+void
+xfs_inobp_bwcheck(xfs_buf_t *bp)
+{
+	xfs_mount_t	*mp;
+	int		i;
+	int		j;
+	xfs_dinode_t	*dip;
+
+	ASSERT(XFS_BUF_FSPRIVATE3(bp, void *) != NULL);
+
+	mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
+
+
+	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
+
+	for (i = 0; i < j; i++)  {
+		dip = (xfs_dinode_t *) xfs_buf_offset(bp,
+						i * mp->m_sb.sb_inodesize);
+		if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) {
+			cmn_err(CE_WARN,
+"Bad magic # 0x%x in XFS inode buffer 0x%Lx, starting blockno %Ld, offset 0x%x",
+				INT_GET(dip->di_core.di_magic, ARCH_CONVERT),
+				(__uint64_t)(__psunsigned_t) bp,
+				(__int64_t) XFS_BUF_ADDR(bp),
+				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
+			xfs_fs_cmn_err(CE_WARN, mp,
+				"corrupt, unmount and run xfs_repair");
+		}
+		if (!dip->di_next_unlinked)  {
+			cmn_err(CE_WARN,
+"Bad next_unlinked field (0) in XFS inode buffer 0x%p, starting blockno %Ld, offset 0x%x",
+				(__uint64_t)(__psunsigned_t) bp,
+				(__int64_t) XFS_BUF_ADDR(bp),
+				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
+			xfs_fs_cmn_err(CE_WARN, mp,
+				"corrupt, unmount and run xfs_repair");
+		}
+	}
+
+	return;
+}
+
+/*
+ * This routine is called to map an inode number within a file
+ * system to the buffer containing the on-disk version of the
+ * inode.  It returns a pointer to the buffer containing the
+ * on-disk inode in the bpp parameter, and in the dip parameter
+ * it returns a pointer to the on-disk inode within that buffer.
+ *
+ * If a non-zero error is returned, then the contents of bpp and
+ * dipp are undefined.
+ *
+ * Use xfs_imap() to determine the size and location of the
+ * buffer to read from disk.
+ */
+int
+xfs_inotobp(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_ino_t	ino,
+	xfs_dinode_t	**dipp,
+	xfs_buf_t	**bpp,
+	int		*offset)
+{
+	int		di_ok;
+	xfs_imap_t	imap;
+	xfs_buf_t	*bp;
+	int		error;
+	xfs_dinode_t	*dip;
+
+	/*
+	 * Call the space managment code to find the location of the
+	 * inode on disk.
+	 */
+	imap.im_blkno = 0;
+	error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
+	if (error != 0) {
+		cmn_err(CE_WARN,
+	"xfs_inotobp: xfs_imap()  returned an "
+	"error %d on %s.  Returning error.", error, mp->m_fsname);
+		return error;
+	}
+
+	/*
+	 * If the inode number maps to a block outside the bounds of the
+	 * file system then return NULL rather than calling read_buf
+	 * and panicing when we get an error from the driver.
+	 */
+	if ((imap.im_blkno + imap.im_len) >
+	    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+		cmn_err(CE_WARN,
+	"xfs_inotobp: inode number (%d + %d) maps to a block outside the bounds "
+	"of the file system %s.  Returning EINVAL.",
+			imap.im_blkno, imap.im_len,mp->m_fsname);
+		return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
+	 * default to just a read_buf() call.
+	 */
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
+				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
+
+	if (error) {
+		cmn_err(CE_WARN,
+	"xfs_inotobp: xfs_trans_read_buf()  returned an "
+	"error %d on %s.  Returning error.", error, mp->m_fsname);
+		return error;
+	}
+	dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0);
+	di_ok =
+		INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC &&
+		XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT));
+	if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
+			XFS_RANDOM_ITOBP_INOTOBP))) {
+		XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip);
+		xfs_trans_brelse(tp, bp);
+		cmn_err(CE_WARN,
+	"xfs_inotobp: XFS_TEST_ERROR()  returned an "
+	"error on %s.  Returning EFSCORRUPTED.",  mp->m_fsname);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	xfs_inobp_check(mp, bp);
+
+	/*
+	 * Set *dipp to point to the on-disk inode in the buffer.
+	 */
+	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
+	*bpp = bp;
+	*offset = imap.im_boffset;
+	return 0;
+}
+
+
+/*
+ * This routine is called to map an inode to the buffer containing
+ * the on-disk version of the inode.  It returns a pointer to the
+ * buffer containing the on-disk inode in the bpp parameter, and in
+ * the dip parameter it returns a pointer to the on-disk inode within
+ * that buffer.
+ *
+ * If a non-zero error is returned, then the contents of bpp and
+ * dipp are undefined.
+ *
+ * If the inode is new and has not yet been initialized, use xfs_imap()
+ * to determine the size and location of the buffer to read from disk.
+ * If the inode has already been mapped to its buffer and read in once,
+ * then use the mapping information stored in the inode rather than
+ * calling xfs_imap().  This allows us to avoid the overhead of looking
+ * at the inode btree for small block file systems (see xfs_dilocate()).
+ * We can tell whether the inode has been mapped in before by comparing
+ * its disk block address to 0.  Only uninitialized inodes will have
+ * 0 for the disk block address.
+ */
+int
+xfs_itobp(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	xfs_dinode_t	**dipp,
+	xfs_buf_t	**bpp,
+	xfs_daddr_t	bno)
+{
+	xfs_buf_t	*bp;
+	int		error;
+	xfs_imap_t	imap;
+#ifdef __KERNEL__
+	int		i;
+	int		ni;
+#endif
+
+	if (ip->i_blkno == (xfs_daddr_t)0) {
+		/*
+		 * Call the space management code to find the location of the
+		 * inode on disk.
+		 */
+		imap.im_blkno = bno;
+		error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP);
+		if (error != 0) {
+			return error;
+		}
+
+		/*
+		 * If the inode number maps to a block outside the bounds
+		 * of the file system then return NULL rather than calling
+		 * read_buf and panicing when we get an error from the
+		 * driver.
+		 */
+		if ((imap.im_blkno + imap.im_len) >
+		    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+#ifdef DEBUG
+			xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
+					"(imap.im_blkno (0x%llx) "
+					"+ imap.im_len (0x%llx)) > "
+					" XFS_FSB_TO_BB(mp, "
+					"mp->m_sb.sb_dblocks) (0x%llx)",
+					(unsigned long long) imap.im_blkno,
+					(unsigned long long) imap.im_len,
+					XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
+#endif /* DEBUG */
+			return XFS_ERROR(EINVAL);
+		}
+
+		/*
+		 * Fill in the fields in the inode that will be used to
+		 * map the inode to its buffer from now on.
+		 */
+		ip->i_blkno = imap.im_blkno;
+		ip->i_len = imap.im_len;
+		ip->i_boffset = imap.im_boffset;
+	} else {
+		/*
+		 * We've already mapped the inode once, so just use the
+		 * mapping that we saved the first time.
+		 */
+		imap.im_blkno = ip->i_blkno;
+		imap.im_len = ip->i_len;
+		imap.im_boffset = ip->i_boffset;
+	}
+	ASSERT(bno == 0 || bno == imap.im_blkno);
+
+	/*
+	 * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
+	 * default to just a read_buf() call.
+	 */
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
+				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
+
+	if (error) {
+#ifdef DEBUG
+		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
+				"xfs_trans_read_buf() returned error %d, "
+				"imap.im_blkno 0x%llx, imap.im_len 0x%llx",
+				error, (unsigned long long) imap.im_blkno,
+				(unsigned long long) imap.im_len);
+#endif /* DEBUG */
+		return error;
+	}
+#ifdef __KERNEL__
+	/*
+	 * Validate the magic number and version of every inode in the buffer
+	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
+	 */
+#ifdef DEBUG
+	ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
+#else
+	ni = 1;
+#endif
+	for (i = 0; i < ni; i++) {
+		int		di_ok;
+		xfs_dinode_t	*dip;
+
+		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+					(i << mp->m_sb.sb_inodelog));
+		di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC &&
+			    XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT));
+		if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
+				 XFS_RANDOM_ITOBP_INOTOBP))) {
+#ifdef DEBUG
+			prdev("bad inode magic/vsn daddr %lld #%d (magic=%x)",
+				mp->m_ddev_targp,
+				(unsigned long long)imap.im_blkno, i,
+				INT_GET(dip->di_core.di_magic, ARCH_CONVERT));
+#endif
+			XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH,
+					     mp, dip);
+			xfs_trans_brelse(tp, bp);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+	}
+#endif	/* __KERNEL__ */
+
+	xfs_inobp_check(mp, bp);
+
+	/*
+	 * Mark the buffer as an inode buffer now that it looks good
+	 */
+	XFS_BUF_SET_VTYPE(bp, B_FS_INO);
+
+	/*
+	 * Set *dipp to point to the on-disk inode in the buffer.
+	 */
+	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
+	*bpp = bp;
+	return 0;
+}
+
+/*
+ * Move inode type and inode format specific information from the
+ * on-disk inode to the in-core inode.  For fifos, devs, and sockets
+ * this means set if_rdev to the proper value.  For files, directories,
+ * and symlinks this means to bring in the in-line data or extent
+ * pointers.  For a file in B-tree format, only the root is immediately
+ * brought in-core.  The rest will be in-lined in if_extents when it
+ * is first referenced (see xfs_iread_extents()).
+ */
+STATIC int
+xfs_iformat(
+	xfs_inode_t		*ip,
+	xfs_dinode_t		*dip)
+{
+	xfs_attr_shortform_t	*atp;
+	int			size;
+	int			error;
+	xfs_fsize_t             di_size;
+	ip->i_df.if_ext_max =
+		XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+	error = 0;
+
+	if (unlikely(
+	    INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) +
+		INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
+	    INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu."
+			"  Unmount and run xfs_repair.",
+			(unsigned long long)ip->i_ino,
+			(int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT)
+			    + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
+			(unsigned long long)
+			INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT));
+		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
+				     ip->i_mount, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt dinode %Lu, forkoff = 0x%x."
+			"  Unmount and run xfs_repair.",
+			(unsigned long long)ip->i_ino,
+			(int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
+		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
+				     ip->i_mount, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	switch (ip->i_d.di_mode & S_IFMT) {
+	case S_IFIFO:
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFSOCK:
+		if (unlikely(INT_GET(dip->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_DEV)) {
+			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
+					      ip->i_mount, dip);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+		ip->i_d.di_size = 0;
+		ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT);
+		break;
+
+	case S_IFREG:
+	case S_IFLNK:
+	case S_IFDIR:
+		switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) {
+		case XFS_DINODE_FMT_LOCAL:
+			/*
+			 * no local regular files yet
+			 */
+			if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) {
+				xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+					"corrupt inode (local format for regular file) %Lu.  Unmount and run xfs_repair.",
+					(unsigned long long) ip->i_ino);
+				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
+						     XFS_ERRLEVEL_LOW,
+						     ip->i_mount, dip);
+				return XFS_ERROR(EFSCORRUPTED);
+			}
+
+			di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
+			if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
+				xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+					"corrupt inode %Lu (bad size %Ld for local inode).  Unmount and run xfs_repair.",
+					(unsigned long long) ip->i_ino,
+					(long long) di_size);
+				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
+						     XFS_ERRLEVEL_LOW,
+						     ip->i_mount, dip);
+				return XFS_ERROR(EFSCORRUPTED);
+			}
+
+			size = (int)di_size;
+			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
+			break;
+		case XFS_DINODE_FMT_EXTENTS:
+			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
+			break;
+		case XFS_DINODE_FMT_BTREE:
+			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
+			break;
+		default:
+			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
+					 ip->i_mount);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+		break;
+
+	default:
+		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	if (error) {
+		return error;
+	}
+	if (!XFS_DFORK_Q(dip))
+		return 0;
+	ASSERT(ip->i_afp == NULL);
+	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
+	ip->i_afp->if_ext_max =
+		XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+	switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) {
+	case XFS_DINODE_FMT_LOCAL:
+		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
+		size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT);
+		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
+		break;
+	case XFS_DINODE_FMT_EXTENTS:
+		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
+		break;
+	default:
+		error = XFS_ERROR(EFSCORRUPTED);
+		break;
+	}
+	if (error) {
+		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+		ip->i_afp = NULL;
+		xfs_idestroy_fork(ip, XFS_DATA_FORK);
+	}
+	return error;
+}
+
+/*
+ * The file is in-lined in the on-disk inode.
+ * If it fits into if_inline_data, then copy
+ * it there, otherwise allocate a buffer for it
+ * and copy the data there.  Either way, set
+ * if_data to point at the data.
+ * If we allocate a buffer for the data, make
+ * sure that its size is a multiple of 4 and
+ * record the real size in i_real_bytes.
+ */
+STATIC int
+xfs_iformat_local(
+	xfs_inode_t	*ip,
+	xfs_dinode_t	*dip,
+	int		whichfork,
+	int		size)
+{
+	xfs_ifork_t	*ifp;
+	int		real_size;
+
+	/*
+	 * If the size is unreasonable, then something
+	 * is wrong and we just bail out rather than crash in
+	 * kmem_alloc() or memcpy() below.
+	 */
+	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt inode %Lu (bad size %d for local fork, size = %d).  Unmount and run xfs_repair.",
+			(unsigned long long) ip->i_ino, size,
+			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
+		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
+				     ip->i_mount, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	real_size = 0;
+	if (size == 0)
+		ifp->if_u1.if_data = NULL;
+	else if (size <= sizeof(ifp->if_u2.if_inline_data))
+		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+	else {
+		real_size = roundup(size, 4);
+		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+	}
+	ifp->if_bytes = size;
+	ifp->if_real_bytes = real_size;
+	if (size)
+		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
+	ifp->if_flags &= ~XFS_IFEXTENTS;
+	ifp->if_flags |= XFS_IFINLINE;
+	return 0;
+}
+
+/*
+ * The file consists of a set of extents all
+ * of which fit into the on-disk inode.
+ * If there are few enough extents to fit into
+ * the if_inline_ext, then copy them there.
+ * Otherwise allocate a buffer for them and copy
+ * them into it.  Either way, set if_extents
+ * to point at the extents.
+ */
+STATIC int
+xfs_iformat_extents(
+	xfs_inode_t	*ip,
+	xfs_dinode_t	*dip,
+	int		whichfork)
+{
+	xfs_bmbt_rec_t	*ep, *dp;
+	xfs_ifork_t	*ifp;
+	int		nex;
+	int		real_size;
+	int		size;
+	int		i;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
+	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
+
+	/*
+	 * If the number of extents is unreasonable, then something
+	 * is wrong and we just bail out rather than crash in
+	 * kmem_alloc() or memcpy() below.
+	 */
+	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt inode %Lu ((a)extents = %d).  Unmount and run xfs_repair.",
+			(unsigned long long) ip->i_ino, nex);
+		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
+				     ip->i_mount, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	real_size = 0;
+	if (nex == 0)
+		ifp->if_u1.if_extents = NULL;
+	else if (nex <= XFS_INLINE_EXTS)
+		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+	else {
+		ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
+		ASSERT(ifp->if_u1.if_extents != NULL);
+		real_size = size;
+	}
+	ifp->if_bytes = size;
+	ifp->if_real_bytes = real_size;
+	if (size) {
+		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
+		xfs_validate_extents(dp, nex, 1, XFS_EXTFMT_INODE(ip));
+		ep = ifp->if_u1.if_extents;
+		for (i = 0; i < nex; i++, ep++, dp++) {
+			ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0),
+								ARCH_CONVERT);
+			ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1),
+								ARCH_CONVERT);
+		}
+		xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
+			whichfork);
+		if (whichfork != XFS_DATA_FORK ||
+			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
+				if (unlikely(xfs_check_nostate_extents(
+				    ifp->if_u1.if_extents, nex))) {
+					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
+							 XFS_ERRLEVEL_LOW,
+							 ip->i_mount);
+					return XFS_ERROR(EFSCORRUPTED);
+				}
+	}
+	ifp->if_flags |= XFS_IFEXTENTS;
+	return 0;
+}
+
+/*
+ * The file has too many extents to fit into
+ * the inode, so they are in B-tree format.
+ * Allocate a buffer for the root of the B-tree
+ * and copy the root into it.  The i_extents
+ * field will remain NULL until all of the
+ * extents are read in (when they are needed).
+ */
+STATIC int
+xfs_iformat_btree(
+	xfs_inode_t		*ip,
+	xfs_dinode_t		*dip,
+	int			whichfork)
+{
+	xfs_bmdr_block_t	*dfp;
+	xfs_ifork_t		*ifp;
+	/* REFERENCED */
+	int			nrecs;
+	int			size;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
+	size = XFS_BMAP_BROOT_SPACE(dfp);
+	nrecs = XFS_BMAP_BROOT_NUMRECS(dfp);
+
+	/*
+	 * blow out if -- fork has less extents than can fit in
+	 * fork (fork shouldn't be a btree format), root btree
+	 * block has more records than can fit into the fork,
+	 * or the number of extents is greater than the number of
+	 * blocks.
+	 */
+	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
+	    || XFS_BMDR_SPACE_CALC(nrecs) >
+			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
+	    || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
+		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt inode %Lu (btree).  Unmount and run xfs_repair.",
+			(unsigned long long) ip->i_ino);
+		XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
+				 ip->i_mount);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	ifp->if_broot_bytes = size;
+	ifp->if_broot = kmem_alloc(size, KM_SLEEP);
+	ASSERT(ifp->if_broot != NULL);
+	/*
+	 * Copy and convert from the on-disk structure
+	 * to the in-memory structure.
+	 */
+	xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
+		ifp->if_broot, size);
+	ifp->if_flags &= ~XFS_IFEXTENTS;
+	ifp->if_flags |= XFS_IFBROOT;
+
+	return 0;
+}
+
+/*
+ * xfs_xlate_dinode_core - translate an xfs_inode_core_t between ondisk
+ * and native format
+ *
+ * buf  = on-disk representation
+ * dip  = native representation
+ * dir  = direction - +ve -> disk to native
+ *                    -ve -> native to disk
+ */
+void
+xfs_xlate_dinode_core(
+	xfs_caddr_t		buf,
+	xfs_dinode_core_t	*dip,
+	int			dir)
+{
+	xfs_dinode_core_t	*buf_core = (xfs_dinode_core_t *)buf;
+	xfs_dinode_core_t	*mem_core = (xfs_dinode_core_t *)dip;
+	xfs_arch_t		arch = ARCH_CONVERT;
+
+	ASSERT(dir);
+
+	INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch);
+	INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch);
+	INT_XLATE(buf_core->di_version,	mem_core->di_version, dir, arch);
+	INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch);
+	INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch);
+	INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch);
+	INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch);
+	INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch);
+	INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch);
+
+	if (dir > 0) {
+		memcpy(mem_core->di_pad, buf_core->di_pad,
+			sizeof(buf_core->di_pad));
+	} else {
+		memcpy(buf_core->di_pad, mem_core->di_pad,
+			sizeof(buf_core->di_pad));
+	}
+
+	INT_XLATE(buf_core->di_flushiter, mem_core->di_flushiter, dir, arch);
+
+	INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec,
+			dir, arch);
+	INT_XLATE(buf_core->di_atime.t_nsec, mem_core->di_atime.t_nsec,
+			dir, arch);
+	INT_XLATE(buf_core->di_mtime.t_sec, mem_core->di_mtime.t_sec,
+			dir, arch);
+	INT_XLATE(buf_core->di_mtime.t_nsec, mem_core->di_mtime.t_nsec,
+			dir, arch);
+	INT_XLATE(buf_core->di_ctime.t_sec, mem_core->di_ctime.t_sec,
+			dir, arch);
+	INT_XLATE(buf_core->di_ctime.t_nsec, mem_core->di_ctime.t_nsec,
+			dir, arch);
+	INT_XLATE(buf_core->di_size, mem_core->di_size, dir, arch);
+	INT_XLATE(buf_core->di_nblocks, mem_core->di_nblocks, dir, arch);
+	INT_XLATE(buf_core->di_extsize, mem_core->di_extsize, dir, arch);
+	INT_XLATE(buf_core->di_nextents, mem_core->di_nextents, dir, arch);
+	INT_XLATE(buf_core->di_anextents, mem_core->di_anextents, dir, arch);
+	INT_XLATE(buf_core->di_forkoff, mem_core->di_forkoff, dir, arch);
+	INT_XLATE(buf_core->di_aformat, mem_core->di_aformat, dir, arch);
+	INT_XLATE(buf_core->di_dmevmask, mem_core->di_dmevmask, dir, arch);
+	INT_XLATE(buf_core->di_dmstate, mem_core->di_dmstate, dir, arch);
+	INT_XLATE(buf_core->di_flags, mem_core->di_flags, dir, arch);
+	INT_XLATE(buf_core->di_gen, mem_core->di_gen, dir, arch);
+}
+
+STATIC uint
+_xfs_dic2xflags(
+	xfs_dinode_core_t	*dic,
+	__uint16_t		di_flags)
+{
+	uint			flags = 0;
+
+	if (di_flags & XFS_DIFLAG_ANY) {
+		if (di_flags & XFS_DIFLAG_REALTIME)
+			flags |= XFS_XFLAG_REALTIME;
+		if (di_flags & XFS_DIFLAG_PREALLOC)
+			flags |= XFS_XFLAG_PREALLOC;
+		if (di_flags & XFS_DIFLAG_IMMUTABLE)
+			flags |= XFS_XFLAG_IMMUTABLE;
+		if (di_flags & XFS_DIFLAG_APPEND)
+			flags |= XFS_XFLAG_APPEND;
+		if (di_flags & XFS_DIFLAG_SYNC)
+			flags |= XFS_XFLAG_SYNC;
+		if (di_flags & XFS_DIFLAG_NOATIME)
+			flags |= XFS_XFLAG_NOATIME;
+		if (di_flags & XFS_DIFLAG_NODUMP)
+			flags |= XFS_XFLAG_NODUMP;
+		if (di_flags & XFS_DIFLAG_RTINHERIT)
+			flags |= XFS_XFLAG_RTINHERIT;
+		if (di_flags & XFS_DIFLAG_PROJINHERIT)
+			flags |= XFS_XFLAG_PROJINHERIT;
+		if (di_flags & XFS_DIFLAG_NOSYMLINKS)
+			flags |= XFS_XFLAG_NOSYMLINKS;
+	}
+
+	return flags;
+}
+
+uint
+xfs_ip2xflags(
+	xfs_inode_t		*ip)
+{
+	xfs_dinode_core_t	*dic = &ip->i_d;
+
+	return _xfs_dic2xflags(dic, dic->di_flags) |
+		(XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0);
+}
+
+uint
+xfs_dic2xflags(
+	xfs_dinode_core_t	*dic)
+{
+	return _xfs_dic2xflags(dic, INT_GET(dic->di_flags, ARCH_CONVERT)) |
+		(XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0);
+}
+
+/*
+ * Given a mount structure and an inode number, return a pointer
+ * to a newly allocated in-core inode coresponding to the given
+ * inode number.
+ *
+ * Initialize the inode's attributes and extent pointers if it
+ * already has them (it will not if the inode has no links).
+ */
+int
+xfs_iread(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_ino_t	ino,
+	xfs_inode_t	**ipp,
+	xfs_daddr_t	bno)
+{
+	xfs_buf_t	*bp;
+	xfs_dinode_t	*dip;
+	xfs_inode_t	*ip;
+	int		error;
+
+	ASSERT(xfs_inode_zone != NULL);
+
+	ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
+	ip->i_ino = ino;
+	ip->i_mount = mp;
+
+	/*
+	 * Get pointer's to the on-disk inode and the buffer containing it.
+	 * If the inode number refers to a block outside the file system
+	 * then xfs_itobp() will return NULL.  In this case we should
+	 * return NULL as well.  Set i_blkno to 0 so that xfs_itobp() will
+	 * know that this is a new incore inode.
+	 */
+	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno);
+
+	if (error != 0) {
+		kmem_zone_free(xfs_inode_zone, ip);
+		return error;
+	}
+
+	/*
+	 * Initialize inode's trace buffers.
+	 * Do this before xfs_iformat in case it adds entries.
+	 */
+#ifdef XFS_BMAP_TRACE
+	ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_BMBT_TRACE
+	ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_RW_TRACE
+	ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_ILOCK_TRACE
+	ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_DIR2_TRACE
+	ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP);
+#endif
+
+	/*
+	 * If we got something that isn't an inode it means someone
+	 * (nfs or dmi) has a stale handle.
+	 */
+	if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) {
+		kmem_zone_free(xfs_inode_zone, ip);
+		xfs_trans_brelse(tp, bp);
+#ifdef DEBUG
+		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
+				"dip->di_core.di_magic (0x%x) != "
+				"XFS_DINODE_MAGIC (0x%x)",
+				INT_GET(dip->di_core.di_magic, ARCH_CONVERT),
+				XFS_DINODE_MAGIC);
+#endif /* DEBUG */
+		return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * If the on-disk inode is already linked to a directory
+	 * entry, copy all of the inode into the in-core inode.
+	 * xfs_iformat() handles copying in the inode format
+	 * specific information.
+	 * Otherwise, just get the truly permanent information.
+	 */
+	if (dip->di_core.di_mode) {
+		xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core,
+		     &(ip->i_d), 1);
+		error = xfs_iformat(ip, dip);
+		if (error)  {
+			kmem_zone_free(xfs_inode_zone, ip);
+			xfs_trans_brelse(tp, bp);
+#ifdef DEBUG
+			xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
+					"xfs_iformat() returned error %d",
+					error);
+#endif /* DEBUG */
+			return error;
+		}
+	} else {
+		ip->i_d.di_magic = INT_GET(dip->di_core.di_magic, ARCH_CONVERT);
+		ip->i_d.di_version = INT_GET(dip->di_core.di_version, ARCH_CONVERT);
+		ip->i_d.di_gen = INT_GET(dip->di_core.di_gen, ARCH_CONVERT);
+		ip->i_d.di_flushiter = INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT);
+		/*
+		 * Make sure to pull in the mode here as well in
+		 * case the inode is released without being used.
+		 * This ensures that xfs_inactive() will see that
+		 * the inode is already free and not try to mess
+		 * with the uninitialized part of it.
+		 */
+		ip->i_d.di_mode = 0;
+		/*
+		 * Initialize the per-fork minima and maxima for a new
+		 * inode here.  xfs_iformat will do it for old inodes.
+		 */
+		ip->i_df.if_ext_max =
+			XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+	}
+
+	INIT_LIST_HEAD(&ip->i_reclaim);
+
+	/*
+	 * The inode format changed when we moved the link count and
+	 * made it 32 bits long.  If this is an old format inode,
+	 * convert it in memory to look like a new one.  If it gets
+	 * flushed to disk we will convert back before flushing or
+	 * logging it.  We zero out the new projid field and the old link
+	 * count field.  We'll handle clearing the pad field (the remains
+	 * of the old uuid field) when we actually convert the inode to
+	 * the new format. We don't change the version number so that we
+	 * can distinguish this from a real new format inode.
+	 */
+	if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
+		ip->i_d.di_nlink = ip->i_d.di_onlink;
+		ip->i_d.di_onlink = 0;
+		ip->i_d.di_projid = 0;
+	}
+
+	ip->i_delayed_blks = 0;
+
+	/*
+	 * Mark the buffer containing the inode as something to keep
+	 * around for a while.  This helps to keep recently accessed
+	 * meta-data in-core longer.
+	 */
+	 XFS_BUF_SET_REF(bp, XFS_INO_REF);
+
+	/*
+	 * Use xfs_trans_brelse() to release the buffer containing the
+	 * on-disk inode, because it was acquired with xfs_trans_read_buf()
+	 * in xfs_itobp() above.  If tp is NULL, this is just a normal
+	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
+	 * will only release the buffer if it is not dirty within the
+	 * transaction.  It will be OK to release the buffer in this case,
+	 * because inodes on disk are never destroyed and we will be
+	 * locking the new in-core inode before putting it in the hash
+	 * table where other processes can find it.  Thus we don't have
+	 * to worry about the inode being changed just because we released
+	 * the buffer.
+	 */
+	xfs_trans_brelse(tp, bp);
+	*ipp = ip;
+	return 0;
+}
+
+/*
+ * Read in extents from a btree-format inode.
+ * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
+ */
+int
+xfs_iread_extents(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	int		whichfork)
+{
+	int		error;
+	xfs_ifork_t	*ifp;
+	size_t		size;
+
+	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
+		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
+				 ip->i_mount);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t);
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	/*
+	 * We know that the size is valid (it's checked in iformat_btree)
+	 */
+	ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
+	ASSERT(ifp->if_u1.if_extents != NULL);
+	ifp->if_lastex = NULLEXTNUM;
+	ifp->if_bytes = ifp->if_real_bytes = (int)size;
+	ifp->if_flags |= XFS_IFEXTENTS;
+	error = xfs_bmap_read_extents(tp, ip, whichfork);
+	if (error) {
+		kmem_free(ifp->if_u1.if_extents, size);
+		ifp->if_u1.if_extents = NULL;
+		ifp->if_bytes = ifp->if_real_bytes = 0;
+		ifp->if_flags &= ~XFS_IFEXTENTS;
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/xfs_inode.c