diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/xfs_inode.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 3876 |
1 files changed, 3876 insertions, 0 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c new file mode 100644 index 00000000000..43c632ab86a --- /dev/null +++ b/fs/xfs/xfs_inode.c @@ -0,0 +1,3876 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_imap.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode_item.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_buf_item.h" +#include "xfs_rw.h" +#include "xfs_error.h" +#include "xfs_bit.h" +#include "xfs_utils.h" +#include "xfs_dir2_trace.h" +#include "xfs_quota.h" +#include "xfs_mac.h" +#include "xfs_acl.h" + + +kmem_zone_t *xfs_ifork_zone; +kmem_zone_t *xfs_inode_zone; +kmem_zone_t *xfs_chashlist_zone; + +/* + * Used in xfs_itruncate(). This is the maximum number of extents + * freed from a file in a single transaction. + */ +#define XFS_ITRUNC_MAX_EXTENTS 2 + +STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); +STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); +STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); +STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); + + +#ifdef DEBUG +/* + * Make sure that the extents in the given memory buffer + * are valid. + */ +STATIC void +xfs_validate_extents( + xfs_bmbt_rec_t *ep, + int nrecs, + int disk, + xfs_exntfmt_t fmt) +{ + xfs_bmbt_irec_t irec; + xfs_bmbt_rec_t rec; + int i; + + for (i = 0; i < nrecs; i++) { + rec.l0 = get_unaligned((__uint64_t*)&ep->l0); + rec.l1 = get_unaligned((__uint64_t*)&ep->l1); + if (disk) + xfs_bmbt_disk_get_all(&rec, &irec); + else + xfs_bmbt_get_all(&rec, &irec); + if (fmt == XFS_EXTFMT_NOSTATE) + ASSERT(irec.br_state == XFS_EXT_NORM); + ep++; + } +} +#else /* DEBUG */ +#define xfs_validate_extents(ep, nrecs, disk, fmt) +#endif /* DEBUG */ + +/* + * Check that none of the inode's in the buffer have a next + * unlinked field of 0. + */ +#if defined(DEBUG) +void +xfs_inobp_check( + xfs_mount_t *mp, + xfs_buf_t *bp) +{ + int i; + int j; + xfs_dinode_t *dip; + + j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; + + for (i = 0; i < j; i++) { + dip = (xfs_dinode_t *)xfs_buf_offset(bp, + i * mp->m_sb.sb_inodesize); + if (!dip->di_next_unlinked) { + xfs_fs_cmn_err(CE_ALERT, mp, + "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", + bp); + ASSERT(dip->di_next_unlinked); + } + } +} +#endif + +/* + * called from bwrite on xfs inode buffers + */ +void +xfs_inobp_bwcheck(xfs_buf_t *bp) +{ + xfs_mount_t *mp; + int i; + int j; + xfs_dinode_t *dip; + + ASSERT(XFS_BUF_FSPRIVATE3(bp, void *) != NULL); + + mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); + + + j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; + + for (i = 0; i < j; i++) { + dip = (xfs_dinode_t *) xfs_buf_offset(bp, + i * mp->m_sb.sb_inodesize); + if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { + cmn_err(CE_WARN, +"Bad magic # 0x%x in XFS inode buffer 0x%Lx, starting blockno %Ld, offset 0x%x", + INT_GET(dip->di_core.di_magic, ARCH_CONVERT), + (__uint64_t)(__psunsigned_t) bp, + (__int64_t) XFS_BUF_ADDR(bp), + xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); + xfs_fs_cmn_err(CE_WARN, mp, + "corrupt, unmount and run xfs_repair"); + } + if (!dip->di_next_unlinked) { + cmn_err(CE_WARN, +"Bad next_unlinked field (0) in XFS inode buffer 0x%p, starting blockno %Ld, offset 0x%x", + (__uint64_t)(__psunsigned_t) bp, + (__int64_t) XFS_BUF_ADDR(bp), + xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); + xfs_fs_cmn_err(CE_WARN, mp, + "corrupt, unmount and run xfs_repair"); + } + } + + return; +} + +/* + * This routine is called to map an inode number within a file + * system to the buffer containing the on-disk version of the + * inode. It returns a pointer to the buffer containing the + * on-disk inode in the bpp parameter, and in the dip parameter + * it returns a pointer to the on-disk inode within that buffer. + * + * If a non-zero error is returned, then the contents of bpp and + * dipp are undefined. + * + * Use xfs_imap() to determine the size and location of the + * buffer to read from disk. + */ +int +xfs_inotobp( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_ino_t ino, + xfs_dinode_t **dipp, + xfs_buf_t **bpp, + int *offset) +{ + int di_ok; + xfs_imap_t imap; + xfs_buf_t *bp; + int error; + xfs_dinode_t *dip; + + /* + * Call the space managment code to find the location of the + * inode on disk. + */ + imap.im_blkno = 0; + error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); + if (error != 0) { + cmn_err(CE_WARN, + "xfs_inotobp: xfs_imap() returned an " + "error %d on %s. Returning error.", error, mp->m_fsname); + return error; + } + + /* + * If the inode number maps to a block outside the bounds of the + * file system then return NULL rather than calling read_buf + * and panicing when we get an error from the driver. + */ + if ((imap.im_blkno + imap.im_len) > + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { + cmn_err(CE_WARN, + "xfs_inotobp: inode number (%d + %d) maps to a block outside the bounds " + "of the file system %s. Returning EINVAL.", + imap.im_blkno, imap.im_len,mp->m_fsname); + return XFS_ERROR(EINVAL); + } + + /* + * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will + * default to just a read_buf() call. + */ + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, + (int)imap.im_len, XFS_BUF_LOCK, &bp); + + if (error) { + cmn_err(CE_WARN, + "xfs_inotobp: xfs_trans_read_buf() returned an " + "error %d on %s. Returning error.", error, mp->m_fsname); + return error; + } + dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); + di_ok = + INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && + XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); + if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, + XFS_RANDOM_ITOBP_INOTOBP))) { + XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); + xfs_trans_brelse(tp, bp); + cmn_err(CE_WARN, + "xfs_inotobp: XFS_TEST_ERROR() returned an " + "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); + return XFS_ERROR(EFSCORRUPTED); + } + + xfs_inobp_check(mp, bp); + + /* + * Set *dipp to point to the on-disk inode in the buffer. + */ + *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + *bpp = bp; + *offset = imap.im_boffset; + return 0; +} + + +/* + * This routine is called to map an inode to the buffer containing + * the on-disk version of the inode. It returns a pointer to the + * buffer containing the on-disk inode in the bpp parameter, and in + * the dip parameter it returns a pointer to the on-disk inode within + * that buffer. + * + * If a non-zero error is returned, then the contents of bpp and + * dipp are undefined. + * + * If the inode is new and has not yet been initialized, use xfs_imap() + * to determine the size and location of the buffer to read from disk. + * If the inode has already been mapped to its buffer and read in once, + * then use the mapping information stored in the inode rather than + * calling xfs_imap(). This allows us to avoid the overhead of looking + * at the inode btree for small block file systems (see xfs_dilocate()). + * We can tell whether the inode has been mapped in before by comparing + * its disk block address to 0. Only uninitialized inodes will have + * 0 for the disk block address. + */ +int +xfs_itobp( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dinode_t **dipp, + xfs_buf_t **bpp, + xfs_daddr_t bno) +{ + xfs_buf_t *bp; + int error; + xfs_imap_t imap; +#ifdef __KERNEL__ + int i; + int ni; +#endif + + if (ip->i_blkno == (xfs_daddr_t)0) { + /* + * Call the space management code to find the location of the + * inode on disk. + */ + imap.im_blkno = bno; + error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP); + if (error != 0) { + return error; + } + + /* + * If the inode number maps to a block outside the bounds + * of the file system then return NULL rather than calling + * read_buf and panicing when we get an error from the + * driver. + */ + if ((imap.im_blkno + imap.im_len) > + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { +#ifdef DEBUG + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " + "(imap.im_blkno (0x%llx) " + "+ imap.im_len (0x%llx)) > " + " XFS_FSB_TO_BB(mp, " + "mp->m_sb.sb_dblocks) (0x%llx)", + (unsigned long long) imap.im_blkno, + (unsigned long long) imap.im_len, + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); +#endif /* DEBUG */ + return XFS_ERROR(EINVAL); + } + + /* + * Fill in the fields in the inode that will be used to + * map the inode to its buffer from now on. + */ + ip->i_blkno = imap.im_blkno; + ip->i_len = imap.im_len; + ip->i_boffset = imap.im_boffset; + } else { + /* + * We've already mapped the inode once, so just use the + * mapping that we saved the first time. + */ + imap.im_blkno = ip->i_blkno; + imap.im_len = ip->i_len; + imap.im_boffset = ip->i_boffset; + } + ASSERT(bno == 0 || bno == imap.im_blkno); + + /* + * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will + * default to just a read_buf() call. + */ + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, + (int)imap.im_len, XFS_BUF_LOCK, &bp); + + if (error) { +#ifdef DEBUG + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " + "xfs_trans_read_buf() returned error %d, " + "imap.im_blkno 0x%llx, imap.im_len 0x%llx", + error, (unsigned long long) imap.im_blkno, + (unsigned long long) imap.im_len); +#endif /* DEBUG */ + return error; + } +#ifdef __KERNEL__ + /* + * Validate the magic number and version of every inode in the buffer + * (if DEBUG kernel) or the first inode in the buffer, otherwise. + */ +#ifdef DEBUG + ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; +#else + ni = 1; +#endif + for (i = 0; i < ni; i++) { + int di_ok; + xfs_dinode_t *dip; + + dip = (xfs_dinode_t *)xfs_buf_offset(bp, + (i << mp->m_sb.sb_inodelog)); + di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && + XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); + if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, + XFS_RANDOM_ITOBP_INOTOBP))) { +#ifdef DEBUG + prdev("bad inode magic/vsn daddr %lld #%d (magic=%x)", + mp->m_ddev_targp, + (unsigned long long)imap.im_blkno, i, + INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); +#endif + XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, + mp, dip); + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EFSCORRUPTED); + } + } +#endif /* __KERNEL__ */ + + xfs_inobp_check(mp, bp); + + /* + * Mark the buffer as an inode buffer now that it looks good + */ + XFS_BUF_SET_VTYPE(bp, B_FS_INO); + + /* + * Set *dipp to point to the on-disk inode in the buffer. + */ + *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + *bpp = bp; + return 0; +} + +/* + * Move inode type and inode format specific information from the + * on-disk inode to the in-core inode. For fifos, devs, and sockets + * this means set if_rdev to the proper value. For files, directories, + * and symlinks this means to bring in the in-line data or extent + * pointers. For a file in B-tree format, only the root is immediately + * brought in-core. The rest will be in-lined in if_extents when it + * is first referenced (see xfs_iread_extents()). + */ +STATIC int +xfs_iformat( + xfs_inode_t *ip, + xfs_dinode_t *dip) +{ + xfs_attr_shortform_t *atp; + int size; + int error; + xfs_fsize_t di_size; + ip->i_df.if_ext_max = + XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); + error = 0; + + if (unlikely( + INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) > + INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt dinode %Lu, extent total = %d, nblocks = %Lu." + " Unmount and run xfs_repair.", + (unsigned long long)ip->i_ino, + (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)), + (unsigned long long) + INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT)); + XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + + if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt dinode %Lu, forkoff = 0x%x." + " Unmount and run xfs_repair.", + (unsigned long long)ip->i_ino, + (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT))); + XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + + switch (ip->i_d.di_mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + if (unlikely(INT_GET(dip->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_DEV)) { + XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + ip->i_d.di_size = 0; + ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT); + break; + + case S_IFREG: + case S_IFLNK: + case S_IFDIR: + switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) { + case XFS_DINODE_FMT_LOCAL: + /* + * no local regular files yet + */ + if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode (local format for regular file) %Lu. Unmount and run xfs_repair.", + (unsigned long long) ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_iformat(4)", + XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + + di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); + if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode %Lu (bad size %Ld for local inode). Unmount and run xfs_repair.", + (unsigned long long) ip->i_ino, + (long long) di_size); + XFS_CORRUPTION_ERROR("xfs_iformat(5)", + XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + + size = (int)di_size; + error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); + break; + case XFS_DINODE_FMT_EXTENTS: + error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); + break; + case XFS_DINODE_FMT_BTREE: + error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); + break; + default: + XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, + ip->i_mount); + return XFS_ERROR(EFSCORRUPTED); + } + break; + + default: + XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); + return XFS_ERROR(EFSCORRUPTED); + } + if (error) { + return error; + } + if (!XFS_DFORK_Q(dip)) + return 0; + ASSERT(ip->i_afp == NULL); + ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); + ip->i_afp->if_ext_max = + XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); + switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) { + case XFS_DINODE_FMT_LOCAL: + atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); + size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT); + error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); + break; + case XFS_DINODE_FMT_EXTENTS: + error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); + break; + case XFS_DINODE_FMT_BTREE: + error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); + break; + default: + error = XFS_ERROR(EFSCORRUPTED); + break; + } + if (error) { + kmem_zone_free(xfs_ifork_zone, ip->i_afp); + ip->i_afp = NULL; + xfs_idestroy_fork(ip, XFS_DATA_FORK); + } + return error; +} + +/* + * The file is in-lined in the on-disk inode. + * If it fits into if_inline_data, then copy + * it there, otherwise allocate a buffer for it + * and copy the data there. Either way, set + * if_data to point at the data. + * If we allocate a buffer for the data, make + * sure that its size is a multiple of 4 and + * record the real size in i_real_bytes. + */ +STATIC int +xfs_iformat_local( + xfs_inode_t *ip, + xfs_dinode_t *dip, + int whichfork, + int size) +{ + xfs_ifork_t *ifp; + int real_size; + + /* + * If the size is unreasonable, then something + * is wrong and we just bail out rather than crash in + * kmem_alloc() or memcpy() below. + */ + if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode %Lu (bad size %d for local fork, size = %d). Unmount and run xfs_repair.", + (unsigned long long) ip->i_ino, size, + XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); + XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + ifp = XFS_IFORK_PTR(ip, whichfork); + real_size = 0; + if (size == 0) + ifp->if_u1.if_data = NULL; + else if (size <= sizeof(ifp->if_u2.if_inline_data)) + ifp->if_u1.if_data = ifp->if_u2.if_inline_data; + else { + real_size = roundup(size, 4); + ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); + } + ifp->if_bytes = size; + ifp->if_real_bytes = real_size; + if (size) + memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); + ifp->if_flags &= ~XFS_IFEXTENTS; + ifp->if_flags |= XFS_IFINLINE; + return 0; +} + +/* + * The file consists of a set of extents all + * of which fit into the on-disk inode. + * If there are few enough extents to fit into + * the if_inline_ext, then copy them there. + * Otherwise allocate a buffer for them and copy + * them into it. Either way, set if_extents + * to point at the extents. + */ +STATIC int +xfs_iformat_extents( + xfs_inode_t *ip, + xfs_dinode_t *dip, + int whichfork) +{ + xfs_bmbt_rec_t *ep, *dp; + xfs_ifork_t *ifp; + int nex; + int real_size; + int size; + int i; + + ifp = XFS_IFORK_PTR(ip, whichfork); + nex = XFS_DFORK_NEXTENTS(dip, whichfork); + size = nex * (uint)sizeof(xfs_bmbt_rec_t); + + /* + * If the number of extents is unreasonable, then something + * is wrong and we just bail out rather than crash in + * kmem_alloc() or memcpy() below. + */ + if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode %Lu ((a)extents = %d). Unmount and run xfs_repair.", + (unsigned long long) ip->i_ino, nex); + XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + + real_size = 0; + if (nex == 0) + ifp->if_u1.if_extents = NULL; + else if (nex <= XFS_INLINE_EXTS) + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + else { + ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); + ASSERT(ifp->if_u1.if_extents != NULL); + real_size = size; + } + ifp->if_bytes = size; + ifp->if_real_bytes = real_size; + if (size) { + dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); + xfs_validate_extents(dp, nex, 1, XFS_EXTFMT_INODE(ip)); + ep = ifp->if_u1.if_extents; + for (i = 0; i < nex; i++, ep++, dp++) { + ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0), + ARCH_CONVERT); + ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1), + ARCH_CONVERT); + } + xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex, + whichfork); + if (whichfork != XFS_DATA_FORK || + XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) + if (unlikely(xfs_check_nostate_extents( + ifp->if_u1.if_extents, nex))) { + XFS_ERROR_REPORT("xfs_iformat_extents(2)", + XFS_ERRLEVEL_LOW, + ip->i_mount); + return XFS_ERROR(EFSCORRUPTED); + } + } + ifp->if_flags |= XFS_IFEXTENTS; + return 0; +} + +/* + * The file has too many extents to fit into + * the inode, so they are in B-tree format. + * Allocate a buffer for the root of the B-tree + * and copy the root into it. The i_extents + * field will remain NULL until all of the + * extents are read in (when they are needed). + */ +STATIC int +xfs_iformat_btree( + xfs_inode_t *ip, + xfs_dinode_t *dip, + int whichfork) +{ + xfs_bmdr_block_t *dfp; + xfs_ifork_t *ifp; + /* REFERENCED */ + int nrecs; + int size; + + ifp = XFS_IFORK_PTR(ip, whichfork); + dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); + size = XFS_BMAP_BROOT_SPACE(dfp); + nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); + + /* + * blow out if -- fork has less extents than can fit in + * fork (fork shouldn't be a btree format), root btree + * block has more records than can fit into the fork, + * or the number of extents is greater than the number of + * blocks. + */ + if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max + || XFS_BMDR_SPACE_CALC(nrecs) > + XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) + || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode %Lu (btree). Unmount and run xfs_repair.", + (unsigned long long) ip->i_ino); + XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, + ip->i_mount); + return XFS_ERROR(EFSCORRUPTED); + } + + ifp->if_broot_bytes = size; + ifp->if_broot = kmem_alloc(size, KM_SLEEP); + ASSERT(ifp->if_broot != NULL); + /* + * Copy and convert from the on-disk structure + * to the in-memory structure. + */ + xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), + ifp->if_broot, size); + ifp->if_flags &= ~XFS_IFEXTENTS; + ifp->if_flags |= XFS_IFBROOT; + + return 0; +} + +/* + * xfs_xlate_dinode_core - translate an xfs_inode_core_t between ondisk + * and native format + * + * buf = on-disk representation + * dip = native representation + * dir = direction - +ve -> disk to native + * -ve -> native to disk + */ +void +xfs_xlate_dinode_core( + xfs_caddr_t buf, + xfs_dinode_core_t *dip, + int dir) +{ + xfs_dinode_core_t *buf_core = (xfs_dinode_core_t *)buf; + xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; + xfs_arch_t arch = ARCH_CONVERT; + + ASSERT(dir); + + INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); + INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch); + INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch); + INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch); + INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); + INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch); + INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch); + INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); + INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); + + if (dir > 0) { + memcpy(mem_core->di_pad, buf_core->di_pad, + sizeof(buf_core->di_pad)); + } else { + memcpy(buf_core->di_pad, mem_core->di_pad, + sizeof(buf_core->di_pad)); + } + + INT_XLATE(buf_core->di_flushiter, mem_core->di_flushiter, dir, arch); + + INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec, + dir, arch); + INT_XLATE(buf_core->di_atime.t_nsec, mem_core->di_atime.t_nsec, + dir, arch); + INT_XLATE(buf_core->di_mtime.t_sec, mem_core->di_mtime.t_sec, + dir, arch); + INT_XLATE(buf_core->di_mtime.t_nsec, mem_core->di_mtime.t_nsec, + dir, arch); + INT_XLATE(buf_core->di_ctime.t_sec, mem_core->di_ctime.t_sec, + dir, arch); + INT_XLATE(buf_core->di_ctime.t_nsec, mem_core->di_ctime.t_nsec, + dir, arch); + INT_XLATE(buf_core->di_size, mem_core->di_size, dir, arch); + INT_XLATE(buf_core->di_nblocks, mem_core->di_nblocks, dir, arch); + INT_XLATE(buf_core->di_extsize, mem_core->di_extsize, dir, arch); + INT_XLATE(buf_core->di_nextents, mem_core->di_nextents, dir, arch); + INT_XLATE(buf_core->di_anextents, mem_core->di_anextents, dir, arch); + INT_XLATE(buf_core->di_forkoff, mem_core->di_forkoff, dir, arch); + INT_XLATE(buf_core->di_aformat, mem_core->di_aformat, dir, arch); + INT_XLATE(buf_core->di_dmevmask, mem_core->di_dmevmask, dir, arch); + INT_XLATE(buf_core->di_dmstate, mem_core->di_dmstate, dir, arch); + INT_XLATE(buf_core->di_flags, mem_core->di_flags, dir, arch); + INT_XLATE(buf_core->di_gen, mem_core->di_gen, dir, arch); +} + +STATIC uint +_xfs_dic2xflags( + xfs_dinode_core_t *dic, + __uint16_t di_flags) +{ + uint flags = 0; + + if (di_flags & XFS_DIFLAG_ANY) { + if (di_flags & XFS_DIFLAG_REALTIME) + flags |= XFS_XFLAG_REALTIME; + if (di_flags & XFS_DIFLAG_PREALLOC) + flags |= XFS_XFLAG_PREALLOC; + if (di_flags & XFS_DIFLAG_IMMUTABLE) + flags |= XFS_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_APPEND) + flags |= XFS_XFLAG_APPEND; + if (di_flags & XFS_DIFLAG_SYNC) + flags |= XFS_XFLAG_SYNC; + if (di_flags & XFS_DIFLAG_NOATIME) + flags |= XFS_XFLAG_NOATIME; + if (di_flags & XFS_DIFLAG_NODUMP) + flags |= XFS_XFLAG_NODUMP; + if (di_flags & XFS_DIFLAG_RTINHERIT) + flags |= XFS_XFLAG_RTINHERIT; + if (di_flags & XFS_DIFLAG_PROJINHERIT) + flags |= XFS_XFLAG_PROJINHERIT; + if (di_flags & XFS_DIFLAG_NOSYMLINKS) + flags |= XFS_XFLAG_NOSYMLINKS; + } + + return flags; +} + +uint +xfs_ip2xflags( + xfs_inode_t *ip) +{ + xfs_dinode_core_t *dic = &ip->i_d; + + return _xfs_dic2xflags(dic, dic->di_flags) | + (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); +} + +uint +xfs_dic2xflags( + xfs_dinode_core_t *dic) +{ + return _xfs_dic2xflags(dic, INT_GET(dic->di_flags, ARCH_CONVERT)) | + (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); +} + +/* + * Given a mount structure and an inode number, return a pointer + * to a newly allocated in-core inode coresponding to the given + * inode number. + * + * Initialize the inode's attributes and extent pointers if it + * already has them (it will not if the inode has no links). + */ +int +xfs_iread( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_ino_t ino, + xfs_inode_t **ipp, + xfs_daddr_t bno) +{ + xfs_buf_t *bp; + xfs_dinode_t *dip; + xfs_inode_t *ip; + int error; + + ASSERT(xfs_inode_zone != NULL); + + ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); + ip->i_ino = ino; + ip->i_mount = mp; + + /* + * Get pointer's to the on-disk inode and the buffer containing it. + * If the inode number refers to a block outside the file system + * then xfs_itobp() will return NULL. In this case we should + * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will + * know that this is a new incore inode. + */ + error = xfs_itobp(mp, tp, ip, &dip, &bp, bno); + + if (error != 0) { + kmem_zone_free(xfs_inode_zone, ip); + return error; + } + + /* + * Initialize inode's trace buffers. + * Do this before xfs_iformat in case it adds entries. + */ +#ifdef XFS_BMAP_TRACE + ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); +#endif +#ifdef XFS_BMBT_TRACE + ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); +#endif +#ifdef XFS_RW_TRACE + ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); +#endif +#ifdef XFS_ILOCK_TRACE + ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); +#endif +#ifdef XFS_DIR2_TRACE + ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); +#endif + + /* + * If we got something that isn't an inode it means someone + * (nfs or dmi) has a stale handle. + */ + if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { + kmem_zone_free(xfs_inode_zone, ip); + xfs_trans_brelse(tp, bp); +#ifdef DEBUG + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " + "dip->di_core.di_magic (0x%x) != " + "XFS_DINODE_MAGIC (0x%x)", + INT_GET(dip->di_core.di_magic, ARCH_CONVERT), + XFS_DINODE_MAGIC); +#endif /* DEBUG */ + return XFS_ERROR(EINVAL); + } + + /* + * If the on-disk inode is already linked to a directory + * entry, copy all of the inode into the in-core inode. + * xfs_iformat() handles copying in the inode format + * specific information. + * Otherwise, just get the truly permanent information. + */ + if (dip->di_core.di_mode) { + xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, + &(ip->i_d), 1); + error = xfs_iformat(ip, dip); + if (error) { + kmem_zone_free(xfs_inode_zone, ip); + xfs_trans_brelse(tp, bp); +#ifdef DEBUG + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " + "xfs_iformat() returned error %d", + error); +#endif /* DEBUG */ + return error; + } + } else { + ip->i_d.di_magic = INT_GET(dip->di_core.di_magic, ARCH_CONVERT); + ip->i_d.di_version = INT_GET(dip->di_core.di_version, ARCH_CONVERT); + ip->i_d.di_gen = INT_GET(dip->di_core.di_gen, ARCH_CONVERT); + ip->i_d.di_flushiter = INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT); + /* + * Make sure to pull in the mode here as well in + * case the inode is released without being used. + * This ensures that xfs_inactive() will see that + * the inode is already free and not try to mess + * with the uninitialized part of it. + */ + ip->i_d.di_mode = 0; + /* + * Initialize the per-fork minima and maxima for a new + * inode here. xfs_iformat will do it for old inodes. + */ + ip->i_df.if_ext_max = + XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); + } + + INIT_LIST_HEAD(&ip->i_reclaim); + + /* + * The inode format changed when we moved the link count and + * made it 32 bits long. If this is an old format inode, + * convert it in memory to look like a new one. If it gets + * flushed to disk we will convert back before flushing or + * logging it. We zero out the new projid field and the old link + * count field. We'll handle clearing the pad field (the remains + * of the old uuid field) when we actually convert the inode to + * the new format. We don't change the version number so that we + * can distinguish this from a real new format inode. + */ + if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { + ip->i_d.di_nlink = ip->i_d.di_onlink; + ip->i_d.di_onlink = 0; + ip->i_d.di_projid = 0; + } + + ip->i_delayed_blks = 0; + + /* + * Mark the buffer containing the inode as something to keep + * around for a while. This helps to keep recently accessed + * meta-data in-core longer. + */ + XFS_BUF_SET_REF(bp, XFS_INO_REF); + + /* + * Use xfs_trans_brelse() to release the buffer containing the + * on-disk inode, because it was acquired with xfs_trans_read_buf() + * in xfs_itobp() above. If tp is NULL, this is just a normal + * brelse(). If we're within a transaction, then xfs_trans_brelse() + * will only release the buffer if it is not dirty within the + * transaction. It will be OK to release the buffer in this case, + * because inodes on disk are never destroyed and we will be + * locking the new in-core inode before putting it in the hash + * table where other processes can find it. Thus we don't have + * to worry about the inode being changed just because we released + * the buffer. + */ + xfs_trans_brelse(tp, bp); + *ipp = ip; + return 0; +} + +/* + * Read in extents from a btree-format inode. + * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. + */ +int +xfs_iread_extents( + xfs_trans_t *tp, + xfs_inode_t *ip, + int whichfork) +{ + int error; + xfs_ifork_t *ifp; + size_t size; + + if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { + XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, + ip->i_mount); + return XFS_ERROR(EFSCORRUPTED); + } + size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t); + ifp = XFS_IFORK_PTR(ip, whichfork); + /* + * We know that the size is valid (it's checked in iformat_btree) + */ + ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); + ASSERT(ifp->if_u1.if_extents != NULL); + ifp->if_lastex = NULLEXTNUM; + ifp->if_bytes = ifp->if_real_bytes = (int)size; + ifp->if_flags |= XFS_IFEXTENTS; + error = xfs_bmap_read_extents(tp, ip, whichfork); + if (error) { + kmem_free(ifp->if_u1.if_extents, size); + ifp->if_u1.if_extents = NULL; + ifp->if_bytes = ifp->if_real_bytes = 0; + ifp->if_flags &= ~XFS_IFEXTENTS; |