diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/xfs_ialloc.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/xfs/xfs_ialloc.c')
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 1401 |
1 files changed, 1401 insertions, 0 deletions
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c new file mode 100644 index 00000000000..ce5fee9eaec --- /dev/null +++ b/fs/xfs/xfs_ialloc.c @@ -0,0 +1,1401 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_bit.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_bmap.h" + +/* + * Log specified fields for the inode given by bp and off. + */ +STATIC void +xfs_ialloc_log_di( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* inode buffer */ + int off, /* index of inode in buffer */ + int fields) /* bitmask of fields to log */ +{ + int first; /* first byte number */ + int ioffset; /* off in bytes */ + int last; /* last byte number */ + xfs_mount_t *mp; /* mount point structure */ + static const short offsets[] = { /* field offsets */ + /* keep in sync with bits */ + offsetof(xfs_dinode_core_t, di_magic), + offsetof(xfs_dinode_core_t, di_mode), + offsetof(xfs_dinode_core_t, di_version), + offsetof(xfs_dinode_core_t, di_format), + offsetof(xfs_dinode_core_t, di_onlink), + offsetof(xfs_dinode_core_t, di_uid), + offsetof(xfs_dinode_core_t, di_gid), + offsetof(xfs_dinode_core_t, di_nlink), + offsetof(xfs_dinode_core_t, di_projid), + offsetof(xfs_dinode_core_t, di_pad), + offsetof(xfs_dinode_core_t, di_atime), + offsetof(xfs_dinode_core_t, di_mtime), + offsetof(xfs_dinode_core_t, di_ctime), + offsetof(xfs_dinode_core_t, di_size), + offsetof(xfs_dinode_core_t, di_nblocks), + offsetof(xfs_dinode_core_t, di_extsize), + offsetof(xfs_dinode_core_t, di_nextents), + offsetof(xfs_dinode_core_t, di_anextents), + offsetof(xfs_dinode_core_t, di_forkoff), + offsetof(xfs_dinode_core_t, di_aformat), + offsetof(xfs_dinode_core_t, di_dmevmask), + offsetof(xfs_dinode_core_t, di_dmstate), + offsetof(xfs_dinode_core_t, di_flags), + offsetof(xfs_dinode_core_t, di_gen), + offsetof(xfs_dinode_t, di_next_unlinked), + offsetof(xfs_dinode_t, di_u), + offsetof(xfs_dinode_t, di_a), + sizeof(xfs_dinode_t) + }; + + + ASSERT(offsetof(xfs_dinode_t, di_core) == 0); + ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0); + mp = tp->t_mountp; + /* + * Get the inode-relative first and last bytes for these fields + */ + xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last); + /* + * Convert to buffer offsets and log it. + */ + ioffset = off << mp->m_sb.sb_inodelog; + first += ioffset; + last += ioffset; + xfs_trans_log_buf(tp, bp, first, last); +} + +/* + * Allocation group level functions. + */ + +/* + * Allocate new inodes in the allocation group specified by agbp. + * Return 0 for success, else error code. + */ +STATIC int /* error code or 0 */ +xfs_ialloc_ag_alloc( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* alloc group buffer */ + int *alloc) +{ + xfs_agi_t *agi; /* allocation group header */ + xfs_alloc_arg_t args; /* allocation argument structure */ + int blks_per_cluster; /* fs blocks per inode cluster */ + xfs_btree_cur_t *cur; /* inode btree cursor */ + xfs_daddr_t d; /* disk addr of buffer */ + int error; + xfs_buf_t *fbuf; /* new free inodes' buffer */ + xfs_dinode_t *free; /* new free inode structure */ + int i; /* inode counter */ + int j; /* block counter */ + int nbufs; /* num bufs of new inodes */ + xfs_agino_t newino; /* new first inode's number */ + xfs_agino_t newlen; /* new number of inodes */ + int ninodes; /* num inodes per buf */ + xfs_agino_t thisino; /* current inode number, for loop */ + int version; /* inode version number to use */ + int isaligned; /* inode allocation at stripe unit */ + /* boundary */ + xfs_dinode_core_t dic; /* a dinode_core to copy to new */ + /* inodes */ + + args.tp = tp; + args.mp = tp->t_mountp; + + /* + * Locking will ensure that we don't have two callers in here + * at one time. + */ + newlen = XFS_IALLOC_INODES(args.mp); + if (args.mp->m_maxicount && + args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) + return XFS_ERROR(ENOSPC); + args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); + /* + * Set the alignment for the allocation. + * If stripe alignment is turned on then align at stripe unit + * boundary. + * If the cluster size is smaller than a filesystem block + * then we're doing I/O for inodes in filesystem block size pieces, + * so don't need alignment anyway. + */ + isaligned = 0; + if (args.mp->m_sinoalign) { + ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); + args.alignment = args.mp->m_dalign; + isaligned = 1; + } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && + args.mp->m_sb.sb_inoalignmt >= + XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp))) + args.alignment = args.mp->m_sb.sb_inoalignmt; + else + args.alignment = 1; + agi = XFS_BUF_TO_AGI(agbp); + /* + * Need to figure out where to allocate the inode blocks. + * Ideally they should be spaced out through the a.g. + * For now, just allocate blocks up front. + */ + args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT); + args.fsbno = XFS_AGB_TO_FSB(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT), + args.agbno); + /* + * Allocate a fixed-size extent of inodes. + */ + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.mod = args.total = args.wasdel = args.isfl = args.userdata = + args.minalignslop = 0; + args.prod = 1; + /* + * Allow space for the inode btree to split. + */ + args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; + if ((error = xfs_alloc_vextent(&args))) + return error; + + /* + * If stripe alignment is turned on, then try again with cluster + * alignment. + */ + if (isaligned && args.fsbno == NULLFSBLOCK) { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT); + args.fsbno = XFS_AGB_TO_FSB(args.mp, + INT_GET(agi->agi_seqno, ARCH_CONVERT), args.agbno); + if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && + args.mp->m_sb.sb_inoalignmt >= + XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp))) + args.alignment = args.mp->m_sb.sb_inoalignmt; + else + args.alignment = 1; + if ((error = xfs_alloc_vextent(&args))) + return error; + } + + if (args.fsbno == NULLFSBLOCK) { + *alloc = 0; + return 0; + } + ASSERT(args.len == args.minlen); + /* + * Convert the results. + */ + newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); + /* + * Loop over the new block(s), filling in the inodes. + * For small block sizes, manipulate the inodes in buffers + * which are multiples of the blocks size. + */ + if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { + blks_per_cluster = 1; + nbufs = (int)args.len; + ninodes = args.mp->m_sb.sb_inopblock; + } else { + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / + args.mp->m_sb.sb_blocksize; + nbufs = (int)args.len / blks_per_cluster; + ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; + } + /* + * Figure out what version number to use in the inodes we create. + * If the superblock version has caught up to the one that supports + * the new inode format, then use the new inode version. Otherwise + * use the old version so that old kernels will continue to be + * able to use the file system. + */ + if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb)) + version = XFS_DINODE_VERSION_2; + else + version = XFS_DINODE_VERSION_1; + + memset(&dic, 0, sizeof(xfs_dinode_core_t)); + INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC); + INT_SET(dic.di_version, ARCH_CONVERT, version); + + for (j = 0; j < nbufs; j++) { + /* + * Get the block. + */ + d = XFS_AGB_TO_DADDR(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT), + args.agbno + (j * blks_per_cluster)); + fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, + args.mp->m_bsize * blks_per_cluster, + XFS_BUF_LOCK); + ASSERT(fbuf); + ASSERT(!XFS_BUF_GETERROR(fbuf)); + /* + * Loop over the inodes in this buffer. + */ + + for (i = 0; i < ninodes; i++) { + free = XFS_MAKE_IPTR(args.mp, fbuf, i); + memcpy(&(free->di_core), &dic, sizeof(xfs_dinode_core_t)); + INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO); + xfs_ialloc_log_di(tp, fbuf, i, + XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); + } + xfs_trans_inode_alloc_buf(tp, fbuf); + } + INT_MOD(agi->agi_count, ARCH_CONVERT, newlen); + INT_MOD(agi->agi_freecount, ARCH_CONVERT, newlen); + down_read(&args.mp->m_peraglock); + args.mp->m_perag[INT_GET(agi->agi_seqno, ARCH_CONVERT)].pagi_freecount += newlen; + up_read(&args.mp->m_peraglock); + INT_SET(agi->agi_newino, ARCH_CONVERT, newino); + /* + * Insert records describing the new inode chunk into the btree. + */ + cur = xfs_btree_init_cursor(args.mp, tp, agbp, + INT_GET(agi->agi_seqno, ARCH_CONVERT), + XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + for (thisino = newino; + thisino < newino + newlen; + thisino += XFS_INODES_PER_CHUNK) { + if ((error = xfs_inobt_lookup_eq(cur, thisino, + XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 0); + if ((error = xfs_inobt_insert(cur, &i))) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 1); + } + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + /* + * Log allocation group header fields + */ + xfs_ialloc_log_agi(tp, agbp, + XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); + /* + * Modify/log superblock values for inode count and inode free count. + */ + xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); + *alloc = 1; + return 0; +} + +STATIC __inline xfs_agnumber_t +xfs_ialloc_next_ag( + xfs_mount_t *mp) +{ + xfs_agnumber_t agno; + + spin_lock(&mp->m_agirotor_lock); + agno = mp->m_agirotor; + if (++mp->m_agirotor == mp->m_maxagi) + mp->m_agirotor = 0; + spin_unlock(&mp->m_agirotor_lock); + + return agno; +} + +/* + * Select an allocation group to look for a free inode in, based on the parent + * inode and then mode. Return the allocation group buffer. + */ +STATIC xfs_buf_t * /* allocation group buffer */ +xfs_ialloc_ag_select( + xfs_trans_t *tp, /* transaction pointer */ + xfs_ino_t parent, /* parent directory inode number */ + mode_t mode, /* bits set to indicate file type */ + int okalloc) /* ok to allocate more space */ +{ + xfs_buf_t *agbp; /* allocation group header buffer */ + xfs_agnumber_t agcount; /* number of ag's in the filesystem */ + xfs_agnumber_t agno; /* current ag number */ + int flags; /* alloc buffer locking flags */ + xfs_extlen_t ineed; /* blocks needed for inode allocation */ + xfs_extlen_t longest = 0; /* longest extent available */ + xfs_mount_t *mp; /* mount point structure */ + int needspace; /* file mode implies space allocated */ + xfs_perag_t *pag; /* per allocation group data */ + xfs_agnumber_t pagno; /* parent (starting) ag number */ + + /* + * Files of these types need at least one block if length > 0 + * (and they won't fit in the inode, but that's hard to figure out). + */ + needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); + mp = tp->t_mountp; + agcount = mp->m_maxagi; + if (S_ISDIR(mode)) + pagno = xfs_ialloc_next_ag(mp); + else { + pagno = XFS_INO_TO_AGNO(mp, parent); + if (pagno >= agcount) + pagno = 0; + } + ASSERT(pagno < agcount); + /* + * Loop through allocation groups, looking for one with a little + * free space in it. Note we don't look for free inodes, exactly. + * Instead, we include whether there is a need to allocate inodes + * to mean that blocks must be allocated for them, + * if none are currently free. + */ + agno = pagno; + flags = XFS_ALLOC_FLAG_TRYLOCK; + down_read(&mp->m_peraglock); + for (;;) { + pag = &mp->m_perag[agno]; + if (!pag->pagi_init) { + if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { + agbp = NULL; + goto nextag; + } + } else + agbp = NULL; + + if (!pag->pagi_inodeok) { + xfs_ialloc_next_ag(mp); + goto unlock_nextag; + } + + /* + * Is there enough free space for the file plus a block + * of inodes (if we need to allocate some)? + */ + ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); + if (ineed && !pag->pagf_init) { + if (agbp == NULL && + xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { + agbp = NULL; + goto nextag; + } + (void)xfs_alloc_pagf_init(mp, tp, agno, flags); + } + if (!ineed || pag->pagf_init) { + if (ineed && !(longest = pag->pagf_longest)) + longest = pag->pagf_flcount > 0; + if (!ineed || + (pag->pagf_freeblks >= needspace + ineed && + longest >= ineed && + okalloc)) { + if (agbp == NULL && + xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { + agbp = NULL; + goto nextag; + } + up_read(&mp->m_peraglock); + return agbp; + } + } +unlock_nextag: + if (agbp) + xfs_trans_brelse(tp, agbp); +nextag: + /* + * No point in iterating over the rest, if we're shutting + * down. + */ + if (XFS_FORCED_SHUTDOWN(mp)) { + up_read(&mp->m_peraglock); + return (xfs_buf_t *)0; + } + agno++; + if (agno >= agcount) + agno = 0; + if (agno == pagno) { + if (flags == 0) { + up_read(&mp->m_peraglock); + return (xfs_buf_t *)0; + } + flags = 0; + } + } +} + +/* + * Visible inode allocation functions. + */ + +/* + * Allocate an inode on disk. + * Mode is used to tell whether the new inode will need space, and whether + * it is a directory. + * + * The arguments IO_agbp and alloc_done are defined to work within + * the constraint of one allocation per transaction. + * xfs_dialloc() is designed to be called twice if it has to do an + * allocation to make more free inodes. On the first call, + * IO_agbp should be set to NULL. If an inode is available, + * i.e., xfs_dialloc() did not need to do an allocation, an inode + * number is returned. In this case, IO_agbp would be set to the + * current ag_buf and alloc_done set to false. + * If an allocation needed to be done, xfs_dialloc would return + * the current ag_buf in IO_agbp and set alloc_done to true. + * The caller should then commit the current transaction, allocate a new + * transaction, and call xfs_dialloc() again, passing in the previous + * value of IO_agbp. IO_agbp should be held across the transactions. + * Since the agbp is locked across the two calls, the second call is + * guaranteed to have a free inode available. + * + * Once we successfully pick an inode its number is returned and the + * on-disk data structures are updated. The inode itself is not read + * in, since doing so would break ordering constraints with xfs_reclaim. + */ +int +xfs_dialloc( + xfs_trans_t *tp, /* transaction pointer */ + xfs_ino_t parent, /* parent inode (directory) */ + mode_t mode, /* mode bits for new inode */ + int okalloc, /* ok to allocate more space */ + xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ + boolean_t *alloc_done, /* true if we needed to replenish + inode freelist */ + xfs_ino_t *inop) /* inode number allocated */ +{ + xfs_agnumber_t agcount; /* number of allocation groups */ + xfs_buf_t *agbp; /* allocation group header's buffer */ + xfs_agnumber_t agno; /* allocation group number */ + xfs_agi_t *agi; /* allocation group header structure */ + xfs_btree_cur_t *cur; /* inode allocation btree cursor */ + int error; /* error return value */ + int i; /* result code */ + int ialloced; /* inode allocation status */ + int noroom = 0; /* no space for inode blk allocation */ + xfs_ino_t ino; /* fs-relative inode to be returned */ + /* REFERENCED */ + int j; /* result code */ + xfs_mount_t *mp; /* file system mount structure */ + int offset; /* index of inode in chunk */ + xfs_agino_t pagino; /* parent's a.g. relative inode # */ + xfs_agnumber_t pagno; /* parent's allocation group number */ + xfs_inobt_rec_t rec; /* inode allocation record */ + xfs_agnumber_t tagno; /* testing allocation group number */ + xfs_btree_cur_t *tcur; /* temp cursor */ + xfs_inobt_rec_t trec; /* temp inode allocation record */ + + + if (*IO_agbp == NULL) { + /* + * We do not have an agbp, so select an initial allocation + * group for inode allocation. + */ + agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + /* + * Couldn't find an allocation group satisfying the + * criteria, give up. + */ + if (!agbp) { + *inop = NULLFSINO; + return 0; + } + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC); + } else { + /* + * Continue where we left off before. In this case, we + * know that the allocation group has free inodes. + */ + agbp = *IO_agbp; + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC); + ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0); + } + mp = tp->t_mountp; + agcount = mp->m_sb.sb_agcount; + agno = INT_GET(agi->agi_seqno, ARCH_CONVERT); + tagno = agno; + pagno = XFS_INO_TO_AGNO(mp, parent); + pagino = XFS_INO_TO_AGINO(mp, parent); + + /* + * If we have already hit the ceiling of inode blocks then clear + * okalloc so we scan all available agi structures for a free + * inode. + */ + + if (mp->m_maxicount && + mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { + noroom = 1; + okalloc = 0; + } + + /* + * Loop until we find an allocation group that either has free inodes + * or in which we can allocate some inodes. Iterate through the + * allocation groups upward, wrapping at the end. + */ + *alloc_done = B_FALSE; + while (!agi->agi_freecount) { + /* + * Don't do anything if we're not supposed to allocate + * any blocks, just go on to the next ag. + */ + if (okalloc) { + /* + * Try to allocate some new inodes in the allocation + * group. + */ + if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { + xfs_trans_brelse(tp, agbp); + if (error == ENOSPC) { + *inop = NULLFSINO; + return 0; + } else + return error; + } + if (ialloced) { + /* + * We successfully allocated some inodes, return + * the current context to the caller so that it + * can commit the current transaction and call + * us again where we left off. + */ + ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0); + *alloc_done = B_TRUE; + *IO_agbp = agbp; + *inop = NULLFSINO; + return 0; + } + } + /* + * If it failed, give up on this ag. + */ + xfs_trans_brelse(tp, agbp); + /* + * Go on to the next ag: get its ag header. + */ +nextag: + if (++tagno == agcount) + tagno = 0; + if (tagno == agno) { + *inop = NULLFSINO; + return noroom ? ENOSPC : 0; + } + down_read(&mp->m_peraglock); + if (mp->m_perag[tagno].pagi_inodeok == 0) { + up_read(&mp->m_peraglock); + goto nextag; + } + error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); + up_read(&mp->m_peraglock); + if (error) + goto nextag; + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC); + } + /* + * Here with an allocation group that has a free inode. + * Reset agno since we may have chosen a new ag in the + * loop above. + */ + agno = tagno; + *IO_agbp = NULL; + cur = xfs_btree_init_cursor(mp, tp, agbp, INT_GET(agi->agi_seqno, ARCH_CONVERT), + XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + /* + * If pagino is 0 (this is the root inode allocation) use newino. + * This must work because we've just allocated some. + */ + if (!pagino) + pagino = INT_GET(agi->agi_newino, ARCH_CONVERT); +#ifdef DEBUG + if (cur->bc_nlevels == 1) { + int freecount = 0; + + if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + do { + if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + freecount += rec.ir_freecount; + if ((error = xfs_inobt_increment(cur, 0, &i))) + goto error0; + } while (i == 1); + + ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) || + XFS_FORCED_SHUTDOWN(mp)); + } +#endif + /* + * If in the same a.g. as the parent, try to get near the parent. + */ + if (pagno == agno) { + if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) + goto error0; + if (i != 0 && + (error = xfs_inobt_get_rec(cur, &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, &j)) == 0 && + j == 1 && + rec.ir_freecount > 0) { + /* + * Found a free inode in the same chunk + * as parent, done. + */ + } + /* + * In the same a.g. as parent, but parent's chunk is full. + */ + else { + int doneleft; /* done, to the left */ + int doneright; /* done, to the right */ + + if (error) + goto error0; + ASSERT(i == 1); + ASSERT(j == 1); + /* + * Duplicate the cursor, search left & right + * simultaneously. + */ + if ((error = xfs_btree_dup_cursor(cur, &tcur))) + goto error0; + /* + * Search left with tcur, back up 1 record. + */ + if ((error = xfs_inobt_decrement(tcur, 0, &i))) + goto error1; + doneleft = !i; + if (!doneleft) { + if ((error = xfs_inobt_get_rec(tcur, + &trec.ir_startino, + &trec.ir_freecount, + &trec.ir_free, &i))) + goto error1; + XFS_WANT_CORRUPTED_GOTO(i == 1, error1); + } + /* + * Search right with cur, go forward 1 record. + */ + if ((error = xfs_inobt_increment(cur, 0, &i))) + goto error1; + doneright = !i; + if (!doneright) { + if ((error = xfs_inobt_get_rec(cur, + &rec.ir_startino, + &rec.ir_freecount, + &rec.ir_free, &i))) + goto error1; + XFS_WANT_CORRUPTED_GOTO(i == 1, error1); + } + /* + * Loop until we find the closest inode chunk + * with a free one. + */ + while (!doneleft || !doneright) { + int useleft; /* using left inode + chunk this time */ + + /* + * Figure out which block is closer, + * if both are valid. + */ + if (!doneleft && !doneright) + useleft = + pagino - + (trec.ir_startino + + XFS_INODES_PER_CHUNK - 1) < + rec.ir_startino - pagino; + else + useleft = !doneleft; + /* + * If checking the left, does it have + * free inodes? + */ + if (useleft && trec.ir_freecount) { + /* + * Yes, set it up as the chunk to use. + */ + rec = trec; + xfs_btree_del_cursor(cur, + XFS_BTREE_NOERROR); + cur = tcur; + break; + } + /* + * If checking the right, does it have + * free inodes? + */ + if (!useleft && rec.ir_freecount) { + /* + * Yes, it's already set up. + */ + xfs_btree_del_cursor(tcur, + XFS_BTREE_NOERROR); + break; + } + /* + * If used the left, get another one + * further left. + */ + if (useleft) { + if ((error = xfs_inobt_decrement(tcur, 0, + &i))) + goto error1; + doneleft = !i; + if (!doneleft) { + if ((error = xfs_inobt_get_rec( + tcur, + &trec.ir_startino, + &trec.ir_freecount, + &trec.ir_free, &i))) + goto error1; + XFS_WANT_CORRUPTED_GOTO(i == 1, + error1); + } + } + /* + * If used the right, get another one + * further right. + */ + else { + if ((error = xfs_inobt_increment(cur, 0, + &i))) + goto error1; + doneright = !i; + if (!doneright) { + if ((error = xfs_inobt_get_rec( + cur, + &rec.ir_startino, + &rec.ir_freecount, + &rec.ir_free, &i))) + goto error1; + XFS_WANT_CORRUPTED_GOTO(i == 1, + error1); + } + } + } + ASSERT(!doneleft || !doneright); + } + } + /* + * In a different a.g. from the parent. + * See if the most recently allocated block has any free. + */ + else if (INT_GET(agi->agi_newino, ARCH_CONVERT) != NULLAGINO) { + if ((error = xfs_inobt_lookup_eq(cur, + INT_GET(agi->agi_newino, ARCH_CONVERT), 0, 0, &i))) + goto error0; + if (i == 1 && + (error = xfs_inobt_get_rec(cur, &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, &j)) == 0 && + j == 1 && + rec.ir_freecount > 0) { + /* + * The last chunk allocated in the group still has + * a free inode. + */ + } + /* + * None left in the last group, search the whole a.g. + */ + else { + if (error) + goto error0; + if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) + goto error0; + ASSERT(i == 1); + for (;;) { + if ((error = xfs_inobt_get_rec(cur, + &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, + &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (rec.ir_freecount > 0) + break; + if ((error = xfs_inobt_increment(cur, 0, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + } + } + offset = XFS_IALLOC_FIND_FREE(&rec.ir_free); + ASSERT(offset >= 0); + ASSERT(offset < XFS_INODES_PER_CHUNK); + ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + XFS_INOBT_CLR_FREE(&rec, offset); + rec.ir_freecount--; + if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, + rec.ir_free))) + goto error0; + INT_MOD(agi->agi_freecount, ARCH_CONVERT, -1); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); + down_read(&mp->m_peraglock); + mp->m_perag[tagno].pagi_freecount--; + up_read(&mp->m_peraglock); +#ifdef DEBUG + if (cur->bc_nlevels == 1) { + int freecount = 0; + + if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) + goto error0; + do { + if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, &i))) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + freecount += rec.ir_freecount; + if ((error = xfs_inobt_increment(cur, 0, &i))) + goto error0; + } while (i == 1); + ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) || + XFS_FORCED_SHUTDOWN(mp)); + } +#endif + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + *inop = ino; + return 0; +error1: + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); +error0: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Free disk inode. Carefully avoids touching the incore inode, all + * manipulations incore are the caller's responsibility. + * The on-disk inode is not changed by this operation, only the + * btree (free inode mask) is changed. + */ +int +xfs_difree( + xfs_trans_t *tp, /* transaction pointer */ + xfs_ino_t inode, /* inode to be freed */ + xfs_bmap_free_t *flist, /* extents to free */ + int *delete, /* set if inode cluster was deleted */ + xfs_ino_t *first_ino) /* first inode in deleted cluster */ +{ + /* REFERENCED */ + xfs_agblock_t agbno; /* block number containing inode */ + xfs_buf_t *agbp; /* buffer containing allocation group header */ + xfs_agino_t agino; /* inode number relative to allocation group */ + xfs_agnumber_t agno; /* allocation group number */ + xfs_agi_t *agi; /* allocation group header */ + xfs_btree_cur_t *cur; /* inode btree cursor */ + int error; /* error return value */ + int i; /* result code */ + int ilen; /* inodes in an inode cluster */ + xfs_mount_t *mp; /* mount structure for filesystem */ + int off; /* offset of inode in inode chunk */ + xfs_inobt_rec_t rec; /* btree record */ + + mp = tp->t_mountp; + + /* + * Break up inode number into its components. + */ + agno = XFS_INO_TO_AGNO(mp, inode); + if (agno >= mp->m_sb.sb_agcount) { + cmn_err(CE_WARN, + "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", + agno, mp->m_sb.sb_agcount, mp->m_fsname); + ASSERT(0); + return XFS_ERROR(EINVAL); + } + agino = XFS_INO_TO_AGINO(mp, inode); + if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { + cmn_err(CE_WARN, + "xfs_difree: inode != XFS_AGINO_TO_INO() (%d != %d) on %s. Returning EINVAL.", + inode, XFS_AGINO_TO_INO(mp, agno, agino), mp->m_fsname); + ASSERT(0); + return XFS_ERROR(EINVAL); + } + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + if (agbno >= mp->m_sb.sb_agblocks) { + cmn_err(CE_WARN, + "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", + agbno, mp->m_sb.sb_agblocks, mp->m_fsname); + ASSERT(0); + return XFS_ERROR(EINVAL); + } + /* + * Get the allocation group header. + */ + down_read(&mp->m_peraglock); + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + up_read(&mp->m_peraglock); + if (error) { + cmn_err(CE_WARN, + "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", + error, mp->m_fsname); + return error; + } + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC); + ASSERT(agbno < INT_GET(agi->agi_length, ARCH_CONVERT)); + /* + * Initialize the cursor. + */ + cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, + (xfs_inode_t *)0, 0); +#ifdef DEBUG + if (cur->bc_nlevels == 1) { + int freecount = 0; + + if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) + goto error0; + do { + if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, &i))) + goto error0; + if (i) { + freecount += rec.ir_freecount; + if ((error = xfs_inobt_increment(cur, 0, &i))) + goto error0; + } + } while (i == 1); + ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) || + XFS_FORCED_SHUTDOWN(mp)); + } +#endif + /* + * Look for the entry describing this inode. + */ + if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { + cmn_err(CE_WARN, + "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", + error, mp->m_fsname); + goto error0; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, + &rec.ir_free, &i))) { + cmn_err(CE_WARN, + "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", + error, mp->m_fsname); + goto error0; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Get the offset in the inode chunk. + */ + off = agino - rec.ir_startino; + ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); + ASSERT(!XFS_INOBT_IS_FREE(&rec, off)); + /* + * Mark the inode free & increment the count. + */ + XFS_INOBT_SET_FREE(&rec, off); + rec.ir_freecount++; + + /* + * When an inode cluster is free, it becomes elgible for removal + */ + if ((mp->m_flags & XFS_MOUNT_IDELETE) && + (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { + + *delete = 1; + *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); + + /* + * Remove the inode cluster from the AGI B+Tree, adjust the + * AGI and Superblock inode counts, and mark the disk space + * to be freed when the transaction is committed. + */ + ilen = XFS_IALLOC_INODES(mp); + INT_MOD(agi->agi_count, ARCH_CONVERT, -ilen); + INT_MOD(agi->agi_freecount, ARCH_CONVERT, -(ilen - 1)); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); + down_read(&mp->m_peraglock); + mp->m_perag[agno].pagi_freecount -= ilen - 1; + up_read(&mp->m_peraglock); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); + + if ((error = xfs_inobt_delete(cur, &i))) { + cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n", + error, mp->m_fsname); + goto error0; + } + + xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, + agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), + XFS_IALLOC_BLOCKS(mp), flist, mp); + } else { + *delete = 0; + + if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { + cmn_err(CE_WARN, + "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", + error, mp->m_fsname); + goto error0; + } + /* + * Change the inode free counts and |