diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/jfs/jfs_imap.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/jfs/jfs_imap.c')
-rw-r--r-- | fs/jfs/jfs_imap.c | 3270 |
1 files changed, 3270 insertions, 0 deletions
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c new file mode 100644 index 00000000000..78383130162 --- /dev/null +++ b/fs/jfs/jfs_imap.c @@ -0,0 +1,3270 @@ +/* + * Copyright (C) International Business Machines Corp., 2000-2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * jfs_imap.c: inode allocation map manager + * + * Serialization: + * Each AG has a simple lock which is used to control the serialization of + * the AG level lists. This lock should be taken first whenever an AG + * level list will be modified or accessed. + * + * Each IAG is locked by obtaining the buffer for the IAG page. + * + * There is also a inode lock for the inode map inode. A read lock needs to + * be taken whenever an IAG is read from the map or the global level + * information is read. A write lock needs to be taken whenever the global + * level information is modified or an atomic operation needs to be used. + * + * If more than one IAG is read at one time, the read lock may not + * be given up until all of the IAG's are read. Otherwise, a deadlock + * may occur when trying to obtain the read lock while another thread + * holding the read lock is waiting on the IAG already being held. + * + * The control page of the inode map is read into memory by diMount(). + * Thereafter it should only be modified in memory and then it will be + * written out when the filesystem is unmounted by diUnmount(). + */ + +#include <linux/fs.h> +#include <linux/buffer_head.h> +#include <linux/pagemap.h> +#include <linux/quotaops.h> + +#include "jfs_incore.h" +#include "jfs_filsys.h" +#include "jfs_dinode.h" +#include "jfs_dmap.h" +#include "jfs_imap.h" +#include "jfs_metapage.h" +#include "jfs_superblock.h" +#include "jfs_debug.h" + +/* + * imap locks + */ +/* iag free list lock */ +#define IAGFREE_LOCK_INIT(imap) init_MUTEX(&imap->im_freelock) +#define IAGFREE_LOCK(imap) down(&imap->im_freelock) +#define IAGFREE_UNLOCK(imap) up(&imap->im_freelock) + +/* per ag iag list locks */ +#define AG_LOCK_INIT(imap,index) init_MUTEX(&(imap->im_aglock[index])) +#define AG_LOCK(imap,agno) down(&imap->im_aglock[agno]) +#define AG_UNLOCK(imap,agno) up(&imap->im_aglock[agno]) + +/* + * external references + */ +extern struct address_space_operations jfs_aops; + +/* + * forward references + */ +static int diAllocAG(struct inomap *, int, boolean_t, struct inode *); +static int diAllocAny(struct inomap *, int, boolean_t, struct inode *); +static int diAllocBit(struct inomap *, struct iag *, int); +static int diAllocExt(struct inomap *, int, struct inode *); +static int diAllocIno(struct inomap *, int, struct inode *); +static int diFindFree(u32, int); +static int diNewExt(struct inomap *, struct iag *, int); +static int diNewIAG(struct inomap *, int *, int, struct metapage **); +static void duplicateIXtree(struct super_block *, s64, int, s64 *); + +static int diIAGRead(struct inomap * imap, int, struct metapage **); +static int copy_from_dinode(struct dinode *, struct inode *); +static void copy_to_dinode(struct dinode *, struct inode *); + +/* + * debug code for double-checking inode map + */ +/* #define _JFS_DEBUG_IMAP 1 */ + +#ifdef _JFS_DEBUG_IMAP +#define DBG_DIINIT(imap) DBGdiInit(imap) +#define DBG_DIALLOC(imap, ino) DBGdiAlloc(imap, ino) +#define DBG_DIFREE(imap, ino) DBGdiFree(imap, ino) + +static void *DBGdiInit(struct inomap * imap); +static void DBGdiAlloc(struct inomap * imap, ino_t ino); +static void DBGdiFree(struct inomap * imap, ino_t ino); +#else +#define DBG_DIINIT(imap) +#define DBG_DIALLOC(imap, ino) +#define DBG_DIFREE(imap, ino) +#endif /* _JFS_DEBUG_IMAP */ + +/* + * NAME: diMount() + * + * FUNCTION: initialize the incore inode map control structures for + * a fileset or aggregate init time. + * + * the inode map's control structure (dinomap) is + * brought in from disk and placed in virtual memory. + * + * PARAMETERS: + * ipimap - pointer to inode map inode for the aggregate or fileset. + * + * RETURN VALUES: + * 0 - success + * -ENOMEM - insufficient free virtual memory. + * -EIO - i/o error. + */ +int diMount(struct inode *ipimap) +{ + struct inomap *imap; + struct metapage *mp; + int index; + struct dinomap_disk *dinom_le; + + /* + * allocate/initialize the in-memory inode map control structure + */ + /* allocate the in-memory inode map control structure. */ + imap = (struct inomap *) kmalloc(sizeof(struct inomap), GFP_KERNEL); + if (imap == NULL) { + jfs_err("diMount: kmalloc returned NULL!"); + return -ENOMEM; + } + + /* read the on-disk inode map control structure. */ + + mp = read_metapage(ipimap, + IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, + PSIZE, 0); + if (mp == NULL) { + kfree(imap); + return -EIO; + } + + /* copy the on-disk version to the in-memory version. */ + dinom_le = (struct dinomap_disk *) mp->data; + imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); + imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); + atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); + atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); + imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); + imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); + for (index = 0; index < MAXAG; index++) { + imap->im_agctl[index].inofree = + le32_to_cpu(dinom_le->in_agctl[index].inofree); + imap->im_agctl[index].extfree = + le32_to_cpu(dinom_le->in_agctl[index].extfree); + imap->im_agctl[index].numinos = + le32_to_cpu(dinom_le->in_agctl[index].numinos); + imap->im_agctl[index].numfree = + le32_to_cpu(dinom_le->in_agctl[index].numfree); + } + + /* release the buffer. */ + release_metapage(mp); + + /* + * allocate/initialize inode allocation map locks + */ + /* allocate and init iag free list lock */ + IAGFREE_LOCK_INIT(imap); + + /* allocate and init ag list locks */ + for (index = 0; index < MAXAG; index++) { + AG_LOCK_INIT(imap, index); + } + + /* bind the inode map inode and inode map control structure + * to each other. + */ + imap->im_ipimap = ipimap; + JFS_IP(ipimap)->i_imap = imap; + +// DBG_DIINIT(imap); + + return (0); +} + + +/* + * NAME: diUnmount() + * + * FUNCTION: write to disk the incore inode map control structures for + * a fileset or aggregate at unmount time. + * + * PARAMETERS: + * ipimap - pointer to inode map inode for the aggregate or fileset. + * + * RETURN VALUES: + * 0 - success + * -ENOMEM - insufficient free virtual memory. + * -EIO - i/o error. + */ +int diUnmount(struct inode *ipimap, int mounterror) +{ + struct inomap *imap = JFS_IP(ipimap)->i_imap; + + /* + * update the on-disk inode map control structure + */ + + if (!(mounterror || isReadOnly(ipimap))) + diSync(ipimap); + + /* + * Invalidate the page cache buffers + */ + truncate_inode_pages(ipimap->i_mapping, 0); + + /* + * free in-memory control structure + */ + kfree(imap); + + return (0); +} + + +/* + * diSync() + */ +int diSync(struct inode *ipimap) +{ + struct dinomap_disk *dinom_le; + struct inomap *imp = JFS_IP(ipimap)->i_imap; + struct metapage *mp; + int index; + + /* + * write imap global conrol page + */ + /* read the on-disk inode map control structure */ + mp = get_metapage(ipimap, + IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, + PSIZE, 0); + if (mp == NULL) { + jfs_err("diSync: get_metapage failed!"); + return -EIO; + } + + /* copy the in-memory version to the on-disk version */ + dinom_le = (struct dinomap_disk *) mp->data; + dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); + dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); + dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); + dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); + dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); + dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); + for (index = 0; index < MAXAG; index++) { + dinom_le->in_agctl[index].inofree = + cpu_to_le32(imp->im_agctl[index].inofree); + dinom_le->in_agctl[index].extfree = + cpu_to_le32(imp->im_agctl[index].extfree); + dinom_le->in_agctl[index].numinos = + cpu_to_le32(imp->im_agctl[index].numinos); + dinom_le->in_agctl[index].numfree = + cpu_to_le32(imp->im_agctl[index].numfree); + } + + /* write out the control structure */ + write_metapage(mp); + + /* + * write out dirty pages of imap + */ + filemap_fdatawrite(ipimap->i_mapping); + filemap_fdatawait(ipimap->i_mapping); + + diWriteSpecial(ipimap, 0); + + return (0); +} + + +/* + * NAME: diRead() + * + * FUNCTION: initialize an incore inode from disk. + * + * on entry, the specifed incore inode should itself + * specify the disk inode number corresponding to the + * incore inode (i.e. i_number should be initialized). + * + * this routine handles incore inode initialization for + * both "special" and "regular" inodes. special inodes + * are those required early in the mount process and + * require special handling since much of the file system + * is not yet initialized. these "special" inodes are + * identified by a NULL inode map inode pointer and are + * actually initialized by a call to diReadSpecial(). + * + * for regular inodes, the iag describing the disk inode + * is read from disk to determine the inode extent address + * for the disk inode. with the inode extent address in + * hand, the page of the extent that contains the disk + * inode is read and the disk inode is copied to the + * incore inode. + * + * PARAMETERS: + * ip - pointer to incore inode to be initialized from disk. + * + * RETURN VALUES: + * 0 - success + * -EIO - i/o error. + * -ENOMEM - insufficient memory + * + */ +int diRead(struct inode *ip) +{ + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + int iagno, ino, extno, rc; + struct inode *ipimap; + struct dinode *dp; + struct iag *iagp; + struct metapage *mp; + s64 blkno, agstart; + struct inomap *imap; + int block_offset; + int inodes_left; + uint pageno; + int rel_inode; + + jfs_info("diRead: ino = %ld", ip->i_ino); + + ipimap = sbi->ipimap; + JFS_IP(ip)->ipimap = ipimap; + + /* determine the iag number for this inode (number) */ + iagno = INOTOIAG(ip->i_ino); + + /* read the iag */ + imap = JFS_IP(ipimap)->i_imap; + IREAD_LOCK(ipimap); + rc = diIAGRead(imap, iagno, &mp); + IREAD_UNLOCK(ipimap); + if (rc) { + jfs_err("diRead: diIAGRead returned %d", rc); + return (rc); + } + + iagp = (struct iag *) mp->data; + + /* determine inode extent that holds the disk inode */ + ino = ip->i_ino & (INOSPERIAG - 1); + extno = ino >> L2INOSPEREXT; + + if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || + (addressPXD(&iagp->inoext[extno]) == 0)) { + release_metapage(mp); + return -ESTALE; + } + + /* get disk block number of the page within the inode extent + * that holds the disk inode. + */ + blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); + + /* get the ag for the iag */ + agstart = le64_to_cpu(iagp->agstart); + + release_metapage(mp); + + rel_inode = (ino & (INOSPERPAGE - 1)); + pageno = blkno >> sbi->l2nbperpage; + + if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { + /* + * OS/2 didn't always align inode extents on page boundaries + */ + inodes_left = + (sbi->nbperpage - block_offset) << sbi->l2niperblk; + + if (rel_inode < inodes_left) + rel_inode += block_offset << sbi->l2niperblk; + else { + pageno += 1; + rel_inode -= inodes_left; + } + } + + /* read the page of disk inode */ + mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); + if (mp == 0) { + jfs_err("diRead: read_metapage failed"); + return -EIO; + } + + /* locate the the disk inode requested */ + dp = (struct dinode *) mp->data; + dp += rel_inode; + + if (ip->i_ino != le32_to_cpu(dp->di_number)) { + jfs_error(ip->i_sb, "diRead: i_ino != di_number"); + rc = -EIO; + } else if (le32_to_cpu(dp->di_nlink) == 0) + rc = -ESTALE; + else + /* copy the disk inode to the in-memory inode */ + rc = copy_from_dinode(dp, ip); + + release_metapage(mp); + + /* set the ag for the inode */ + JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); + JFS_IP(ip)->active_ag = -1; + + return (rc); +} + + +/* + * NAME: diReadSpecial() + * + * FUNCTION: initialize a 'special' inode from disk. + * + * this routines handles aggregate level inodes. The + * inode cache cannot differentiate between the + * aggregate inodes and the filesystem inodes, so we + * handle these here. We don't actually use the aggregate + * inode map, since these inodes are at a fixed location + * and in some cases the aggregate inode map isn't initialized + * yet. + * + * PARAMETERS: + * sb - filesystem superblock + * inum - aggregate inode number + * secondary - 1 if secondary aggregate inode table + * + * RETURN VALUES: + * new inode - success + * NULL - i/o error. + */ +struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) +{ + struct jfs_sb_info *sbi = JFS_SBI(sb); + uint address; + struct dinode *dp; + struct inode *ip; + struct metapage *mp; + + ip = new_inode(sb); + if (ip == NULL) { + jfs_err("diReadSpecial: new_inode returned NULL!"); + return ip; + } + + if (secondary) { + address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; + JFS_IP(ip)->ipimap = sbi->ipaimap2; + } else { + address = AITBL_OFF >> L2PSIZE; + JFS_IP(ip)->ipimap = sbi->ipaimap; + } + + ASSERT(inum < INOSPEREXT); + + ip->i_ino = inum; + + address += inum >> 3; /* 8 inodes per 4K page */ + + /* read the page of fixed disk inode (AIT) in raw mode */ + mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); + if (mp == NULL) { + ip->i_nlink = 1; /* Don't want iput() deleting it */ + iput(ip); + return (NULL); + } + + /* get the pointer to the disk inode of interest */ + dp = (struct dinode *) (mp->data); + dp += inum % 8; /* 8 inodes per 4K page */ + + /* copy on-disk inode to in-memory inode */ + if ((copy_from_dinode(dp, ip)) != 0) { + /* handle bad return by returning NULL for ip */ + ip->i_nlink = 1; /* Don't want iput() deleting it */ + iput(ip); + /* release the page */ + release_metapage(mp); + return (NULL); + + } + + ip->i_mapping->a_ops = &jfs_aops; + mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); + + /* Allocations to metadata inodes should not affect quotas */ + ip->i_flags |= S_NOQUOTA; + + if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { + sbi->gengen = le32_to_cpu(dp->di_gengen); + sbi->inostamp = le32_to_cpu(dp->di_inostamp); + } + + /* release the page */ + release_metapage(mp); + + return (ip); +} + +/* + * NAME: diWriteSpecial() + * + * FUNCTION: Write the special inode to disk + * + * PARAMETERS: + * ip - special inode + * secondary - 1 if secondary aggregate inode table + * + * RETURN VALUES: none + */ + +void diWriteSpecial(struct inode *ip, int secondary) +{ + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + uint address; + struct dinode *dp; + ino_t inum = ip->i_ino; + struct metapage *mp; + + ip->i_state &= ~I_DIRTY; + + if (secondary) + address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; + else + address = AITBL_OFF >> L2PSIZE; + + ASSERT(inum < INOSPEREXT); + + address += inum >> 3; /* 8 inodes per 4K page */ + + /* read the page of fixed disk inode (AIT) in raw mode */ + mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); + if (mp == NULL) { + jfs_err("diWriteSpecial: failed to read aggregate inode " + "extent!"); + return; + } + + /* get the pointer to the disk inode of interest */ + dp = (struct dinode *) (mp->data); + dp += inum % 8; /* 8 inodes per 4K page */ + + /* copy on-disk inode to in-memory inode */ + copy_to_dinode(dp, ip); + memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); + + if (inum == FILESYSTEM_I) + dp->di_gengen = cpu_to_le32(sbi->gengen); + + /* write the page */ + write_metapage(mp); +} + +/* + * NAME: diFreeSpecial() + * + * FUNCTION: Free allocated space for special inode + */ +void diFreeSpecial(struct inode *ip) +{ + if (ip == NULL) { + jfs_err("diFreeSpecial called with NULL ip!"); + return; + } + filemap_fdatawrite(ip->i_mapping); + filemap_fdatawait(ip->i_mapping); + truncate_inode_pages(ip->i_mapping, 0); + iput(ip); +} + + + +/* + * NAME: diWrite() + * + * FUNCTION: write the on-disk inode portion of the in-memory inode + * to its corresponding on-disk inode. + * + * on entry, the specifed incore inode should itself + * specify the disk inode number corresponding to the + * incore inode (i.e. i_number should be initialized). + * + * the inode contains the inode extent address for the disk + * inode. with the inode extent address in hand, the + * page of the extent that contains the disk inode is + * read and the disk inode portion of the incore inode + * is copied to the disk inode. + * + * PARAMETERS: + * tid - transacation id + * ip - pointer to incore inode to be written to the inode extent. + * + * RETURN VALUES: + * 0 - success + * -EIO - i/o error. + */ +int diWrite(tid_t tid, struct inode *ip) +{ + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + int rc = 0; + s32 ino; + struct dinode *dp; + s64 blkno; + int block_offset; + int inodes_left; + struct metapage *mp; + uint pageno; + int rel_inode; + int dioffset; + struct inode *ipimap; + uint type; + lid_t lid; + struct tlock *ditlck, *tlck; + struct linelock *dilinelock, *ilinelock; + struct lv *lv; + int n; + + ipimap = jfs_ip->ipimap; + + ino = ip->i_ino & (INOSPERIAG - 1); + + if (!addressPXD(&(jfs_ip->ixpxd)) || + (lengthPXD(&(jfs_ip->ixpxd)) != + JFS_IP(ipimap)->i_imap->im_nbperiext)) { + jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); + return -EIO; + } + + /* + * read the page of disk inode containing the specified inode: + */ + /* compute the block address of the page */ + blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); + + rel_inode = (ino & (INOSPERPAGE - 1)); + pageno = blkno >> sbi->l2nbperpage; + + if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { + /* + * OS/2 didn't always align inode extents on page boundaries + */ + inodes_left = + (sbi->nbperpage - block_offset) << sbi->l2niperblk; + + if (rel_inode < inodes_left) + rel_inode += block_offset << sbi->l2niperblk; + else { + pageno += 1; + rel_inode -= inodes_left; + } + } + /* read the page of disk inode */ + retry: + mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); + if (mp == 0) + return -EIO; + + /* get the pointer to the disk inode */ + dp = (struct dinode *) mp->data; + dp += rel_inode; + + dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; + + /* + * acquire transaction lock on the on-disk inode; + * N.B. tlock is acquired on ipimap not ip; + */ + if ((ditlck = + txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) + goto retry; + dilinelock = (struct linelock *) & ditlck->lock; + + /* + * copy btree root from in-memory inode to on-disk inode + * + * (tlock is taken from inline B+-tree root in in-memory + * inode when the B+-tree root is updated, which is pointed + * by jfs_ip->blid as well as being on tx tlock list) + * + * further processing of btree root is based on the copy + * in in-memory inode, where txLog() will log from, and, + * for xtree root, txUpdateMap() will update map and reset + * XAD_NEW bit; + */ + + if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { + /* + * This is the special xtree inside the directory for storing + * the directory table + */ + xtpage_t *p, *xp; + xad_t *xad; + + jfs_ip->xtlid = 0; + tlck = lid_to_tlock(lid); + assert(tlck->type & tlckXTREE); + tlck->type |= tlckBTROOT; + tlck->mp = mp; + ilinelock = (struct linelock *) & tlck->lock; + + /* + * copy xtree root from inode to dinode: + */ + p = &jfs_ip->i_xtroot; + xp = (xtpage_t *) &dp->di_dirtable; + lv = ilinelock->lv; + for (n = 0; n < ilinelock->index; n++, lv++) { + memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], + lv->length << L2XTSLOTSIZE); + } + + /* reset on-disk (metadata page) xtree XAD_NEW bit */ + xad = &xp->xad[XTENTRYSTART]; + for (n = XTENTRYSTART; + n < le16_to_cpu(xp->header.nextindex); n++, xad++) + if (xad->flag & (XAD_NEW | XAD_EXTENDED)) + xad->flag &= ~(XAD_NEW | XAD_EXTENDED); + } + + if ((lid = jfs_ip->blid) == 0) + goto inlineData; + jfs_ip->blid = 0; + + tlck = lid_to_tlock(lid); + type = tlck->type; + tlck->type |= tlckBTROOT; + tlck->mp = mp; + ilinelock = (struct linelock *) & tlck->lock; + + /* + * regular file: 16 byte (XAD slot) granularity + */ + if (type & tlckXTREE) { + xtpage_t *p, *xp; + xad_t *xad; + + /* + * copy xtree root from inode to dinode: + */ + p = &jfs_ip->i_xtroot; + xp = &dp->di_xtroot; + lv = ilinelock->lv; + for (n = 0; n < ilinelock->index; n++, lv++) { + memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], + lv->length << L2XTSLOTSIZE); + } + + /* reset on-disk (metadata page) xtree XAD_NEW bit */ + xad = &xp->xad[XTENTRYSTART]; + for (n = XTENTRYSTART; + n < le16_to_cpu(xp->header.nextindex); n++, xad++) + if (xad->flag & (XAD_NEW | XAD_EXTENDED)) + xad->flag &= ~(XAD_NEW | XAD_EXTENDED); + } + /* + * directory: 32 byte (directory entry slot) granularity + */ + else if (type & tlckDTREE) { + dtpage_t *p, *xp; + + /* + * copy dtree root from inode to dinode: + */ + p = (dtpage_t *) &jfs_ip->i_dtroot; + xp = (dtpage_t *) & dp->di_dtroot; + lv = ilinelock->lv; + for (n = 0; n < ilinelock->index; n++, lv++) { + memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], + lv->length << L2DTSLOTSIZE); + } + } else { + jfs_err("diWrite: UFO tlock"); + } + + inlineData: + /* + * copy inline symlink from in-memory inode to on-disk inode + */ + if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { + lv = & dilinelock->lv[dilinelock->index]; + lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; + lv->length = 2; + memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); + dilinelock->index++; + } + /* + * copy inline data from in-memory inode to on-disk inode: + * 128 byte slot granularity + */ + if (test_cflag(COMMIT_Inlineea, ip)) { + lv = & dilinelock->lv[dilinelock->index]; + lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; + lv->length = 1; + memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); + dilinelock->index++; + + clear_cflag(COMMIT_Inlineea, ip); + } + + /* + * lock/copy inode base: 128 byte slot granularity + */ +// baseDinode: + lv = & dilinelock->lv[dilinelock->index]; + lv->offset = dioffset >> L2INODESLOTSIZE; + copy_to_dinode(dp, ip); + if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { + lv->length = 2; + memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); + } else + lv->length = 1; + dilinelock->index++; + +#ifdef _JFS_FASTDASD + /* + * We aren't logging changes to the DASD used in directory inodes, + * but we need to write them to disk. If we don't unmount cleanly, + * mount will recalculate the DASD used. + */ + if (S_ISDIR(ip->i_mode) + && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED)) + memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd)); +#endif /* _JFS_FASTDASD */ + + /* release the buffer holding the updated on-disk inode. + * the buffer will be later written by commit processing. + */ + write_metapage(mp); + + return (rc); +} + + +/* + * NAME: diFree(ip) + * + * FUNCTION: free a specified inode from the inode working map + * for a fileset or aggregate. + * + * if the inode to be freed represents the first (only) + * free inode within the iag, the iag will be placed on + * the ag free inode list. + * + * freeing the inode will cause the inode extent to be + * freed if the inode is the only allocated inode within + * the extent. in this case all the disk resource backing + * up the inode extent will be freed. in addition, the iag + * will be placed on the ag extent free list if the extent + * is the first free extent in the iag. if freeing the + * extent also means that no free inodes will exist for + * the iag, the iag will also be removed from the ag free + * inode list. + * + * the iag describing the inode will be freed if the extent + * is to be freed and it is the only backed extent within + * the iag. in this case, the iag will be removed from the + * ag free extent list and ag free inode list and placed on + * the inode map's free iag list. + * + * a careful update approach is used to provide consistency + * in the face of updates to multiple buffers. under this + * approach, all required buffers are obtained before making + * any updates and are held until all updates are complete. + * + * PARAMETERS: + * ip - inode to be freed. + * + * RETURN VALUES: + * 0 - success + * -EIO - i/o error. + */ +int diFree(struct inode *ip) +{ + int rc; + ino_t inum = ip->i_ino; + struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; + struct metapage *mp, *amp, *bmp, *cmp, *dmp; + int iagno, ino, extno, bitno, sword, agno; + int back, fwd; + u32 bitmap, mask; + struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; + struct inomap *imap = JFS_IP(ipimap)->i_imap; + pxd_t freepxd; + tid_t tid; + struct inode *iplist[3]; + struct tlock *tlck; + struct pxd_lock *pxdlock; + + /* + * This is just to suppress compiler warnings. The same logic that + * references these variables is used to initialize them. + */ + aiagp = biagp = ciagp = diagp = NULL; + + /* get the iag number containing the inode. + */ + iagno = INOTOIAG(inum); + + /* make sure that the iag is contained within + * the map. + */ + if (iagno >= imap->im_nextiag) { + dump_mem("imap", imap, 32); + jfs_error(ip->i_sb, + "diFree: inum = %d, iagno = %d, nextiag = %d", + (uint) inum, iagno, imap->im_nextiag); + return -EIO; + } + + /* get the allocation group for this ino. + */ + agno = JFS_IP(ip)->agno; + + /* Lock the AG specific inode map information + */ + AG_LOCK(imap, agno); + + /* Obtain read lock in imap inode. Don't release it until we have + * read all of the IAG's that we are going to. + */ + IREAD_LOCK(ipimap); + + /* read the iag. + */ + if ((rc = diIAGRead(imap, iagno, &mp))) { + IREAD_UNLOCK(ipimap); + AG_UNLOCK(imap, agno); + return (rc); + } + iagp = (struct iag *) mp->data; + + /* get the inode number and extent number of the inode within + * the iag and the inode number within the extent. + */ + ino = inum & (INOSPERIAG - 1); + extno = ino >> L2INOSPEREXT; + bitno = ino & (INOSPEREXT - 1); + mask = HIGHORDER >> bitno; + + if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { + jfs_error(ip->i_sb, + "diFree: wmap shows inode already free"); + } + + if (!addressPXD(&iagp->inoext[extno])) { + release_metapage(mp); + IREAD_UNLOCK(ipimap); + AG_UNLOCK(imap, agno); + jfs_error(ip->i_sb, "diFree: invalid inoext"); + return -EIO; + } + + /* compute the bitmap for the extent reflecting the freed inode. + */ + bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; + + if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { + release_metapage(mp); + IREAD_UNLOCK(ipimap); + AG_UNLOCK(imap, agno); + jfs_error(ip->i_sb, "diFree: numfree > numinos"); + return -EIO; + } + /* + * inode extent still has some inodes or below low water mark: + * keep the inode extent; + */ + if (bitmap || + imap->im_agctl[agno].numfree < 96 || + (imap->im_agctl[agno].numfree < 288 && + (((imap->im_agctl[agno].numfree * 100) / + imap->im_agctl[agno].numinos) <= 25))) { + /* if the iag currently has no free inodes (i.e., + * the inode being freed is the first free inode of iag), + * insert the iag at head of the inode free list for the ag. + */ + if (iagp->nfreeinos == 0) { + /* check if there are any iags on the ag inode + * free list. if so, read the first one so that + * we can link the current iag onto the list at + * the head. + */ + if ((fwd = imap->im_agctl[agno].inofree) >= 0) { + /* read the iag that currently is the head + * of the list. + */ + if ((rc = diIAGRead(imap, fwd, &))) { + IREAD_UNLOCK(ipimap); + AG_UNLOCK(imap, agno); + release_metapage(mp); + return (rc); + } + aiagp = (struct iag *) amp->data; + + /* make current head point back to the iag. + */ + aiagp->inofreeback = cpu_to_le32(iagno); + + write_metapage(amp); + } + + /* iag points forward to current head and iag + * becomes the new head of the list. + */ + iagp->inofreefwd = + cpu_to_le32(imap->im_agctl[agno].inofree); + iagp->inofreeback = cpu_to_le32(-1); + imap->im_agctl[agno].inofree = iagno; + } + IREAD_UNLOCK(ipimap); + + /* update the free inode summary map for the extent if + * freeing the inode means the extent will now have free + * inodes (i.e., the inode being freed is the first free + * inode of extent), + */ + if (iagp->wmap[extno] == cpu_to_le32(ONES)) { + sword = extno >> L2EXTSPERSUM; + bitno = extno & (EXTSPERSUM - 1); + iagp->inosmap[sword] &= + cpu_to_le32(~(HIGHORDER >> bitno)); + } + + /* update the bitmap. + */ + iagp->wmap[extno] = cpu_to_le32(bitmap); + DBG_DIFREE(imap, inum); + + /* update the free inode counts at the iag, ag and + * map level. + */ + iagp->nfreeinos = + cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1); + imap->im_agctl[agno].numfree += 1; + atomic_inc(&imap->im_numfree); + + /* release the AG inode map lock + */ + AG_UNLOCK(imap, agno); + + /* write the iag */ + write_metapage(mp); + + return (0); + } + + + /* + * inode extent has become free and above low water mark: + * free the inode extent; + */ + + /* + * prepare to update iag list(s) (careful update step 1) + */ + amp = bmp = cmp = dmp = NULL; + fwd = back = -1; + + /* check if the iag currently has no free extents. if so, + * it will be placed on the head of the ag extent free list. + */ + if (iagp->nfreeexts == 0) { + /* check if the ag extent free list has any iags. + * if so, read the iag at the head of the list now. + * this (head) iag will be updated later to reflect + * the addition of the current iag at the head of + * the list. + */ + if ((fwd = imap->im_agctl[agno].extfree) >= 0) { + if ((rc = diIAGRead(imap, fwd, &))) + goto error_out; + aiagp = (struct iag *) amp->data; + } + } else { + /* iag has free extents. check if the addition of a free + * extent will cause all extents to be free within this + * iag. if so, the iag will be removed from the ag extent + * free list and placed on the inode map's free iag list. + */ + if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { + /* in preparation for removing the iag from the + * ag extent free list, read the iags preceeding + * and following the iag on the ag extent free + * list. + */ + if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { + if ((rc = diIAGRead(imap, fwd, &))) + goto error_out; + aiagp = (struct iag *) amp->data; + } + + if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { + if ((rc = diIAGRead(imap, back, &bmp))) + goto error_out; + biagp = (struct iag *) bmp->data; + } + } + } + + /* remove the iag from the ag inode free list if freeing + * this extent cause the iag to have no free inodes. + */ + if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { + int inofreeback = le32_to_cpu(iagp->inofreeback); + int inofreefwd = le32_to_cpu(iagp->inofreefwd); + + /* in preparation for removing the iag from the + * ag inod |