aboutsummaryrefslogtreecommitdiff
path: root/fs/jfs/jfs_xtree.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/jfs/jfs_xtree.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/jfs/jfs_xtree.c')
-rw-r--r--fs/jfs/jfs_xtree.c4485
1 files changed, 4485 insertions, 0 deletions
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
new file mode 100644
index 00000000000..11c58c54b81
--- /dev/null
+++ b/fs/jfs/jfs_xtree.c
@@ -0,0 +1,4485 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2000-2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/*
+ * jfs_xtree.c: extent allocation descriptor B+-tree manager
+ */
+
+#include <linux/fs.h>
+#include <linux/quotaops.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_dmap.h"
+#include "jfs_dinode.h"
+#include "jfs_superblock.h"
+#include "jfs_debug.h"
+
+/*
+ * xtree local flag
+ */
+#define XT_INSERT 0x00000001
+
+/*
+ * xtree key/entry comparison: extent offset
+ *
+ * return:
+ * -1: k < start of extent
+ * 0: start_of_extent <= k <= end_of_extent
+ * 1: k > end_of_extent
+ */
+#define XT_CMP(CMP, K, X, OFFSET64)\
+{\
+ OFFSET64 = offsetXAD(X);\
+ (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
+ ((K) < OFFSET64) ? -1 : 0;\
+}
+
+/* write a xad entry */
+#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
+{\
+ (XAD)->flag = (FLAG);\
+ XADoffset((XAD), (OFF));\
+ XADlength((XAD), (LEN));\
+ XADaddress((XAD), (ADDR));\
+}
+
+#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
+
+/* get page buffer for specified block address */
+/* ToDo: Replace this ugly macro with a function */
+#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
+{\
+ BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
+ if (!(RC))\
+ {\
+ if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
+ (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
+ (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
+ {\
+ jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\
+ BT_PUTPAGE(MP);\
+ MP = NULL;\
+ RC = -EIO;\
+ }\
+ }\
+}
+
+/* for consistency */
+#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
+
+#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
+ BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
+/* xtree entry parameter descriptor */
+struct xtsplit {
+ struct metapage *mp;
+ s16 index;
+ u8 flag;
+ s64 off;
+ s64 addr;
+ int len;
+ struct pxdlist *pxdlist;
+};
+
+
+/*
+ * statistics
+ */
+#ifdef CONFIG_JFS_STATISTICS
+static struct {
+ uint search;
+ uint fastSearch;
+ uint split;
+} xtStat;
+#endif
+
+
+/*
+ * forward references
+ */
+static int xtSearch(struct inode *ip,
+ s64 xoff, int *cmpp, struct btstack * btstack, int flag);
+
+static int xtSplitUp(tid_t tid,
+ struct inode *ip,
+ struct xtsplit * split, struct btstack * btstack);
+
+static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
+ struct metapage ** rmpp, s64 * rbnp);
+
+static int xtSplitRoot(tid_t tid, struct inode *ip,
+ struct xtsplit * split, struct metapage ** rmpp);
+
+#ifdef _STILL_TO_PORT
+static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
+ xtpage_t * fp, struct btstack * btstack);
+
+static int xtSearchNode(struct inode *ip,
+ xad_t * xad,
+ int *cmpp, struct btstack * btstack, int flag);
+
+static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
+#endif /* _STILL_TO_PORT */
+
+/* External references */
+
+/*
+ * debug control
+ */
+/* #define _JFS_DEBUG_XTREE 1 */
+
+
+/*
+ * xtLookup()
+ *
+ * function: map a single page into a physical extent;
+ */
+int xtLookup(struct inode *ip, s64 lstart,
+ s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
+{
+ int rc = 0;
+ struct btstack btstack;
+ int cmp;
+ s64 bn;
+ struct metapage *mp;
+ xtpage_t *p;
+ int index;
+ xad_t *xad;
+ s64 size, xoff, xend;
+ int xlen;
+ s64 xaddr;
+
+ *plen = 0;
+
+ if (!no_check) {
+ /* is lookup offset beyond eof ? */
+ size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
+ JFS_SBI(ip->i_sb)->l2bsize;
+ if (lstart >= size) {
+ jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
+ (ulong) lstart, (ulong) size);
+ return 0;
+ }
+ }
+
+ /*
+ * search for the xad entry covering the logical extent
+ */
+//search:
+ if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) {
+ jfs_err("xtLookup: xtSearch returned %d", rc);
+ return rc;
+ }
+
+ /*
+ * compute the physical extent covering logical extent
+ *
+ * N.B. search may have failed (e.g., hole in sparse file),
+ * and returned the index of the next entry.
+ */
+ /* retrieve search result */
+ XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+ /* is xad found covering start of logical extent ?
+ * lstart is a page start address,
+ * i.e., lstart cannot start in a hole;
+ */
+ if (cmp)
+ goto out;
+
+ /*
+ * lxd covered by xad
+ */
+ xad = &p->xad[index];
+ xoff = offsetXAD(xad);
+ xlen = lengthXAD(xad);
+ xend = xoff + xlen;
+ xaddr = addressXAD(xad);
+
+ /* initialize new pxd */
+ *pflag = xad->flag;
+ *paddr = xaddr + (lstart - xoff);
+ /* a page must be fully covered by an xad */
+ *plen = min(xend - lstart, llen);
+
+ out:
+ XT_PUTPAGE(mp);
+
+ return rc;
+}
+
+
+/*
+ * xtLookupList()
+ *
+ * function: map a single logical extent into a list of physical extent;
+ *
+ * parameter:
+ * struct inode *ip,
+ * struct lxdlist *lxdlist, lxd list (in)
+ * struct xadlist *xadlist, xad list (in/out)
+ * int flag)
+ *
+ * coverage of lxd by xad under assumption of
+ * . lxd's are ordered and disjoint.
+ * . xad's are ordered and disjoint.
+ *
+ * return:
+ * 0: success
+ *
+ * note: a page being written (even a single byte) is backed fully,
+ * except the last page which is only backed with blocks
+ * required to cover the last byte;
+ * the extent backing a page is fully contained within an xad;
+ */
+int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
+ struct xadlist * xadlist, int flag)
+{
+ int rc = 0;
+ struct btstack btstack;
+ int cmp;
+ s64 bn;
+ struct metapage *mp;
+ xtpage_t *p;
+ int index;
+ lxd_t *lxd;
+ xad_t *xad, *pxd;
+ s64 size, lstart, lend, xstart, xend, pstart;
+ s64 llen, xlen, plen;
+ s64 xaddr, paddr;
+ int nlxd, npxd, maxnpxd;
+
+ npxd = xadlist->nxad = 0;
+ maxnpxd = xadlist->maxnxad;
+ pxd = xadlist->xad;
+
+ nlxd = lxdlist->nlxd;
+ lxd = lxdlist->lxd;
+
+ lstart = offsetLXD(lxd);
+ llen = lengthLXD(lxd);
+ lend = lstart + llen;
+
+ size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
+ JFS_SBI(ip->i_sb)->l2bsize;
+
+ /*
+ * search for the xad entry covering the logical extent
+ */
+ search:
+ if (lstart >= size)
+ return 0;
+
+ if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0)))
+ return rc;
+
+ /*
+ * compute the physical extent covering logical extent
+ *
+ * N.B. search may have failed (e.g., hole in sparse file),
+ * and returned the index of the next entry.
+ */
+//map:
+ /* retrieve search result */
+ XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+ /* is xad on the next sibling page ? */
+ if (index == le16_to_cpu(p->header.nextindex)) {
+ if (p->header.flag & BT_ROOT)
+ goto mapend;
+
+ if ((bn = le64_to_cpu(p->header.next)) == 0)
+ goto mapend;
+
+ XT_PUTPAGE(mp);
+
+ /* get next sibling page */
+ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+ if (rc)
+ return rc;
+
+ index = XTENTRYSTART;
+ }
+
+ xad = &p->xad[index];
+
+ /*
+ * is lxd covered by xad ?
+ */
+ compare:
+ xstart = offsetXAD(xad);
+ xlen = lengthXAD(xad);
+ xend = xstart + xlen;
+ xaddr = addressXAD(xad);
+
+ compare1:
+ if (xstart < lstart)
+ goto compare2;
+
+ /* (lstart <= xstart) */
+
+ /* lxd is NOT covered by xad */
+ if (lend <= xstart) {
+ /*
+ * get next lxd
+ */
+ if (--nlxd == 0)
+ goto mapend;
+ lxd++;
+
+ lstart = offsetLXD(lxd);
+ llen = lengthLXD(lxd);
+ lend = lstart + llen;
+ if (lstart >= size)
+ goto mapend;
+
+ /* compare with the current xad */
+ goto compare1;
+ }
+ /* lxd is covered by xad */
+ else { /* (xstart < lend) */
+
+ /* initialize new pxd */
+ pstart = xstart;
+ plen = min(lend - xstart, xlen);
+ paddr = xaddr;
+
+ goto cover;
+ }
+
+ /* (xstart < lstart) */
+ compare2:
+ /* lxd is covered by xad */
+ if (lstart < xend) {
+ /* initialize new pxd */
+ pstart = lstart;
+ plen = min(xend - lstart, llen);
+ paddr = xaddr + (lstart - xstart);
+
+ goto cover;
+ }
+ /* lxd is NOT covered by xad */
+ else { /* (xend <= lstart) */
+
+ /*
+ * get next xad
+ *
+ * linear search next xad covering lxd on
+ * the current xad page, and then tree search
+ */
+ if (index == le16_to_cpu(p->header.nextindex) - 1) {
+ if (p->header.flag & BT_ROOT)
+ goto mapend;
+
+ XT_PUTPAGE(mp);
+ goto search;
+ } else {
+ index++;
+ xad++;
+
+ /* compare with new xad */
+ goto compare;
+ }
+ }
+
+ /*
+ * lxd is covered by xad and a new pxd has been initialized
+ * (lstart <= xstart < lend) or (xstart < lstart < xend)
+ */
+ cover:
+ /* finalize pxd corresponding to current xad */
+ XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
+
+ if (++npxd >= maxnpxd)
+ goto mapend;
+ pxd++;
+
+ /*
+ * lxd is fully covered by xad
+ */
+ if (lend <= xend) {
+ /*
+ * get next lxd
+ */
+ if (--nlxd == 0)
+ goto mapend;
+ lxd++;
+
+ lstart = offsetLXD(lxd);
+ llen = lengthLXD(lxd);
+ lend = lstart + llen;
+ if (lstart >= size)
+ goto mapend;
+
+ /*
+ * test for old xad covering new lxd
+ * (old xstart < new lstart)
+ */
+ goto compare2;
+ }
+ /*
+ * lxd is partially covered by xad
+ */
+ else { /* (xend < lend) */
+
+ /*
+ * get next xad
+ *
+ * linear search next xad covering lxd on
+ * the current xad page, and then next xad page search
+ */
+ if (index == le16_to_cpu(p->header.nextindex) - 1) {
+ if (p->header.flag & BT_ROOT)
+ goto mapend;
+
+ if ((bn = le64_to_cpu(p->header.next)) == 0)
+ goto mapend;
+
+ XT_PUTPAGE(mp);
+
+ /* get next sibling page */
+ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+ if (rc)
+ return rc;
+
+ index = XTENTRYSTART;
+ xad = &p->xad[index];
+ } else {
+ index++;
+ xad++;
+ }
+
+ /*
+ * test for new xad covering old lxd
+ * (old lstart < new xstart)
+ */
+ goto compare;
+ }
+
+ mapend:
+ xadlist->nxad = npxd;
+
+//out:
+ XT_PUTPAGE(mp);
+
+ return rc;
+}
+
+
+/*
+ * xtSearch()
+ *
+ * function: search for the xad entry covering specified offset.
+ *
+ * parameters:
+ * ip - file object;
+ * xoff - extent offset;
+ * cmpp - comparison result:
+ * btstack - traverse stack;
+ * flag - search process flag (XT_INSERT);
+ *
+ * returns:
+ * btstack contains (bn, index) of search path traversed to the entry.
+ * *cmpp is set to result of comparison with the entry returned.
+ * the page containing the entry is pinned at exit.
+ */
+static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
+ int *cmpp, struct btstack * btstack, int flag)
+{
+ struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+ int rc = 0;
+ int cmp = 1; /* init for empty page */
+ s64 bn; /* block number */
+ struct metapage *mp; /* page buffer */
+ xtpage_t *p; /* page */
+ xad_t *xad;
+ int base, index, lim, btindex;
+ struct btframe *btsp;
+ int nsplit = 0; /* number of pages to split */
+ s64 t64;
+
+ INCREMENT(xtStat.search);
+
+ BT_CLR(btstack);
+
+ btstack->nsplit = 0;
+
+ /*
+ * search down tree from root:
+ *
+ * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
+ * internal page, child page Pi contains entry with k, Ki <= K < Kj.
+ *
+ * if entry with search key K is not found
+ * internal page search find the entry with largest key Ki
+ * less than K which point to the child page to search;
+ * leaf page search find the entry with smallest key Kj
+ * greater than K so that the returned index is the position of
+ * the entry to be shifted right for insertion of new entry.
+ * for empty tree, search key is greater than any key of the tree.
+ *
+ * by convention, root bn = 0.
+ */
+ for (bn = 0;;) {
+ /* get/pin the page to search */
+ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+ if (rc)
+ return rc;
+
+ /* try sequential access heuristics with the previous
+ * access entry in target leaf page:
+ * once search narrowed down into the target leaf,
+ * key must either match an entry in the leaf or
+ * key entry does not exist in the tree;
+ */
+//fastSearch:
+ if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
+ (p->header.flag & BT_LEAF) &&
+ (index = jfs_ip->btindex) <
+ le16_to_cpu(p->header.nextindex)) {
+ xad = &p->xad[index];
+ t64 = offsetXAD(xad);
+ if (xoff < t64 + lengthXAD(xad)) {
+ if (xoff >= t64) {
+ *cmpp = 0;
+ goto out;
+ }
+
+ /* stop sequential access heuristics */
+ goto binarySearch;
+ } else { /* (t64 + lengthXAD(xad)) <= xoff */
+
+ /* try next sequential entry */
+ index++;
+ if (index <
+ le16_to_cpu(p->header.nextindex)) {
+ xad++;
+ t64 = offsetXAD(xad);
+ if (xoff < t64 + lengthXAD(xad)) {
+ if (xoff >= t64) {
+ *cmpp = 0;
+ goto out;
+ }
+
+ /* miss: key falls between
+ * previous and this entry
+ */
+ *cmpp = 1;
+ goto out;
+ }
+
+ /* (xoff >= t64 + lengthXAD(xad));
+ * matching entry may be further out:
+ * stop heuristic search
+ */
+ /* stop sequential access heuristics */
+ goto binarySearch;
+ }
+
+ /* (index == p->header.nextindex);
+ * miss: key entry does not exist in
+ * the target leaf/tree
+ */
+ *cmpp = 1;
+ goto out;
+ }
+
+ /*
+ * if hit, return index of the entry found, and
+ * if miss, where new entry with search key is
+ * to be inserted;
+ */
+ out:
+ /* compute number of pages to split */
+ if (flag & XT_INSERT) {
+ if (p->header.nextindex == /* little-endian */
+ p->header.maxentry)
+ nsplit++;
+ else
+ nsplit = 0;
+ btstack->nsplit = nsplit;
+ }
+
+ /* save search result */
+ btsp = btstack->top;
+ btsp->bn = bn;
+ btsp->index = index;
+ btsp->mp = mp;
+
+ /* update sequential access heuristics */
+ jfs_ip->btindex = index;
+
+ INCREMENT(xtStat.fastSearch);
+ return 0;
+ }
+
+ /* well, ... full search now */
+ binarySearch:
+ lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
+
+ /*
+ * binary search with search key K on the current page
+ */
+ for (base = XTENTRYSTART; lim; lim >>= 1) {
+ index = base + (lim >> 1);
+
+ XT_CMP(cmp, xoff, &p->xad[index], t64);
+ if (cmp == 0) {
+ /*
+ * search hit
+ */
+ /* search hit - leaf page:
+ * return the entry found
+ */
+ if (p->header.flag & BT_LEAF) {
+ *cmpp = cmp;
+
+ /* compute number of pages to split */
+ if (flag & XT_INSERT) {
+ if (p->header.nextindex ==
+ p->header.maxentry)
+ nsplit++;
+ else
+ nsplit = 0;
+ btstack->nsplit = nsplit;
+ }
+
+ /* save search result */
+ btsp = btstack->top;
+ btsp->bn = bn;
+ btsp->index = index;
+ btsp->mp = mp;
+
+ /* init sequential access heuristics */
+ btindex = jfs_ip->btindex;
+ if (index == btindex ||
+ index == btindex + 1)
+ jfs_ip->btorder = BT_SEQUENTIAL;
+ else
+ jfs_ip->btorder = BT_RANDOM;
+ jfs_ip->btindex = index;
+
+ return 0;
+ }
+
+ /* search hit - internal page:
+ * descend/search its child page
+ */
+ goto next;
+ }
+
+ if (cmp > 0) {
+ base = index + 1;
+ --lim;
+ }
+ }
+
+ /*
+ * search miss
+ *
+ * base is the smallest index with key (Kj) greater than
+ * search key (K) and may be zero or maxentry index.
+ */
+ /*
+ * search miss - leaf page:
+ *
+ * return location of entry (base) where new entry with
+ * search key K is to be inserted.
+ */
+ if (p->header.flag & BT_LEAF) {
+ *cmpp = cmp;
+
+ /* compute number of pages to split */
+ if (flag & XT_INSERT) {
+ if (p->header.nextindex ==
+ p->header.maxentry)
+ nsplit++;
+ else
+ nsplit = 0;
+ btstack->nsplit = nsplit;
+ }
+
+ /* save search result */
+ btsp = btstack->top;
+ btsp->bn = bn;
+ btsp->index = base;
+ btsp->mp = mp;
+
+ /* init sequential access heuristics */
+ btindex = jfs_ip->btindex;
+ if (base == btindex || base == btindex + 1)
+ jfs_ip->btorder = BT_SEQUENTIAL;
+ else
+ jfs_ip->btorder = BT_RANDOM;
+ jfs_ip->btindex = base;
+
+ return 0;
+ }
+
+ /*
+ * search miss - non-leaf page:
+ *
+ * if base is non-zero, decrement base by one to get the parent
+ * entry of the child page to search.
+ */
+ index = base ? base - 1 : base;
+
+ /*
+ * go down to child page
+ */
+ next:
+ /* update number of pages to split */
+ if (p->header.nextindex == p->header.maxentry)
+ nsplit++;
+ else
+ nsplit = 0;
+
+ /* push (bn, index) of the parent page/entry */
+ BT_PUSH(btstack, bn, index);
+
+ /* get the child page block number */
+ bn = addressXAD(&p->xad[index]);
+
+ /* unpin the parent page */
+ XT_PUTPAGE(mp);
+ }
+}
+
+/*
+ * xtInsert()
+ *
+ * function:
+ *
+ * parameter:
+ * tid - transaction id;
+ * ip - file object;
+ * xflag - extent flag (XAD_NOTRECORDED):
+ * xoff - extent offset;
+ * xlen - extent length;
+ * xaddrp - extent address pointer (in/out):
+ * if (*xaddrp)
+ * caller allocated data extent at *xaddrp;
+ * else
+ * allocate data extent and return its xaddr;
+ * flag -
+ *
+ * return:
+ */
+int xtInsert(tid_t tid, /* transaction id */
+ struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
+ int flag)
+{
+ int rc = 0;
+ s64 xaddr, hint;
+ struct metapage *mp; /* meta-page buffer */
+ xtpage_t *p; /* base B+-tree index page */
+ s64 bn;
+ int index, nextindex;
+ struct btstack btstack; /* traverse stack */
+ struct xtsplit split; /* split information */
+ xad_t *xad;
+ int cmp;
+ struct tlock *tlck;
+ struct xtlock *xtlck;
+
+ jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
+
+ /*
+ * search for the entry location at which to insert:
+ *
+ * xtFastSearch() and xtSearch() both returns (leaf page
+ * pinned, index at which to insert).
+ * n.b. xtSearch() may return index of maxentry of
+ * the full page.
+ */
+ if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+ return rc;
+
+ /* retrieve search result */
+ XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+ /* This test must follow XT_GETSEARCH since mp must be valid if
+ * we branch to out: */
+ if (cmp == 0) {
+ rc = -EEXIST;
+ goto out;
+ }
+
+ /*
+ * allocate data extent requested
+ *
+ * allocation hint: last xad
+ */
+ if ((xaddr = *xaddrp) == 0) {
+ if (index > XTENTRYSTART) {
+ xad = &p->xad[index - 1];
+ hint = addressXAD(xad) + lengthXAD(xad) - 1;
+ } else
+ hint = 0;
+ if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen)))
+ goto out;
+ if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
+ DQUOT_FREE_BLOCK(ip, xlen);
+ goto out;
+ }
+ }
+
+ /*
+ * insert entry for new extent
+ */
+ xflag |= XAD_NEW;
+
+ /*
+ * if the leaf page is full, split the page and
+ * propagate up the router entry for the new page from split
+ *
+ * The xtSplitUp() will insert the entry and unpin the leaf page.
+ */
+ nextindex = le16_to_cpu(p->header.nextindex);
+ if (nextindex == le16_to_cpu(p->header.maxentry)) {
+ split.mp = mp;
+ split.index = index;
+ split.flag = xflag;
+ split.off = xoff;
+ split.len = xlen;
+ split.addr = xaddr;
+ split.pxdlist = NULL;
+ if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
+ /* undo data extent allocation */
+ if (*xaddrp == 0) {
+ dbFree(ip, xaddr, (s64) xlen);
+ DQUOT_FREE_BLOCK(ip, xlen);
+ }
+ return rc;
+ }
+
+ *xaddrp = xaddr;
+ return 0;
+ }
+
+ /*
+ * insert the new entry into the leaf page
+ */
+ /*
+ * acquire a transaction lock on the leaf page;
+ *
+ * action: xad insertion/extension;
+ */
+ BT_MARK_DIRTY(mp, ip);
+
+ /* if insert into middle, shift right remaining entries. */
+ if (index < nextindex)
+ memmove(&p->xad[index + 1], &p->xad[index],
+ (nextindex - index) * sizeof(xad_t));
+
+ /* insert the new entry: mark the entry NEW */
+ xad = &p->xad[index];
+ XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
+
+ /* advance next available entry index */
+ p->header.nextindex =
+ cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+
+ /* Don't log it if there are no links to the file */
+ if (!test_cflag(COMMIT_Nolink, ip)) {
+ tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
+ xtlck = (struct xtlock *) & tlck->lock;
+ xtlck->lwm.offset =
+ (xtlck->lwm.offset) ? min(index,
+ (int)xtlck->lwm.offset) : index;
+ xtlck->lwm.length =
+ le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
+ }
+
+ *xaddrp = xaddr;
+
+ out:
+ /* unpin the leaf page */
+ XT_PUTPAGE(mp);
+
+ return rc;
+}
+
+
+/*
+ * xtSplitUp()
+ *
+ * function:
+ * split full pages as propagating insertion up the tree
+ *
+ * parameter:
+ * tid - transaction id;
+ * ip - file object;
+ * split - entry parameter descriptor;
+ * btstack - traverse stack from xtSearch()
+ *
+ * return:
+ */
+static int
+xtSplitUp(tid_t tid,
+ struct inode *ip, struct xtsplit * split, struct btstack * btstack)
+{
+ int rc = 0;
+ struct metapage *smp;
+ xtpage_t *sp; /* split page */
+ struct metapage *rmp;
+ s64 rbn; /* new right page block number */
+ struct metapage *rcmp;
+ xtpage_t *rcp; /* right child page */
+ s64 rcbn; /* right child page block number */
+ int skip; /* index of entry of insertion */
+ int nextindex; /* next available entry index of p */
+ struct btframe *parent; /* parent page entry on traverse stack */
+ xad_t *xad;
+ s64 xaddr;
+ int xlen;
+ int nsplit; /* number of pages split */
+ struct pxdlist pxdlist;
+ pxd_t *pxd;
+ struct tlock *tlck;
+ struct xtlock *xtlck;
+
+ smp = split->mp;
+ sp = XT_PAGE(ip, smp);
+
+ /* is inode xtree root extension/inline EA area free ? */
+ if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
+ (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
+ (JFS_IP(ip)->mode2 & INLINEEA)) {
+ sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
+ JFS_IP(ip)->mode2 &= ~INLINEEA;
+
+ BT_MARK_DIRTY(smp, ip);
+ /*
+ * acquire a transaction lock on the leaf page;
+ *
+ * action: xad insertion/extension;
+ */
+
+ /* if insert into middle, shift right remaining entries. */
+ skip = split->index;
+ nextindex = le16_to_cpu(sp->header.nextindex);
+ if (skip < nextindex)
+ memmove(&sp->xad[skip + 1], &sp->xad[skip],
+ (nextindex - skip) * sizeof(xad_t));
+
+ /* insert the new entry: mark the entry NEW */
+ xad = &sp->xad[skip];
+ XT_PUTENTRY(xad, split->flag, split->off, split->len,
+ split->addr);
+
+ /* advance next available entry index */
+ sp->header.nextindex =
+ cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
+
+ /* Don't log it if there are no links to the file */
+ if (!test_cflag(COMMIT_Nolink, ip)) {
+ tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
+ xtlck = (struct xtlock *) & tlck->lock;
+ xtlck->lwm.offset = (xtlck->lwm.offset) ?
+ min(skip, (int)xtlck->lwm.offset) : skip;
+ xtlck->lwm.length =
+ le16_to_cpu(sp->header.nextindex) -
+ xtlck->lwm.offset;
+ }
+
+ return 0;
+ }
+
+ /*
+ * allocate new index blocks to cover index page split(s)
+ *
+ * allocation hint: ?
+ */
+ if (split->pxdlist == NULL) {
+ nsplit = btstack->nsplit;
+ split->pxdlist = &pxdlist;
+ pxdlist.maxnpxd = pxdlist.npxd = 0;
+ pxd = &pxdlist.pxd[0];
+ xlen = JFS_SBI(ip->i_sb)->nbperpage;
+ for (; nsplit > 0; nsplit--, pxd++) {
+ if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
+ == 0) {
+ PXDaddress(pxd, xaddr);
+ PXDlength(pxd, xlen);
+
+ pxdlist.maxnpxd++;
+
+ continue;
+ }
+
+ /* undo allocation */
+
+ XT_PUTPAGE(smp);
+ return rc;
+ }
+ }
+
+ /*
+ * Split leaf page <sp> into <sp> and a new right page <rp>.
+ *
+ * The split routines insert the new entry into the leaf page,
+ * and acquire txLock as appropriate.
+ * return <rp> pinned and its block number <rpbn>.
+ */
+ rc = (sp->header.flag & BT_ROOT) ?
+ xtSplitRoot(tid, ip, split, &rmp) :
+ xtSplitPage(tid, ip, split, &rmp, &rbn);
+
+ XT_PUTPAGE(smp);
+
+ if (rc)
+ return -EIO;
+ /*
+ * propagate up the router entry for the leaf page just split
+ *
+ * insert a router entry for the new page into the parent page,
+ * propagate the insert/split up the tree by walking back the stack
+ * of (bn of parent page, index of child page entry in parent page)
+ * that were traversed during the search for the page that split.
+ *
+ * the propagation of insert/split up the tree stops if the root
+ * splits or the page inserted into doesn't have to split to hold
+ * the new entry.
+ *
+ * the parent entry for the split page remains the same, and
+ * a new entry is inserted at its right with the first key and
+ * block number of the new right page.
+ *
+ * There are a maximum of 3 pages pinned at any time:
+ * right child, left parent and right parent (when the parent splits)
+ * to keep the child page pinned while working on the parent.
+ * make sure that all pins are released at exit.
+ */
+ while ((parent = BT_POP(btstack)) != NULL) {
+ /* parent page specified by stack frame <parent> */
+
+ /* keep current child pages <rcp> pinned */
+ rcmp = rmp;
+ rcbn = rbn;
+ rcp = XT_PAGE(ip, rcmp);
+
+ /*
+ * insert router entry in parent for new right child page <rp>
+ */
+ /* get/pin the parent page <sp> */
+ XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
+ if (rc) {
+ XT_PUTPAGE(rcmp);
+ return rc;
+ }
+
+ /*
+ * The new key entry goes ONE AFTER the index of parent entry,
+ * because the split was to the right.
+ */
+ skip = parent->index + 1;
+
+ /*
+ * split or shift right remaining entries of the parent page
+ */
+ nextindex = le16_to_cpu(sp->header.nextindex);
+ /*
+ * parent page is full - split the parent page
+ */
+ if (nextindex == le16_to_cpu(sp->header.maxentry)) {
+ /* init for parent page split */
+ split->mp = smp;
+ split->index = skip; /* index at insert */
+ split->flag = XAD_NEW;
+ split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
+ split->len = JFS_SBI(ip->i_sb)->nbperpage;
+ split->addr = rcbn;
+
+ /* unpin previous right child page */
+ XT_PUTPAGE(rcmp);
+
+ /* The split routines insert the new entry,
+ * and acquire txLock as appropriate.
+ * return <rp> pinned and its block number <rpbn>.
+ */
+ rc = (sp->header.flag & BT_ROOT) ?
+ xtSplitRoot(tid, ip, split, &rmp) :
+ xtSplitPage(tid, ip, split, &rmp, &rbn);
+ if (rc) {
+ XT_PUTPAGE(smp);
+ return rc;
+ }
+
+ XT_PUTPAGE(smp);
+ /* keep new child page <rp> pinned */
+ }
+ /*
+ * parent page is not full - insert in parent page
+ */
+ else {
+ /*
+ * insert router entry in parent for the right child
+ * page from the first entry of the right child page:
+ */
+ /*
+ * acquire a transaction lock on the parent page;
+ *
+ * action: router xad insertion;
+ */
+ BT_MARK_DIRTY(smp, ip);
+
+ /*
+ * if insert into middle, shift right remaining entries
+ */
+ if (skip < nextindex)
+ memmove(&sp->xad[skip + 1], &sp->xad[skip],
+ (nextindex -
+ skip) << L2XTSLOTSIZE);
+
+ /* insert the router entry */
+ xad = &sp->xad[skip];
+ XT_PUTENTRY(xad, XAD_NEW,
+ offsetXAD(&rcp->xad[XTENTRYSTART]),
+ JFS_SBI(ip->i_sb)->nbperpage, rcbn);
+
+ /* advance next available entry index. */
+ sp->header.nextindex =
+ cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
+ 1);
+
+ /* Don't log it if there are no links to the file */
+ if (!test_cflag(COMMIT_Nolink, ip)) {
+ tlck = txLock(tid, ip, smp,
+ tlckXTREE | tlckGROW);
+ xtlck = (struct xtlock *) & tlck->lock;
+ xtlck->lwm.offset = (xtlck->lwm.offset) ?
+ min(skip, (int)xtlck->lwm.offset) : skip;
+ xtlck->lwm.length =
+ le16_to_cpu(sp->header.nextindex) -
+ xtlck->lwm.offset;
+ }
+
+ /* unpin parent page */
+ XT_PUTPAGE(smp);
+
+ /* exit propagate up */
+ break;
+ }
+ }
+
+ /* unpin current right page */
+ XT_PUTPAGE(rmp);
+
+ return 0;
+}
+
+
+/*
+ * xtSplitPage()
+ *
+ * function:
+ * split a full non-root page into
+ * original/split/left page and new right page
+ * i.e., the original/split page remains as left page.
+ *
+ * parameter:
+ * int tid,
+ * struct inode *ip,
+ * struct xtsplit *split,
+ * struct metapage **rmpp,
+ * u64 *rbnp,
+ *
+ * return:
+ * Pointer to page in which to insert or NULL on error.
+ */
+static int
+xtSplitPage(tid_t tid, struct inode *ip,
+ struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
+{
+ int rc = 0;
+ struct metapage *smp;
+ xtpage_t *sp;
+ struct metapage *rmp;
+ xtpage_t *rp; /* new right page allocated */
+ s64 rbn; /* new right page block number */
+ struct metapage *mp;
+ xtpage_t *p;
+ s64 nextbn;
+ int skip, maxentry, middle, righthalf, n;
+ xad_t *xad;
+ struct pxdlist *pxdlist;
+ pxd_t *pxd;
+ struct tlock *tlck;
+ struct xtlock *sxtlck = NULL, *rxtlck = NULL;
+ int quota_allocation = 0;
+
+ smp = split->mp;
+ sp = XT_PAGE(ip, smp);
+
+ INCREMENT(xtStat.split);
+
+ pxdlist = split->pxdlist;
+ pxd = &pxdlist->pxd[pxdlist->npxd];
+ pxdlist->npxd++;
+ rbn = addressPXD(pxd);
+
+ /* Allocate blocks to quota. */
+ if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
+ rc =