diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/jfs/jfs_txnmgr.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/jfs/jfs_txnmgr.c')
-rw-r--r-- | fs/jfs/jfs_txnmgr.c | 3131 |
1 files changed, 3131 insertions, 0 deletions
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c new file mode 100644 index 00000000000..f40301d93f7 --- /dev/null +++ b/fs/jfs/jfs_txnmgr.c @@ -0,0 +1,3131 @@ +/* + * Copyright (C) International Business Machines Corp., 2000-2005 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * jfs_txnmgr.c: transaction manager + * + * notes: + * transaction starts with txBegin() and ends with txCommit() + * or txAbort(). + * + * tlock is acquired at the time of update; + * (obviate scan at commit time for xtree and dtree) + * tlock and mp points to each other; + * (no hashlist for mp -> tlock). + * + * special cases: + * tlock on in-memory inode: + * in-place tlock in the in-memory inode itself; + * converted to page lock by iWrite() at commit time. + * + * tlock during write()/mmap() under anonymous transaction (tid = 0): + * transferred (?) to transaction at commit time. + * + * use the page itself to update allocation maps + * (obviate intermediate replication of allocation/deallocation data) + * hold on to mp+lock thru update of maps + */ + + +#include <linux/fs.h> +#include <linux/vmalloc.h> +#include <linux/smp_lock.h> +#include <linux/completion.h> +#include <linux/suspend.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include "jfs_incore.h" +#include "jfs_filsys.h" +#include "jfs_metapage.h" +#include "jfs_dinode.h" +#include "jfs_imap.h" +#include "jfs_dmap.h" +#include "jfs_superblock.h" +#include "jfs_debug.h" + +/* + * transaction management structures + */ +static struct { + int freetid; /* index of a free tid structure */ + int freelock; /* index first free lock word */ + wait_queue_head_t freewait; /* eventlist of free tblock */ + wait_queue_head_t freelockwait; /* eventlist of free tlock */ + wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ + int tlocksInUse; /* Number of tlocks in use */ + spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ +/* struct tblock *sync_queue; * Transactions waiting for data sync */ + struct list_head unlock_queue; /* Txns waiting to be released */ + struct list_head anon_list; /* inodes having anonymous txns */ + struct list_head anon_list2; /* inodes having anonymous txns + that couldn't be sync'ed */ +} TxAnchor; + +int jfs_tlocks_low; /* Indicates low number of available tlocks */ + +#ifdef CONFIG_JFS_STATISTICS +static struct { + uint txBegin; + uint txBegin_barrier; + uint txBegin_lockslow; + uint txBegin_freetid; + uint txBeginAnon; + uint txBeginAnon_barrier; + uint txBeginAnon_lockslow; + uint txLockAlloc; + uint txLockAlloc_freelock; +} TxStat; +#endif + +static int nTxBlock = -1; /* number of transaction blocks */ +module_param(nTxBlock, int, 0); +MODULE_PARM_DESC(nTxBlock, + "Number of transaction blocks (max:65536)"); + +static int nTxLock = -1; /* number of transaction locks */ +module_param(nTxLock, int, 0); +MODULE_PARM_DESC(nTxLock, + "Number of transaction locks (max:65536)"); + +struct tblock *TxBlock; /* transaction block table */ +static int TxLockLWM; /* Low water mark for number of txLocks used */ +static int TxLockHWM; /* High water mark for number of txLocks used */ +static int TxLockVHWM; /* Very High water mark */ +struct tlock *TxLock; /* transaction lock table */ + + +/* + * transaction management lock + */ +static DEFINE_SPINLOCK(jfsTxnLock); + +#define TXN_LOCK() spin_lock(&jfsTxnLock) +#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) + +#define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); +#define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) +#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) + +DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait); +DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); +static int jfs_commit_thread_waking; + +/* + * Retry logic exist outside these macros to protect from spurrious wakeups. + */ +static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(event, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + TXN_UNLOCK(); + schedule(); + current->state = TASK_RUNNING; + remove_wait_queue(event, &wait); +} + +#define TXN_SLEEP(event)\ +{\ + TXN_SLEEP_DROP_LOCK(event);\ + TXN_LOCK();\ +} + +#define TXN_WAKEUP(event) wake_up_all(event) + + +/* + * statistics + */ +static struct { + tid_t maxtid; /* 4: biggest tid ever used */ + lid_t maxlid; /* 4: biggest lid ever used */ + int ntid; /* 4: # of transactions performed */ + int nlid; /* 4: # of tlocks acquired */ + int waitlock; /* 4: # of tlock wait */ +} stattx; + + +/* + * external references + */ +extern int lmGroupCommit(struct jfs_log *, struct tblock *); +extern int jfs_commit_inode(struct inode *, int); +extern int jfs_stop_threads; + +extern struct completion jfsIOwait; + +/* + * forward references + */ +static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, + struct tlock * tlck, struct commit * cd); +static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, + struct tlock * tlck); +static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, + struct tlock * tlck); +static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, + struct tlock * tlck); +static void txAllocPMap(struct inode *ip, struct maplock * maplock, + struct tblock * tblk); +static void txForce(struct tblock * tblk); +static int txLog(struct jfs_log * log, struct tblock * tblk, + struct commit * cd); +static void txUpdateMap(struct tblock * tblk); +static void txRelease(struct tblock * tblk); +static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, + struct tlock * tlck); +static void LogSyncRelease(struct metapage * mp); + +/* + * transaction block/lock management + * --------------------------------- + */ + +/* + * Get a transaction lock from the free list. If the number in use is + * greater than the high water mark, wake up the sync daemon. This should + * free some anonymous transaction locks. (TXN_LOCK must be held.) + */ +static lid_t txLockAlloc(void) +{ + lid_t lid; + + INCREMENT(TxStat.txLockAlloc); + if (!TxAnchor.freelock) { + INCREMENT(TxStat.txLockAlloc_freelock); + } + + while (!(lid = TxAnchor.freelock)) + TXN_SLEEP(&TxAnchor.freelockwait); + TxAnchor.freelock = TxLock[lid].next; + HIGHWATERMARK(stattx.maxlid, lid); + if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { + jfs_info("txLockAlloc tlocks low"); + jfs_tlocks_low = 1; + wake_up(&jfs_sync_thread_wait); + } + + return lid; +} + +static void txLockFree(lid_t lid) +{ + TxLock[lid].next = TxAnchor.freelock; + TxAnchor.freelock = lid; + TxAnchor.tlocksInUse--; + if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { + jfs_info("txLockFree jfs_tlocks_low no more"); + jfs_tlocks_low = 0; + TXN_WAKEUP(&TxAnchor.lowlockwait); + } + TXN_WAKEUP(&TxAnchor.freelockwait); +} + +/* + * NAME: txInit() + * + * FUNCTION: initialize transaction management structures + * + * RETURN: + * + * serialization: single thread at jfs_init() + */ +int txInit(void) +{ + int k, size; + struct sysinfo si; + + /* Set defaults for nTxLock and nTxBlock if unset */ + + if (nTxLock == -1) { + if (nTxBlock == -1) { + /* Base default on memory size */ + si_meminfo(&si); + if (si.totalram > (256 * 1024)) /* 1 GB */ + nTxLock = 64 * 1024; + else + nTxLock = si.totalram >> 2; + } else if (nTxBlock > (8 * 1024)) + nTxLock = 64 * 1024; + else + nTxLock = nTxBlock << 3; + } + if (nTxBlock == -1) + nTxBlock = nTxLock >> 3; + + /* Verify tunable parameters */ + if (nTxBlock < 16) + nTxBlock = 16; /* No one should set it this low */ + if (nTxBlock > 65536) + nTxBlock = 65536; + if (nTxLock < 256) + nTxLock = 256; /* No one should set it this low */ + if (nTxLock > 65536) + nTxLock = 65536; + + printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", + nTxBlock, nTxLock); + /* + * initialize transaction block (tblock) table + * + * transaction id (tid) = tblock index + * tid = 0 is reserved. + */ + TxLockLWM = (nTxLock * 4) / 10; + TxLockHWM = (nTxLock * 7) / 10; + TxLockVHWM = (nTxLock * 8) / 10; + + size = sizeof(struct tblock) * nTxBlock; + TxBlock = (struct tblock *) vmalloc(size); + if (TxBlock == NULL) + return -ENOMEM; + + for (k = 1; k < nTxBlock - 1; k++) { + TxBlock[k].next = k + 1; + init_waitqueue_head(&TxBlock[k].gcwait); + init_waitqueue_head(&TxBlock[k].waitor); + } + TxBlock[k].next = 0; + init_waitqueue_head(&TxBlock[k].gcwait); + init_waitqueue_head(&TxBlock[k].waitor); + + TxAnchor.freetid = 1; + init_waitqueue_head(&TxAnchor.freewait); + + stattx.maxtid = 1; /* statistics */ + + /* + * initialize transaction lock (tlock) table + * + * transaction lock id = tlock index + * tlock id = 0 is reserved. + */ + size = sizeof(struct tlock) * nTxLock; + TxLock = (struct tlock *) vmalloc(size); + if (TxLock == NULL) { + vfree(TxBlock); + return -ENOMEM; + } + + /* initialize tlock table */ + for (k = 1; k < nTxLock - 1; k++) + TxLock[k].next = k + 1; + TxLock[k].next = 0; + init_waitqueue_head(&TxAnchor.freelockwait); + init_waitqueue_head(&TxAnchor.lowlockwait); + + TxAnchor.freelock = 1; + TxAnchor.tlocksInUse = 0; + INIT_LIST_HEAD(&TxAnchor.anon_list); + INIT_LIST_HEAD(&TxAnchor.anon_list2); + + LAZY_LOCK_INIT(); + INIT_LIST_HEAD(&TxAnchor.unlock_queue); + + stattx.maxlid = 1; /* statistics */ + + return 0; +} + +/* + * NAME: txExit() + * + * FUNCTION: clean up when module is unloaded + */ +void txExit(void) +{ + vfree(TxLock); + TxLock = NULL; + vfree(TxBlock); + TxBlock = NULL; +} + + +/* + * NAME: txBegin() + * + * FUNCTION: start a transaction. + * + * PARAMETER: sb - superblock + * flag - force for nested tx; + * + * RETURN: tid - transaction id + * + * note: flag force allows to start tx for nested tx + * to prevent deadlock on logsync barrier; + */ +tid_t txBegin(struct super_block *sb, int flag) +{ + tid_t t; + struct tblock *tblk; + struct jfs_log *log; + + jfs_info("txBegin: flag = 0x%x", flag); + log = JFS_SBI(sb)->log; + + TXN_LOCK(); + + INCREMENT(TxStat.txBegin); + + retry: + if (!(flag & COMMIT_FORCE)) { + /* + * synchronize with logsync barrier + */ + if (test_bit(log_SYNCBARRIER, &log->flag) || + test_bit(log_QUIESCE, &log->flag)) { + INCREMENT(TxStat.txBegin_barrier); + TXN_SLEEP(&log->syncwait); + goto retry; + } + } + if (flag == 0) { + /* + * Don't begin transaction if we're getting starved for tlocks + * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately + * free tlocks) + */ + if (TxAnchor.tlocksInUse > TxLockVHWM) { + INCREMENT(TxStat.txBegin_lockslow); + TXN_SLEEP(&TxAnchor.lowlockwait); + goto retry; + } + } + + /* + * allocate transaction id/block + */ + if ((t = TxAnchor.freetid) == 0) { + jfs_info("txBegin: waiting for free tid"); + INCREMENT(TxStat.txBegin_freetid); + TXN_SLEEP(&TxAnchor.freewait); + goto retry; + } + + tblk = tid_to_tblock(t); + + if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { + /* Don't let a non-forced transaction take the last tblk */ + jfs_info("txBegin: waiting for free tid"); + INCREMENT(TxStat.txBegin_freetid); + TXN_SLEEP(&TxAnchor.freewait); + goto retry; + } + + TxAnchor.freetid = tblk->next; + + /* + * initialize transaction + */ + + /* + * We can't zero the whole thing or we screw up another thread being + * awakened after sleeping on tblk->waitor + * + * memset(tblk, 0, sizeof(struct tblock)); + */ + tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; + + tblk->sb = sb; + ++log->logtid; + tblk->logtid = log->logtid; + + ++log->active; + + HIGHWATERMARK(stattx.maxtid, t); /* statistics */ + INCREMENT(stattx.ntid); /* statistics */ + + TXN_UNLOCK(); + + jfs_info("txBegin: returning tid = %d", t); + + return t; +} + + +/* + * NAME: txBeginAnon() + * + * FUNCTION: start an anonymous transaction. + * Blocks if logsync or available tlocks are low to prevent + * anonymous tlocks from depleting supply. + * + * PARAMETER: sb - superblock + * + * RETURN: none + */ +void txBeginAnon(struct super_block *sb) +{ + struct jfs_log *log; + + log = JFS_SBI(sb)->log; + + TXN_LOCK(); + INCREMENT(TxStat.txBeginAnon); + + retry: + /* + * synchronize with logsync barrier + */ + if (test_bit(log_SYNCBARRIER, &log->flag) || + test_bit(log_QUIESCE, &log->flag)) { + INCREMENT(TxStat.txBeginAnon_barrier); + TXN_SLEEP(&log->syncwait); + goto retry; + } + + /* + * Don't begin transaction if we're getting starved for tlocks + */ + if (TxAnchor.tlocksInUse > TxLockVHWM) { + INCREMENT(TxStat.txBeginAnon_lockslow); + TXN_SLEEP(&TxAnchor.lowlockwait); + goto retry; + } + TXN_UNLOCK(); +} + + +/* + * txEnd() + * + * function: free specified transaction block. + * + * logsync barrier processing: + * + * serialization: + */ +void txEnd(tid_t tid) +{ + struct tblock *tblk = tid_to_tblock(tid); + struct jfs_log *log; + + jfs_info("txEnd: tid = %d", tid); + TXN_LOCK(); + + /* + * wakeup transactions waiting on the page locked + * by the current transaction + */ + TXN_WAKEUP(&tblk->waitor); + + log = JFS_SBI(tblk->sb)->log; + + /* + * Lazy commit thread can't free this guy until we mark it UNLOCKED, + * otherwise, we would be left with a transaction that may have been + * reused. + * + * Lazy commit thread will turn off tblkGC_LAZY before calling this + * routine. + */ + if (tblk->flag & tblkGC_LAZY) { + jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); + TXN_UNLOCK(); + + spin_lock_irq(&log->gclock); // LOGGC_LOCK + tblk->flag |= tblkGC_UNLOCKED; + spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK + return; + } + + jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); + + assert(tblk->next == 0); + + /* + * insert tblock back on freelist + */ + tblk->next = TxAnchor.freetid; + TxAnchor.freetid = tid; + + /* + * mark the tblock not active + */ + if (--log->active == 0) { + clear_bit(log_FLUSH, &log->flag); + + /* + * synchronize with logsync barrier + */ + if (test_bit(log_SYNCBARRIER, &log->flag)) { + /* forward log syncpt */ + /* lmSync(log); */ + + jfs_info("log barrier off: 0x%x", log->lsn); + + /* enable new transactions start */ + clear_bit(log_SYNCBARRIER, &log->flag); + + /* wakeup all waitors for logsync barrier */ + TXN_WAKEUP(&log->syncwait); + } + } + + /* + * wakeup all waitors for a free tblock + */ + TXN_WAKEUP(&TxAnchor.freewait); + + TXN_UNLOCK(); +} + + +/* + * txLock() + * + * function: acquire a transaction lock on the specified <mp> + * + * parameter: + * + * return: transaction lock id + * + * serialization: + */ +struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, + int type) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + int dir_xtree = 0; + lid_t lid; + tid_t xtid; + struct tlock *tlck; + struct xtlock *xtlck; + struct linelock *linelock; + xtpage_t *p; + struct tblock *tblk; + + TXN_LOCK(); + + if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && + !(mp->xflag & COMMIT_PAGE)) { + /* + * Directory inode is special. It can have both an xtree tlock + * and a dtree tlock associated with it. + */ + dir_xtree = 1; + lid = jfs_ip->xtlid; + } else + lid = mp->lid; + + /* is page not locked by a transaction ? */ + if (lid == 0) + goto allocateLock; + + jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); + + /* is page locked by the requester transaction ? */ + tlck = lid_to_tlock(lid); + if ((xtid = tlck->tid) == tid) + goto grantLock; + + /* + * is page locked by anonymous transaction/lock ? + * + * (page update without transaction (i.e., file write) is + * locked under anonymous transaction tid = 0: + * anonymous tlocks maintained on anonymous tlock list of + * the inode of the page and available to all anonymous + * transactions until txCommit() time at which point + * they are transferred to the transaction tlock list of + * the commiting transaction of the inode) + */ + if (xtid == 0) { + tlck->tid = tid; + tblk = tid_to_tblock(tid); + /* + * The order of the tlocks in the transaction is important + * (during truncate, child xtree pages must be freed before + * parent's tlocks change the working map). + * Take tlock off anonymous list and add to tail of + * transaction list + * + * Note: We really need to get rid of the tid & lid and + * use list_head's. This code is getting UGLY! + */ + if (jfs_ip->atlhead == lid) { + if (jfs_ip->atltail == lid) { + /* only anonymous txn. + * Remove from anon_list + */ + list_del_init(&jfs_ip->anon_inode_list); + } + jfs_ip->atlhead = tlck->next; + } else { + lid_t last; + for (last = jfs_ip->atlhead; + lid_to_tlock(last)->next != lid; + last = lid_to_tlock(last)->next) { + assert(last); + } + lid_to_tlock(last)->next = tlck->next; + if (jfs_ip->atltail == lid) + jfs_ip->atltail = last; + } + + /* insert the tlock at tail of transaction tlock list */ + + if (tblk->next) + lid_to_tlock(tblk->last)->next = lid; + else + tblk->next = lid; + tlck->next = 0; + tblk->last = lid; + + goto grantLock; + } + + goto waitLock; + + /* + * allocate a tlock + */ + allocateLock: + lid = txLockAlloc(); + tlck = lid_to_tlock(lid); + + /* + * initialize tlock + */ + tlck->tid = tid; + + /* mark tlock for meta-data page */ + if (mp->xflag & COMMIT_PAGE) { + + tlck->flag = tlckPAGELOCK; + + /* mark the page dirty and nohomeok */ + mark_metapage_dirty(mp); + atomic_inc(&mp->nohomeok); + + jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", + mp, atomic_read(&mp->nohomeok), tid, tlck); + + /* if anonymous transaction, and buffer is on the group + * commit synclist, mark inode to show this. This will + * prevent the buffer from being marked nohomeok for too + * long a time. + */ + if ((tid == 0) && mp->lsn) + set_cflag(COMMIT_Synclist, ip); + } + /* mark tlock for in-memory inode */ + else + tlck->flag = tlckINODELOCK; + + tlck->type = 0; + + /* bind the tlock and the page */ + tlck->ip = ip; + tlck->mp = mp; + if (dir_xtree) + jfs_ip->xtlid = lid; + else + mp->lid = lid; + + /* + * enqueue transaction lock to transaction/inode + */ + /* insert the tlock at tail of transaction tlock list */ + if (tid) { + tblk = tid_to_tblock(tid); + if (tblk->next) + lid_to_tlock(tblk->last)->next = lid; + else + tblk->next = lid; + tlck->next = 0; + tblk->last = lid; + } + /* anonymous transaction: + * insert the tlock at head of inode anonymous tlock list + */ + else { + tlck->next = jfs_ip->atlhead; + jfs_ip->atlhead = lid; + if (tlck->next == 0) { + /* This inode's first anonymous transaction */ + jfs_ip->atltail = lid; + list_add_tail(&jfs_ip->anon_inode_list, + &TxAnchor.anon_list); + } + } + + /* initialize type dependent area for linelock */ + linelock = (struct linelock *) & tlck->lock; + linelock->next = 0; + linelock->flag = tlckLINELOCK; + linelock->maxcnt = TLOCKSHORT; + linelock->index = 0; + + switch (type & tlckTYPE) { + case tlckDTREE: + linelock->l2linesize = L2DTSLOTSIZE; + break; + + case tlckXTREE: + linelock->l2linesize = L2XTSLOTSIZE; + + xtlck = (struct xtlock *) linelock; + xtlck->header.offset = 0; + xtlck->header.length = 2; + + if (type & tlckNEW) { + xtlck->lwm.offset = XTENTRYSTART; + } else { + if (mp->xflag & COMMIT_PAGE) + p = (xtpage_t *) mp->data; + else + p = &jfs_ip->i_xtroot; + xtlck->lwm.offset = + le16_to_cpu(p->header.nextindex); + } + xtlck->lwm.length = 0; /* ! */ + xtlck->twm.offset = 0; + xtlck->hwm.offset = 0; + + xtlck->index = 2; + break; + + case tlckINODE: + linelock->l2linesize = L2INODESLOTSIZE; + break; + + case tlckDATA: + linelock->l2linesize = L2DATASLOTSIZE; + break; + + default: + jfs_err("UFO tlock:0x%p", tlck); + } + + /* + * update tlock vector + */ + grantLock: + tlck->type |= type; + + TXN_UNLOCK(); + + return tlck; + + /* + * page is being locked by another transaction: + */ + waitLock: + /* Only locks on ipimap or ipaimap should reach here */ + /* assert(jfs_ip->fileset == AGGREGATE_I); */ + if (jfs_ip->fileset != AGGREGATE_I) { + jfs_err("txLock: trying to lock locked page!"); + dump_mem("ip", ip, sizeof(struct inode)); + dump_mem("mp", mp, sizeof(struct metapage)); + dump_mem("Locker's tblk", tid_to_tblock(tid), + sizeof(struct tblock)); + dump_mem("Tlock", tlck, sizeof(struct tlock)); + BUG(); + } + INCREMENT(stattx.waitlock); /* statistics */ + release_metapage(mp); + + jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", + tid, xtid, lid); + TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); + jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); + + return NULL; +} + + +/* + * NAME: txRelease() + * + * FUNCTION: Release buffers associated with transaction locks, but don't + * mark homeok yet. The allows other transactions to modify + * buffers, but won't let them go to disk until commit record + * actually gets written. + * + * PARAMETER: + * tblk - + * + * RETURN: Errors from subroutines. + */ +static void txRelease(struct tblock * tblk) +{ + struct metapage *mp; + lid_t lid; + struct tlock *tlck; + + TXN_LOCK(); + + for (lid = tblk->next; lid; lid = tlck->next) { + tlck = lid_to_tlock(lid); + if ((mp = tlck->mp) != NULL && + (tlck->type & tlckBTROOT) == 0) { + assert(mp->xflag & COMMIT_PAGE); + mp->lid = 0; + } + } + + /* + * wakeup transactions waiting on a page locked + * by the current transaction + */ + TXN_WAKEUP(&tblk->waitor); + + TXN_UNLOCK(); +} + + +/* + * NAME: txUnlock() + * + * FUNCTION: Initiates pageout of pages modified by tid in journalled + * objects and frees their lockwords. + */ +static void txUnlock(struct tblock * tblk) +{ + struct tlock *tlck; + struct linelock *linelock; + lid_t lid, next, llid, k; + struct metapage *mp; + struct jfs_log *log; + int difft, diffp; + + jfs_info("txUnlock: tblk = 0x%p", tblk); + log = JFS_SBI(tblk->sb)->log; + + /* + * mark page under tlock homeok (its log has been written): + */ + for (lid = tblk->next; lid; lid = next) { + tlck = lid_to_tlock(lid); + next = tlck->next; + + jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); + + /* unbind page from tlock */ + if ((mp = tlck->mp) != NULL && + (tlck->type & tlckBTROOT) == 0) { + assert(mp->xflag & COMMIT_PAGE); + + /* hold buffer + * + * It's possible that someone else has the metapage. + * The only things were changing are nohomeok, which + * is handled atomically, and clsn which is protected + * by the LOGSYNC_LOCK. + */ + hold_metapage(mp, 1); + + assert(atomic_read(&mp->nohomeok) > 0); + atomic_dec(&mp->nohomeok); + + /* inherit younger/larger clsn */ + LOGSYNC_LOCK(log); + if (mp->clsn) { + logdiff(difft, tblk->clsn, log); + logdiff(diffp, mp->clsn, log); + if (difft > diffp) + mp->clsn = tblk->clsn; + } else + mp->clsn = tblk->clsn; + LOGSYNC_UNLOCK(log); + + assert(!(tlck->flag & tlckFREEPAGE)); + + if (tlck->flag & tlckWRITEPAGE) { + write_metapage(mp); + } else { + /* release page which has been forced */ + release_metapage(mp); + } + } + + /* insert tlock, and linelock(s) of the tlock if any, + * at head of freelist + */ + TXN_LOCK(); + + llid = ((struct linelock *) & tlck->lock)->next; + while (llid) { + linelock = (struct linelock *) lid_to_tlock(llid); + k = linelock->next; + txLockFree(llid); + llid = k; + } + txLockFree(lid); + + TXN_UNLOCK(); + } + tblk->next = tblk->last = 0; + + /* + * remove tblock from logsynclist + * (allocation map pages inherited lsn of tblk and + * has been inserted in logsync list at txUpdateMap()) + */ + if (tblk->lsn) { + LOGSYNC_LOCK(log); + log->count--; + list_del(&tblk->synclist); + LOGSYNC_UNLOCK(log); + } +} + + +/* + * txMaplock() + * + * function: allocate a transaction lock for freed page/entry; + * for freed page, maplock is used as xtlock/dtlock type; + */ +struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) +{ + struct jfs_inode_info *jfs_ip = JFS_IP(ip); + lid_t lid; + struct tblock *tblk; + struct tlock *tlck; + struct maplock *maplock; + + TXN_LOCK(); + + /* + * allocate a tlock + */ + lid = txLockAlloc(); + tlck = lid_to_tlock(lid); + + /* + * initialize tlock + */ + tlck->tid = tid; + + /* bind the tlock and the object */ + tlck->flag = tlckINODELOCK; + tlck->ip = ip; + tlck->mp = NULL; + + tlck->type = type; + + /* + * enqueue transaction lock to transaction/inode + */ + /* insert the tlock at tail of transaction tlock list */ + if (tid) { + tblk = tid_to_tblock(tid); + if (tblk->next) + lid_to_tlock(tblk->last)->next = lid; + else + tblk->next = lid; + tlck->next = 0; + tblk->last = lid; + } + /* anonymous transaction: + * insert the tlock at head of inode anonymous tlock list + */ + else { + tlck->next = jfs_ip->atlhead; + jfs_ip->atlhead = lid; + if (tlck->next == 0) { + /* This inode's first anonymous transaction */ + jfs_ip->atltail = lid; + list_add_tail(&jfs_ip->anon_inode_list, + &TxAnchor.anon_list); + } + } + + TXN_UNLOCK(); + + /* initialize type dependent area for maplock */ + maplock = (struct maplock *) & tlck->lock; + maplock->next = 0; + maplock->maxcnt = 0; + maplock->index = 0; + + return tlck; +} + + +/* + * txLinelock() + * + * function: allocate a transaction lock for log vector list + */ +struct linelock *txLinelock(struct linelock * tlock) +{ + lid_t lid; + struct tlock *tlck; + struct linelock *linelock; + + TXN_LOCK(); + + /* allocate a TxLock structure */ + lid = txLockAlloc(); + tlck = lid_to_tlock(lid); + + TXN_UNLOCK(); + + /* initialize linelock */ + linelock = (struct linelock *) tlck; + linelock->next = 0; + linelock->flag = tlckLINELOCK; + linelock->maxcnt = TLOCKLONG; + linelock->index = 0; + + /* append linelock after tlock */ + linelock->next = tlock->next; + tlock->next = lid; + + return linelock; +} + + + +/* + * transaction commit management + * ----------------------------- + */ + +/* + * NAME: txCommit() + * + * FUNCTION: commit the changes to the objects specified in + * clist. For journalled segments only the + * changes of the caller are committed, ie by tid. + * for non-journalled segments the data are flushed to + * disk and then the change to the disk inode and indirect + * blocks committed (so blocks newly allocated to the + * segment will be made a part of the segment atomically). + * + * all of the segments specified in clist must be in + * one file system. no more than 6 segments are needed + * to handle all unix svcs. + * + * if the i_nlink field (i.e. disk inode link count) + * is zero, and the type of inode is a regular file or + * directory, or symbolic link , the inode is truncated + * to zero length. the truncation is committed but the + * VM resources are unaffected until it is closed (see + * iput and iclose). + * + * PARAMETER: + * + * RETURN: + * + * serialization: + * on entry the inode lock on each segment is assumed + * to be held. + * + * i/o error: + */ +int txCommit(tid_t tid, /* transaction identifier */ + int nip, /* number of inodes to commit */ + struct inode **iplist, /* list of inode to commit */ + int flag) +{ + int rc = 0; + struct commit cd; + struct jfs_log *log; + struct tblock *tblk; + struct lrd *lrd; + int lsn; + struct inode *ip; + struct jfs_inode_info *jfs_ip; + int k, n; + ino_t top; + struct super_block *sb; + + jfs_info("txCommit, tid = %d, flag = %d", tid, flag); + /* is read-only file system ? */ + if (isReadOnly(iplist[0])) { + rc = -EROFS; + goto TheEnd; + } + + sb = cd.sb = iplist[0]->i_sb; + cd.tid = tid; + + if (tid == 0) + tid = txBegin(sb, 0); + tblk = tid_to_tblock(tid); + + /* + * initialize commit structure + */ + log = JFS_SBI(sb)->log; + cd.log = log; + + /* initialize log record descriptor in commit */ + lrd = &cd.lrd; + lrd->logtid = cpu_to_le32(tblk->logtid); + lrd->backchain = 0; + + tblk->xflag |= flag; + + if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) + tblk->xflag |= COMMIT_LAZY; + /* + * prepare non-journaled objects for commit + * + * flush data pages of non-journaled file + * to prevent the file getting non-initialized disk blocks + * in case of crash. + * (new blocks - ) + */ + cd.iplist = iplist; + cd.nip = nip; + + /* + * acquire transaction lock on (on-disk) inodes + * + * update on-disk inode from in-memory inode + * acquiring transaction locks for AFTER records + * on the on-disk inode of file object + * + * sort the inodes array by inode number in descending order + * to prevent deadlock when acquiring transaction lock + * of on-disk inodes on multiple on-disk inode pages by + * multiple concurrent transactions + */ + for (k = 0; k < cd.nip; k++) { + top = (cd.iplist[k])->i_ino; + for (n = k + 1; n < cd.nip; n++) { + ip = cd.iplist[n]; + if (ip->i_ino > top) { + top = ip->i_ino; + cd.iplist[n] = cd.iplist[k]; + cd.iplist[k] = ip; + } + } + + ip = cd.iplist[k]; + jfs_ip = JFS_IP(ip); + + /* + * BUGBUG - This code has temporarily been removed. The + * intent is to ensure that any file data is written before + * the metadata is committed to the journal. This prevents + * uninitialized data from appearing in a file after the + * journal has been replayed. (The uninitialized data + * could be sensitive data removed by another user.) + * + * The problem now is that we are |