aboutsummaryrefslogtreecommitdiff
path: root/fs/jfs/jfs_logmgr.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/jfs/jfs_logmgr.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/jfs/jfs_logmgr.c')
-rw-r--r--fs/jfs/jfs_logmgr.c2524
1 files changed, 2524 insertions, 0 deletions
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
new file mode 100644
index 00000000000..b6a6869ebb4
--- /dev/null
+++ b/fs/jfs/jfs_logmgr.c
@@ -0,0 +1,2524 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2000-2004
+ * Portions Copyright (C) Christoph Hellwig, 2001-2002
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * jfs_logmgr.c: log manager
+ *
+ * for related information, see transaction manager (jfs_txnmgr.c), and
+ * recovery manager (jfs_logredo.c).
+ *
+ * note: for detail, RTFS.
+ *
+ * log buffer manager:
+ * special purpose buffer manager supporting log i/o requirements.
+ * per log serial pageout of logpage
+ * queuing i/o requests and redrive i/o at iodone
+ * maintain current logpage buffer
+ * no caching since append only
+ * appropriate jfs buffer cache buffers as needed
+ *
+ * group commit:
+ * transactions which wrote COMMIT records in the same in-memory
+ * log page during the pageout of previous/current log page(s) are
+ * committed together by the pageout of the page.
+ *
+ * TBD lazy commit:
+ * transactions are committed asynchronously when the log page
+ * containing it COMMIT is paged out when it becomes full;
+ *
+ * serialization:
+ * . a per log lock serialize log write.
+ * . a per log lock serialize group commit.
+ * . a per log lock serialize log open/close;
+ *
+ * TBD log integrity:
+ * careful-write (ping-pong) of last logpage to recover from crash
+ * in overwrite.
+ * detection of split (out-of-order) write of physical sectors
+ * of last logpage via timestamp at end of each sector
+ * with its mirror data array at trailer).
+ *
+ * alternatives:
+ * lsn - 64-bit monotonically increasing integer vs
+ * 32-bit lspn and page eor.
+ */
+
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h> /* for sync_blockdev() */
+#include <linux/bio.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_txnmgr.h"
+#include "jfs_debug.h"
+
+
+/*
+ * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread)
+ */
+static struct lbuf *log_redrive_list;
+static DEFINE_SPINLOCK(log_redrive_lock);
+DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
+
+
+/*
+ * log read/write serialization (per log)
+ */
+#define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock)
+#define LOG_LOCK(log) down(&((log)->loglock))
+#define LOG_UNLOCK(log) up(&((log)->loglock))
+
+
+/*
+ * log group commit serialization (per log)
+ */
+
+#define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)
+#define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)
+#define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)
+#define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)
+
+/*
+ * log sync serialization (per log)
+ */
+#define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE)
+#define LOGSYNC_BARRIER(logsize) ((logsize)/4)
+/*
+#define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE)
+#define LOGSYNC_BARRIER(logsize) ((logsize)/2)
+*/
+
+
+/*
+ * log buffer cache synchronization
+ */
+static DEFINE_SPINLOCK(jfsLCacheLock);
+
+#define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags)
+#define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags)
+
+/*
+ * See __SLEEP_COND in jfs_locks.h
+ */
+#define LCACHE_SLEEP_COND(wq, cond, flags) \
+do { \
+ if (cond) \
+ break; \
+ __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
+} while (0)
+
+#define LCACHE_WAKEUP(event) wake_up(event)
+
+
+/*
+ * lbuf buffer cache (lCache) control
+ */
+/* log buffer manager pageout control (cumulative, inclusive) */
+#define lbmREAD 0x0001
+#define lbmWRITE 0x0002 /* enqueue at tail of write queue;
+ * init pageout if at head of queue;
+ */
+#define lbmRELEASE 0x0004 /* remove from write queue
+ * at completion of pageout;
+ * do not free/recycle it yet:
+ * caller will free it;
+ */
+#define lbmSYNC 0x0008 /* do not return to freelist
+ * when removed from write queue;
+ */
+#define lbmFREE 0x0010 /* return to freelist
+ * at completion of pageout;
+ * the buffer may be recycled;
+ */
+#define lbmDONE 0x0020
+#define lbmERROR 0x0040
+#define lbmGC 0x0080 /* lbmIODone to perform post-GC processing
+ * of log page
+ */
+#define lbmDIRECT 0x0100
+
+/*
+ * Global list of active external journals
+ */
+static LIST_HEAD(jfs_external_logs);
+static struct jfs_log *dummy_log = NULL;
+static DECLARE_MUTEX(jfs_log_sem);
+
+/*
+ * external references
+ */
+extern void txLazyUnlock(struct tblock * tblk);
+extern int jfs_stop_threads;
+extern struct completion jfsIOwait;
+extern int jfs_tlocks_low;
+
+/*
+ * forward references
+ */
+static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
+ struct lrd * lrd, struct tlock * tlck);
+
+static int lmNextPage(struct jfs_log * log);
+static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
+ int activate);
+
+static int open_inline_log(struct super_block *sb);
+static int open_dummy_log(struct super_block *sb);
+static int lbmLogInit(struct jfs_log * log);
+static void lbmLogShutdown(struct jfs_log * log);
+static struct lbuf *lbmAllocate(struct jfs_log * log, int);
+static void lbmFree(struct lbuf * bp);
+static void lbmfree(struct lbuf * bp);
+static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
+static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
+static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
+static int lbmIOWait(struct lbuf * bp, int flag);
+static bio_end_io_t lbmIODone;
+static void lbmStartIO(struct lbuf * bp);
+static void lmGCwrite(struct jfs_log * log, int cant_block);
+static int lmLogSync(struct jfs_log * log, int nosyncwait);
+
+
+
+/*
+ * statistics
+ */
+#ifdef CONFIG_JFS_STATISTICS
+static struct lmStat {
+ uint commit; /* # of commit */
+ uint pagedone; /* # of page written */
+ uint submitted; /* # of pages submitted */
+ uint full_page; /* # of full pages submitted */
+ uint partial_page; /* # of partial pages submitted */
+} lmStat;
+#endif
+
+
+/*
+ * NAME: lmLog()
+ *
+ * FUNCTION: write a log record;
+ *
+ * PARAMETER:
+ *
+ * RETURN: lsn - offset to the next log record to write (end-of-log);
+ * -1 - error;
+ *
+ * note: todo: log error handler
+ */
+int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
+ struct tlock * tlck)
+{
+ int lsn;
+ int diffp, difft;
+ struct metapage *mp = NULL;
+
+ jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
+ log, tblk, lrd, tlck);
+
+ LOG_LOCK(log);
+
+ /* log by (out-of-transaction) JFS ? */
+ if (tblk == NULL)
+ goto writeRecord;
+
+ /* log from page ? */
+ if (tlck == NULL ||
+ tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
+ goto writeRecord;
+
+ /*
+ * initialize/update page/transaction recovery lsn
+ */
+ lsn = log->lsn;
+
+ LOGSYNC_LOCK(log);
+
+ /*
+ * initialize page lsn if first log write of the page
+ */
+ if (mp->lsn == 0) {
+ mp->log = log;
+ mp->lsn = lsn;
+ log->count++;
+
+ /* insert page at tail of logsynclist */
+ list_add_tail(&mp->synclist, &log->synclist);
+ }
+
+ /*
+ * initialize/update lsn of tblock of the page
+ *
+ * transaction inherits oldest lsn of pages associated
+ * with allocation/deallocation of resources (their
+ * log records are used to reconstruct allocation map
+ * at recovery time: inode for inode allocation map,
+ * B+-tree index of extent descriptors for block
+ * allocation map);
+ * allocation map pages inherit transaction lsn at
+ * commit time to allow forwarding log syncpt past log
+ * records associated with allocation/deallocation of
+ * resources only after persistent map of these map pages
+ * have been updated and propagated to home.
+ */
+ /*
+ * initialize transaction lsn:
+ */
+ if (tblk->lsn == 0) {
+ /* inherit lsn of its first page logged */
+ tblk->lsn = mp->lsn;
+ log->count++;
+
+ /* insert tblock after the page on logsynclist */
+ list_add(&tblk->synclist, &mp->synclist);
+ }
+ /*
+ * update transaction lsn:
+ */
+ else {
+ /* inherit oldest/smallest lsn of page */
+ logdiff(diffp, mp->lsn, log);
+ logdiff(difft, tblk->lsn, log);
+ if (diffp < difft) {
+ /* update tblock lsn with page lsn */
+ tblk->lsn = mp->lsn;
+
+ /* move tblock after page on logsynclist */
+ list_move(&tblk->synclist, &mp->synclist);
+ }
+ }
+
+ LOGSYNC_UNLOCK(log);
+
+ /*
+ * write the log record
+ */
+ writeRecord:
+ lsn = lmWriteRecord(log, tblk, lrd, tlck);
+
+ /*
+ * forward log syncpt if log reached next syncpt trigger
+ */
+ logdiff(diffp, lsn, log);
+ if (diffp >= log->nextsync)
+ lsn = lmLogSync(log, 0);
+
+ /* update end-of-log lsn */
+ log->lsn = lsn;
+
+ LOG_UNLOCK(log);
+
+ /* return end-of-log address */
+ return lsn;
+}
+
+
+/*
+ * NAME: lmWriteRecord()
+ *
+ * FUNCTION: move the log record to current log page
+ *
+ * PARAMETER: cd - commit descriptor
+ *
+ * RETURN: end-of-log address
+ *
+ * serialization: LOG_LOCK() held on entry/exit
+ */
+static int
+lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
+ struct tlock * tlck)
+{
+ int lsn = 0; /* end-of-log address */
+ struct lbuf *bp; /* dst log page buffer */
+ struct logpage *lp; /* dst log page */
+ caddr_t dst; /* destination address in log page */
+ int dstoffset; /* end-of-log offset in log page */
+ int freespace; /* free space in log page */
+ caddr_t p; /* src meta-data page */
+ caddr_t src;
+ int srclen;
+ int nbytes; /* number of bytes to move */
+ int i;
+ int len;
+ struct linelock *linelock;
+ struct lv *lv;
+ struct lvd *lvd;
+ int l2linesize;
+
+ len = 0;
+
+ /* retrieve destination log page to write */
+ bp = (struct lbuf *) log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ dstoffset = log->eor;
+
+ /* any log data to write ? */
+ if (tlck == NULL)
+ goto moveLrd;
+
+ /*
+ * move log record data
+ */
+ /* retrieve source meta-data page to log */
+ if (tlck->flag & tlckPAGELOCK) {
+ p = (caddr_t) (tlck->mp->data);
+ linelock = (struct linelock *) & tlck->lock;
+ }
+ /* retrieve source in-memory inode to log */
+ else if (tlck->flag & tlckINODELOCK) {
+ if (tlck->type & tlckDTREE)
+ p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
+ else
+ p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
+ linelock = (struct linelock *) & tlck->lock;
+ }
+#ifdef _JFS_WIP
+ else if (tlck->flag & tlckINLINELOCK) {
+
+ inlinelock = (struct inlinelock *) & tlck;
+ p = (caddr_t) & inlinelock->pxd;
+ linelock = (struct linelock *) & tlck;
+ }
+#endif /* _JFS_WIP */
+ else {
+ jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
+ return 0; /* Probably should trap */
+ }
+ l2linesize = linelock->l2linesize;
+
+ moveData:
+ ASSERT(linelock->index <= linelock->maxcnt);
+
+ lv = linelock->lv;
+ for (i = 0; i < linelock->index; i++, lv++) {
+ if (lv->length == 0)
+ continue;
+
+ /* is page full ? */
+ if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
+ /* page become full: move on to next page */
+ lmNextPage(log);
+
+ bp = log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ dstoffset = LOGPHDRSIZE;
+ }
+
+ /*
+ * move log vector data
+ */
+ src = (u8 *) p + (lv->offset << l2linesize);
+ srclen = lv->length << l2linesize;
+ len += srclen;
+ while (srclen > 0) {
+ freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
+ nbytes = min(freespace, srclen);
+ dst = (caddr_t) lp + dstoffset;
+ memcpy(dst, src, nbytes);
+ dstoffset += nbytes;
+
+ /* is page not full ? */
+ if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
+ break;
+
+ /* page become full: move on to next page */
+ lmNextPage(log);
+
+ bp = (struct lbuf *) log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ dstoffset = LOGPHDRSIZE;
+
+ srclen -= nbytes;
+ src += nbytes;
+ }
+
+ /*
+ * move log vector descriptor
+ */
+ len += 4;
+ lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
+ lvd->offset = cpu_to_le16(lv->offset);
+ lvd->length = cpu_to_le16(lv->length);
+ dstoffset += 4;
+ jfs_info("lmWriteRecord: lv offset:%d length:%d",
+ lv->offset, lv->length);
+ }
+
+ if ((i = linelock->next)) {
+ linelock = (struct linelock *) lid_to_tlock(i);
+ goto moveData;
+ }
+
+ /*
+ * move log record descriptor
+ */
+ moveLrd:
+ lrd->length = cpu_to_le16(len);
+
+ src = (caddr_t) lrd;
+ srclen = LOGRDSIZE;
+
+ while (srclen > 0) {
+ freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
+ nbytes = min(freespace, srclen);
+ dst = (caddr_t) lp + dstoffset;
+ memcpy(dst, src, nbytes);
+
+ dstoffset += nbytes;
+ srclen -= nbytes;
+
+ /* are there more to move than freespace of page ? */
+ if (srclen)
+ goto pageFull;
+
+ /*
+ * end of log record descriptor
+ */
+
+ /* update last log record eor */
+ log->eor = dstoffset;
+ bp->l_eor = dstoffset;
+ lsn = (log->page << L2LOGPSIZE) + dstoffset;
+
+ if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
+ tblk->clsn = lsn;
+ jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
+ bp->l_eor);
+
+ INCREMENT(lmStat.commit); /* # of commit */
+
+ /*
+ * enqueue tblock for group commit:
+ *
+ * enqueue tblock of non-trivial/synchronous COMMIT
+ * at tail of group commit queue
+ * (trivial/asynchronous COMMITs are ignored by
+ * group commit.)
+ */
+ LOGGC_LOCK(log);
+
+ /* init tblock gc state */
+ tblk->flag = tblkGC_QUEUE;
+ tblk->bp = log->bp;
+ tblk->pn = log->page;
+ tblk->eor = log->eor;
+
+ /* enqueue transaction to commit queue */
+ list_add_tail(&tblk->cqueue, &log->cqueue);
+
+ LOGGC_UNLOCK(log);
+ }
+
+ jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
+ le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
+
+ /* page not full ? */
+ if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
+ return lsn;
+
+ pageFull:
+ /* page become full: move on to next page */
+ lmNextPage(log);
+
+ bp = (struct lbuf *) log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ dstoffset = LOGPHDRSIZE;
+ src += nbytes;
+ }
+
+ return lsn;
+}
+
+
+/*
+ * NAME: lmNextPage()
+ *
+ * FUNCTION: write current page and allocate next page.
+ *
+ * PARAMETER: log
+ *
+ * RETURN: 0
+ *
+ * serialization: LOG_LOCK() held on entry/exit
+ */
+static int lmNextPage(struct jfs_log * log)
+{
+ struct logpage *lp;
+ int lspn; /* log sequence page number */
+ int pn; /* current page number */
+ struct lbuf *bp;
+ struct lbuf *nextbp;
+ struct tblock *tblk;
+
+ /* get current log page number and log sequence page number */
+ pn = log->page;
+ bp = log->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ lspn = le32_to_cpu(lp->h.page);
+
+ LOGGC_LOCK(log);
+
+ /*
+ * write or queue the full page at the tail of write queue
+ */
+ /* get the tail tblk on commit queue */
+ if (list_empty(&log->cqueue))
+ tblk = NULL;
+ else
+ tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
+
+ /* every tblk who has COMMIT record on the current page,
+ * and has not been committed, must be on commit queue
+ * since tblk is queued at commit queueu at the time
+ * of writing its COMMIT record on the page before
+ * page becomes full (even though the tblk thread
+ * who wrote COMMIT record may have been suspended
+ * currently);
+ */
+
+ /* is page bound with outstanding tail tblk ? */
+ if (tblk && tblk->pn == pn) {
+ /* mark tblk for end-of-page */
+ tblk->flag |= tblkGC_EOP;
+
+ if (log->cflag & logGC_PAGEOUT) {
+ /* if page is not already on write queue,
+ * just enqueue (no lbmWRITE to prevent redrive)
+ * buffer to wqueue to ensure correct serial order
+ * of the pages since log pages will be added
+ * continuously
+ */
+ if (bp->l_wqnext == NULL)
+ lbmWrite(log, bp, 0, 0);
+ } else {
+ /*
+ * No current GC leader, initiate group commit
+ */
+ log->cflag |= logGC_PAGEOUT;
+ lmGCwrite(log, 0);
+ }
+ }
+ /* page is not bound with outstanding tblk:
+ * init write or mark it to be redriven (lbmWRITE)
+ */
+ else {
+ /* finalize the page */
+ bp->l_ceor = bp->l_eor;
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
+ lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
+ }
+ LOGGC_UNLOCK(log);
+
+ /*
+ * allocate/initialize next page
+ */
+ /* if log wraps, the first data page of log is 2
+ * (0 never used, 1 is superblock).
+ */
+ log->page = (pn == log->size - 1) ? 2 : pn + 1;
+ log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
+
+ /* allocate/initialize next log page buffer */
+ nextbp = lbmAllocate(log, log->page);
+ nextbp->l_eor = log->eor;
+ log->bp = nextbp;
+
+ /* initialize next log page */
+ lp = (struct logpage *) nextbp->l_ldata;
+ lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
+ lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
+
+ return 0;
+}
+
+
+/*
+ * NAME: lmGroupCommit()
+ *
+ * FUNCTION: group commit
+ * initiate pageout of the pages with COMMIT in the order of
+ * page number - redrive pageout of the page at the head of
+ * pageout queue until full page has been written.
+ *
+ * RETURN:
+ *
+ * NOTE:
+ * LOGGC_LOCK serializes log group commit queue, and
+ * transaction blocks on the commit queue.
+ * N.B. LOG_LOCK is NOT held during lmGroupCommit().
+ */
+int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
+{
+ int rc = 0;
+
+ LOGGC_LOCK(log);
+
+ /* group committed already ? */
+ if (tblk->flag & tblkGC_COMMITTED) {
+ if (tblk->flag & tblkGC_ERROR)
+ rc = -EIO;
+
+ LOGGC_UNLOCK(log);
+ return rc;
+ }
+ jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
+
+ if (tblk->xflag & COMMIT_LAZY)
+ tblk->flag |= tblkGC_LAZY;
+
+ if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
+ (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
+ || jfs_tlocks_low)) {
+ /*
+ * No pageout in progress
+ *
+ * start group commit as its group leader.
+ */
+ log->cflag |= logGC_PAGEOUT;
+
+ lmGCwrite(log, 0);
+ }
+
+ if (tblk->xflag & COMMIT_LAZY) {
+ /*
+ * Lazy transactions can leave now
+ */
+ LOGGC_UNLOCK(log);
+ return 0;
+ }
+
+ /* lmGCwrite gives up LOGGC_LOCK, check again */
+
+ if (tblk->flag & tblkGC_COMMITTED) {
+ if (tblk->flag & tblkGC_ERROR)
+ rc = -EIO;
+
+ LOGGC_UNLOCK(log);
+ return rc;
+ }
+
+ /* upcount transaction waiting for completion
+ */
+ log->gcrtc++;
+ tblk->flag |= tblkGC_READY;
+
+ __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
+ LOGGC_LOCK(log), LOGGC_UNLOCK(log));
+
+ /* removed from commit queue */
+ if (tblk->flag & tblkGC_ERROR)
+ rc = -EIO;
+
+ LOGGC_UNLOCK(log);
+ return rc;
+}
+
+/*
+ * NAME: lmGCwrite()
+ *
+ * FUNCTION: group commit write
+ * initiate write of log page, building a group of all transactions
+ * with commit records on that page.
+ *
+ * RETURN: None
+ *
+ * NOTE:
+ * LOGGC_LOCK must be held by caller.
+ * N.B. LOG_LOCK is NOT held during lmGroupCommit().
+ */
+static void lmGCwrite(struct jfs_log * log, int cant_write)
+{
+ struct lbuf *bp;
+ struct logpage *lp;
+ int gcpn; /* group commit page number */
+ struct tblock *tblk;
+ struct tblock *xtblk = NULL;
+
+ /*
+ * build the commit group of a log page
+ *
+ * scan commit queue and make a commit group of all
+ * transactions with COMMIT records on the same log page.
+ */
+ /* get the head tblk on the commit queue */
+ gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
+
+ list_for_each_entry(tblk, &log->cqueue, cqueue) {
+ if (tblk->pn != gcpn)
+ break;
+
+ xtblk = tblk;
+
+ /* state transition: (QUEUE, READY) -> COMMIT */
+ tblk->flag |= tblkGC_COMMIT;
+ }
+ tblk = xtblk; /* last tblk of the page */
+
+ /*
+ * pageout to commit transactions on the log page.
+ */
+ bp = (struct lbuf *) tblk->bp;
+ lp = (struct logpage *) bp->l_ldata;
+ /* is page already full ? */
+ if (tblk->flag & tblkGC_EOP) {
+ /* mark page to free at end of group commit of the page */
+ tblk->flag &= ~tblkGC_EOP;
+ tblk->flag |= tblkGC_FREE;
+ bp->l_ceor = bp->l_eor;
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
+ lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
+ cant_write);
+ INCREMENT(lmStat.full_page);
+ }
+ /* page is not yet full */
+ else {
+ bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
+ lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
+ INCREMENT(lmStat.partial_page);
+ }
+}
+
+/*
+ * NAME: lmPostGC()
+ *
+ * FUNCTION: group commit post-processing
+ * Processes transactions after their commit records have been written
+ * to disk, redriving log I/O if necessary.
+ *
+ * RETURN: None
+ *
+ * NOTE:
+ * This routine is called a interrupt time by lbmIODone
+ */
+static void lmPostGC(struct lbuf * bp)
+{
+ unsigned long flags;
+ struct jfs_log *log = bp->l_log;
+ struct logpage *lp;
+ struct tblock *tblk, *temp;
+
+ //LOGGC_LOCK(log);
+ spin_lock_irqsave(&log->gclock, flags);
+ /*
+ * current pageout of group commit completed.
+ *
+ * remove/wakeup transactions from commit queue who were
+ * group committed with the current log page
+ */
+ list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
+ if (!(tblk->flag & tblkGC_COMMIT))
+ break;
+ /* if transaction was marked GC_COMMIT then
+ * it has been shipped in the current pageout
+ * and made it to disk - it is committed.
+ */
+
+ if (bp->l_flag & lbmERROR)
+ tblk->flag |= tblkGC_ERROR;
+
+ /* remove it from the commit queue */
+ list_del(&tblk->cqueue);
+ tblk->flag &= ~tblkGC_QUEUE;
+
+ if (tblk == log->flush_tblk) {
+ /* we can stop flushing the log now */
+ clear_bit(log_FLUSH, &log->flag);
+ log->flush_tblk = NULL;
+ }
+
+ jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
+ tblk->flag);
+
+ if (!(tblk->xflag & COMMIT_FORCE))
+ /*
+ * Hand tblk over to lazy commit thread
+ */
+ txLazyUnlock(tblk);
+ else {
+ /* state transition: COMMIT -> COMMITTED */
+ tblk->flag |= tblkGC_COMMITTED;
+
+ if (tblk->flag & tblkGC_READY)
+ log->gcrtc--;
+
+ LOGGC_WAKEUP(tblk);
+ }
+
+ /* was page full before pageout ?
+ * (and this is the last tblk bound with the page)
+ */
+ if (tblk->flag & tblkGC_FREE)
+ lbmFree(bp);
+ /* did page become full after pageout ?
+ * (and this is the last tblk bound with the page)
+ */
+ else if (tblk->flag & tblkGC_EOP) {
+ /* finalize the page */
+ lp = (struct logpage *) bp->l_ldata;
+ bp->l_ceor = bp->l_eor;
+ lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
+ jfs_info("lmPostGC: calling lbmWrite");
+ lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
+ 1);
+ }
+
+ }
+
+ /* are there any transactions who have entered lnGroupCommit()
+ * (whose COMMITs are after that of the last log page written.
+ * They are waiting for new group commit (above at (SLEEP 1))
+ * or lazy transactions are on a full (queued) log page,
+ * select the latest ready transaction as new group leader and
+ * wake her up to lead her group.
+ */
+ if ((!list_empty(&log->cqueue)) &&
+ ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
+ test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
+ /*
+ * Call lmGCwrite with new group leader
+ */
+ lmGCwrite(log, 1);
+
+ /* no transaction are ready yet (transactions are only just
+ * queued (GC_QUEUE) and not entered for group commit yet).
+ * the first transaction entering group commit
+ * will elect herself as new group leader.
+ */
+ else
+ log->cflag &= ~logGC_PAGEOUT;
+
+ //LOGGC_UNLOCK(log);
+ spin_unlock_irqrestore(&log->gclock, flags);
+ return;
+}
+
+/*
+ * NAME: lmLogSync()
+ *
+ * FUNCTION: write log SYNCPT record for specified log
+ * if new sync address is available
+ * (normally the case if sync() is executed by back-ground
+ * process).
+ * if not, explicitly run jfs_blogsync() to initiate
+ * getting of new sync address.
+ * calculate new value of i_nextsync which determines when
+ * this code is called again.
+ *
+ * this is called only from lmLog().
+ *
+ * PARAMETER: ip - pointer to logs inode.
+ *
+ * RETURN: 0
+ *
+ * serialization: LOG_LOCK() held on entry/exit
+ */
+static int lmLogSync(struct jfs_log * log, int nosyncwait)
+{
+ int logsize;
+ int written; /* written since last syncpt */
+ int free; /* free space left available */
+ int delta; /* additional delta to write normally */
+ int more; /* additional write granted */
+ struct lrd lrd;
+ int lsn;
+ struct logsyncblk *lp;
+
+ /*
+ * forward syncpt
+ */
+ /* if last sync is same as last syncpt,
+ * invoke sync point forward processing to update sync.
+ */
+
+ if (log->sync == log->syncpt) {
+ LOGSYNC_LOCK(log);
+ /* ToDo: push dirty metapages out to disk */
+// bmLogSync(log);
+
+ if (list_empty(&log->synclist))
+ log->sync = log->lsn;
+ else {
+ lp = list_entry(log->synclist.next,
+ struct logsyncblk, synclist);
+ log->sync = lp->lsn;
+ }
+ LOGSYNC_UNLOCK(log);
+
+ }
+
+ /* if sync is different from last syncpt,
+ * write a SYNCPT record with syncpt = sync.
+ * reset syncpt = sync
+ */
+ if (log->sync != log->syncpt) {
+ struct jfs_sb_info *sbi;
+
+ /*
+ * We need to make sure all of the "written" metapages
+ * actually make it to disk
+ */
+ list_for_each_entry(sbi, &log->sb_list, log_list) {
+ if (sbi->flag & JFS_NOINTEGRITY)
+ continue;
+ filemap_fdatawrite(sbi->ipbmap->i_mapping);
+ filemap_fdatawrite(sbi->ipimap->i_mapping);
+ filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping);
+ }
+ list_for_each_entry(sbi, &log->sb_list, log_list) {
+ if (sbi->flag & JFS_NOINTEGRITY)
+ continue;
+ filemap_fdatawait(sbi->ipbmap->i_mapping);
+ filemap_fdatawait(sbi->ipimap->i_mapping);
+ filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping);
+ }
+
+ lrd.logtid = 0;
+ lrd.backchain = 0;
+ lrd.type = cpu_to_le16(LOG_SYNCPT);
+ lrd.length = 0;
+ lrd.log.syncpt.sync = cpu_to_le32(log->sync);
+ lsn = lmWriteRecord(log, NULL, &lrd, NULL);
+
+ log->syncpt = log->sync;
+ } else
+ lsn = log->lsn;
+
+ /*
+ * setup next syncpt trigger (SWAG)
+ */
+ logsize = log->logsize;
+
+ logdiff(written, lsn, log);
+ free = logsize - written;
+ delta = LOGSYNC_DELTA(logsize);
+ more = min(free / 2, delta);
+ if (more < 2 * LOGPSIZE) {
+ jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
+ /*
+ * log wrapping
+ *
+ * option 1 - panic ? No.!
+ * option 2 - shutdown file systems
+ * associated with log ?
+ * option 3 - extend log ?
+ */
+ /*
+ * option 4 - second chance
+ *
+ * mark log wrapped, and continue.
+ * when all active transactions are completed,
+ * mark log vaild for recovery.
+ * if crashed during invalid state, log state
+ * implies invald log, forcing fsck().
+ */
+ /* mark log state log wrap in log superblock */
+ /* log->state = LOGWRAP; */
+
+ /* reset sync point computation */
+ log->syncpt = log->sync = lsn;
+ log->nextsync = delta;
+ } else
+ /* next syncpt trigger = written + more */
+ log->nextsync = written + more;
+
+ /* return if lmLogSync() from outside of transaction, e.g., sync() */
+ if (nosyncwait)
+ return lsn;
+
+ /* if number of bytes written from last sync point is more
+ * than 1/4 of the log size, stop new transactions from
+ * starting until all current transactions are completed
+ * by setting syncbarrier flag.
+ */
+ if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
+ set_bit(log_SYNCBARRIER, &log->flag);
+ jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
+ log->syncpt);
+ /*
+ * We may have to initiate group commit
+ */
+ jfs_flush_journal(log, 0);
+ }
+
+ return lsn;
+}
+
+
+/*
+ * NAME: lmLogOpen()
+ *
+ * FUNCTION: open the log on first open;
+ * insert filesystem in the active list of the log.
+ *
+ * PARAMETER: ipmnt - file system mount inode
+ * iplog - log inode (out)
+ *
+ * RETURN:
+ *
+ * serialization:
+ */
+int lmLogOpen(struct super_block *sb)
+{
+ int rc;
+ struct block_device *bdev;
+ struct jfs_log *log;
+ struct jfs_sb_info *sbi = JFS_SBI(sb);
+
+ if (sbi->flag & JFS_NOINTEGRITY)
+ return open_dummy_log(sb);
+
+ if (sbi->mntflag & JFS_INLINELOG)
+ return open_inline_log(sb);
+
+ down(&jfs_log_sem);
+ list_for_each_entry(log, &jfs_external_logs, journal_list) {
+ if (log->bdev->bd_dev == sbi->logdev) {
+ if (memcmp(log->uuid, sbi->loguuid,
+ sizeof(log->uuid))) {
+ jfs_warn("wrong uuid on JFS journal\n");
+ up(&jfs_log_sem);
+ return -EINVAL;
+ }
+ /*
+ * add file system to log active file system list
+ */
+ if ((rc = lmLogFileSystem(log, sbi, 1))) {
+ up(&jfs_log_sem);
+ return rc;
+ }
+ goto journal_found;
+ }
+ }
+
+ if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
+ up(&jfs_log_sem);
+ return -ENOMEM;
+ }
+ memset(log, 0, sizeof(struct jfs_log));
+ INIT_LIST_HEAD(&log->sb_list);
+ init_waitqueue_head(&log->syncwait);
+
+ /*
+ * external log as separate logical volume
+ *
+ * file systems to log may have n-to-1 relationship;
+ */
+
+ bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE);
+ if (IS_ERR(bdev)) {
+ rc = -PTR_ERR(bdev);
+ goto free;
+ }
+
+ if ((rc = bd_claim(bdev, log))) {
+ goto close;
+ }
+
+ log->bdev = bdev;
+ memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
+
+ /*
+ * initialize log:
+ */
+ if ((rc = lmLogInit(log)))
+ goto unclaim;
+
+ list_add(&log->journal_list, &jfs_external_logs);
+
+ /*
+ * add file system to log active file system list
+ */
+ if ((rc = lmLogFileSystem(log, sbi, 1)))
+ goto shutdown;
+
+journal_found:
+ LOG_LOCK(log);
+ list_add(&sbi->log_list, &log->sb_list);
+ sbi->log = log;
+ LOG_UNLOCK(log);
+
+ up(&jfs_log_sem);
+ return 0;
+
+ /*
+ * unwind on error
+ */
+ shutdown: /* unwind lbmLogInit() */
+ list_del(&log->journal_list);
+ lbmLogShutdown(log);
+
+ unclaim:
+ bd_release(bdev);
+
+ close: /* close external log device */
+ blkdev_put(bdev);
+
+ free: /* free log descriptor */
+ up(&jfs_log_sem);
+ kfree(log);
+
+ jfs_warn("lmLogOpen: exit(%d)", rc);
+ return rc;
+}
+
+static int open_inline_log(struct super_block *sb)
+{
+ struct jfs_log *log;
+ int rc;
+
+ if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
+ return -ENOMEM;
+ memset(log, 0, sizeof(struct jfs_log));
+ INIT_LIST_HEAD(&log->sb_list);
+ init_waitqueue_head(&am