diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/jfs/jfs_logmgr.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/jfs/jfs_logmgr.c')
-rw-r--r-- | fs/jfs/jfs_logmgr.c | 2524 |
1 files changed, 2524 insertions, 0 deletions
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c new file mode 100644 index 00000000000..b6a6869ebb4 --- /dev/null +++ b/fs/jfs/jfs_logmgr.c @@ -0,0 +1,2524 @@ +/* + * Copyright (C) International Business Machines Corp., 2000-2004 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * jfs_logmgr.c: log manager + * + * for related information, see transaction manager (jfs_txnmgr.c), and + * recovery manager (jfs_logredo.c). + * + * note: for detail, RTFS. + * + * log buffer manager: + * special purpose buffer manager supporting log i/o requirements. + * per log serial pageout of logpage + * queuing i/o requests and redrive i/o at iodone + * maintain current logpage buffer + * no caching since append only + * appropriate jfs buffer cache buffers as needed + * + * group commit: + * transactions which wrote COMMIT records in the same in-memory + * log page during the pageout of previous/current log page(s) are + * committed together by the pageout of the page. + * + * TBD lazy commit: + * transactions are committed asynchronously when the log page + * containing it COMMIT is paged out when it becomes full; + * + * serialization: + * . a per log lock serialize log write. + * . a per log lock serialize group commit. + * . a per log lock serialize log open/close; + * + * TBD log integrity: + * careful-write (ping-pong) of last logpage to recover from crash + * in overwrite. + * detection of split (out-of-order) write of physical sectors + * of last logpage via timestamp at end of each sector + * with its mirror data array at trailer). + * + * alternatives: + * lsn - 64-bit monotonically increasing integer vs + * 32-bit lspn and page eor. + */ + +#include <linux/fs.h> +#include <linux/blkdev.h> +#include <linux/interrupt.h> +#include <linux/smp_lock.h> +#include <linux/completion.h> +#include <linux/buffer_head.h> /* for sync_blockdev() */ +#include <linux/bio.h> +#include <linux/suspend.h> +#include <linux/delay.h> +#include "jfs_incore.h" +#include "jfs_filsys.h" +#include "jfs_metapage.h" +#include "jfs_txnmgr.h" +#include "jfs_debug.h" + + +/* + * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread) + */ +static struct lbuf *log_redrive_list; +static DEFINE_SPINLOCK(log_redrive_lock); +DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait); + + +/* + * log read/write serialization (per log) + */ +#define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock) +#define LOG_LOCK(log) down(&((log)->loglock)) +#define LOG_UNLOCK(log) up(&((log)->loglock)) + + +/* + * log group commit serialization (per log) + */ + +#define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock) +#define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock) +#define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock) +#define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait) + +/* + * log sync serialization (per log) + */ +#define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE) +#define LOGSYNC_BARRIER(logsize) ((logsize)/4) +/* +#define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE) +#define LOGSYNC_BARRIER(logsize) ((logsize)/2) +*/ + + +/* + * log buffer cache synchronization + */ +static DEFINE_SPINLOCK(jfsLCacheLock); + +#define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags) +#define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags) + +/* + * See __SLEEP_COND in jfs_locks.h + */ +#define LCACHE_SLEEP_COND(wq, cond, flags) \ +do { \ + if (cond) \ + break; \ + __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \ +} while (0) + +#define LCACHE_WAKEUP(event) wake_up(event) + + +/* + * lbuf buffer cache (lCache) control + */ +/* log buffer manager pageout control (cumulative, inclusive) */ +#define lbmREAD 0x0001 +#define lbmWRITE 0x0002 /* enqueue at tail of write queue; + * init pageout if at head of queue; + */ +#define lbmRELEASE 0x0004 /* remove from write queue + * at completion of pageout; + * do not free/recycle it yet: + * caller will free it; + */ +#define lbmSYNC 0x0008 /* do not return to freelist + * when removed from write queue; + */ +#define lbmFREE 0x0010 /* return to freelist + * at completion of pageout; + * the buffer may be recycled; + */ +#define lbmDONE 0x0020 +#define lbmERROR 0x0040 +#define lbmGC 0x0080 /* lbmIODone to perform post-GC processing + * of log page + */ +#define lbmDIRECT 0x0100 + +/* + * Global list of active external journals + */ +static LIST_HEAD(jfs_external_logs); +static struct jfs_log *dummy_log = NULL; +static DECLARE_MUTEX(jfs_log_sem); + +/* + * external references + */ +extern void txLazyUnlock(struct tblock * tblk); +extern int jfs_stop_threads; +extern struct completion jfsIOwait; +extern int jfs_tlocks_low; + +/* + * forward references + */ +static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, + struct lrd * lrd, struct tlock * tlck); + +static int lmNextPage(struct jfs_log * log); +static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, + int activate); + +static int open_inline_log(struct super_block *sb); +static int open_dummy_log(struct super_block *sb); +static int lbmLogInit(struct jfs_log * log); +static void lbmLogShutdown(struct jfs_log * log); +static struct lbuf *lbmAllocate(struct jfs_log * log, int); +static void lbmFree(struct lbuf * bp); +static void lbmfree(struct lbuf * bp); +static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp); +static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block); +static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag); +static int lbmIOWait(struct lbuf * bp, int flag); +static bio_end_io_t lbmIODone; +static void lbmStartIO(struct lbuf * bp); +static void lmGCwrite(struct jfs_log * log, int cant_block); +static int lmLogSync(struct jfs_log * log, int nosyncwait); + + + +/* + * statistics + */ +#ifdef CONFIG_JFS_STATISTICS +static struct lmStat { + uint commit; /* # of commit */ + uint pagedone; /* # of page written */ + uint submitted; /* # of pages submitted */ + uint full_page; /* # of full pages submitted */ + uint partial_page; /* # of partial pages submitted */ +} lmStat; +#endif + + +/* + * NAME: lmLog() + * + * FUNCTION: write a log record; + * + * PARAMETER: + * + * RETURN: lsn - offset to the next log record to write (end-of-log); + * -1 - error; + * + * note: todo: log error handler + */ +int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, + struct tlock * tlck) +{ + int lsn; + int diffp, difft; + struct metapage *mp = NULL; + + jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", + log, tblk, lrd, tlck); + + LOG_LOCK(log); + + /* log by (out-of-transaction) JFS ? */ + if (tblk == NULL) + goto writeRecord; + + /* log from page ? */ + if (tlck == NULL || + tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL) + goto writeRecord; + + /* + * initialize/update page/transaction recovery lsn + */ + lsn = log->lsn; + + LOGSYNC_LOCK(log); + + /* + * initialize page lsn if first log write of the page + */ + if (mp->lsn == 0) { + mp->log = log; + mp->lsn = lsn; + log->count++; + + /* insert page at tail of logsynclist */ + list_add_tail(&mp->synclist, &log->synclist); + } + + /* + * initialize/update lsn of tblock of the page + * + * transaction inherits oldest lsn of pages associated + * with allocation/deallocation of resources (their + * log records are used to reconstruct allocation map + * at recovery time: inode for inode allocation map, + * B+-tree index of extent descriptors for block + * allocation map); + * allocation map pages inherit transaction lsn at + * commit time to allow forwarding log syncpt past log + * records associated with allocation/deallocation of + * resources only after persistent map of these map pages + * have been updated and propagated to home. + */ + /* + * initialize transaction lsn: + */ + if (tblk->lsn == 0) { + /* inherit lsn of its first page logged */ + tblk->lsn = mp->lsn; + log->count++; + + /* insert tblock after the page on logsynclist */ + list_add(&tblk->synclist, &mp->synclist); + } + /* + * update transaction lsn: + */ + else { + /* inherit oldest/smallest lsn of page */ + logdiff(diffp, mp->lsn, log); + logdiff(difft, tblk->lsn, log); + if (diffp < difft) { + /* update tblock lsn with page lsn */ + tblk->lsn = mp->lsn; + + /* move tblock after page on logsynclist */ + list_move(&tblk->synclist, &mp->synclist); + } + } + + LOGSYNC_UNLOCK(log); + + /* + * write the log record + */ + writeRecord: + lsn = lmWriteRecord(log, tblk, lrd, tlck); + + /* + * forward log syncpt if log reached next syncpt trigger + */ + logdiff(diffp, lsn, log); + if (diffp >= log->nextsync) + lsn = lmLogSync(log, 0); + + /* update end-of-log lsn */ + log->lsn = lsn; + + LOG_UNLOCK(log); + + /* return end-of-log address */ + return lsn; +} + + +/* + * NAME: lmWriteRecord() + * + * FUNCTION: move the log record to current log page + * + * PARAMETER: cd - commit descriptor + * + * RETURN: end-of-log address + * + * serialization: LOG_LOCK() held on entry/exit + */ +static int +lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, + struct tlock * tlck) +{ + int lsn = 0; /* end-of-log address */ + struct lbuf *bp; /* dst log page buffer */ + struct logpage *lp; /* dst log page */ + caddr_t dst; /* destination address in log page */ + int dstoffset; /* end-of-log offset in log page */ + int freespace; /* free space in log page */ + caddr_t p; /* src meta-data page */ + caddr_t src; + int srclen; + int nbytes; /* number of bytes to move */ + int i; + int len; + struct linelock *linelock; + struct lv *lv; + struct lvd *lvd; + int l2linesize; + + len = 0; + + /* retrieve destination log page to write */ + bp = (struct lbuf *) log->bp; + lp = (struct logpage *) bp->l_ldata; + dstoffset = log->eor; + + /* any log data to write ? */ + if (tlck == NULL) + goto moveLrd; + + /* + * move log record data + */ + /* retrieve source meta-data page to log */ + if (tlck->flag & tlckPAGELOCK) { + p = (caddr_t) (tlck->mp->data); + linelock = (struct linelock *) & tlck->lock; + } + /* retrieve source in-memory inode to log */ + else if (tlck->flag & tlckINODELOCK) { + if (tlck->type & tlckDTREE) + p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; + else + p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; + linelock = (struct linelock *) & tlck->lock; + } +#ifdef _JFS_WIP + else if (tlck->flag & tlckINLINELOCK) { + + inlinelock = (struct inlinelock *) & tlck; + p = (caddr_t) & inlinelock->pxd; + linelock = (struct linelock *) & tlck; + } +#endif /* _JFS_WIP */ + else { + jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck); + return 0; /* Probably should trap */ + } + l2linesize = linelock->l2linesize; + + moveData: + ASSERT(linelock->index <= linelock->maxcnt); + + lv = linelock->lv; + for (i = 0; i < linelock->index; i++, lv++) { + if (lv->length == 0) + continue; + + /* is page full ? */ + if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { + /* page become full: move on to next page */ + lmNextPage(log); + + bp = log->bp; + lp = (struct logpage *) bp->l_ldata; + dstoffset = LOGPHDRSIZE; + } + + /* + * move log vector data + */ + src = (u8 *) p + (lv->offset << l2linesize); + srclen = lv->length << l2linesize; + len += srclen; + while (srclen > 0) { + freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; + nbytes = min(freespace, srclen); + dst = (caddr_t) lp + dstoffset; + memcpy(dst, src, nbytes); + dstoffset += nbytes; + + /* is page not full ? */ + if (dstoffset < LOGPSIZE - LOGPTLRSIZE) + break; + + /* page become full: move on to next page */ + lmNextPage(log); + + bp = (struct lbuf *) log->bp; + lp = (struct logpage *) bp->l_ldata; + dstoffset = LOGPHDRSIZE; + + srclen -= nbytes; + src += nbytes; + } + + /* + * move log vector descriptor + */ + len += 4; + lvd = (struct lvd *) ((caddr_t) lp + dstoffset); + lvd->offset = cpu_to_le16(lv->offset); + lvd->length = cpu_to_le16(lv->length); + dstoffset += 4; + jfs_info("lmWriteRecord: lv offset:%d length:%d", + lv->offset, lv->length); + } + + if ((i = linelock->next)) { + linelock = (struct linelock *) lid_to_tlock(i); + goto moveData; + } + + /* + * move log record descriptor + */ + moveLrd: + lrd->length = cpu_to_le16(len); + + src = (caddr_t) lrd; + srclen = LOGRDSIZE; + + while (srclen > 0) { + freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; + nbytes = min(freespace, srclen); + dst = (caddr_t) lp + dstoffset; + memcpy(dst, src, nbytes); + + dstoffset += nbytes; + srclen -= nbytes; + + /* are there more to move than freespace of page ? */ + if (srclen) + goto pageFull; + + /* + * end of log record descriptor + */ + + /* update last log record eor */ + log->eor = dstoffset; + bp->l_eor = dstoffset; + lsn = (log->page << L2LOGPSIZE) + dstoffset; + + if (lrd->type & cpu_to_le16(LOG_COMMIT)) { + tblk->clsn = lsn; + jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn, + bp->l_eor); + + INCREMENT(lmStat.commit); /* # of commit */ + + /* + * enqueue tblock for group commit: + * + * enqueue tblock of non-trivial/synchronous COMMIT + * at tail of group commit queue + * (trivial/asynchronous COMMITs are ignored by + * group commit.) + */ + LOGGC_LOCK(log); + + /* init tblock gc state */ + tblk->flag = tblkGC_QUEUE; + tblk->bp = log->bp; + tblk->pn = log->page; + tblk->eor = log->eor; + + /* enqueue transaction to commit queue */ + list_add_tail(&tblk->cqueue, &log->cqueue); + + LOGGC_UNLOCK(log); + } + + jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x", + le16_to_cpu(lrd->type), log->bp, log->page, dstoffset); + + /* page not full ? */ + if (dstoffset < LOGPSIZE - LOGPTLRSIZE) + return lsn; + + pageFull: + /* page become full: move on to next page */ + lmNextPage(log); + + bp = (struct lbuf *) log->bp; + lp = (struct logpage *) bp->l_ldata; + dstoffset = LOGPHDRSIZE; + src += nbytes; + } + + return lsn; +} + + +/* + * NAME: lmNextPage() + * + * FUNCTION: write current page and allocate next page. + * + * PARAMETER: log + * + * RETURN: 0 + * + * serialization: LOG_LOCK() held on entry/exit + */ +static int lmNextPage(struct jfs_log * log) +{ + struct logpage *lp; + int lspn; /* log sequence page number */ + int pn; /* current page number */ + struct lbuf *bp; + struct lbuf *nextbp; + struct tblock *tblk; + + /* get current log page number and log sequence page number */ + pn = log->page; + bp = log->bp; + lp = (struct logpage *) bp->l_ldata; + lspn = le32_to_cpu(lp->h.page); + + LOGGC_LOCK(log); + + /* + * write or queue the full page at the tail of write queue + */ + /* get the tail tblk on commit queue */ + if (list_empty(&log->cqueue)) + tblk = NULL; + else + tblk = list_entry(log->cqueue.prev, struct tblock, cqueue); + + /* every tblk who has COMMIT record on the current page, + * and has not been committed, must be on commit queue + * since tblk is queued at commit queueu at the time + * of writing its COMMIT record on the page before + * page becomes full (even though the tblk thread + * who wrote COMMIT record may have been suspended + * currently); + */ + + /* is page bound with outstanding tail tblk ? */ + if (tblk && tblk->pn == pn) { + /* mark tblk for end-of-page */ + tblk->flag |= tblkGC_EOP; + + if (log->cflag & logGC_PAGEOUT) { + /* if page is not already on write queue, + * just enqueue (no lbmWRITE to prevent redrive) + * buffer to wqueue to ensure correct serial order + * of the pages since log pages will be added + * continuously + */ + if (bp->l_wqnext == NULL) + lbmWrite(log, bp, 0, 0); + } else { + /* + * No current GC leader, initiate group commit + */ + log->cflag |= logGC_PAGEOUT; + lmGCwrite(log, 0); + } + } + /* page is not bound with outstanding tblk: + * init write or mark it to be redriven (lbmWRITE) + */ + else { + /* finalize the page */ + bp->l_ceor = bp->l_eor; + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); + lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0); + } + LOGGC_UNLOCK(log); + + /* + * allocate/initialize next page + */ + /* if log wraps, the first data page of log is 2 + * (0 never used, 1 is superblock). + */ + log->page = (pn == log->size - 1) ? 2 : pn + 1; + log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */ + + /* allocate/initialize next log page buffer */ + nextbp = lbmAllocate(log, log->page); + nextbp->l_eor = log->eor; + log->bp = nextbp; + + /* initialize next log page */ + lp = (struct logpage *) nextbp->l_ldata; + lp->h.page = lp->t.page = cpu_to_le32(lspn + 1); + lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); + + return 0; +} + + +/* + * NAME: lmGroupCommit() + * + * FUNCTION: group commit + * initiate pageout of the pages with COMMIT in the order of + * page number - redrive pageout of the page at the head of + * pageout queue until full page has been written. + * + * RETURN: + * + * NOTE: + * LOGGC_LOCK serializes log group commit queue, and + * transaction blocks on the commit queue. + * N.B. LOG_LOCK is NOT held during lmGroupCommit(). + */ +int lmGroupCommit(struct jfs_log * log, struct tblock * tblk) +{ + int rc = 0; + + LOGGC_LOCK(log); + + /* group committed already ? */ + if (tblk->flag & tblkGC_COMMITTED) { + if (tblk->flag & tblkGC_ERROR) + rc = -EIO; + + LOGGC_UNLOCK(log); + return rc; + } + jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc); + + if (tblk->xflag & COMMIT_LAZY) + tblk->flag |= tblkGC_LAZY; + + if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) && + (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag) + || jfs_tlocks_low)) { + /* + * No pageout in progress + * + * start group commit as its group leader. + */ + log->cflag |= logGC_PAGEOUT; + + lmGCwrite(log, 0); + } + + if (tblk->xflag & COMMIT_LAZY) { + /* + * Lazy transactions can leave now + */ + LOGGC_UNLOCK(log); + return 0; + } + + /* lmGCwrite gives up LOGGC_LOCK, check again */ + + if (tblk->flag & tblkGC_COMMITTED) { + if (tblk->flag & tblkGC_ERROR) + rc = -EIO; + + LOGGC_UNLOCK(log); + return rc; + } + + /* upcount transaction waiting for completion + */ + log->gcrtc++; + tblk->flag |= tblkGC_READY; + + __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED), + LOGGC_LOCK(log), LOGGC_UNLOCK(log)); + + /* removed from commit queue */ + if (tblk->flag & tblkGC_ERROR) + rc = -EIO; + + LOGGC_UNLOCK(log); + return rc; +} + +/* + * NAME: lmGCwrite() + * + * FUNCTION: group commit write + * initiate write of log page, building a group of all transactions + * with commit records on that page. + * + * RETURN: None + * + * NOTE: + * LOGGC_LOCK must be held by caller. + * N.B. LOG_LOCK is NOT held during lmGroupCommit(). + */ +static void lmGCwrite(struct jfs_log * log, int cant_write) +{ + struct lbuf *bp; + struct logpage *lp; + int gcpn; /* group commit page number */ + struct tblock *tblk; + struct tblock *xtblk = NULL; + + /* + * build the commit group of a log page + * + * scan commit queue and make a commit group of all + * transactions with COMMIT records on the same log page. + */ + /* get the head tblk on the commit queue */ + gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn; + + list_for_each_entry(tblk, &log->cqueue, cqueue) { + if (tblk->pn != gcpn) + break; + + xtblk = tblk; + + /* state transition: (QUEUE, READY) -> COMMIT */ + tblk->flag |= tblkGC_COMMIT; + } + tblk = xtblk; /* last tblk of the page */ + + /* + * pageout to commit transactions on the log page. + */ + bp = (struct lbuf *) tblk->bp; + lp = (struct logpage *) bp->l_ldata; + /* is page already full ? */ + if (tblk->flag & tblkGC_EOP) { + /* mark page to free at end of group commit of the page */ + tblk->flag &= ~tblkGC_EOP; + tblk->flag |= tblkGC_FREE; + bp->l_ceor = bp->l_eor; + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); + lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC, + cant_write); + INCREMENT(lmStat.full_page); + } + /* page is not yet full */ + else { + bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */ + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); + lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write); + INCREMENT(lmStat.partial_page); + } +} + +/* + * NAME: lmPostGC() + * + * FUNCTION: group commit post-processing + * Processes transactions after their commit records have been written + * to disk, redriving log I/O if necessary. + * + * RETURN: None + * + * NOTE: + * This routine is called a interrupt time by lbmIODone + */ +static void lmPostGC(struct lbuf * bp) +{ + unsigned long flags; + struct jfs_log *log = bp->l_log; + struct logpage *lp; + struct tblock *tblk, *temp; + + //LOGGC_LOCK(log); + spin_lock_irqsave(&log->gclock, flags); + /* + * current pageout of group commit completed. + * + * remove/wakeup transactions from commit queue who were + * group committed with the current log page + */ + list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) { + if (!(tblk->flag & tblkGC_COMMIT)) + break; + /* if transaction was marked GC_COMMIT then + * it has been shipped in the current pageout + * and made it to disk - it is committed. + */ + + if (bp->l_flag & lbmERROR) + tblk->flag |= tblkGC_ERROR; + + /* remove it from the commit queue */ + list_del(&tblk->cqueue); + tblk->flag &= ~tblkGC_QUEUE; + + if (tblk == log->flush_tblk) { + /* we can stop flushing the log now */ + clear_bit(log_FLUSH, &log->flag); + log->flush_tblk = NULL; + } + + jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk, + tblk->flag); + + if (!(tblk->xflag & COMMIT_FORCE)) + /* + * Hand tblk over to lazy commit thread + */ + txLazyUnlock(tblk); + else { + /* state transition: COMMIT -> COMMITTED */ + tblk->flag |= tblkGC_COMMITTED; + + if (tblk->flag & tblkGC_READY) + log->gcrtc--; + + LOGGC_WAKEUP(tblk); + } + + /* was page full before pageout ? + * (and this is the last tblk bound with the page) + */ + if (tblk->flag & tblkGC_FREE) + lbmFree(bp); + /* did page become full after pageout ? + * (and this is the last tblk bound with the page) + */ + else if (tblk->flag & tblkGC_EOP) { + /* finalize the page */ + lp = (struct logpage *) bp->l_ldata; + bp->l_ceor = bp->l_eor; + lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); + jfs_info("lmPostGC: calling lbmWrite"); + lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, + 1); + } + + } + + /* are there any transactions who have entered lnGroupCommit() + * (whose COMMITs are after that of the last log page written. + * They are waiting for new group commit (above at (SLEEP 1)) + * or lazy transactions are on a full (queued) log page, + * select the latest ready transaction as new group leader and + * wake her up to lead her group. + */ + if ((!list_empty(&log->cqueue)) && + ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) || + test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) + /* + * Call lmGCwrite with new group leader + */ + lmGCwrite(log, 1); + + /* no transaction are ready yet (transactions are only just + * queued (GC_QUEUE) and not entered for group commit yet). + * the first transaction entering group commit + * will elect herself as new group leader. + */ + else + log->cflag &= ~logGC_PAGEOUT; + + //LOGGC_UNLOCK(log); + spin_unlock_irqrestore(&log->gclock, flags); + return; +} + +/* + * NAME: lmLogSync() + * + * FUNCTION: write log SYNCPT record for specified log + * if new sync address is available + * (normally the case if sync() is executed by back-ground + * process). + * if not, explicitly run jfs_blogsync() to initiate + * getting of new sync address. + * calculate new value of i_nextsync which determines when + * this code is called again. + * + * this is called only from lmLog(). + * + * PARAMETER: ip - pointer to logs inode. + * + * RETURN: 0 + * + * serialization: LOG_LOCK() held on entry/exit + */ +static int lmLogSync(struct jfs_log * log, int nosyncwait) +{ + int logsize; + int written; /* written since last syncpt */ + int free; /* free space left available */ + int delta; /* additional delta to write normally */ + int more; /* additional write granted */ + struct lrd lrd; + int lsn; + struct logsyncblk *lp; + + /* + * forward syncpt + */ + /* if last sync is same as last syncpt, + * invoke sync point forward processing to update sync. + */ + + if (log->sync == log->syncpt) { + LOGSYNC_LOCK(log); + /* ToDo: push dirty metapages out to disk */ +// bmLogSync(log); + + if (list_empty(&log->synclist)) + log->sync = log->lsn; + else { + lp = list_entry(log->synclist.next, + struct logsyncblk, synclist); + log->sync = lp->lsn; + } + LOGSYNC_UNLOCK(log); + + } + + /* if sync is different from last syncpt, + * write a SYNCPT record with syncpt = sync. + * reset syncpt = sync + */ + if (log->sync != log->syncpt) { + struct jfs_sb_info *sbi; + + /* + * We need to make sure all of the "written" metapages + * actually make it to disk + */ + list_for_each_entry(sbi, &log->sb_list, log_list) { + if (sbi->flag & JFS_NOINTEGRITY) + continue; + filemap_fdatawrite(sbi->ipbmap->i_mapping); + filemap_fdatawrite(sbi->ipimap->i_mapping); + filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping); + } + list_for_each_entry(sbi, &log->sb_list, log_list) { + if (sbi->flag & JFS_NOINTEGRITY) + continue; + filemap_fdatawait(sbi->ipbmap->i_mapping); + filemap_fdatawait(sbi->ipimap->i_mapping); + filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping); + } + + lrd.logtid = 0; + lrd.backchain = 0; + lrd.type = cpu_to_le16(LOG_SYNCPT); + lrd.length = 0; + lrd.log.syncpt.sync = cpu_to_le32(log->sync); + lsn = lmWriteRecord(log, NULL, &lrd, NULL); + + log->syncpt = log->sync; + } else + lsn = log->lsn; + + /* + * setup next syncpt trigger (SWAG) + */ + logsize = log->logsize; + + logdiff(written, lsn, log); + free = logsize - written; + delta = LOGSYNC_DELTA(logsize); + more = min(free / 2, delta); + if (more < 2 * LOGPSIZE) { + jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); + /* + * log wrapping + * + * option 1 - panic ? No.! + * option 2 - shutdown file systems + * associated with log ? + * option 3 - extend log ? + */ + /* + * option 4 - second chance + * + * mark log wrapped, and continue. + * when all active transactions are completed, + * mark log vaild for recovery. + * if crashed during invalid state, log state + * implies invald log, forcing fsck(). + */ + /* mark log state log wrap in log superblock */ + /* log->state = LOGWRAP; */ + + /* reset sync point computation */ + log->syncpt = log->sync = lsn; + log->nextsync = delta; + } else + /* next syncpt trigger = written + more */ + log->nextsync = written + more; + + /* return if lmLogSync() from outside of transaction, e.g., sync() */ + if (nosyncwait) + return lsn; + + /* if number of bytes written from last sync point is more + * than 1/4 of the log size, stop new transactions from + * starting until all current transactions are completed + * by setting syncbarrier flag. + */ + if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) { + set_bit(log_SYNCBARRIER, &log->flag); + jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, + log->syncpt); + /* + * We may have to initiate group commit + */ + jfs_flush_journal(log, 0); + } + + return lsn; +} + + +/* + * NAME: lmLogOpen() + * + * FUNCTION: open the log on first open; + * insert filesystem in the active list of the log. + * + * PARAMETER: ipmnt - file system mount inode + * iplog - log inode (out) + * + * RETURN: + * + * serialization: + */ +int lmLogOpen(struct super_block *sb) +{ + int rc; + struct block_device *bdev; + struct jfs_log *log; + struct jfs_sb_info *sbi = JFS_SBI(sb); + + if (sbi->flag & JFS_NOINTEGRITY) + return open_dummy_log(sb); + + if (sbi->mntflag & JFS_INLINELOG) + return open_inline_log(sb); + + down(&jfs_log_sem); + list_for_each_entry(log, &jfs_external_logs, journal_list) { + if (log->bdev->bd_dev == sbi->logdev) { + if (memcmp(log->uuid, sbi->loguuid, + sizeof(log->uuid))) { + jfs_warn("wrong uuid on JFS journal\n"); + up(&jfs_log_sem); + return -EINVAL; + } + /* + * add file system to log active file system list + */ + if ((rc = lmLogFileSystem(log, sbi, 1))) { + up(&jfs_log_sem); + return rc; + } + goto journal_found; + } + } + + if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) { + up(&jfs_log_sem); + return -ENOMEM; + } + memset(log, 0, sizeof(struct jfs_log)); + INIT_LIST_HEAD(&log->sb_list); + init_waitqueue_head(&log->syncwait); + + /* + * external log as separate logical volume + * + * file systems to log may have n-to-1 relationship; + */ + + bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); + if (IS_ERR(bdev)) { + rc = -PTR_ERR(bdev); + goto free; + } + + if ((rc = bd_claim(bdev, log))) { + goto close; + } + + log->bdev = bdev; + memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); + + /* + * initialize log: + */ + if ((rc = lmLogInit(log))) + goto unclaim; + + list_add(&log->journal_list, &jfs_external_logs); + + /* + * add file system to log active file system list + */ + if ((rc = lmLogFileSystem(log, sbi, 1))) + goto shutdown; + +journal_found: + LOG_LOCK(log); + list_add(&sbi->log_list, &log->sb_list); + sbi->log = log; + LOG_UNLOCK(log); + + up(&jfs_log_sem); + return 0; + + /* + * unwind on error + */ + shutdown: /* unwind lbmLogInit() */ + list_del(&log->journal_list); + lbmLogShutdown(log); + + unclaim: + bd_release(bdev); + + close: /* close external log device */ + blkdev_put(bdev); + + free: /* free log descriptor */ + up(&jfs_log_sem); + kfree(log); + + jfs_warn("lmLogOpen: exit(%d)", rc); + return rc; +} + +static int open_inline_log(struct super_block *sb) +{ + struct jfs_log *log; + int rc; + + if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) + return -ENOMEM; + memset(log, 0, sizeof(struct jfs_log)); + INIT_LIST_HEAD(&log->sb_list); + init_waitqueue_head(&am |