diff options
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 4098 |
1 files changed, 4098 insertions, 0 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c new file mode 100644 index 00000000000..9824b5bf0ec --- /dev/null +++ b/fs/xfs/xfs_log_recover.c @@ -0,0 +1,4098 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_sb.h" +#include "xfs_trans.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_error.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_imap.h" +#include "xfs_inode_item.h" +#include "xfs_inode.h" +#include "xfs_ialloc_btree.h" +#include "xfs_ialloc.h" +#include "xfs_log_priv.h" +#include "xfs_buf_item.h" +#include "xfs_alloc_btree.h" +#include "xfs_log_recover.h" +#include "xfs_extfree_item.h" +#include "xfs_trans_priv.h" +#include "xfs_bit.h" +#include "xfs_quota.h" +#include "xfs_rw.h" + +STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); +STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); +STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, + xlog_recover_item_t *item); +#if defined(DEBUG) +STATIC void xlog_recover_check_summary(xlog_t *); +STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int); +#else +#define xlog_recover_check_summary(log) +#define xlog_recover_check_ail(mp, lip, gen) +#endif + + +/* + * Sector aligned buffer routines for buffer create/read/write/access + */ + +#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ + ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ + ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) +#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) + +xfs_buf_t * +xlog_get_bp( + xlog_t *log, + int num_bblks) +{ + ASSERT(num_bblks > 0); + + if (log->l_sectbb_log) { + if (num_bblks > 1) + num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); + num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); + } + return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); +} + +void +xlog_put_bp( + xfs_buf_t *bp) +{ + xfs_buf_free(bp); +} + + +/* + * nbblks should be uint, but oh well. Just want to catch that 32-bit length. + */ +int +xlog_bread( + xlog_t *log, + xfs_daddr_t blk_no, + int nbblks, + xfs_buf_t *bp) +{ + int error; + + if (log->l_sectbb_log) { + blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); + nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); + } + + ASSERT(nbblks > 0); + ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); + ASSERT(bp); + + XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); + XFS_BUF_READ(bp); + XFS_BUF_BUSY(bp); + XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); + XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); + + xfsbdstrat(log->l_mp, bp); + if ((error = xfs_iowait(bp))) + xfs_ioerror_alert("xlog_bread", log->l_mp, + bp, XFS_BUF_ADDR(bp)); + return error; +} + +/* + * Write out the buffer at the given block for the given number of blocks. + * The buffer is kept locked across the write and is returned locked. + * This can only be used for synchronous log writes. + */ +int +xlog_bwrite( + xlog_t *log, + xfs_daddr_t blk_no, + int nbblks, + xfs_buf_t *bp) +{ + int error; + + if (log->l_sectbb_log) { + blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); + nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); + } + + ASSERT(nbblks > 0); + ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); + + XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); + XFS_BUF_ZEROFLAGS(bp); + XFS_BUF_BUSY(bp); + XFS_BUF_HOLD(bp); + XFS_BUF_PSEMA(bp, PRIBIO); + XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); + XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); + + if ((error = xfs_bwrite(log->l_mp, bp))) + xfs_ioerror_alert("xlog_bwrite", log->l_mp, + bp, XFS_BUF_ADDR(bp)); + return error; +} + +xfs_caddr_t +xlog_align( + xlog_t *log, + xfs_daddr_t blk_no, + int nbblks, + xfs_buf_t *bp) +{ + xfs_caddr_t ptr; + + if (!log->l_sectbb_log) + return XFS_BUF_PTR(bp); + + ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); + ASSERT(XFS_BUF_SIZE(bp) >= + BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); + return ptr; +} + +#ifdef DEBUG +/* + * dump debug superblock and log record information + */ +STATIC void +xlog_header_check_dump( + xfs_mount_t *mp, + xlog_rec_header_t *head) +{ + int b; + + printk("%s: SB : uuid = ", __FUNCTION__); + for (b = 0; b < 16; b++) + printk("%02x",((unsigned char *)&mp->m_sb.sb_uuid)[b]); + printk(", fmt = %d\n", XLOG_FMT); + printk(" log : uuid = "); + for (b = 0; b < 16; b++) + printk("%02x",((unsigned char *)&head->h_fs_uuid)[b]); + printk(", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT)); +} +#else +#define xlog_header_check_dump(mp, head) +#endif + +/* + * check log record header for recovery + */ +STATIC int +xlog_header_check_recover( + xfs_mount_t *mp, + xlog_rec_header_t *head) +{ + ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); + + /* + * IRIX doesn't write the h_fmt field and leaves it zeroed + * (XLOG_FMT_UNKNOWN). This stops us from trying to recover + * a dirty log created in IRIX. + */ + if (unlikely(INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT)) { + xlog_warn( + "XFS: dirty log written in incompatible format - can't recover"); + xlog_header_check_dump(mp, head); + XFS_ERROR_REPORT("xlog_header_check_recover(1)", + XFS_ERRLEVEL_HIGH, mp); + return XFS_ERROR(EFSCORRUPTED); + } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { + xlog_warn( + "XFS: dirty log entry has mismatched uuid - can't recover"); + xlog_header_check_dump(mp, head); + XFS_ERROR_REPORT("xlog_header_check_recover(2)", + XFS_ERRLEVEL_HIGH, mp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; +} + +/* + * read the head block of the log and check the header + */ +STATIC int +xlog_header_check_mount( + xfs_mount_t *mp, + xlog_rec_header_t *head) +{ + ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); + + if (uuid_is_nil(&head->h_fs_uuid)) { + /* + * IRIX doesn't write the h_fs_uuid or h_fmt fields. If + * h_fs_uuid is nil, we assume this log was last mounted + * by IRIX and continue. + */ + xlog_warn("XFS: nil uuid in log - IRIX style log"); + } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { + xlog_warn("XFS: log has mismatched uuid - can't recover"); + xlog_header_check_dump(mp, head); + XFS_ERROR_REPORT("xlog_header_check_mount", + XFS_ERRLEVEL_HIGH, mp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; +} + +STATIC void +xlog_recover_iodone( + struct xfs_buf *bp) +{ + xfs_mount_t *mp; + + ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); + + if (XFS_BUF_GETERROR(bp)) { + /* + * We're not going to bother about retrying + * this during recovery. One strike! + */ + mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); + xfs_ioerror_alert("xlog_recover_iodone", + mp, bp, XFS_BUF_ADDR(bp)); + xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); + } + XFS_BUF_SET_FSPRIVATE(bp, NULL); + XFS_BUF_CLR_IODONE_FUNC(bp); + xfs_biodone(bp); +} + +/* + * This routine finds (to an approximation) the first block in the physical + * log which contains the given cycle. It uses a binary search algorithm. + * Note that the algorithm can not be perfect because the disk will not + * necessarily be perfect. + */ +int +xlog_find_cycle_start( + xlog_t *log, + xfs_buf_t *bp, + xfs_daddr_t first_blk, + xfs_daddr_t *last_blk, + uint cycle) +{ + xfs_caddr_t offset; + xfs_daddr_t mid_blk; + uint mid_cycle; + int error; + + mid_blk = BLK_AVG(first_blk, *last_blk); + while (mid_blk != first_blk && mid_blk != *last_blk) { + if ((error = xlog_bread(log, mid_blk, 1, bp))) + return error; + offset = xlog_align(log, mid_blk, 1, bp); + mid_cycle = GET_CYCLE(offset, ARCH_CONVERT); + if (mid_cycle == cycle) { + *last_blk = mid_blk; + /* last_half_cycle == mid_cycle */ + } else { + first_blk = mid_blk; + /* first_half_cycle == mid_cycle */ + } + mid_blk = BLK_AVG(first_blk, *last_blk); + } + ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || + (mid_blk == *last_blk && mid_blk-1 == first_blk)); + + return 0; +} + +/* + * Check that the range of blocks does not contain the cycle number + * given. The scan needs to occur from front to back and the ptr into the + * region must be updated since a later routine will need to perform another + * test. If the region is completely good, we end up returning the same + * last block number. + * + * Set blkno to -1 if we encounter no errors. This is an invalid block number + * since we don't ever expect logs to get this large. + */ +STATIC int +xlog_find_verify_cycle( + xlog_t *log, + xfs_daddr_t start_blk, + int nbblks, + uint stop_on_cycle_no, + xfs_daddr_t *new_blk) +{ + xfs_daddr_t i, j; + uint cycle; + xfs_buf_t *bp; + xfs_daddr_t bufblks; + xfs_caddr_t buf = NULL; + int error = 0; + + bufblks = 1 << ffs(nbblks); + + while (!(bp = xlog_get_bp(log, bufblks))) { + /* can't get enough memory to do everything in one big buffer */ + bufblks >>= 1; + if (bufblks <= log->l_sectbb_log) + return ENOMEM; + } + + for (i = start_blk; i < start_blk + nbblks; i += bufblks) { + int bcount; + + bcount = min(bufblks, (start_blk + nbblks - i)); + + if ((error = xlog_bread(log, i, bcount, bp))) + goto out; + + buf = xlog_align(log, i, bcount, bp); + for (j = 0; j < bcount; j++) { + cycle = GET_CYCLE(buf, ARCH_CONVERT); + if (cycle == stop_on_cycle_no) { + *new_blk = i+j; + goto out; + } + + buf += BBSIZE; + } + } + + *new_blk = -1; + +out: + xlog_put_bp(bp); + return error; +} + +/* + * Potentially backup over partial log record write. + * + * In the typical case, last_blk is the number of the block directly after + * a good log record. Therefore, we subtract one to get the block number + * of the last block in the given buffer. extra_bblks contains the number + * of blocks we would have read on a previous read. This happens when the + * last log record is split over the end of the physical log. + * + * extra_bblks is the number of blocks potentially verified on a previous + * call to this routine. + */ +STATIC int +xlog_find_verify_log_record( + xlog_t *log, + xfs_daddr_t start_blk, + xfs_daddr_t *last_blk, + int extra_bblks) +{ + xfs_daddr_t i; + xfs_buf_t *bp; + xfs_caddr_t offset = NULL; + xlog_rec_header_t *head = NULL; + int error = 0; + int smallmem = 0; + int num_blks = *last_blk - start_blk; + int xhdrs; + + ASSERT(start_blk != 0 || *last_blk != start_blk); + + if (!(bp = xlog_get_bp(log, num_blks))) { + if (!(bp = xlog_get_bp(log, 1))) + return ENOMEM; + smallmem = 1; + } else { + if ((error = xlog_bread(log, start_blk, num_blks, bp))) + goto out; + offset = xlog_align(log, start_blk, num_blks, bp); + offset += ((num_blks - 1) << BBSHIFT); + } + + for (i = (*last_blk) - 1; i >= 0; i--) { + if (i < start_blk) { + /* valid log record not found */ + xlog_warn( + "XFS: Log inconsistent (didn't find previous header)"); + ASSERT(0); + error = XFS_ERROR(EIO); + goto out; + } + + if (smallmem) { + if ((error = xlog_bread(log, i, 1, bp))) + goto out; + offset = xlog_align(log, i, 1, bp); + } + + head = (xlog_rec_header_t *)offset; + + if (XLOG_HEADER_MAGIC_NUM == + INT_GET(head->h_magicno, ARCH_CONVERT)) + break; + + if (!smallmem) + offset -= BBSIZE; + } + + /* + * We hit the beginning of the physical log & still no header. Return + * to caller. If caller can handle a return of -1, then this routine + * will be called again for the end of the physical log. + */ + if (i == -1) { + error = -1; + goto out; + } + + /* + * We have the final block of the good log (the first block + * of the log record _before_ the head. So we check the uuid. + */ + if ((error = xlog_header_check_mount(log->l_mp, head))) + goto out; + + /* + * We may have found a log record header before we expected one. + * last_blk will be the 1st block # with a given cycle #. We may end + * up reading an entire log record. In this case, we don't want to + * reset last_blk. Only when last_blk points in the middle of a log + * record do we update last_blk. + */ + if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { + uint h_size = INT_GET(head->h_size, ARCH_CONVERT); + + xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; + if (h_size % XLOG_HEADER_CYCLE_SIZE) + xhdrs++; + } else { + xhdrs = 1; + } + + if (*last_blk - i + extra_bblks + != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs) + *last_blk = i; + +out: + xlog_put_bp(bp); + return error; +} + +/* + * Head is defined to be the point of the log where the next log write + * write could go. This means that incomplete LR writes at the end are + * eliminated when calculating the head. We aren't guaranteed that previous + * LR have complete transactions. We only know that a cycle number of + * current cycle number -1 won't be present in the log if we start writing + * from our current block number. + * + * last_blk contains the block number of the first block with a given + * cycle number. + * + * Return: zero if normal, non-zero if error. + */ +int +xlog_find_head( + xlog_t *log, + xfs_daddr_t *return_head_blk) +{ + xfs_buf_t *bp; + xfs_caddr_t offset; + xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; + int num_scan_bblks; + uint first_half_cycle, last_half_cycle; + uint stop_on_cycle; + int error, log_bbnum = log->l_logBBsize; + + /* Is the end of the log device zeroed? */ + if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { + *return_head_blk = first_blk; + + /* Is the whole lot zeroed? */ + if (!first_blk) { + /* Linux XFS shouldn't generate totally zeroed logs - + * mkfs etc write a dummy unmount record to a fresh + * log so we can store the uuid in there + */ + xlog_warn("XFS: totally zeroed log"); + } + + return 0; + } else if (error) { + xlog_warn("XFS: empty log check failed"); + return error; + } + + first_blk = 0; /* get cycle # of 1st block */ + bp = xlog_get_bp(log, 1); + if (!bp) + return ENOMEM; + if ((error = xlog_bread(log, 0, 1, bp))) + goto bp_err; + offset = xlog_align(log, 0, 1, bp); + first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); + + last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ + if ((error = xlog_bread(log, last_blk, 1, bp))) + goto bp_err; + offset = xlog_align(log, last_blk, 1, bp); + last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); + ASSERT(last_half_cycle != 0); + + /* + * If the 1st half cycle number is equal to the last half cycle number, + * then the entire log is stamped with the same cycle number. In this + * case, head_blk can't be set to zero (which makes sense). The below + * math doesn't work out properly with head_blk equal to zero. Instead, + * we set it to log_bbnum which is an invalid block number, but this + * value makes the math correct. If head_blk doesn't changed through + * all the tests below, *head_blk is set to zero at the very end rather + * than log_bbnum. In a sense, log_bbnum and zero are the same block + * in a circular file. + */ + if (first_half_cycle == last_half_cycle) { + /* + * In this case we believe that the entire log should have + * cycle number last_half_cycle. We need to scan backwards + * from the end verifying that there are no holes still + * containing last_half_cycle - 1. If we find such a hole, + * then the start of that hole will be the new head. The + * simple case looks like + * x | x ... | x - 1 | x + * Another case that fits this picture would be + * x | x + 1 | x ... | x + * In this case the head really is somwhere at the end of the + * log, as one of the latest writes at the beginning was + * incomplete. + * One more case is + * x | x + 1 | x ... | x - 1 | x + * This is really the combination of the above two cases, and + * the head has to end up at the start of the x-1 hole at the + * end of the log. + * + * In the 256k log case, we will read from the beginning to the + * end of the log and search for cycle numbers equal to x-1. + * We don't worry about the x+1 blocks that we encounter, + * because we know that they cannot be the head since the log + * started with x. + */ + head_blk = log_bbnum; + stop_on_cycle = last_half_cycle - 1; + } else { + /* + * In this case we want to find the first block with cycle + * number matching last_half_cycle. We expect the log to be + * some variation on + * x + 1 ... | x ... + * The first block with cycle number x (last_half_cycle) will + * be where the new head belongs. First we do a binary search + * for the first occurrence of last_half_cycle. The binary + * search may not be totally accurate, so then we scan back + * from there looking for occurrences of last_half_cycle before + * us. If that backwards scan wraps around the beginning of + * the log, then we look for occurrences of last_half_cycle - 1 + * at the end of the log. The cases we're looking for look + * like + * x + 1 ... | x | x + 1 | x ... + * ^ binary search stopped here + * or + * x + 1 ... | x ... | x - 1 | x + * <---------> less than scan distance + */ + stop_on_cycle = last_half_cycle; + if ((error = xlog_find_cycle_start(log, bp, first_blk, + &head_blk, last_half_cycle))) + goto bp_err; + } + + /* + * Now validate the answer. Scan back some number of maximum possible + * blocks and make sure each one has the expected cycle number. The + * maximum is determined by the total possible amount of buffering + * in the in-core log. The following number can be made tighter if + * we actually look at the block size of the filesystem. + */ + num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); + if (head_blk >= num_scan_bblks) { + /* + * We are guaranteed that the entire check can be performed + * in one buffer. + */ + start_blk = head_blk - num_scan_bblks; + if ((error = xlog_find_verify_cycle(log, + start_blk, num_scan_bblks, + stop_on_cycle, &new_blk))) + goto bp_err; + if (new_blk != -1) + head_blk = new_blk; + } else { /* need to read 2 parts of log */ + /* + * We are going to scan backwards in the log in two parts. + * First we scan the physical end of the log. In this part + * of the log, we are looking for blocks with cycle number + * last_half_cycle - 1. + * If we find one, then we know that the log starts there, as + * we've found a hole that didn't get written in going around + * the end of the physical log. The simple case for this is + * x + 1 ... | x ... | x - 1 | x + * <---------> less than scan distance + * If all of the blocks at the end of the log have cycle number + * last_half_cycle, then we check the blocks at the start of + * the log looking for occurrences of last_half_cycle. If we + * find one, then our current estimate for the location of the + * first occurrence of last_half_cycle is wrong and we move + * back to the hole we've found. This case looks like + * x + 1 ... | x | x + 1 | x ... + * ^ binary search stopped here + * Another case we need to handle that only occurs in 256k + * logs is + * x + 1 ... | x ... | x+1 | x ... + * ^ binary search stops here + * In a 256k log, the scan at the end of the log will see the + * x + 1 blocks. We need to skip past those since that is + * certainly not the head of the log. By searching for + * last_half_cycle-1 we accomplish that. + */ + start_blk = log_bbnum - num_scan_bblks + head_blk; + ASSERT(head_blk <= INT_MAX && + (xfs_daddr_t) num_scan_bblks - head_blk >= 0); + if ((error = xlog_find_verify_cycle(log, start_blk, + num_scan_bblks - (int)head_blk, + (stop_on_cycle - 1), &new_blk))) + goto bp_err; + if (new_blk != -1) { + head_blk = new_blk; + goto bad_blk; + } + + /* + * Scan beginning of log now. The last part of the physical + * log is good. This scan needs to verify that it doesn't find + * the last_half_cycle. + */ + start_blk = 0; + ASSERT(head_blk <= INT_MAX); + if ((error = xlog_find_verify_cycle(log, + start_blk, (int)head_blk, + stop_on_cycle, &new_blk))) + goto bp_err; + if (new_blk != -1) + head_blk = new_blk; + } + + bad_blk: + /* + * Now we need to make sure head_blk is not pointing to a block in + * the middle of a log record. + */ + num_scan_bblks = XLOG_REC_SHIFT(log); + if (head_blk >= num_scan_bblks) { + start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ + + /* start ptr at last block ptr before head_blk */ + if ((error = xlog_find_verify_log_record(log, start_blk, + &head_blk, 0)) == -1) { + error = XFS_ERROR(EIO); + goto bp_err; + } else if (error) + goto bp_err; + } else { + start_blk = 0; + ASSERT(head_blk <= INT_MAX); + if ((error = xlog_find_verify_log_record(log, start_blk, + &head_blk, 0)) == -1) { + /* We hit the beginning of the log during our search */ + start_blk = log_bbnum - num_scan_bblks + head_blk; + new_blk = log_bbnum; + ASSERT(start_blk <= INT_MAX && + (xfs_daddr_t) log_bbnum-start_blk >= 0); + ASSERT(head_blk <= INT_MAX); + if ((error = xlog_find_verify_log_record(log, + start_blk, &new_blk, + (int)head_blk)) == -1) { + error = XFS_ERROR(EIO); + goto bp_err; + } else if (error) + goto bp_err; + if (new_blk != log_bbnum) + head_blk = new_blk; + } else if (error) + goto bp_err; + } + + xlog_put_bp(bp); + if (head_blk == log_bbnum) + *return_head_blk = 0; + else + *return_head_blk = head_blk; + /* + * When returning here, we have a good block number. Bad block + * means that during a previous crash, we didn't have a clean break + * from cycle number N to cycle number N-1. In this case, we need + * to find the first block with cycle number N-1. + */ + return 0; + + bp_err: + xlog_put_bp(bp); + + if (error) + xlog_warn("XFS: failed to find log head"); + return error; +} + +/* + * Find the sync block number or the tail of the log. + * + * This will be the block number of the last record to have its + * associated buffers synced to disk. Every log record header has + * a sync lsn embedded in it. LSNs hold block numbers, so it is easy + * to get a sync block number. The only concern is to figure out which + * log record header to believe. + * + * The following algorithm uses the log record header with the largest + * lsn. The entire log record does not need to be valid. We only care + * that the header is valid. + * + * We could speed up search by using current head_blk buffer, but it is not + * available. + */ +int +xlog_find_tail( + xlog_t *log, + xfs_daddr_t *head_blk, + xfs_daddr_t *tail_blk, + int readonly) +{ + xlog_rec_header_t *rhead; + xlog_op_header_t *op_head; + xfs_caddr_t offset = NULL; + xfs_buf_t *bp; + int error, i, found; + xfs_daddr_t umount_data_blk; + xfs_daddr_t after_umount_blk; + xfs_lsn_t tail_lsn; + int hblks; + + found = 0; + + /* + * Find previous log record + */ + if ((error = xlog_find_head(log, head_blk))) + return error; + + bp = xlog_get_bp(log, 1); + if (!bp) + return ENOMEM; + if (*head_blk == 0) { /* special case */ + if ((error = xlog_bread(log, 0, 1, bp))) + goto bread_err; + offset = xlog_align(log, 0, 1, bp); + if (GET_CYCLE(offset, ARCH_CONVERT) == 0) { + *tail_blk = 0; + /* leave all other log inited values alone */ + goto exit; + } + } + + /* + * Search backwards looking for log record header block + */ + ASSERT(*head_blk < INT_MAX); + for (i = (int)(*head_blk) - 1; i >= 0; i--) { + if ((error = xlog_bread(log, i, 1, bp))) + goto bread_err; + offset = xlog_align(log, i, 1, bp); + if (XLOG_HEADER_MAGIC_NUM == + INT_GET(*(uint *)offset, ARCH_CONVERT)) { + found = 1; + break; + } + } + /* + * If we haven't found the log record header block, start looking + * again from the end of the physical log. XXXmiken: There should be + * a check here to make sure we didn't search more than N blocks in + * the previous code. + */ + if (!found) { + for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { + if ((error = xlog_bread(log, i, 1, bp))) + goto bread_err; + offset = xlog_align(log, i, 1, bp); + if (XLOG_HEADER_MAGIC_NUM == + INT_GET(*(uint*)offset, ARCH_CONVERT)) { + found = 2; + break; + } + } + } + if (!found) { + xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); + ASSERT(0); + return XFS_ERROR(EIO); + } + + /* find blk_no of tail of log */ + rhead = (xlog_rec_header_t *)offset; + *tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT)); + + /* + * Reset log values according to the state of the log when we + * crashed. In the case where head_blk == 0, we bump curr_cycle + * one because the next write starts a new cycle rather than + * continuing the cycle of the last good log record. At this + * point we have guaranteed that all partial log records have been + * accounted for. Therefore, we know that the last good log record + * written was complete and ended exactly on the end boundary + * of the physical log. + */ + log->l_prev_block = i; + log->l_curr_block = (int)*head_blk; + log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT); + if (found == 2) + log->l_curr_cycle++; + log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT); + log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT); + log->l_grant_reserve_cycle = log->l_curr_cycle; + log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); + log->l_grant_write_cycle = log->l_curr_cycle; + log->l_grant_write_bytes = BBTOB(log->l_curr_block); + + /* + * Look for unmount record. If we find it, then we know there + * was a clean unmount. Since 'i' could be the last block in + * the physical log, we convert to a log block before comparing + * to the head_blk. + * + * Save the current tail lsn to use to pass to + * xlog_clear_stale_blocks() below. We won't want to clear the + * unmount record if there is one, so we pass the lsn of the + * unmount record rather than the block after it. + */ + if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { + int h_size = INT_GET(rhead->h_size, ARCH_CONVERT); + int h_version = INT_GET(rhead->h_version, ARCH_CONVERT); + + if ((h_version & XLOG_VERSION_2) && + (h_size > XLOG_HEADER_CYCLE_SIZE)) { + hblks = h_size / XLOG_HEADER_CYCLE_SIZE; + if (h_size % XLOG_HEADER_CYCLE_SIZE) + hblks++; + } else { + hblks = 1; + } + } else { + hblks = 1; + } + after_umount_blk = (i + hblks + (int) + BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize; + tail_lsn = log->l_tail_lsn; + if (*head_blk == after_umount_blk && + INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) { + umount_data_blk = (i + hblks) % log->l_logBBsize; + if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { + goto bread_err; + } + offset = xlog_align(log, umount_data_blk, 1, bp); + op_head = (xlog_op_header_t *)offset; + if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { + /* + * Set tail and last sync so that newly written + * log records will point recovery to after the + * current unmount record. + */ + ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle, + after_umount_blk); + ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle, + after_umount_blk); + *tail_blk = after_umount_blk; + } + } + + /* + * Make sure that there are no blocks in front of the head + * with the same cycle number as the head. This can happen + * because we allow multiple outstanding log writes concurrently, + * and the later writes might make it out before earlier ones. + * + * We use the lsn from before modifying it so that we'll never + * overwrite the unmount record after a clean unmount. + * + * Do this only if we are going to recover the filesystem + * + * NOTE: This used to say "if (!readonly)" + * However on Linux, we can & do recover a read-only filesystem. + * We only skip recovery if NORECOVERY is specified on mount, + * in which case we would not be here. + * + * But... if the -device- itself is readonly, just skip this. + * We can't recover this device anyway, so it won't matter. + */ + if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { + error = xlog_clear_stale_blocks(log, tail_lsn); + } + +bread_err: +exit: + xlog_put_bp(bp); + + if (error) + xlog_warn("XFS: failed to locate log tail"); + return error; +} + +/* + * Is the log zeroed at all? + * + * The last binary search should be changed to perform an X block read + * once X becomes small enough. You can then search linearly through + * the X blocks. This will cut down on the number of reads we need to do. + * + * If the log is partially zeroed, this routine will pass back the blkno + * of the first block with cycle number 0. It won't have a complete LR + * preceding it. + * + * Return: + * 0 => the log is completely written to + * -1 => use *blk_no as the first block of the log + * >0 => error has occurred + */ +int +xlog_find_zeroed( + xlog_t *log, + xfs_daddr_t *blk_no) +{ + xfs_buf_t *bp; + xfs_caddr_t offset; + uint first_cycle, last_cycle; + xfs_daddr_t new_blk, last_blk, start_blk; + xfs_daddr_t num_scan_bblks; + int error, log_bbnum = log->l_logBBsize; + + /* check totally zeroed log */ + bp = xlog_get_bp(log, 1); + if (!bp) + return ENOMEM; + if ((error = xlog_bread(log, 0, 1, bp))) + goto bp_err; + offset = xlog_align(log, 0, 1, bp); + first_cycle = GET_CYCLE(offset, ARCH_CONVERT); + if (first_cycle == 0) { /* completely zeroed log */ + *blk_no = 0; + xlog_put_bp(bp); + return -1; + } + + /* check partially zeroed log */ + if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) + goto bp_err; + offset = xlog_align(log, log_bbnum-1, 1, bp); + last_cycle = GET_CYCLE(offset, ARCH_CONVERT); + if (last_cycle != 0) { /* log completely written to */ + xlog_put_bp(bp); + return 0; + } else if (first_cycle != 1) { + /* + * If the cycle of the last block is zero, the cycle of + * the first block must be 1. If it's not, maybe we're + * not looking at a log... Bail out. + */ + xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); + return XFS_ERROR(EINVAL); + } + + /* we have a partially zeroed log */ + last_blk = log_bbnum-1; + if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) + goto bp_err; + + /* + * Validate the answer. Because there is no way to guarantee that + * the entire log is made up of log records which are the same size, + * we scan over the defined maximum blocks. At this point, the maximum + * is not chosen to mean anything special. XXXmiken + */ + num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); + ASSERT(num_scan_bblks <= INT_MAX); + + if (last_blk < num_scan_bblks) + num_scan_bblks = last_blk; + start_blk = last_blk - num_scan_bblks; + + /* + * We search for any instances of cycle number 0 that occur before + * our current estimate of the head. What we're trying to detect is + * 1 ... | 0 | 1 | 0... + * ^ binary search ends here + */ + if ((error = xlog_find_verify_cycle(log, start_blk, + (int)num_scan_bblks, 0, &new_blk))) + goto bp_err; + if (new_blk != -1) + last_blk = new_blk; + + /* + * Potentially backup over partial log record write. We don't need + * to search the end of the log because we know it is zero. + */ + if ((error = xlog_find_verify_log_record(log, start_blk, + &last_blk, 0)) == -1) { + error = XFS_ERROR(EIO); + goto bp_err; + } else if (error) + goto bp_err; + + *blk_no = last_blk; +bp_err: + xlog_put_bp(bp); + if (error) + return error; + return -1; +} + +/* + * These are simple subroutines used by xlog_clear_stale_blocks() below + * to initialize a buffer full of empty log record headers and write + * them into the log. + */ +STATIC void +xlog_add_record( + xlog_t *log, + xfs_caddr_t buf, + int cycle, + int block, + int tail_cycle, + int tail_block) +{ + xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; + + memset(buf, 0, BBSIZE); + INT_SET(recp->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); + INT_SET(recp->h_cycle, ARCH_CONVERT, cycle); + INT_SET(recp->h_version, ARCH_CONVERT, + XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); + ASSIGN_ANY_LSN_DISK(recp->h_lsn, cycle, block); + ASSIGN_ANY_LSN_DISK(recp->h_tail_lsn, tail_cycle, tail_block); + INT_SET(recp->h_fmt, ARCH_CONVERT, XLOG_FMT); + memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); +} + +STATIC int +xlog_write_log_records( + xlog_t *log, + int cycle, + int start_block, + int blocks, + int tail_cycle, + int tail_block) +{ + xfs_caddr_t offset; + xfs_buf_t *bp; + int balign, ealign; + int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); + int end_block = start_block + blocks; + int bufblks; + int error = 0; + int i, j = 0; + + bufblks = 1 << ffs(blocks); + while (!(bp = xlog_get_bp(log, bufblks))) { |