diff options
Diffstat (limited to 'fs/xfs/xfs_mount.c')
| -rw-r--r-- | fs/xfs/xfs_mount.c | 1340 |
1 files changed, 425 insertions, 915 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index eb403b40e12..3507cd0ec40 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -17,37 +17,31 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" #include "xfs_inode.h" -#include "xfs_btree.h" +#include "xfs_dir2.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_bmap.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_quota.h" #include "xfs_fsops.h" -#include "xfs_utils.h" #include "xfs_trace.h" - - -STATIC void xfs_unmountfs_wait(xfs_mount_t *); +#include "xfs_icache.h" +#include "xfs_dinode.h" #ifdef HAVE_PERCPU_SB @@ -55,73 +49,13 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, int); -STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, - int64_t, int); STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); - #else #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) -#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) - #endif -static const struct { - short offset; - short type; /* 0 = integer - * 1 = binary / string (no translation) - */ -} xfs_sb_info[] = { - { offsetof(xfs_sb_t, sb_magicnum), 0 }, - { offsetof(xfs_sb_t, sb_blocksize), 0 }, - { offsetof(xfs_sb_t, sb_dblocks), 0 }, - { offsetof(xfs_sb_t, sb_rblocks), 0 }, - { offsetof(xfs_sb_t, sb_rextents), 0 }, - { offsetof(xfs_sb_t, sb_uuid), 1 }, - { offsetof(xfs_sb_t, sb_logstart), 0 }, - { offsetof(xfs_sb_t, sb_rootino), 0 }, - { offsetof(xfs_sb_t, sb_rbmino), 0 }, - { offsetof(xfs_sb_t, sb_rsumino), 0 }, - { offsetof(xfs_sb_t, sb_rextsize), 0 }, - { offsetof(xfs_sb_t, sb_agblocks), 0 }, - { offsetof(xfs_sb_t, sb_agcount), 0 }, - { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, - { offsetof(xfs_sb_t, sb_logblocks), 0 }, - { offsetof(xfs_sb_t, sb_versionnum), 0 }, - { offsetof(xfs_sb_t, sb_sectsize), 0 }, - { offsetof(xfs_sb_t, sb_inodesize), 0 }, - { offsetof(xfs_sb_t, sb_inopblock), 0 }, - { offsetof(xfs_sb_t, sb_fname[0]), 1 }, - { offsetof(xfs_sb_t, sb_blocklog), 0 }, - { offsetof(xfs_sb_t, sb_sectlog), 0 }, - { offsetof(xfs_sb_t, sb_inodelog), 0 }, - { offsetof(xfs_sb_t, sb_inopblog), 0 }, - { offsetof(xfs_sb_t, sb_agblklog), 0 }, - { offsetof(xfs_sb_t, sb_rextslog), 0 }, - { offsetof(xfs_sb_t, sb_inprogress), 0 }, - { offsetof(xfs_sb_t, sb_imax_pct), 0 }, - { offsetof(xfs_sb_t, sb_icount), 0 }, - { offsetof(xfs_sb_t, sb_ifree), 0 }, - { offsetof(xfs_sb_t, sb_fdblocks), 0 }, - { offsetof(xfs_sb_t, sb_frextents), 0 }, - { offsetof(xfs_sb_t, sb_uquotino), 0 }, - { offsetof(xfs_sb_t, sb_gquotino), 0 }, - { offsetof(xfs_sb_t, sb_qflags), 0 }, - { offsetof(xfs_sb_t, sb_flags), 0 }, - { offsetof(xfs_sb_t, sb_shared_vn), 0 }, - { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, - { offsetof(xfs_sb_t, sb_unit), 0 }, - { offsetof(xfs_sb_t, sb_width), 0 }, - { offsetof(xfs_sb_t, sb_dirblklog), 0 }, - { offsetof(xfs_sb_t, sb_logsectlog), 0 }, - { offsetof(xfs_sb_t, sb_logsectsize),0 }, - { offsetof(xfs_sb_t, sb_logsunit), 0 }, - { offsetof(xfs_sb_t, sb_features2), 0 }, - { offsetof(xfs_sb_t, sb_bad_features2), 0 }, - { sizeof(xfs_sb_t), 0 } -}; - static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; static uuid_t *xfs_uuid_table; @@ -141,9 +75,7 @@ xfs_uuid_mount( return 0; if (uuid_is_nil(uuid)) { - cmn_err(CE_WARN, - "XFS: Filesystem %s has nil UUID - can't mount", - mp->m_fsname); + xfs_warn(mp, "Filesystem has nil UUID - can't mount"); return XFS_ERROR(EINVAL); } @@ -171,8 +103,7 @@ xfs_uuid_mount( out_duplicate: mutex_unlock(&xfs_uuid_table_mutex); - cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount", - mp->m_fsname); + xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid); return XFS_ERROR(EINVAL); } @@ -200,22 +131,33 @@ xfs_uuid_unmount( } +STATIC void +__xfs_free_perag( + struct rcu_head *head) +{ + struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); + + ASSERT(atomic_read(&pag->pag_ref) == 0); + kmem_free(pag); +} + /* - * Free up the resources associated with a mount structure. Assume that - * the structure was initially zeroed, so we can tell which fields got - * initialized. + * Free up the per-ag resources associated with the mount structure. */ STATIC void xfs_free_perag( xfs_mount_t *mp) { - if (mp->m_perag) { - int agno; + xfs_agnumber_t agno; + struct xfs_perag *pag; - for (agno = 0; agno < mp->m_maxagi; agno++) - if (mp->m_perag[agno].pagb_list) - kmem_free(mp->m_perag[agno].pagb_list); - kmem_free(mp->m_perag); + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + spin_lock(&mp->m_perag_lock); + pag = radix_tree_delete(&mp->m_perag_tree, agno); + spin_unlock(&mp->m_perag_lock); + ASSERT(pag); + ASSERT(atomic_read(&pag->pag_ref) == 0); + call_rcu(&pag->rcu_head, __xfs_free_perag); } } @@ -233,332 +175,96 @@ xfs_sb_validate_fsb_count( #if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) - return E2BIG; + return EFBIG; #else /* Limited by UINT_MAX of sectors */ if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) - return E2BIG; + return EFBIG; #endif return 0; } -/* - * Check the validity of the SB found. - */ -STATIC int -xfs_mount_validate_sb( +int +xfs_initialize_perag( xfs_mount_t *mp, - xfs_sb_t *sbp, - int flags) + xfs_agnumber_t agcount, + xfs_agnumber_t *maxagi) { - /* - * If the log device and data device have the - * same device number, the log is internal. - * Consequently, the sb_logstart should be non-zero. If - * we have a zero sb_logstart in this case, we may be trying to mount - * a volume filesystem in a non-volume manner. - */ - if (sbp->sb_magicnum != XFS_SB_MAGIC) { - xfs_fs_mount_cmn_err(flags, "bad magic number"); - return XFS_ERROR(EWRONGFS); - } - - if (!xfs_sb_good_version(sbp)) { - xfs_fs_mount_cmn_err(flags, "bad version"); - return XFS_ERROR(EWRONGFS); - } - - if (unlikely( - sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { - xfs_fs_mount_cmn_err(flags, - "filesystem is marked as having an external log; " - "specify logdev on the\nmount command line."); - return XFS_ERROR(EINVAL); - } - - if (unlikely( - sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { - xfs_fs_mount_cmn_err(flags, - "filesystem is marked as having an internal log; " - "do not specify logdev on\nthe mount command line."); - return XFS_ERROR(EINVAL); - } - - /* - * More sanity checking. These were stolen directly from - * xfs_repair. - */ - if (unlikely( - sbp->sb_agcount <= 0 || - sbp->sb_sectsize < XFS_MIN_SECTORSIZE || - sbp->sb_sectsize > XFS_MAX_SECTORSIZE || - sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || - sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || - sbp->sb_sectsize != (1 << sbp->sb_sectlog) || - sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || - sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || - sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || - sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || - sbp->sb_blocksize != (1 << sbp->sb_blocklog) || - sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || - sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || - sbp->sb_inodelog < XFS_DINODE_MIN_LOG || - sbp->sb_inodelog > XFS_DINODE_MAX_LOG || - sbp->sb_inodesize != (1 << sbp->sb_inodelog) || - (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || - (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || - (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || - (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { - xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); - return XFS_ERROR(EFSCORRUPTED); - } - - /* - * Sanity check AG count, size fields against data size field - */ - if (unlikely( - sbp->sb_dblocks == 0 || - sbp->sb_dblocks > - (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || - sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * - sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { - xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); - return XFS_ERROR(EFSCORRUPTED); - } - - /* - * Until this is fixed only page-sized or smaller data blocks work. - */ - if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { - xfs_fs_mount_cmn_err(flags, - "file system with blocksize %d bytes", - sbp->sb_blocksize); - xfs_fs_mount_cmn_err(flags, - "only pagesize (%ld) or less will currently work.", - PAGE_SIZE); - return XFS_ERROR(ENOSYS); - } + xfs_agnumber_t index; + xfs_agnumber_t first_initialised = 0; + xfs_perag_t *pag; + xfs_agino_t agino; + xfs_ino_t ino; + xfs_sb_t *sbp = &mp->m_sb; + int error = -ENOMEM; /* - * Currently only very few inode sizes are supported. + * Walk the current per-ag tree so we don't try to initialise AGs + * that already exist (growfs case). Allocate and insert all the + * AGs we don't find ready for initialisation. */ - switch (sbp->sb_inodesize) { - case 256: - case 512: - case 1024: - case 2048: - break; - default: - xfs_fs_mount_cmn_err(flags, - "inode size of %d bytes not supported", - sbp->sb_inodesize); - return XFS_ERROR(ENOSYS); - } + for (index = 0; index < agcount; index++) { + pag = xfs_perag_get(mp, index); + if (pag) { + xfs_perag_put(pag); + continue; + } + if (!first_initialised) + first_initialised = index; + + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); + if (!pag) + goto out_unwind; + pag->pag_agno = index; + pag->pag_mount = mp; + spin_lock_init(&pag->pag_ici_lock); + mutex_init(&pag->pag_ici_reclaim_lock); + INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + spin_lock_init(&pag->pag_buf_lock); + pag->pag_buf_tree = RB_ROOT; - if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || - xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { - xfs_fs_mount_cmn_err(flags, - "file system too large to be mounted on this system."); - return XFS_ERROR(E2BIG); - } + if (radix_tree_preload(GFP_NOFS)) + goto out_unwind; - if (unlikely(sbp->sb_inprogress)) { - xfs_fs_mount_cmn_err(flags, "file system busy"); - return XFS_ERROR(EFSCORRUPTED); + spin_lock(&mp->m_perag_lock); + if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { + BUG(); + spin_unlock(&mp->m_perag_lock); + radix_tree_preload_end(); + error = -EEXIST; + goto out_unwind; + } + spin_unlock(&mp->m_perag_lock); + radix_tree_preload_end(); } /* - * Version 1 directory format has never worked on Linux. + * If we mount with the inode64 option, or no inode overflows + * the legacy 32-bit address space clear the inode32 option. */ - if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { - xfs_fs_mount_cmn_err(flags, - "file system using version 1 directory format"); - return XFS_ERROR(ENOSYS); - } - - return 0; -} - -STATIC void -xfs_initialize_perag_icache( - xfs_perag_t *pag) -{ - if (!pag->pag_ici_init) { - rwlock_init(&pag->pag_ici_lock); - INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); - pag->pag_ici_init = 1; - } -} - -xfs_agnumber_t -xfs_initialize_perag( - xfs_mount_t *mp, - xfs_agnumber_t agcount) -{ - xfs_agnumber_t index, max_metadata; - xfs_perag_t *pag; - xfs_agino_t agino; - xfs_ino_t ino; - xfs_sb_t *sbp = &mp->m_sb; - xfs_ino_t max_inum = XFS_MAXINUMBER_32; - - /* Check to see if the filesystem can overflow 32 bit inodes */ agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); - /* Clear the mount flag if no inode can overflow 32 bits - * on this filesystem, or if specifically requested.. - */ - if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) { + if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32) mp->m_flags |= XFS_MOUNT_32BITINODES; - } else { + else mp->m_flags &= ~XFS_MOUNT_32BITINODES; - } - - /* If we can overflow then setup the ag headers accordingly */ - if (mp->m_flags & XFS_MOUNT_32BITINODES) { - /* Calculate how much should be reserved for inodes to - * meet the max inode percentage. - */ - if (mp->m_maxicount) { - __uint64_t icount; - - icount = sbp->sb_dblocks * sbp->sb_imax_pct; - do_div(icount, 100); - icount += sbp->sb_agblocks - 1; - do_div(icount, sbp->sb_agblocks); - max_metadata = icount; - } else { - max_metadata = agcount; - } - for (index = 0; index < agcount; index++) { - ino = XFS_AGINO_TO_INO(mp, index, agino); - if (ino > max_inum) { - index++; - break; - } - - /* This ag is preferred for inodes */ - pag = &mp->m_perag[index]; - pag->pagi_inodeok = 1; - if (index < max_metadata) - pag->pagf_metadata = 1; - xfs_initialize_perag_icache(pag); - } - } else { - /* Setup default behavior for smaller filesystems */ - for (index = 0; index < agcount; index++) { - pag = &mp->m_perag[index]; - pag->pagi_inodeok = 1; - xfs_initialize_perag_icache(pag); - } - } - return index; -} - -void -xfs_sb_from_disk( - xfs_sb_t *to, - xfs_dsb_t *from) -{ - to->sb_magicnum = be32_to_cpu(from->sb_magicnum); - to->sb_blocksize = be32_to_cpu(from->sb_blocksize); - to->sb_dblocks = be64_to_cpu(from->sb_dblocks); - to->sb_rblocks = be64_to_cpu(from->sb_rblocks); - to->sb_rextents = be64_to_cpu(from->sb_rextents); - memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); - to->sb_logstart = be64_to_cpu(from->sb_logstart); - to->sb_rootino = be64_to_cpu(from->sb_rootino); - to->sb_rbmino = be64_to_cpu(from->sb_rbmino); - to->sb_rsumino = be64_to_cpu(from->sb_rsumino); - to->sb_rextsize = be32_to_cpu(from->sb_rextsize); - to->sb_agblocks = be32_to_cpu(from->sb_agblocks); - to->sb_agcount = be32_to_cpu(from->sb_agcount); - to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks); - to->sb_logblocks = be32_to_cpu(from->sb_logblocks); - to->sb_versionnum = be16_to_cpu(from->sb_versionnum); - to->sb_sectsize = be16_to_cpu(from->sb_sectsize); - to->sb_inodesize = be16_to_cpu(from->sb_inodesize); - to->sb_inopblock = be16_to_cpu(from->sb_inopblock); - memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); - to->sb_blocklog = from->sb_blocklog; - to->sb_sectlog = from->sb_sectlog; - to->sb_inodelog = from->sb_inodelog; - to->sb_inopblog = from->sb_inopblog; - to->sb_agblklog = from->sb_agblklog; - to->sb_rextslog = from->sb_rextslog; - to->sb_inprogress = from->sb_inprogress; - to->sb_imax_pct = from->sb_imax_pct; - to->sb_icount = be64_to_cpu(from->sb_icount); - to->sb_ifree = be64_to_cpu(from->sb_ifree); - to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks); - to->sb_frextents = be64_to_cpu(from->sb_frextents); - to->sb_uquotino = be64_to_cpu(from->sb_uquotino); - to->sb_gquotino = be64_to_cpu(from->sb_gquotino); - to->sb_qflags = be16_to_cpu(from->sb_qflags); - to->sb_flags = from->sb_flags; - to->sb_shared_vn = from->sb_shared_vn; - to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt); - to->sb_unit = be32_to_cpu(from->sb_unit); - to->sb_width = be32_to_cpu(from->sb_width); - to->sb_dirblklog = from->sb_dirblklog; - to->sb_logsectlog = from->sb_logsectlog; - to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize); - to->sb_logsunit = be32_to_cpu(from->sb_logsunit); - to->sb_features2 = be32_to_cpu(from->sb_features2); - to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2); -} -/* - * Copy in core superblock to ondisk one. - * - * The fields argument is mask of superblock fields to copy. - */ -void -xfs_sb_to_disk( - xfs_dsb_t *to, - xfs_sb_t *from, - __int64_t fields) -{ - xfs_caddr_t to_ptr = (xfs_caddr_t)to; - xfs_caddr_t from_ptr = (xfs_caddr_t)from; - xfs_sb_field_t f; - int first; - int size; - - ASSERT(fields); - if (!fields) - return; - - while (fields) { - f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); - first = xfs_sb_info[f].offset; - size = xfs_sb_info[f + 1].offset - first; - - ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); + if (mp->m_flags & XFS_MOUNT_32BITINODES) + index = xfs_set_inode32(mp); + else + index = xfs_set_inode64(mp); - if (size == 1 || xfs_sb_info[f].type == 1) { - memcpy(to_ptr + first, from_ptr + first, size); - } else { - switch (size) { - case 2: - *(__be16 *)(to_ptr + first) = - cpu_to_be16(*(__u16 *)(from_ptr + first)); - break; - case 4: - *(__be32 *)(to_ptr + first) = - cpu_to_be32(*(__u32 *)(from_ptr + first)); - break; - case 8: - *(__be64 *)(to_ptr + first) = - cpu_to_be64(*(__u64 *)(from_ptr + first)); - break; - default: - ASSERT(0); - } - } + if (maxagi) + *maxagi = index; + return 0; - fields &= ~(1LL << f); +out_unwind: + kmem_free(pag); + for (; index > first_initialised; index--) { + pag = radix_tree_delete(&mp->m_perag_tree, index); + kmem_free(pag); } + return error; } /* @@ -567,192 +273,107 @@ xfs_sb_to_disk( * Does the initial read of the superblock. */ int -xfs_readsb(xfs_mount_t *mp, int flags) +xfs_readsb( + struct xfs_mount *mp, + int flags) { unsigned int sector_size; - unsigned int extra_flags; - xfs_buf_t *bp; + struct xfs_buf *bp; + struct xfs_sb *sbp = &mp->m_sb; int error; + int loud = !(flags & XFS_MFSI_QUIET); + const struct xfs_buf_ops *buf_ops; ASSERT(mp->m_sb_bp == NULL); ASSERT(mp->m_ddev_targp != NULL); /* + * For the initial read, we must guess at the sector + * size based on the block device. It's enough to + * get the sb_sectsize out of the superblock and + * then reread with the proper length. + * We don't verify it yet, because it may not be complete. + */ + sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); + buf_ops = NULL; + + /* * Allocate a (locked) buffer to hold the superblock. * This will be kept around at all times to optimize * access to the superblock. */ - sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); - extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; - - bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), - extra_flags); - if (!bp || XFS_BUF_ISERROR(bp)) { - xfs_fs_mount_cmn_err(flags, "SB read failed"); - error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; - goto fail; +reread: + bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, + BTOBB(sector_size), 0, buf_ops); + if (!bp) { + if (loud) + xfs_warn(mp, "SB buffer read failed"); + return EIO; + } + if (bp->b_error) { + error = bp->b_error; + if (loud) + xfs_warn(mp, "SB validate failed with error %d.", error); + /* bad CRC means corrupted metadata */ + if (error == EFSBADCRC) + error = EFSCORRUPTED; + goto release_buf; } - ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); /* * Initialize the mount structure from the superblock. - * But first do some basic consistency checking. */ - xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); + xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); + xfs_sb_quota_from_disk(sbp); - error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); - if (error) { - xfs_fs_mount_cmn_err(flags, "SB validate failed"); - goto fail; + /* + * If we haven't validated the superblock, do so now before we try + * to check the sector size and reread the superblock appropriately. + */ + if (sbp->sb_magicnum != XFS_SB_MAGIC) { + if (loud) + xfs_warn(mp, "Invalid superblock magic number"); + error = EINVAL; + goto release_buf; } /* * We must be able to do sector-sized and sector-aligned IO. */ - if (sector_size > mp->m_sb.sb_sectsize) { - xfs_fs_mount_cmn_err(flags, - "device supports only %u byte sectors (not %u)", - sector_size, mp->m_sb.sb_sectsize); + if (sector_size > sbp->sb_sectsize) { + if (loud) + xfs_warn(mp, "device supports %u byte sectors (not %u)", + sector_size, sbp->sb_sectsize); error = ENOSYS; - goto fail; + goto release_buf; } - /* - * If device sector size is smaller than the superblock size, - * re-read the superblock so the buffer is correctly sized. - */ - if (sector_size < mp->m_sb.sb_sectsize) { - XFS_BUF_UNMANAGE(bp); + if (buf_ops == NULL) { + /* + * Re-read the superblock so the buffer is correctly sized, + * and properly verified. + */ xfs_buf_relse(bp); - sector_size = mp->m_sb.sb_sectsize; - bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), extra_flags); - if (!bp || XFS_BUF_ISERROR(bp)) { - xfs_fs_mount_cmn_err(flags, "SB re-read failed"); - error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; - goto fail; - } - ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + sector_size = sbp->sb_sectsize; + buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops; + goto reread; } /* Initialize per-cpu counters */ xfs_icsb_reinit_counters(mp); + /* no need to be quiet anymore, so reset the buf ops */ + bp->b_ops = &xfs_sb_buf_ops; + mp->m_sb_bp = bp; - xfs_buf_relse(bp); - ASSERT(XFS_BUF_VALUSEMA(bp) > 0); + xfs_buf_unlock(bp); return 0; - fail: - if (bp) { - XFS_BUF_UNMANAGE(bp); - xfs_buf_relse(bp); - } +release_buf: + xfs_buf_relse(bp); return error; } - -/* - * xfs_mount_common - * - * Mount initialization code establishing various mount - * fields from the superblock associated with the given - * mount structure - */ -STATIC void -xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) -{ - mp->m_agfrotor = mp->m_agirotor = 0; - spin_lock_init(&mp->m_agirotor_lock); - mp->m_maxagi = mp->m_sb.sb_agcount; - mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; - mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; - mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; - mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; - mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; - mp->m_blockmask = sbp->sb_blocksize - 1; - mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; - mp->m_blockwmask = mp->m_blockwsize - 1; - - mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); - mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; - mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; - - mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); - mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2; - mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2; - - mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); - mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; - mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; - - mp->m_bsize = XFS_FSB_TO_BB(mp, 1); - mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, - sbp->sb_inopblock); - mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; -} - -/* - * xfs_initialize_perag_data - * - * Read in each per-ag structure so we can count up the number of - * allocated inodes, free inodes and used filesystem blocks as this - * information is no longer persistent in the superblock. Once we have - * this information, write it into the in-core superblock structure. - */ -STATIC int -xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) -{ - xfs_agnumber_t index; - xfs_perag_t *pag; - xfs_sb_t *sbp = &mp->m_sb; - uint64_t ifree = 0; - uint64_t ialloc = 0; - uint64_t bfree = 0; - uint64_t bfreelst = 0; - uint64_t btree = 0; - int error; - - for (index = 0; index < agcount; index++) { - /* - * read the agf, then the agi. This gets us - * all the information we need and populates the - * per-ag structures for us. - */ - error = xfs_alloc_pagf_init(mp, NULL, index, 0); - if (error) - return error; - - error = xfs_ialloc_pagi_init(mp, NULL, index); - if (error) - return error; - pag = &mp->m_perag[index]; - ifree += pag->pagi_freecount; - ialloc += pag->pagi_count; - bfree += pag->pagf_freeblks; - bfreelst += pag->pagf_flcount; - btree += pag->pagf_btreeblks; - } - /* - * Overwrite incore superblock counters with just-read data - */ - spin_lock(&mp->m_sb_lock); - sbp->sb_ifree = ifree; - sbp->sb_icount = ialloc; - sbp->sb_fdblocks = bfree + bfreelst + btree; - spin_unlock(&mp->m_sb_lock); - - /* Fixup the per-cpu counters as well. */ - xfs_icsb_reinit_counters(mp); - - return 0; -} - /* * Update alignment values based on mount options and sb values */ @@ -768,39 +389,27 @@ xfs_update_alignment(xfs_mount_t *mp) */ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || (BBTOB(mp->m_swidth) & mp->m_blockmask)) { - if (mp->m_flags & XFS_MOUNT_RETERR) { - cmn_err(CE_WARN, - "XFS: alignment check 1 failed"); - return XFS_ERROR(EINVAL); - } - mp->m_dalign = mp->m_swidth = 0; + xfs_warn(mp, + "alignment check failed: sunit/swidth vs. blocksize(%d)", + sbp->sb_blocksize); + return XFS_ERROR(EINVAL); } else { /* * Convert the stripe unit and width to FSBs. */ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { - if (mp->m_flags & XFS_MOUNT_RETERR) { - return XFS_ERROR(EINVAL); - } - xfs_fs_cmn_err(CE_WARN, mp, -"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", - mp->m_dalign, mp->m_swidth, - sbp->sb_agblocks); - - mp->m_dalign = 0; - mp->m_swidth = 0; + xfs_warn(mp, + "alignment check failed: sunit/swidth vs. agsize(%d)", + sbp->sb_agblocks); + return XFS_ERROR(EINVAL); } else if (mp->m_dalign) { mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); } else { - if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_fs_cmn_err(CE_WARN, mp, -"stripe alignment turned off: sunit(%d) less than bsize(%d)", - mp->m_dalign, - mp->m_blockmask +1); - return XFS_ERROR(EINVAL); - } - mp->m_swidth = 0; + xfs_warn(mp, + "alignment check failed: sunit(%d) less than bsize(%d)", + mp->m_dalign, sbp->sb_blocksize); + return XFS_ERROR(EINVAL); } } @@ -817,6 +426,10 @@ xfs_update_alignment(xfs_mount_t *mp) sbp->sb_width = mp->m_swidth; mp->m_update_flags |= XFS_SB_WIDTH; } + } else { + xfs_warn(mp, + "cannot change alignment: superblock does not support data alignment"); + return XFS_ERROR(EINVAL); } } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && xfs_sb_version_hasdalign(&mp->m_sb)) { @@ -891,6 +504,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp) } /* + * precalculate the low space thresholds for dynamic speculative preallocation. + */ +void +xfs_set_low_space_thresholds( + struct xfs_mount *mp) +{ + int i; + + for (i = 0; i < XFS_LOWSP_MAX; i++) { + __uint64_t space = mp->m_sb.sb_dblocks; + + do_div(space, 100); + mp->m_low_space[i] = space * (i + 1); + } +} + + +/* * Set whether we're using inode alignment. */ STATIC void @@ -914,49 +545,42 @@ xfs_set_inoalignment(xfs_mount_t *mp) } /* - * Check that the data (and log if separate) are an ok size. + * Check that the data (and log if separate) is an ok size. */ STATIC int xfs_check_sizes(xfs_mount_t *mp) { xfs_buf_t *bp; xfs_daddr_t d; - int error; d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { - cmn_err(CE_WARN, "XFS: size check 1 failed"); - return XFS_ERROR(E2BIG); + xfs_warn(mp, "filesystem size mismatch detected"); + return XFS_ERROR(EFBIG); } - error = xfs_read_buf(mp, mp->m_ddev_targp, - d - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, &bp); - if (!error) { - xfs_buf_relse(bp); - } else { - cmn_err(CE_WARN, "XFS: size check 2 failed"); - if (error == ENOSPC) - error = XFS_ERROR(E2BIG); - return error; + bp = xfs_buf_read_uncached(mp->m_ddev_targp, + d - XFS_FSS_TO_BB(mp, 1), + XFS_FSS_TO_BB(mp, 1), 0, NULL); + if (!bp) { + xfs_warn(mp, "last sector read failed"); + return EIO; } + xfs_buf_relse(bp); if (mp->m_logdev_targp != mp->m_ddev_targp) { d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { - cmn_err(CE_WARN, "XFS: size check 3 failed"); - return XFS_ERROR(E2BIG); + xfs_warn(mp, "log size mismatch detected"); + return XFS_ERROR(EFBIG); } - error = xfs_read_buf(mp, mp->m_logdev_targp, - d - XFS_FSB_TO_BB(mp, 1), - XFS_FSB_TO_BB(mp, 1), 0, &bp); - if (!error) { - xfs_buf_relse(bp); - } else { - cmn_err(CE_WARN, "XFS: size check 3 failed"); - if (error == ENOSPC) - error = XFS_ERROR(E2BIG); - return error; + bp = xfs_buf_read_uncached(mp->m_logdev_targp, + d - XFS_FSB_TO_BB(mp, 1), + XFS_FSB_TO_BB(mp, 1), 0, NULL); + if (!bp) { + xfs_warn(mp, "log device read failed"); + return EIO; } + xfs_buf_relse(bp); } return 0; } @@ -990,17 +614,11 @@ xfs_mount_reset_sbqflags( if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; -#ifdef QUOTADEBUG - xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); -#endif - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0); if (error) { xfs_trans_cancel(tp, 0); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_mount_reset_sbqflags: Superblock update failed!"); + xfs_alert(mp, "%s: Superblock update failed!", __func__); return error; } @@ -1008,6 +626,24 @@ xfs_mount_reset_sbqflags( return xfs_trans_commit(tp, 0); } +__uint64_t +xfs_default_resblks(xfs_mount_t *mp) +{ + __uint64_t resblks; + + /* + * We default to 5% or 8192 fsbs of space reserved, whichever is + * smaller. This is intended to cover concurrent allocation + * transactions when we initially hit enospc. These each require a 4 + * block reservation. Hence by default we cover roughly 2000 concurrent + * allocation reservations. + */ + resblks = mp->m_sb.sb_dblocks; + do_div(resblks, 20); + resblks = min_t(__uint64_t, resblks, 8192); + return resblks; +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -1029,7 +665,7 @@ xfs_mountfs( uint quotaflags = 0; int error = 0; - xfs_mount_common(mp, sbp); + xfs_sb_mount_common(mp, sbp); /* * Check for a mismatched features2 values. Older kernels @@ -1048,8 +684,7 @@ xfs_mountfs( * transaction subsystem is online. */ if (xfs_sb_has_mismatched_features2(sbp)) { - cmn_err(CE_WARN, - "XFS: correcting sb_features alignment problem"); + xfs_warn(mp, "correcting sb_features alignment problem"); sbp->sb_features2 |= sbp->sb_bad_features2; sbp->sb_bad_features2 = sbp->sb_features2; mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; @@ -1073,6 +708,12 @@ xfs_mountfs( mp->m_update_flags |= XFS_SB_VERSIONNUM; } + /* always use v2 inodes by default now */ + if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { + mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; + mp->m_update_flags |= XFS_SB_VERSIONNUM; + } + /* * Check if sb_agblocks is aligned at stripe boundary * If sb_agblocks is NOT aligned turn off m_dalign since @@ -1090,8 +731,6 @@ xfs_mountfs( xfs_set_maxicount(mp); - mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); - error = xfs_uuid_mount(mp); if (error) goto out; @@ -1101,12 +740,27 @@ xfs_mountfs( */ xfs_set_rw_sizes(mp); + /* set the low space thresholds for dynamic preallocation */ + xfs_set_low_space_thresholds(mp); + /* * Set the inode cluster size. * This may still be overridden by the file system * block size if it is larger than the chosen cluster size. + * + * For v5 filesystems, scale the cluster size with the inode size to + * keep a constant ratio of inode per cluster buffer, but only if mkfs + * has set the inode alignment value appropriately for larger cluster + * sizes. */ mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + int new_size = mp->m_inode_cluster_size; + + new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; + if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) + mp->m_inode_cluster_size = new_size; + } /* * Set inode alignment fields @@ -1114,7 +768,7 @@ xfs_mountfs( xfs_set_inoalignment(mp); /* - * Check that the data (and log if separate) are an ok size. + * Check that the data (and log if separate) is an ok size. */ error = xfs_check_sizes(mp); if (error) @@ -1125,7 +779,7 @@ xfs_mountfs( */ error = xfs_rtmount_init(mp); if (error) { - cmn_err(CE_WARN, "XFS: RT mount failed"); + xfs_warn(mp, "RT mount failed"); goto out_remove_uuid; } @@ -1137,12 +791,11 @@ xfs_mountfs( mp->m_dmevmask = 0; /* not persistent; set after each mount */ - xfs_dir_mount(mp); - - /* - * Initialize the attribute manager's entries. - */ - mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; + error = xfs_da_mount(mp); + if (error) { + xfs_warn(mp, "Failed dir/attr init: %d", error); + goto out_remove_uuid; + } /* * Initialize the precomputed transaction reservations values. @@ -1152,16 +805,16 @@ xfs_mountfs( /* * Allocate and initialize the per-ag data. */ - init_rwsem(&mp->m_peraglock); - mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), - KM_MAYFAIL); - if (!mp->m_perag) - goto out_remove_uuid; - - mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); + spin_lock_init(&mp->m_perag_lock); + INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); + error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); + if (error) { + xfs_warn(mp, "Failed per-ag init: %d", error); + goto out_free_dir; + } if (!sbp->sb_logblocks) { - cmn_err(CE_WARN, "XFS: no log defined"); + xfs_warn(mp, "no log defined"); XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); error = XFS_ERROR(EFSCORRUPTED); goto out_free_perag; @@ -1174,8 +827,8 @@ xfs_mountfs( XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); if (error) { - cmn_err(CE_WARN, "XFS: log mount failed"); - goto out_free_perag; + xfs_warn(mp, "log mount failed"); + goto out_fail_wait; } /* @@ -1202,25 +855,23 @@ xfs_mountfs( !mp->m_sb.sb_inprogress) { error = xfs_initialize_perag_data(mp, sbp->sb_agcount); if (error) - goto out_free_perag; + goto out_fail_wait; } /* * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. */ - error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); + error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); if (error) { - cmn_err(CE_WARN, "XFS: failed to read root inode"); + xfs_warn(mp, "failed to read root inode"); goto out_log_dealloc; } ASSERT(rip != NULL); - if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { - cmn_err(CE_WARN, "XFS: corrupted root inode"); - cmn_err(CE_WARN, "Device %s - root %llu is not a directory", - XFS_BUFTARG_NAME(mp->m_ddev_targp), + if (unlikely(!S_ISDIR(rip->i_d.di_mode))) { + xfs_warn(mp, "corrupted root inode %llu: not a directory", (unsigned long long)rip->i_ino); xfs_iunlock(rip, XFS_ILOCK_EXCL); XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, @@ -1240,7 +891,7 @@ xfs_mountfs( /* * Free up the root inode. */ - cmn_err(CE_WARN, "XFS: failed to read RT inodes"); + xfs_warn(mp, "failed to read RT inodes"); goto out_rele_rip; } @@ -1252,7 +903,7 @@ xfs_mountfs( if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { error = xfs_mount_log_sb(mp, mp->m_update_flags); if (error) { - cmn_err(CE_WARN, "XFS: failed to write sb changes"); + xfs_warn(mp, "failed to write sb changes"); goto out_rtunmount; } } @@ -1273,10 +924,7 @@ xfs_mountfs( * quotachecked license. */ if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { - cmn_err(CE_NOTE, - "XFS: resetting qflags for filesystem %s", - mp->m_fsname); - + xfs_notice(mp, "resetting quota flags"); error = xfs_mount_reset_sbqflags(mp); if (error) return error; @@ -1290,7 +938,7 @@ xfs_mountfs( */ error = xfs_log_mount_finish(mp); if (error) { - cmn_err(CE_WARN, "XFS: log mount finish failed"); + xfs_warn(mp, "log mount finish failed"); goto out_rtunmount; } @@ -1304,13 +952,6 @@ xfs_mountfs( xfs_qm_mount_quotas(mp); } -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) - if (XFS_IS_QUOTA_ON(mp)) - xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on"); - else - xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on"); -#endif - /* * Now we are mounted, reserve a small amount of unused space for * privileged transactions. This is needed so that transaction @@ -1319,17 +960,16 @@ xfs_mountfs( * attr, unwritten extent conversion at ENOSPC, etc. Data allocations * are not allowed to use this reserved space. * - * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. * This may drive us straight to ENOSPC on mount, but that implies * we were already there on the last unmount. Warn if this occurs. */ - resblks = mp->m_sb.sb_dblocks; - do_div(resblks, 20); - resblks = min_t(__uint64_t, resblks, 1024); - error = xfs_reserve_blocks(mp, &resblks, NULL); - if (error) - cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " - "Continuing without a reserve pool."); + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + resblks = xfs_default_resblks(mp); + error = xfs_reserve_blocks(mp, &resblks, NULL); + if (error) + xfs_warn(mp, + "Unable to allocate reserve blocks. Continuing without reserve pool."); + } return 0; @@ -1339,8 +979,14 @@ xfs_mountfs( IRELE(rip); out_log_dealloc: xfs_log_unmount(mp); + out_fail_wait: + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) + xfs_wait_buftarg(mp->m_logdev_targp); + xfs_wait_buftarg(mp->m_ddev_targp); out_free_perag: xfs_free_perag(mp); + out_free_dir: + xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); out: @@ -1358,6 +1004,8 @@ xfs_unmountfs( __uint64_t resblks; int error; + cancel_delayed_work_sync(&mp->m_eofblocks_work); + xfs_qm_unmount_quotas(mp); xfs_rtunmount_inodes(mp); IRELE(mp->m_rootip); @@ -1372,22 +1020,23 @@ xfs_unmountfs( * push out the iclog we will never get that unlocked. hence we * need to force the log first. */ - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); + xfs_log_force(mp, XFS_LOG_SYNC); - xfs_qm_unmount(mp); + /* + * Flush all pending changes from the AIL. + */ + xfs_ail_push_all_sync(mp->m_ail); /* - * Flush out the log synchronously so that we know for sure - * that nothing is pinned. This is important because bflush() - * will skip pinned buffers. + * And reclaim all inodes. At this point there should be no dirty + * inodes and none should be pinned or locked, but use synchronous + * reclaim just to be sure. We can stop background inode reclaim + * here as well if it is still running. */ - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_reclaim_inodes(mp, SYNC_WAIT); - xfs_binval(mp->m_ddev_targp); - if (mp->m_rtdev_targp) { - xfs_binval(mp->m_rtdev_targp); - } + xfs_qm_unmount(mp); /* * Unreserve any blocks we have so that when we unmount we don't account @@ -1406,17 +1055,16 @@ xfs_unmountfs( resblks = 0; error = xfs_reserve_blocks(mp, &resblks, NULL); if (error) - cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " + xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); - error = xfs_log_sbcount(mp, 1); + error = xfs_log_sbcount(mp); if (error) - cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " + xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); - xfs_unmountfs_writesb(mp); - xfs_unmountfs_wait(mp); /* wait for async bufs */ - xfs_log_unmount_write(mp); + xfs_log_unmount(mp); + xfs_da_unmount(mp); xfs_uuid_unmount(mp); #if defined(DEBUG) @@ -1425,38 +1073,24 @@ xfs_unmountfs( xfs_free_perag(mp); } -STATIC void -xfs_unmountfs_wait(xfs_mount_t *mp) -{ - if (mp->m_logdev_targp != mp->m_ddev_targp) - xfs_wait_buftarg(mp->m_logdev_targp); - if (mp->m_rtdev_targp) - xfs_wait_buftarg(mp->m_rtdev_targp); - xfs_wait_buftarg(mp->m_ddev_targp); -} - int xfs_fs_writable(xfs_mount_t *mp) { - return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) || + return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY)); } /* * xfs_log_sbcount * - * Called either periodically to keep the on disk superblock values - * roughly up to date or from unmount to make sure the values are - * correct on a clean unmount. + * Sync the superblock counters to disk. * * Note this code can be called during the process of freezing, so - * we may need to use the transaction allocator which does not not + * we may need to use the transaction allocator which does not * block when the transaction subsystem is in its frozen state. */ int -xfs_log_sbcount( - xfs_mount_t *mp, - uint sync) +xfs_log_sbcount(xfs_mount_t *mp) { xfs_trans_t *tp; int error; @@ -1474,95 +1108,20 @@ xfs_log_sbcount( return 0; tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); - error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return error; } xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); - if (sync) - xfs_trans_set_sync(tp); + xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); return error; } -int -xfs_unmountfs_writesb(xfs_mount_t *mp) -{ - xfs_buf_t *sbp; - int error = 0; - - /* - * skip superblock write if fs is read-only, or - * if we are doing a forced umount. - */ - if (!((mp->m_flags & XFS_MOUNT_RDONLY) || - XFS_FORCED_SHUTDOWN(mp))) { - - sbp = xfs_getsb(mp, 0); - - XFS_BUF_UNDONE(sbp); - XFS_BUF_UNREAD(sbp); - XFS_BUF_UNDELAYWRITE(sbp); - XFS_BUF_WRITE(sbp); - XFS_BUF_UNASYNC(sbp); - ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); - xfsbdstrat(mp, sbp); - error = xfs_iowait(sbp); - if (error) - xfs_ioerror_alert("xfs_unmountfs_writesb", - mp, sbp, XFS_BUF_ADDR(sbp)); - xfs_buf_relse(sbp); - } - return error; -} - /* - * xfs_mod_sb() can be used to copy arbitrary changes to the - * in-core superblock into the superblock buffer to be logged. - * It does not provide the higher level of locking that is - * needed to protect the in-core superblock from concurrent - * access. - */ -void -xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) -{ - xfs_buf_t *bp; - int first; - int last; - xfs_mount_t *mp; - xfs_sb_field_t f; - - ASSERT(fields); - if (!fields) - return; - mp = tp->t_mountp; - bp = xfs_trans_getsb(tp, mp, 0); - first = sizeof(xfs_sb_t); - last = 0; - - /* translate/copy */ - - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); - - /* find modified range */ - - f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); - ASSERT((1LL << f) & XFS_SB_MOD_BITS); - first = xfs_sb_info[f].offset; - - f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); - ASSERT((1LL << f) & XFS_SB_MOD_BITS); - last = xfs_sb_info[f + 1].offset - 1; - - xfs_trans_log_buf(tp, bp, first, last); -} - - -/* - * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply + * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply * a delta to a specified field in the in-core superblock. Simply * switch on the field indicated and apply the delta to that field. * Fields are not allowed to dip below zero, so if the delta would @@ -1620,26 +1179,30 @@ xfs_mod_incore_sb_unlocked( lcounter += rem; } } else { /* Taking blocks away */ - lcounter += delta; + if (lcounter >= 0) { + mp->m_sb.sb_fdblocks = lcounter + + XFS_ALLOC_SET_ASIDE(mp); + return 0; + } - /* - * If were out of blocks, use any available reserved blocks if - * were allowed to. - */ + /* + * We are out of blocks, use any available reserved + * blocks if were allowed to. + */ + if (!rsvd) + return XFS_ERROR(ENOSPC); - if (lcounter < 0) { - if (rsvd) { - lcounter = (long long)mp->m_resblks_avail + delta; - if (lcounter < 0) { - return XFS_ERROR(ENOSPC); - } - mp->m_resblks_avail = lcounter; - return 0; - } else { /* not reserved */ - return XFS_ERROR(ENOSPC); - } + lcounter = (long long)mp->m_resblks_avail + delta; + if (lcounter >= 0) { + mp->m_resblks_avail = lcounter; + return 0; } + printk_once(KERN_WARNING + "Filesystem \"%s\": reserve blocks depleted! " + "Consider increasing reserve pool size.", + mp->m_fsname); + return XFS_ERROR(ENOSPC); } mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); @@ -1738,135 +1301,72 @@ xfs_mod_incore_sb_unlocked( */ int xfs_mod_incore_sb( - xfs_mount_t *mp, - xfs_sb_field_t field, - int64_t delta, - int rsvd) + struct xfs_mount *mp, + xfs_sb_field_t field, + int64_t delta, + int rsvd) { - int status; + int status; - /* check for per-cpu counters */ - switch (field) { #ifdef HAVE_PERCPU_SB - case XFS_SBS_ICOUNT: - case XFS_SBS_IFREE: - case XFS_SBS_FDBLOCKS: - if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - status = xfs_icsb_modify_counters(mp, field, - delta, rsvd); - break; - } - /* FALLTHROUGH */ + ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); #endif - default: - spin_lock(&mp->m_sb_lock); - status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - spin_unlock(&mp->m_sb_lock); - break; - } + spin_lock(&mp->m_sb_lock); + status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); + spin_unlock(&mp->m_sb_lock); return status; } /* - * xfs_mod_incore_sb_batch() is used to change more than one field - * in the in-core superblock structure at a time. This modification - * is protected by a lock internal to this module. The fields and - * changes to those fields are specified in the array of xfs_mod_sb - * structures passed in. + * Change more than one field in the in-core superblock structure at a time. + * + * The fields and changes to those fields are specified in the array of + * xfs_mod_sb structures passed in. Either all of the specified deltas + * will be applied or none of them will. If any modified field dips below 0, + * then all modifications will be backed out and EINVAL will be returned. * - * Either all of the specified deltas will be applied or none of - * them will. If any modified field dips below 0, then all modifications - * will be backed out and EINVAL will be returned. + * Note that this function may not be used for the superblock values that + * are tracked with the in-memory per-cpu counters - a direct call to + * xfs_icsb_modify_counters is required for these. */ int -xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) +xfs_mod_incore_sb_batch( + struct xfs_mount *mp, + xfs_mod_sb_t *msb, + uint nmsb, + int rsvd) { - int status=0; - xfs_mod_sb_t *msbp; + xfs_mod_sb_t *msbp; + int error = 0; /* - * Loop through the array of mod structures and apply each - * individually. If any fail, then back out all those - * which have already been applied. Do all of this within - * the scope of the m_sb_lock so that all of the changes will - * be atomic. + * Loop through the array of mod structures and apply each individually. + * If any fail, then back out all those which have already been applied. + * Do all of this within the scope of the m_sb_lock so that all of the + * changes will be atomic. */ spin_lock(&mp->m_sb_lock); - msbp = &msb[0]; - for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { - /* - * Apply the delta at index n. If it fails, break - * from the loop so we'll fall into the undo loop - * below. - */ - switch (msbp->msb_field) { -#ifdef HAVE_PERCPU_SB - case XFS_SBS_ICOUNT: - case XFS_SBS_IFREE: - case XFS_SBS_FDBLOCKS: - if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - spin_unlock(&mp->m_sb_lock); - status = xfs_icsb_modify_counters(mp, - msbp->msb_field, - msbp->msb_delta, rsvd); - spin_lock(&mp->m_sb_lock); - break; - } - /* FALLTHROUGH */ -#endif - default: - status = xfs_mod_incore_sb_unlocked(mp, - msbp->msb_field, - msbp->msb_delta, rsvd); - break; - } + for (msbp = msb; msbp < (msb + nmsb); msbp++) { + ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || + msbp->msb_field > XFS_SBS_FDBLOCKS); - if (status != 0) { - break; - } + error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, + msbp->msb_delta, rsvd); + if (error) + goto unwind; } + spin_unlock(&mp->m_sb_lock); + return 0; - /* - * If we didn't complete the loop above, then back out - * any changes made to the superblock. If you add code - * between the loop above and here, make sure that you - * preserve the value of status. Loop back until - * we step below the beginning of the array. Make sure - * we don't touch anything back there. - */ - if (status != 0) { - msbp--; - while (msbp >= msb) { - switch (msbp->msb_field) { -#ifdef HAVE_PERCPU_SB - case XFS_SBS_ICOUNT: - case XFS_SBS_IFREE: - case XFS_SBS_FDBLOCKS: - if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - spin_unlock(&mp->m_sb_lock); - status = xfs_icsb_modify_counters(mp, - msbp->msb_field, - -(msbp->msb_delta), - rsvd); - spin_lock(&mp->m_sb_lock); - break; - } - /* FALLTHROUGH */ -#endif - default: - status = xfs_mod_incore_sb_unlocked(mp, - msbp->msb_field, - -(msbp->msb_delta), - rsvd); - break; - } - ASSERT(status == 0); - msbp--; - } +unwind: + while (--msbp >= msb) { + error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, + -msbp->msb_delta, rsvd); + ASSERT(error == 0); } spin_unlock(&mp->m_sb_lock); - return status; + return error; } /* @@ -1878,23 +1378,20 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) * the superblock buffer if it can be locked without sleeping. * If it can't then we'll return NULL. */ -xfs_buf_t * +struct xfs_buf * xfs_getsb( - xfs_mount_t *mp, - int flags) + struct xfs_mount *mp, + int flags) { - xfs_buf_t *bp; + struct xfs_buf *bp = mp->m_sb_bp; - ASSERT(mp->m_sb_bp != NULL); - bp = mp->m_sb_bp; - if (flags & XFS_BUF_TRYLOCK) { - if (!XFS_BUF_CPSEMA(bp)) { + if (!xfs_buf_trylock(bp)) { + if (flags & XBF_TRYLOCK) return NULL; - } - } else { - XFS_BUF_PSEMA(bp, PRIBIO); + xfs_buf_lock(bp); } - XFS_BUF_HOLD(bp); + + xfs_buf_hold(bp); ASSERT(XFS_BUF_ISDONE(bp)); return bp; } @@ -1904,18 +1401,13 @@ xfs_getsb( */ void xfs_freesb( - xfs_mount_t *mp) + struct xfs_mount *mp) { - xfs_buf_t *bp; + struct xfs_buf *bp = mp->m_sb_bp; - /* - * Use xfs_getsb() so that the buffer will be locked - * when we call xfs_buf_relse(). - */ - bp = xfs_getsb(mp, 0); - XFS_BUF_UNMANAGE(bp); - xfs_buf_relse(bp); + xfs_buf_lock(bp); mp->m_sb_bp = NULL; + xfs_buf_relse(bp); } /* @@ -1936,8 +1428,7 @@ xfs_mount_log_sb( XFS_SB_VERSIONNUM)); tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); - error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return error; @@ -1947,6 +1438,24 @@ xfs_mount_log_sb( return error; } +/* + * If the underlying (data/log/rt) device is readonly, there are some + * operations that cannot proceed. + */ +int +xfs_dev_is_read_only( + struct xfs_mount *mp, + char *message) +{ + if (xfs_readonly_buftarg(mp->m_ddev_targp) || + xfs_readonly_buftarg(mp->m_logdev_targp) || + (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { + xfs_notice(mp, "%s required on read-only device.", message); + xfs_notice(mp, "write access unavailable, cannot proceed."); + return EROFS; + } + return 0; +} #ifdef HAVE_PERCPU_SB /* @@ -2077,12 +1586,6 @@ xfs_icsb_init_counters( if (mp->m_sb_cnts == NULL) return -ENOMEM; -#ifdef CONFIG_HOTPLUG_CPU - mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; - mp->m_icsb_notifier.priority = 0; - register_hotcpu_notifier(&mp->m_icsb_notifier); -#endif /* CONFIG_HOTPLUG_CPU */ - for_each_online_cpu(i) { cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); @@ -2095,6 +1598,13 @@ xfs_icsb_init_counters( * initial balance kicks us off correctly */ mp->m_icsb_counters = -1; + +#ifdef CONFIG_HOTPLUG_CPU + mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; + mp->m_icsb_notifier.priority = 0; + register_hotcpu_notifier(&mp->m_icsb_notifier); +#endif /* CONFIG_HOTPLUG_CPU */ + return 0; } @@ -2382,7 +1892,7 @@ xfs_icsb_balance_counter( spin_unlock(&mp->m_sb_lock); } -STATIC int +int xfs_icsb_modify_counters( xfs_mount_t *mp, xfs_sb_field_t field, |
