aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_super.c')
-rw-r--r--fs/xfs/xfs_super.c652
1 files changed, 381 insertions, 271 deletions
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ee5b695c99a..8f0333b3f7a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -17,41 +17,36 @@
*/
#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_inum.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
+#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
+#include "xfs_alloc.h"
#include "xfs_error.h"
-#include "xfs_itable.h"
#include "xfs_fsops.h"
-#include "xfs_attr.h"
+#include "xfs_trans.h"
#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
+#include "xfs_log.h"
#include "xfs_log_priv.h"
-#include "xfs_trans_priv.h"
-#include "xfs_filestream.h"
#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
#include "xfs_extfree_item.h"
#include "xfs_mru_cache.h"
#include "xfs_inode_item.h"
-#include "xfs_sync.h"
+#include "xfs_icache.h"
#include "xfs_trace.h"
+#include "xfs_icreate_item.h"
+#include "xfs_dinode.h"
+#include "xfs_filestream.h"
+#include "xfs_quota.h"
#include <linux/namei.h>
#include <linux/init.h>
@@ -89,6 +84,8 @@ mempool_t *xfs_ioend_pool;
* unwritten extent conversion */
#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
+#define MNTOPT_32BITINODE "inode32" /* inode allocation limited to
+ * XFS_MAXINUMBER_32 */
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
@@ -121,20 +118,26 @@ mempool_t *xfs_ioend_pool;
* in the future, too.
*/
enum {
- Opt_barrier, Opt_nobarrier, Opt_err
+ Opt_barrier,
+ Opt_nobarrier,
+ Opt_inode64,
+ Opt_inode32,
+ Opt_err
};
static const match_table_t tokens = {
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
+ {Opt_inode64, "inode64"},
+ {Opt_inode32, "inode32"},
{Opt_err, NULL}
};
STATIC unsigned long
-suffix_strtoul(char *s, char **endp, unsigned int base)
+suffix_kstrtoint(char *s, unsigned int base, int *res)
{
- int last, shift_left_factor = 0;
+ int last, shift_left_factor = 0, _res;
char *value = s;
last = strlen(value) - 1;
@@ -151,7 +154,10 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
value[last] = '\0';
}
- return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
+ if (kstrtoint(s, base, &_res))
+ return -EINVAL;
+ *res = _res << shift_left_factor;
+ return 0;
}
/*
@@ -167,7 +173,7 @@ xfs_parseargs(
char *options)
{
struct super_block *sb = mp->m_super;
- char *this_char, *value, *eov;
+ char *this_char, *value;
int dsunit = 0;
int dswidth = 0;
int iosize = 0;
@@ -198,7 +204,9 @@ xfs_parseargs(
*/
mp->m_flags |= XFS_MOUNT_BARRIER;
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+#if !XFS_BIG_INUMS
mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+#endif
/*
* These can be overridden by the mount option parsing.
@@ -221,14 +229,16 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- mp->m_logbufs = simple_strtoul(value, &eov, 10);
+ if (kstrtoint(value, 10, &mp->m_logbufs))
+ return EINVAL;
} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
return EINVAL;
}
- mp->m_logbsize = suffix_strtoul(value, &eov, 10);
+ if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
+ return EINVAL;
} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
@@ -257,7 +267,8 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- iosize = simple_strtoul(value, &eov, 10);
+ if (kstrtoint(value, 10, &iosize))
+ return EINVAL;
iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
if (!value || !*value) {
@@ -265,7 +276,8 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- iosize = suffix_strtoul(value, &eov, 10);
+ if (suffix_kstrtoint(value, 10, &iosize))
+ return EINVAL;
iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_GRPID) ||
!strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -287,14 +299,18 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- dsunit = simple_strtoul(value, &eov, 10);
+ if (kstrtoint(value, 10, &dsunit))
+ return EINVAL;
} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
return EINVAL;
}
- dswidth = simple_strtoul(value, &eov, 10);
+ if (kstrtoint(value, 10, &dswidth))
+ return EINVAL;
+ } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
+ mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
#if !XFS_BIG_INUMS
@@ -324,10 +340,9 @@ xfs_parseargs(
} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
mp->m_flags |= XFS_MOUNT_FILESTREAMS;
} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
- mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
- XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
- XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
- XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
+ mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
+ mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
+ mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
!strcmp(this_char, MNTOPT_UQUOTA) ||
!strcmp(this_char, MNTOPT_USRQUOTA)) {
@@ -340,17 +355,17 @@ xfs_parseargs(
} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
!strcmp(this_char, MNTOPT_PRJQUOTA)) {
mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
- XFS_OQUOTA_ENFD);
+ XFS_PQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+ mp->m_qflags &= ~XFS_PQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
!strcmp(this_char, MNTOPT_GRPQUOTA)) {
mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
- XFS_OQUOTA_ENFD);
+ XFS_GQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+ mp->m_qflags &= ~XFS_GQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
xfs_warn(mp,
"delaylog is the default now, option is deprecated.");
@@ -401,12 +416,6 @@ xfs_parseargs(
}
#endif
- if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
- (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
- xfs_warn(mp, "cannot mount with both project and group quota");
- return EINVAL;
- }
-
if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
xfs_warn(mp, "sunit and swidth must be specified together");
return EINVAL;
@@ -420,20 +429,15 @@ xfs_parseargs(
}
done:
- if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
+ if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {
/*
* At this point the superblock has not been read
* in, therefore we do not know the block size.
* Before the mount call ends we will convert
* these to FSBs.
*/
- if (dsunit) {
- mp->m_dalign = dsunit;
- mp->m_flags |= XFS_MOUNT_RETERR;
- }
-
- if (dswidth)
- mp->m_swidth = dswidth;
+ mp->m_dalign = dsunit;
+ mp->m_swidth = dswidth;
}
if (mp->m_logbufs != -1 &&
@@ -494,6 +498,7 @@ xfs_showargs(
{ XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
{ XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
+ { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
{ 0, NULL }
};
static struct proc_xfs_info xfs_info_unset[] = {
@@ -540,15 +545,14 @@ xfs_showargs(
else if (mp->m_qflags & XFS_UQUOTA_ACCT)
seq_puts(m, "," MNTOPT_UQUOTANOENF);
- /* Either project or group quotas can be active, not both */
-
if (mp->m_qflags & XFS_PQUOTA_ACCT) {
- if (mp->m_qflags & XFS_OQUOTA_ENFD)
+ if (mp->m_qflags & XFS_PQUOTA_ENFD)
seq_puts(m, "," MNTOPT_PRJQUOTA);
else
seq_puts(m, "," MNTOPT_PQUOTANOENF);
- } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
- if (mp->m_qflags & XFS_OQUOTA_ENFD)
+ }
+ if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+ if (mp->m_qflags & XFS_GQUOTA_ENFD)
seq_puts(m, "," MNTOPT_GRPQUOTA);
else
seq_puts(m, "," MNTOPT_GQUOTANOENF);
@@ -593,6 +597,80 @@ xfs_max_file_offset(
return (((__uint64_t)pagefactor) << bitshift) - 1;
}
+xfs_agnumber_t
+xfs_set_inode32(struct xfs_mount *mp)
+{
+ xfs_agnumber_t index = 0;
+ xfs_agnumber_t maxagi = 0;
+ xfs_sb_t *sbp = &mp->m_sb;
+ xfs_agnumber_t max_metadata;
+ xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
+ xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
+ xfs_perag_t *pag;
+
+ /* Calculate how much should be reserved for inodes to meet
+ * the max inode percentage.
+ */
+ if (mp->m_maxicount) {
+ __uint64_t icount;
+
+ icount = sbp->sb_dblocks * sbp->sb_imax_pct;
+ do_div(icount, 100);
+ icount += sbp->sb_agblocks - 1;
+ do_div(icount, sbp->sb_agblocks);
+ max_metadata = icount;
+ } else {
+ max_metadata = sbp->sb_agcount;
+ }
+
+ for (index = 0; index < sbp->sb_agcount; index++) {
+ ino = XFS_AGINO_TO_INO(mp, index, agino);
+
+ if (ino > XFS_MAXINUMBER_32) {
+ pag = xfs_perag_get(mp, index);
+ pag->pagi_inodeok = 0;
+ pag->pagf_metadata = 0;
+ xfs_perag_put(pag);
+ continue;
+ }
+
+ pag = xfs_perag_get(mp, index);
+ pag->pagi_inodeok = 1;
+ maxagi++;
+ if (index < max_metadata)
+ pag->pagf_metadata = 1;
+ xfs_perag_put(pag);
+ }
+ mp->m_flags |= (XFS_MOUNT_32BITINODES |
+ XFS_MOUNT_SMALL_INUMS);
+
+ return maxagi;
+}
+
+xfs_agnumber_t
+xfs_set_inode64(struct xfs_mount *mp)
+{
+ xfs_agnumber_t index = 0;
+
+ for (index = 0; index < mp->m_sb.sb_agcount; index++) {
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, index);
+ pag->pagi_inodeok = 1;
+ pag->pagf_metadata = 0;
+ xfs_perag_put(pag);
+ }
+
+ /* There is no need for lock protection on m_flags,
+ * the rw_semaphore of the VFS superblock is locked
+ * during mount/umount/remount operations, so this is
+ * enough to avoid concurency on the m_flags field
+ */
+ mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
+ XFS_MOUNT_SMALL_INUMS);
+ return index;
+}
+
STATIC int
xfs_blkdev_get(
xfs_mount_t *mp,
@@ -623,7 +701,7 @@ void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
{
- blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
+ blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
}
STATIC void
@@ -687,20 +765,18 @@ xfs_open_devices(
* Setup xfs_mount buffer target pointers
*/
error = ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
+ mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
if (!mp->m_ddev_targp)
goto out_close_rtdev;
if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
- mp->m_fsname);
+ mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
if (!mp->m_rtdev_targp)
goto out_free_ddev_targ;
}
if (logdev && logdev != ddev) {
- mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
- mp->m_fsname);
+ mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
if (!mp->m_logdev_targp)
goto out_free_rtdev_targ;
} else {
@@ -733,8 +809,7 @@ xfs_setup_devices(
{
int error;
- error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
- mp->m_sb.sb_sectsize);
+ error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
if (error)
return error;
@@ -744,14 +819,12 @@ xfs_setup_devices(
if (xfs_sb_version_hassector(&mp->m_sb))
log_sector_size = mp->m_sb.sb_logsectsize;
error = xfs_setsize_buftarg(mp->m_logdev_targp,
- mp->m_sb.sb_blocksize,
log_sector_size);
if (error)
return error;
}
if (mp->m_rtdev_targp) {
error = xfs_setsize_buftarg(mp->m_rtdev_targp,
- mp->m_sb.sb_blocksize,
mp->m_sb.sb_sectsize);
if (error)
return error;
@@ -760,6 +833,86 @@ xfs_setup_devices(
return 0;
}
+STATIC int
+xfs_init_mount_workqueues(
+ struct xfs_mount *mp)
+{
+ mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
+ WQ_MEM_RECLAIM, 0, mp->m_fsname);
+ if (!mp->m_data_workqueue)
+ goto out;
+
+ mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
+ WQ_MEM_RECLAIM, 0, mp->m_fsname);
+ if (!mp->m_unwritten_workqueue)
+ goto out_destroy_data_iodone_queue;
+
+ mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
+ WQ_MEM_RECLAIM, 0, mp->m_fsname);
+ if (!mp->m_cil_workqueue)
+ goto out_destroy_unwritten;
+
+ mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
+ 0, 0, mp->m_fsname);
+ if (!mp->m_reclaim_workqueue)
+ goto out_destroy_cil;
+
+ mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
+ 0, 0, mp->m_fsname);
+ if (!mp->m_log_workqueue)
+ goto out_destroy_reclaim;
+
+ mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
+ 0, 0, mp->m_fsname);
+ if (!mp->m_eofblocks_workqueue)
+ goto out_destroy_log;
+
+ return 0;
+
+out_destroy_log:
+ destroy_workqueue(mp->m_log_workqueue);
+out_destroy_reclaim:
+ destroy_workqueue(mp->m_reclaim_workqueue);
+out_destroy_cil:
+ destroy_workqueue(mp->m_cil_workqueue);
+out_destroy_unwritten:
+ destroy_workqueue(mp->m_unwritten_workqueue);
+out_destroy_data_iodone_queue:
+ destroy_workqueue(mp->m_data_workqueue);
+out:
+ return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_mount_workqueues(
+ struct xfs_mount *mp)
+{
+ destroy_workqueue(mp->m_eofblocks_workqueue);
+ destroy_workqueue(mp->m_log_workqueue);
+ destroy_workqueue(mp->m_reclaim_workqueue);
+ destroy_workqueue(mp->m_cil_workqueue);
+ destroy_workqueue(mp->m_data_workqueue);
+ destroy_workqueue(mp->m_unwritten_workqueue);
+}
+
+/*
+ * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
+ * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
+ * for IO to complete so that we effectively throttle multiple callers to the
+ * rate at which IO is completing.
+ */
+void
+xfs_flush_inodes(
+ struct xfs_mount *mp)
+{
+ struct super_block *sb = mp->m_super;
+
+ if (down_read_trylock(&sb->s_umount)) {
+ sync_inodes_sb(sb);
+ up_read(&sb->s_umount);
+ }
+}
+
/* Catch misguided souls that try to use this interface on XFS */
STATIC struct inode *
xfs_fs_alloc_inode(
@@ -783,10 +936,6 @@ xfs_fs_destroy_inode(
XFS_STATS_INC(vn_reclaim);
- /* bad inode, get out here ASAP */
- if (is_bad_inode(inode))
- goto out_reclaim;
-
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
/*
@@ -802,7 +951,6 @@ xfs_fs_destroy_inode(
* this more efficiently than we can here, so simply let background
* reclaim tear down all inodes.
*/
-out_reclaim:
xfs_inode_set_reclaim_tag(ip);
}
@@ -833,127 +981,41 @@ xfs_fs_inode_init_once(
"xfsino", ip->i_ino);
}
-/*
- * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode.
- *
- * We need the barrier() to maintain correct ordering between unlogged
- * updates and the transaction commit code that clears the i_update_core
- * field. This requires all updates to be completed before marking the
- * inode dirty.
- */
-STATIC void
-xfs_fs_dirty_inode(
- struct inode *inode,
- int flags)
-{
- barrier();
- XFS_I(inode)->i_update_core = 1;
-}
-
-STATIC int
-xfs_fs_write_inode(
- struct inode *inode,
- struct writeback_control *wbc)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- int error = EAGAIN;
-
- trace_xfs_write_inode(ip);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
- /*
- * Make sure the inode has made it it into the log. Instead
- * of forcing it all the way to stable storage using a
- * synchronous transaction we let the log force inside the
- * ->sync_fs call do that for thus, which reduces the number
- * of synchronous log forces dramatically.
- */
- error = xfs_log_dirty_inode(ip, NULL, 0);
- if (error)
- goto out;
- return 0;
- } else {
- if (!ip->i_update_core)
- return 0;
-
- /*
- * We make this non-blocking if the inode is contended, return
- * EAGAIN to indicate to the caller that they did not succeed.
- * This prevents the flush path from blocking on inodes inside
- * another operation right now, they get caught later by
- * xfs_sync.
- */
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
- goto out;
-
- if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
- goto out_unlock;
-
- /*
- * Now we have the flush lock and the inode is not pinned, we
- * can check if the inode is really clean as we know that
- * there are no pending transaction completions, it is not
- * waiting on the delayed write queue and there is no IO in
- * progress.
- */
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- error = 0;
- goto out_unlock;
- }
- error = xfs_iflush(ip, SYNC_TRYLOCK);
- }
-
- out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- out:
- /*
- * if we failed to write out the inode then mark
- * it dirty again so we'll try again later.
- */
- if (error)
- xfs_mark_inode_dirty_sync(ip);
- return -error;
-}
-
STATIC void
xfs_fs_evict_inode(
struct inode *inode)
{
xfs_inode_t *ip = XFS_I(inode);
+ ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+
trace_xfs_evict_inode(ip);
- truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
XFS_STATS_INC(vn_rele);
XFS_STATS_INC(vn_remove);
XFS_STATS_DEC(vn_active);
- /*
- * The iolock is used by the file system to coordinate reads,
- * writes, and block truncates. Up to this point the lock
- * protected concurrent accesses by users of the inode. But
- * from here forward we're doing some final processing of the
- * inode because we're done with it, and although we reuse the
- * iolock for protection it is really a distinct lock class
- * (in the lockdep sense) from before. To keep lockdep happy
- * (and basically indicate what we are doing), we explicitly
- * re-init the iolock here.
- */
- ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
- mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
- lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
- &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
-
xfs_inactive(ip);
}
+/*
+ * We do an unlocked check for XFS_IDONTCACHE here because we are already
+ * serialised against cache hits here via the inode->i_lock and igrab() in
+ * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
+ * racing with us, and it avoids needing to grab a spinlock here for every inode
+ * we drop the final reference on.
+ */
+STATIC int
+xfs_fs_drop_inode(
+ struct inode *inode)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+
+ return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
+}
+
STATIC void
xfs_free_fsname(
struct xfs_mount *mp)
@@ -969,20 +1031,12 @@ xfs_fs_put_super(
{
struct xfs_mount *mp = XFS_M(sb);
- xfs_syncd_stop(mp);
-
- /*
- * Blow away any referenced inode in the filestreams cache.
- * This can and will cause log traffic as inodes go inactive
- * here.
- */
xfs_filestream_unmount(mp);
-
- xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
xfs_unmountfs(mp);
+
xfs_freesb(mp);
xfs_icsb_destroy_counters(mp);
+ xfs_destroy_mount_workqueues(mp);
xfs_close_devices(mp);
xfs_free_fsname(mp);
kfree(mp);
@@ -994,7 +1048,6 @@ xfs_fs_sync_fs(
int wait)
{
struct xfs_mount *mp = XFS_M(sb);
- int error;
/*
* Doing anything during the async pass would be counterproductive.
@@ -1002,17 +1055,14 @@ xfs_fs_sync_fs(
if (!wait)
return 0;
- error = xfs_quiesce_data(mp);
- if (error)
- return -error;
-
+ xfs_log_force(mp, XFS_LOG_SYNC);
if (laptop_mode) {
/*
* The disk must be active because we're syncing.
- * We schedule xfssyncd now (now that the disk is
+ * We schedule log work now (now that the disk is
* active) instead of later (when it might not be).
*/
- flush_delayed_work_sync(&mp->m_sync_work);
+ flush_delayed_work(&mp->m_log->l_work);
}
return 0;
@@ -1059,9 +1109,9 @@ xfs_fs_statfs(
spin_unlock(&mp->m_sb_lock);
- if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
- ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
- (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+ if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
+ ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
+ (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
xfs_qm_statvfs(ip, statp);
return 0;
}
@@ -1089,6 +1139,48 @@ xfs_restore_resvblks(struct xfs_mount *mp)
xfs_reserve_blocks(mp, &resblks, NULL);
}
+/*
+ * Trigger writeback of all the dirty metadata in the file system.
+ *
+ * This ensures that the metadata is written to their location on disk rather
+ * than just existing in transactions in the log. This means after a quiesce
+ * there is no log replay required to write the inodes to disk - this is the
+ * primary difference between a sync and a quiesce.
+ *
+ * Note: xfs_log_quiesce() stops background log work - the callers must ensure
+ * it is started again when appropriate.
+ */
+static void
+xfs_quiesce_attr(
+ struct xfs_mount *mp)
+{
+ int error = 0;
+
+ /* wait for all modifications to complete */
+ while (atomic_read(&mp->m_active_trans) > 0)
+ delay(100);
+
+ /* force the log to unpin objects from the now complete transactions */
+ xfs_log_force(mp, XFS_LOG_SYNC);
+
+ /* reclaim inodes to do any IO before the freeze completes */
+ xfs_reclaim_inodes(mp, 0);
+ xfs_reclaim_inodes(mp, SYNC_WAIT);
+
+ /* Push the superblock and write an unmount record */
+ error = xfs_log_sbcount(mp);
+ if (error)
+ xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
+ "Frozen image may not be consistent.");
+ /*
+ * Just warn here till VFS can correctly support
+ * read-only remount without racing.
+ */
+ WARN_ON(atomic_read(&mp->m_active_trans) != 0);
+
+ xfs_log_quiesce(mp);
+}
+
STATIC int
xfs_fs_remount(
struct super_block *sb,
@@ -1100,6 +1192,7 @@ xfs_fs_remount(
char *p;
int error;
+ sync_filesystem(sb);
while ((p = strsep(&options, ",")) != NULL) {
int token;
@@ -1114,6 +1207,12 @@ xfs_fs_remount(
case Opt_nobarrier:
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
+ case Opt_inode64:
+ mp->m_maxagi = xfs_set_inode64(mp);
+ break;
+ case Opt_inode32:
+ mp->m_maxagi = xfs_set_inode32(mp);
+ break;
default:
/*
* Logically we would return an error here to prevent
@@ -1133,7 +1232,7 @@ xfs_fs_remount(
*/
#if 0
xfs_info(mp,
- "mount option \"%s\" not supported for remount\n", p);
+ "mount option \"%s\" not supported for remount", p);
return -EINVAL;
#else
break;
@@ -1163,20 +1262,18 @@ xfs_fs_remount(
* value if it is non-zero, otherwise go with the default.
*/
xfs_restore_resvblks(mp);
+ xfs_log_work_queue(mp);
}
/* rw -> ro */
if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
/*
- * After we have synced the data but before we sync the
- * metadata, we need to free up the reserve block pool so that
- * the used block count in the superblock on disk is correct at
- * the end of the remount. Stash the current reserve pool size
- * so that if we get remounted rw, we can return it to the same
- * size.
+ * Before we sync the metadata, we need to free up the reserve
+ * block pool so that the used block count in the superblock on
+ * disk is correct at the end of the remount. Stash the current
+ * reserve pool size so that if we get remounted rw, we can
+ * return it to the same size.
*/
-
- xfs_quiesce_data(mp);
xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
mp->m_flags |= XFS_MOUNT_RDONLY;
@@ -1208,6 +1305,7 @@ xfs_fs_unfreeze(
struct xfs_mount *mp = XFS_M(sb);
xfs_restore_resvblks(mp);
+ xfs_log_work_queue(mp);
return 0;
}
@@ -1250,6 +1348,17 @@ xfs_finish_flags(
}
/*
+ * V5 filesystems always use attr2 format for attributes.
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+ xfs_warn(mp,
+"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
+ MNTOPT_NOATTR2, MNTOPT_ATTR2);
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
* mkfs'ed attr2 will turn on attr2 mount unless explicitly
* told by noattr2 to turn it off
*/
@@ -1266,6 +1375,14 @@ xfs_finish_flags(
return XFS_ERROR(EROFS);
}
+ if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+ (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
+ !xfs_sb_version_has_pquotino(&mp->m_sb)) {
+ xfs_warn(mp,
+ "Super block does not support project and group quota together");
+ return XFS_ERROR(EINVAL);
+ }
+
return 0;
}
@@ -1286,6 +1403,8 @@ xfs_fs_fill_super(
spin_lock_init(&mp->m_sb_lock);
mutex_init(&mp->m_growlock);
atomic_set(&mp->m_active_trans, 0);
+ INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+ INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
mp->m_super = sb;
sb->s_fs_info = mp;
@@ -1309,10 +1428,14 @@ xfs_fs_fill_super(
if (error)
goto out_free_fsname;
- error = xfs_icsb_init_counters(mp);
+ error = -xfs_init_mount_workqueues(mp);
if (error)
goto out_close_devices;
+ error = -xfs_icsb_init_counters(mp);
+ if (error)
+ goto out_destroy_workqueues;
+
error = xfs_readsb(mp, flags);
if (error)
goto out_destroy_counters;
@@ -1332,39 +1455,32 @@ xfs_fs_fill_super(
/*
* we must configure the block size in the superblock before we run the
* full mount process as the mount process can lookup and cache inodes.
- * For the same reason we must also initialise the syncd and register
- * the inode cache shrinker so that inodes can be reclaimed during
- * operations like a quotacheck that iterate all inodes in the
- * filesystem.
*/
sb->s_magic = XFS_SB_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+ sb->s_max_links = XFS_MAXLINK;
sb->s_time_gran = 1;
set_posix_acl_flag(sb);
+ /* version 5 superblocks support inode version counters. */
+ if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
+ sb->s_flags |= MS_I_VERSION;
+
error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
- error = xfs_syncd_init(mp);
- if (error)
- goto out_unmount;
-
root = igrab(VFS_I(mp->m_rootip));
if (!root) {
error = ENOENT;
- goto out_syncd_stop;
- }
- if (is_bad_inode(root)) {
- error = EINVAL;
- goto out_syncd_stop;
+ goto out_unmount;
}
- sb->s_root = d_alloc_root(root);
+ sb->s_root = d_make_root(root);
if (!sb->s_root) {
error = ENOMEM;
- goto out_iput;
+ goto out_unmount;
}
return 0;
@@ -1375,6 +1491,8 @@ xfs_fs_fill_super(
xfs_freesb(mp);
out_destroy_counters:
xfs_icsb_destroy_counters(mp);
+out_destroy_workqueues:
+ xfs_destroy_mount_workqueues(mp);
out_close_devices:
xfs_close_devices(mp);
out_free_fsname:
@@ -1383,20 +1501,8 @@ xfs_fs_fill_super(
out:
return -error;
- out_iput:
- iput(root);
- out_syncd_stop:
- xfs_syncd_stop(mp);
out_unmount:
- /*
- * Blow away any referenced inode in the filestreams cache.
- * This can and will cause log traffic as inodes go inactive
- * here.
- */
xfs_filestream_unmount(mp);
-
- xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
xfs_unmountfs(mp);
goto out_free_sb;
}
@@ -1411,27 +1517,28 @@ xfs_fs_mount(
return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
}
-static int
+static long
xfs_fs_nr_cached_objects(
- struct super_block *sb)
+ struct super_block *sb,
+ int nid)
{
return xfs_reclaim_inodes_count(XFS_M(sb));
}
-static void
+static long
xfs_fs_free_cached_objects(
struct super_block *sb,
- int nr_to_scan)
+ long nr_to_scan,
+ int nid)
{
- xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
+ return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
}
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
- .dirty_inode = xfs_fs_dirty_inode,
- .write_inode = xfs_fs_write_inode,
.evict_inode = xfs_fs_evict_inode,
+ .drop_inode = xfs_fs_drop_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
.freeze_fs = xfs_fs_freeze,
@@ -1450,6 +1557,7 @@ static struct file_system_type xfs_fs_type = {
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
+MODULE_ALIAS_FS("xfs");
STATIC int __init
xfs_init_zones(void)
@@ -1484,13 +1592,9 @@ xfs_init_zones(void)
if (!xfs_da_state_zone)
goto out_destroy_btree_cur_zone;
- xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
- if (!xfs_dabuf_zone)
- goto out_destroy_da_state_zone;
-
xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
if (!xfs_ifork_zone)
- goto out_destroy_dabuf_zone;
+ goto out_destroy_da_state_zone;
xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
if (!xfs_trans_zone)
@@ -1507,9 +1611,8 @@ xfs_init_zones(void)
* size possible under XFS. This wastes a little bit of memory,
* but it is much faster.
*/
- xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
- (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
- NBWORD) * sizeof(int))), "xfs_buf_item");
+ xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
+ "xfs_buf_item");
if (!xfs_buf_item_zone)
goto out_destroy_log_item_desc_zone;
@@ -1537,9 +1640,15 @@ xfs_init_zones(void)
KM_ZONE_SPREAD, NULL);
if (!xfs_ili_zone)
goto out_destroy_inode_zone;
+ xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item),
+ "xfs_icr");
+ if (!xfs_icreate_zone)
+ goto out_destroy_ili_zone;
return 0;
+ out_destroy_ili_zone:
+ kmem_zone_destroy(xfs_ili_zone);
out_destroy_inode_zone:
kmem_zone_destroy(xfs_inode_zone);
out_destroy_efi_zone:
@@ -1554,8 +1663,6 @@ xfs_init_zones(void)
kmem_zone_destroy(xfs_trans_zone);
out_destroy_ifork_zone:
kmem_zone_destroy(xfs_ifork_zone);
- out_destroy_dabuf_zone:
- kmem_zone_destroy(xfs_dabuf_zone);
out_destroy_da_state_zone:
kmem_zone_destroy(xfs_da_state_zone);
out_destroy_btree_cur_zone:
@@ -1575,6 +1682,12 @@ xfs_init_zones(void)
STATIC void
xfs_destroy_zones(void)
{
+ /*
+ * Make sure all delayed rcu free are flushed before we
+ * destroy caches.
+ */
+ rcu_barrier();
+ kmem_zone_destroy(xfs_icreate_zone);
kmem_zone_destroy(xfs_ili_zone);
kmem_zone_destroy(xfs_inode_zone);
kmem_zone_destroy(xfs_efi_zone);
@@ -1583,7 +1696,6 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_log_item_desc_zone);
kmem_zone_destroy(xfs_trans_zone);
kmem_zone_destroy(xfs_ifork_zone);
- kmem_zone_destroy(xfs_dabuf_zone);
kmem_zone_destroy(xfs_da_state_zone);
kmem_zone_destroy(xfs_btree_cur_zone);
kmem_zone_destroy(xfs_bmap_free_item_zone);
@@ -1597,21 +1709,22 @@ STATIC int __init
xfs_init_workqueues(void)
{
/*
- * We never want to the same work item to run twice, reclaiming inodes
- * or idling the log is not going to get any faster by multiple CPUs
- * competing for ressources. Use the default large max_active value
- * so that even lots of filesystems can perform these task in parallel.
+ * The allocation workqueue can be used in memory reclaim situations
+ * (writepage path), and parallelism is only limited by the number of
+ * AGs in all the filesystems mounted. Hence use the default large
+ * max_active value for this workqueue.
*/
- xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
- if (!xfs_syncd_wq)
+ xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0);
+ if (!xfs_alloc_wq)
return -ENOMEM;
+
return 0;
}
STATIC void
xfs_destroy_workqueues(void)
{
- destroy_workqueue(xfs_syncd_wq);
+ destroy_workqueue(xfs_alloc_wq);
}
STATIC int __init
@@ -1636,13 +1749,9 @@ init_xfs_fs(void)
if (error)
goto out_destroy_wq;
- error = xfs_filestream_init();
- if (error)
- goto out_mru_cache_uninit;
-
error = xfs_buf_init();
if (error)
- goto out_filestream_uninit;
+ goto out_mru_cache_uninit;
error = xfs_init_procfs();
if (error)
@@ -1652,21 +1761,23 @@ init_xfs_fs(void)
if (error)
goto out_cleanup_procfs;
- vfs_initquota();
+ error = xfs_qm_init();
+ if (error)
+ goto out_sysctl_unregister;
error = register_filesystem(&xfs_fs_type);
if (error)
- goto out_sysctl_unregister;
+ goto out_qm_exit;
return 0;
+ out_qm_exit:
+ xfs_qm_exit();
out_sysctl_unregister:
xfs_sysctl_unregister();
out_cleanup_procfs:
xfs_cleanup_procfs();
out_buf_terminate:
xfs_buf_terminate();
- out_filestream_uninit:
- xfs_filestream_uninit();
out_mru_cache_uninit:
xfs_mru_cache_uninit();
out_destroy_wq:
@@ -1680,12 +1791,11 @@ init_xfs_fs(void)
STATIC void __exit
exit_xfs_fs(void)
{
- vfs_exitquota();
+ xfs_qm_exit();
unregister_filesystem(&xfs_fs_type);
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
- xfs_filestream_uninit();
xfs_mru_cache_uninit();
xfs_destroy_workqueues();
xfs_destroy_zones();