aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/quota/quota.c3
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/xfs_aops.c183
-rw-r--r--fs/xfs/xfs_aops.h4
-rw-r--r--fs/xfs/xfs_bmap.c13
-rw-r--r--fs/xfs/xfs_buf.c17
-rw-r--r--fs/xfs/xfs_dfrag.c24
-rw-r--r--fs/xfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/xfs_dquot.c418
-rw-r--r--fs/xfs/xfs_dquot.h49
-rw-r--r--fs/xfs/xfs_file.c84
-rw-r--r--fs/xfs/xfs_iget.c41
-rw-r--r--fs/xfs/xfs_inode.c94
-rw-r--r--fs/xfs/xfs_inode.h23
-rw-r--r--fs/xfs/xfs_inode_item.c297
-rw-r--r--fs/xfs/xfs_inode_item.h16
-rw-r--r--fs/xfs/xfs_ioctl.c14
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_iops.c71
-rw-r--r--fs/xfs/xfs_itable.c21
-rw-r--r--fs/xfs/xfs_log.c612
-rw-r--r--fs/xfs/xfs_log.h16
-rw-r--r--fs/xfs/xfs_log_priv.h28
-rw-r--r--fs/xfs/xfs_log_recover.c6
-rw-r--r--fs/xfs/xfs_mount.c8
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c628
-rw-r--r--fs/xfs/xfs_qm.h49
-rw-r--r--fs/xfs/xfs_qm_bhv.c42
-rw-r--r--fs/xfs/xfs_qm_stats.c105
-rw-r--r--fs/xfs/xfs_qm_stats.h53
-rw-r--r--fs/xfs/xfs_qm_syscalls.c130
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_quota_priv.h11
-rw-r--r--fs/xfs/xfs_sb.h1
-rw-r--r--fs/xfs/xfs_stats.c99
-rw-r--r--fs/xfs/xfs_stats.h10
-rw-r--r--fs/xfs/xfs_super.c164
-rw-r--r--fs/xfs/xfs_super.h8
-rw-r--r--fs/xfs/xfs_sync.c46
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_trace.h28
-rw-r--r--fs/xfs/xfs_trans.c31
-rw-r--r--fs/xfs/xfs_trans_ail.c83
-rw-r--r--fs/xfs/xfs_trans_buf.c25
-rw-r--r--fs/xfs/xfs_trans_dquot.c21
-rw-r--r--fs/xfs/xfs_trans_inode.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h3
-rw-r--r--fs/xfs/xfs_vnode.h1
-rw-r--r--fs/xfs/xfs_vnodeops.h3
51 files changed, 1375 insertions, 2250 deletions
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index fc2c4388d12..9a391204ca2 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -282,10 +282,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
case Q_XGETQUOTA:
return quota_getxquota(sb, type, id, addr);
case Q_XQUOTASYNC:
- /* caller already holds s_umount */
if (sb->s_flags & MS_RDONLY)
return -EROFS;
- writeback_inodes_sb(sb, WB_REASON_SYNC);
+ /* XFS quotas are fully coherent now, making this call a noop */
return 0;
default:
return -EINVAL;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 427a4e82a58..0a9977983f9 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -96,9 +96,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
xfs_qm_bhv.o \
xfs_qm.o \
xfs_quotaops.o
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o
-endif
xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_PROC_FS) += xfs_stats.o
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 74b9baf36ac..0dbb9e70fe2 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -26,6 +26,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
@@ -99,23 +100,6 @@ xfs_destroy_ioend(
}
/*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
- xfs_ioend_t *ioend)
-{
- xfs_inode_t *ip = XFS_I(ioend->io_inode);
- xfs_fsize_t isize;
- xfs_fsize_t bsize;
-
- bsize = ioend->io_offset + ioend->io_size;
- isize = MIN(i_size_read(VFS_I(ip)), bsize);
- return isize > ip->i_d.di_size ? isize : 0;
-}
-
-/*
* Fast and loose check if this write could update the on-disk inode size.
*/
static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
@@ -124,32 +108,65 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
XFS_I(ioend->io_inode)->i_d.di_size;
}
+STATIC int
+xfs_setfilesize_trans_alloc(
+ struct xfs_ioend *ioend)
+{
+ struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
+ struct xfs_trans *tp;
+ int error;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+
+ error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ ioend->io_append_trans = tp;
+
+ /*
+ * We hand off the transaction to the completion thread now, so
+ * clear the flag here.
+ */
+ current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ return 0;
+}
+
/*
* Update on-disk file size now that data has been written to disk.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
*/
STATIC int
xfs_setfilesize(
- xfs_ioend_t *ioend)
+ struct xfs_ioend *ioend)
{
- xfs_inode_t *ip = XFS_I(ioend->io_inode);
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ struct xfs_trans *tp = ioend->io_append_trans;
xfs_fsize_t isize;
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
- return EAGAIN;
+ /*
+ * The transaction was allocated in the I/O submission thread,
+ * thus we need to mark ourselves as beeing in a transaction
+ * manually.
+ */
+ current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
- isize = xfs_ioend_new_eof(ioend);
- if (isize) {
- trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
- ip->i_d.di_size = isize;
- xfs_mark_inode_dirty(ip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
+ if (!isize) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_cancel(tp, 0);
+ return 0;
}
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return 0;
+ trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
+
+ ip->i_d.di_size = isize;
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ return xfs_trans_commit(tp, 0);
}
/*
@@ -163,10 +180,12 @@ xfs_finish_ioend(
struct xfs_ioend *ioend)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
+ struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
+
if (ioend->io_type == IO_UNWRITTEN)
- queue_work(xfsconvertd_workqueue, &ioend->io_work);
- else if (xfs_ioend_is_append(ioend))
- queue_work(xfsdatad_workqueue, &ioend->io_work);
+ queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
+ else if (ioend->io_append_trans)
+ queue_work(mp->m_data_workqueue, &ioend->io_work);
else
xfs_destroy_ioend(ioend);
}
@@ -195,35 +214,36 @@ xfs_end_io(
* range to normal written extens after the data I/O has finished.
*/
if (ioend->io_type == IO_UNWRITTEN) {
+ /*
+ * For buffered I/O we never preallocate a transaction when
+ * doing the unwritten extent conversion, but for direct I/O
+ * we do not know if we are converting an unwritten extent
+ * or not at the point where we preallocate the transaction.
+ */
+ if (ioend->io_append_trans) {
+ ASSERT(ioend->io_isdirect);
+
+ current_set_flags_nested(
+ &ioend->io_append_trans->t_pflags, PF_FSTRANS);
+ xfs_trans_cancel(ioend->io_append_trans, 0);
+ }
+
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
if (error) {
ioend->io_error = -error;
goto done;
}
+ } else if (ioend->io_append_trans) {
+ error = xfs_setfilesize(ioend);
+ if (error)
+ ioend->io_error = -error;
+ } else {
+ ASSERT(!xfs_ioend_is_append(ioend));
}
- /*
- * We might have to update the on-disk file size after extending
- * writes.
- */
- error = xfs_setfilesize(ioend);
- ASSERT(!error || error == EAGAIN);
-
done:
- /*
- * If we didn't complete processing of the ioend, requeue it to the
- * tail of the workqueue for another attempt later. Otherwise destroy
- * it.
- */
- if (error == EAGAIN) {
- atomic_inc(&ioend->io_remaining);
- xfs_finish_ioend(ioend);
- /* ensure we don't spin on blocked ioends */
- delay(1);
- } else {
- xfs_destroy_ioend(ioend);
- }
+ xfs_destroy_ioend(ioend);
}
/*
@@ -259,6 +279,7 @@ xfs_alloc_ioend(
*/
atomic_set(&ioend->io_remaining, 1);
ioend->io_isasync = 0;
+ ioend->io_isdirect = 0;
ioend->io_error = 0;
ioend->io_list = NULL;
ioend->io_type = type;
@@ -269,6 +290,7 @@ xfs_alloc_ioend(
ioend->io_size = 0;
ioend->io_iocb = NULL;
ioend->io_result = 0;
+ ioend->io_append_trans = NULL;
INIT_WORK(&ioend->io_work, xfs_end_io);
return ioend;
@@ -379,14 +401,6 @@ xfs_submit_ioend_bio(
atomic_inc(&ioend->io_remaining);
bio->bi_private = ioend;
bio->bi_end_io = xfs_end_bio;
-
- /*
- * If the I/O is beyond EOF we mark the inode dirty immediately
- * but don't update the inode size until I/O completion.
- */
- if (xfs_ioend_new_eof(ioend))
- xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
-
submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
}
@@ -1033,8 +1047,20 @@ xfs_vm_writepage(
wbc, end_index);
}
- if (iohead)
+ if (iohead) {
+ /*
+ * Reserve log space if we might write beyond the on-disk
+ * inode size.
+ */
+ if (ioend->io_type != IO_UNWRITTEN &&
+ xfs_ioend_is_append(ioend)) {
+ err = xfs_setfilesize_trans_alloc(ioend);
+ if (err)
+ goto error;
+ }
+
xfs_submit_ioend(wbc, iohead);
+ }
return 0;
@@ -1314,17 +1340,32 @@ xfs_vm_direct_IO(
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct block_device *bdev = xfs_find_bdev_for_inode(inode);
+ struct xfs_ioend *ioend = NULL;
ssize_t ret;
if (rw & WRITE) {
- iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
+ size_t size = iov_length(iov, nr_segs);
+
+ /*
+ * We need to preallocate a transaction for a size update
+ * here. In the case that this write both updates the size
+ * and converts at least on unwritten extent we will cancel
+ * the still clean transaction after the I/O has finished.
+ */
+ iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
+ if (offset + size > XFS_I(inode)->i_d.di_size) {
+ ret = xfs_setfilesize_trans_alloc(ioend);
+ if (ret)
+ goto out_destroy_ioend;
+ ioend->io_isdirect = 1;
+ }
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs,
xfs_get_blocks_direct,
xfs_end_io_direct_write, NULL, 0);
if (ret != -EIOCBQUEUED && iocb->private)
- xfs_destroy_ioend(iocb->private);
+ goto out_trans_cancel;
} else {
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs,
@@ -1333,6 +1374,16 @@ xfs_vm_direct_IO(
}
return ret;
+
+out_trans_cancel:
+ if (ioend->io_append_trans) {
+ current_set_flags_nested(&ioend->io_append_trans->t_pflags,
+ PF_FSTRANS);
+ xfs_trans_cancel(ioend->io_append_trans, 0);
+ }
+out_destroy_ioend:
+ xfs_destroy_ioend(ioend);
+ return ret;
}
STATIC void
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 116dd5c3703..84eafbcb0d9 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,8 +18,6 @@
#ifndef __XFS_AOPS_H__
#define __XFS_AOPS_H__
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
extern mempool_t *xfs_ioend_pool;
/*
@@ -48,12 +46,14 @@ typedef struct xfs_ioend {
int io_error; /* I/O error code */
atomic_t io_remaining; /* hold count */
unsigned int io_isasync : 1; /* needs aio_complete */
+ unsigned int io_isdirect : 1;/* direct I/O */
struct inode *io_inode; /* file being written to */
struct buffer_head *io_buffer_head;/* buffer linked list head */
struct buffer_head *io_buffer_tail;/* buffer linked list tail */
size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */
+ struct xfs_trans *io_append_trans;/* xact. for size update */
struct kiocb *io_iocb;
int io_result;
} xfs_ioend_t;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 188ef2fbd62..3548c6f7559 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5536,8 +5536,12 @@ xfs_getbmap(
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
return XFS_ERROR(ENOMEM);
out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
- if (!out)
- return XFS_ERROR(ENOMEM);
+ if (!out) {
+ out = kmem_zalloc_large(bmv->bmv_count *
+ sizeof(struct getbmapx));
+ if (!out)
+ return XFS_ERROR(ENOMEM);
+ }
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
@@ -5661,7 +5665,10 @@ xfs_getbmap(
break;
}
- kmem_free(out);
+ if (is_vmalloc_addr(out))
+ kmem_free_large(out);
+ else
+ kmem_free(out);
return error;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4dff85c7d7e..6819b5163e3 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -45,8 +45,6 @@ static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
#ifdef XFS_BUF_LOCK_TRACKING
# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
@@ -1793,21 +1791,8 @@ xfs_buf_init(void)
if (!xfslogd_workqueue)
goto out_free_buf_zone;
- xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
- if (!xfsdatad_workqueue)
- goto out_destroy_xfslogd_workqueue;
-
- xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
- WQ_MEM_RECLAIM, 1);
- if (!xfsconvertd_workqueue)
- goto out_destroy_xfsdatad_workqueue;
-
return 0;
- out_destroy_xfsdatad_workqueue:
- destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
- destroy_workqueue(xfslogd_workqueue);
out_free_buf_zone:
kmem_zone_destroy(xfs_buf_zone);
out:
@@ -1817,8 +1802,6 @@ xfs_buf_init(void)
void
xfs_buf_terminate(void)
{
- destroy_workqueue(xfsconvertd_workqueue);
- destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(xfs_buf_zone);
}
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index dd974a55c77..1137bbc5ecc 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -215,7 +215,7 @@ xfs_swap_extents(
xfs_trans_t *tp;
xfs_bstat_t *sbp = &sxp->sx_stat;
xfs_ifork_t *tempifp, *ifp, *tifp;
- int ilf_fields, tilf_fields;
+ int src_log_flags, target_log_flags;
int error = 0;
int aforkblks = 0;
int taforkblks = 0;
@@ -385,9 +385,8 @@ xfs_swap_extents(
tip->i_delayed_blks = ip->i_delayed_blks;
ip->i_delayed_blks = 0;
- ilf_fields = XFS_ILOG_CORE;
-
- switch(ip->i_d.di_format) {
+ src_log_flags = XFS_ILOG_CORE;
+ switch (ip->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
/* If the extents fit in the inode, fix the
* pointer. Otherwise it's already NULL or
@@ -397,16 +396,15 @@ xfs_swap_extents(
ifp->if_u1.if_extents =
ifp->if_u2.if_inline_ext;
}
- ilf_fields |= XFS_ILOG_DEXT;
+ src_log_flags |= XFS_ILOG_DEXT;
break;
case XFS_DINODE_FMT_BTREE:
- ilf_fields |= XFS_ILOG_DBROOT;
+ src_log_flags |= XFS_ILOG_DBROOT;
break;
}
- tilf_fields = XFS_ILOG_CORE;
-
- switch(tip->i_d.di_format) {
+ target_log_flags = XFS_ILOG_CORE;
+ switch (tip->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
/* If the extents fit in the inode, fix the
* pointer. Otherwise it's already NULL or
@@ -416,10 +414,10 @@ xfs_swap_extents(
tifp->if_u1.if_extents =
tifp->if_u2.if_inline_ext;
}
- tilf_fields |= XFS_ILOG_DEXT;
+ target_log_flags |= XFS_ILOG_DEXT;
break;
case XFS_DINODE_FMT_BTREE:
- tilf_fields |= XFS_ILOG_DBROOT;
+ target_log_flags |= XFS_ILOG_DBROOT;
break;
}
@@ -427,8 +425,8 @@ xfs_swap_extents(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_log_inode(tp, ip, ilf_fields);
- xfs_trans_log_inode(tp, tip, tilf_fields);
+ xfs_trans_log_inode(tp, ip, src_log_flags);
+ xfs_trans_log_inode(tp, tip, target_log_flags);
/*
* If this is a synchronous mount, make sure that the
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 9245e029b8e..d3b63aefd01 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -29,6 +29,7 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
+#include "xfs_dir2.h"
#include "xfs_dir2_format.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 53db20ee3e7..4be16a0cbe5 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -43,11 +43,10 @@
* Lock order:
*
* ip->i_lock
- * qh->qh_lock
- * qi->qi_dqlist_lock
- * dquot->q_qlock (xfs_dqlock() and friends)
- * dquot->q_flush (xfs_dqflock() and friends)
- * xfs_Gqm->qm_dqfrlist_lock
+ * qi->qi_tree_lock
+ * dquot->q_qlock (xfs_dqlock() and friends)
+ * dquot->q_flush (xfs_dqflock() and friends)
+ * qi->qi_lru_lock
*
* If two dquots need to be locked the order is user before group/project,
* otherwise by the lowest id first, see xfs_dqlock2.
@@ -60,6 +59,9 @@ int xfs_dqreq_num;
int xfs_dqerror_mod = 33;
#endif
+struct kmem_zone *xfs_qm_dqtrxzone;
+static struct kmem_zone *xfs_qm_dqzone;
+
static struct lock_class_key xfs_dquot_other_class;
/*
@@ -69,12 +71,12 @@ void
xfs_qm_dqdestroy(
xfs_dquot_t *dqp)
{
- ASSERT(list_empty(&dqp->q_freelist));
+ ASSERT(list_empty(&dqp->q_lru));
mutex_destroy(&dqp->q_qlock);
- kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
+ kmem_zone_free(xfs_qm_dqzone, dqp);
- atomic_dec(&xfs_Gqm->qm_totaldquots);
+ XFS_STATS_DEC(xs_qm_dquot);
}
/*
@@ -282,7 +284,7 @@ xfs_qm_dqalloc(
* Return if this type of quotas is turned off while we didn't
* have an inode lock
*/
- if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+ if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
return (ESRCH);
}
@@ -384,7 +386,7 @@ xfs_qm_dqtobp(
dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
xfs_ilock(quotip, XFS_ILOCK_SHARED);
- if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+ if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
/*
* Return if this type of quotas is turned off while we
* didn't have the quota inode lock.
@@ -492,12 +494,12 @@ xfs_qm_dqread(
int cancelflags = 0;
- dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+ dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
dqp->dq_flags = type;
dqp->q_core.d_id = cpu_to_be32(id);
dqp->q_mount = mp;
- INIT_LIST_HEAD(&dqp->q_freelist);
+ INIT_LIST_HEAD(&dqp->q_lru);
mutex_init(&dqp->q_qlock);
init_waitqueue_head(&dqp->q_pinwait);
@@ -516,7 +518,7 @@ xfs_qm_dqread(
if (!(type & XFS_DQ_USER))
lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
- atomic_inc(&xfs_Gqm->qm_totaldquots);
+ XFS_STATS_INC(xs_qm_dquot);
trace_xfs_dqread(dqp);
@@ -602,60 +604,6 @@ error0:
}
/*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- xfs_dqhash_t *qh,
- xfs_dquot_t **O_dqpp)
-{
- xfs_dquot_t *dqp;
-
- ASSERT(mutex_is_locked(&qh->qh_lock));
-
- /*
- * Traverse the hashchain looking for a match
- */
- list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
- /*
- * We already have the hashlock. We don't need the
- * dqlock to look at the id field of the dquot, since the
- * id can't be modified without the hashlock anyway.
- */
- if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
- continue;
-
- trace_xfs_dqlookup_found(dqp);
-
- xfs_dqlock(dqp);
- if (dqp->dq_flags & XFS_DQ_FREEING) {
- *O_dqpp = NULL;
- xfs_dqunlock(dqp);
- return -1;
- }
-
- dqp->q_nrefs++;
-
- /*
- * move the dquot to the front of the hashchain
- */
- list_move(&dqp->q_hashlist, &qh->qh_list);
- trace_xfs_dqlookup_done(dqp);
- *O_dqpp = dqp;
- return 0;
- }
-
- *O_dqpp = NULL;
- return 1;
-}
-
-/*
* Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
* a locked dquot, doing an allocation (if requested) as needed.
* When both an inode and an id are given, the inode's id takes precedence.
@@ -672,10 +620,10 @@ xfs_qm_dqget(
uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
{
- xfs_dquot_t *dqp;
- xfs_dqhash_t *h;
- uint version;
- int error;
+ struct xfs_quotainfo *qi = mp->m_quotainfo;
+ struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
+ struct xfs_dquot *dqp;
+ int error;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
@@ -683,7 +631,6 @@ xfs_qm_dqget(
(! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
return (ESRCH);
}
- h = XFS_DQ_HASH(mp, id, type);
#ifdef DEBUG
if (xfs_do_dqerror) {
@@ -699,42 +646,33 @@ xfs_qm_dqget(
type == XFS_DQ_GROUP);
if (ip) {
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- if (type == XFS_DQ_USER)
- ASSERT(ip->i_udquot == NULL);
- else
- ASSERT(ip->i_gdquot == NULL);
+ ASSERT(xfs_inode_dquot(ip, type) == NULL);
}
#endif
restart:
- mutex_lock(&h->qh_lock);
+ mutex_lock(&qi->qi_tree_lock);
+ dqp = radix_tree_lookup(tree, id);
+ if (dqp) {
+ xfs_dqlock(dqp);
+ if (dqp->dq_flags & XFS_DQ_FREEING) {
+ xfs_dqunlock(dqp);
+ mutex_unlock(&qi->qi_tree_lock);
+ trace_xfs_dqget_freeing(dqp);
+ delay(1);
+ goto restart;
+ }
- /*
- * Look in the cache (hashtable).
- * The chain is kept locked during lookup.
- */
- switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
- case -1:
- XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
- mutex_unlock(&h->qh_lock);
- delay(1);
- goto restart;
- case 0:
- XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
- /*
- * The dquot was found, moved to the front of the chain,
- * taken off the freelist if it was on it, and locked
- * at this point. Just unlock the hashchain and return.
- */
- ASSERT(*O_dqpp);
- ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
- mutex_unlock(&h->qh_lock);
- trace_xfs_dqget_hit(*O_dqpp);
- return 0; /* success */
- default:
- XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
- break;
+ dqp->q_nrefs++;
+ mutex_unlock(&qi->qi_tree_lock);
+
+ trace_xfs_dqget_hit(dqp);
+ XFS_STATS_INC(xs_qm_dqcachehits);
+ *O_dqpp = dqp;
+ return 0;
}
+ mutex_unlock(&qi->qi_tree_lock);
+ XFS_STATS_INC(xs_qm_dqcachemisses);
/*
* Dquot cache miss. We don't want to keep the inode lock across
@@ -745,12 +683,6 @@ restart:
*/
if (ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /*
- * Save the hashchain version stamp, and unlock the chain, so that
- * we don't keep the lock across a disk read
- */
- version = h->qh_version;
- mutex_unlock(&h->qh_lock);
error = xfs_qm_dqread(mp, id, type, flags, &dqp);
@@ -760,97 +692,53 @@ restart:
if (error)
return error;
- /*
- * Dquot lock comes after hashlock in the lock ordering
- */
if (ip) {
/*
* A dquot could be attached to this inode by now, since
* we had dropped the ilock.
*/
-