aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/direct-io.c18
-rw-r--r--fs/open.c29
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_aops.c3
-rw-r--r--fs/xfs/xfs_bmap.c193
-rw-r--r--fs/xfs/xfs_bmap.h15
-rw-r--r--fs/xfs/xfs_bmap_util.c97
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_buf_item.c19
-rw-r--r--fs/xfs/xfs_dir2.c342
-rw-r--r--fs/xfs/xfs_dquot.c2
-rw-r--r--fs/xfs/xfs_file.c26
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_iops.c14
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c55
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_sb.c1
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c12
-rw-r--r--fs/xfs/xfs_trans_resv.c6
-rw-r--r--fs/xfs/xfs_trans_resv.h1
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/uapi/linux/falloc.h35
24 files changed, 685 insertions, 205 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 160a5489a93..a701752dd75 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1194,13 +1194,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
}
/*
- * For file extending writes updating i_size before data
- * writeouts complete can expose uninitialized blocks. So
- * even for AIO, we need to wait for i/o to complete before
- * returning in this case.
+ * For file extending writes updating i_size before data writeouts
+ * complete can expose uninitialized blocks in dumb filesystems.
+ * In that case we need to wait for I/O completion even if asked
+ * for an asynchronous write.
*/
- dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
- (end > i_size_read(inode)));
+ if (is_sync_kiocb(iocb))
+ dio->is_async = false;
+ else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
+ (rw & WRITE) && end > i_size_read(inode))
+ dio->is_async = false;
+ else
+ dio->is_async = true;
+
dio->inode = inode;
dio->rw = rw;
diff --git a/fs/open.c b/fs/open.c
index 4b3e1edf2fe..c4465b2f844 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,7 +231,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
return -EINVAL;
/* Return error if mode is not supported */
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+ return -EOPNOTSUPP;
+
+ /* Punch hole and zero range are mutually exclusive */
+ if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
+ (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
/* Punch hole must have keep size set */
@@ -239,11 +245,20 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
!(mode & FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
+ /* Collapse range should only be used exclusively. */
+ if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+ (mode & ~FALLOC_FL_COLLAPSE_RANGE))
+ return -EINVAL;
+
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- /* It's not possible punch hole on append only file */
- if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+ /*
+ * It's not possible to punch hole or perform collapse range
+ * on append only file
+ */
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
+ && IS_APPEND(inode))
return -EPERM;
if (IS_IMMUTABLE(inode))
@@ -271,6 +286,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
return -EFBIG;
+ /*
+ * There is no need to overlap collapse range with EOF, in which case
+ * it is effectively a truncate operation
+ */
+ if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+ (offset + len >= i_size_read(inode)))
+ return -EINVAL;
+
if (!file->f_op->fallocate)
return -EOPNOTSUPP;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 0ecec1896f2..6888ad886ff 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -281,7 +281,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (!acl)
goto set_acl;
- error = -EINVAL;
+ error = -E2BIG;
if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
return error;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index db2cfb067d0..ef62c6b6130 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1441,7 +1441,8 @@ xfs_vm_direct_IO(
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs,
xfs_get_blocks_direct,
- xfs_end_io_direct_write, NULL, 0);
+ xfs_end_io_direct_write, NULL,
+ DIO_ASYNC_EXTEND);
if (ret != -EIOCBQUEUED && iocb->private)
goto out_destroy_ioend;
} else {
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 152543c4ca7..5b6092ef51e 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5378,3 +5378,196 @@ error0:
}
return error;
}
+
+/*
+ * Shift extent records to the left to cover a hole.
+ *
+ * The maximum number of extents to be shifted in a single operation
+ * is @num_exts, and @current_ext keeps track of the current extent
+ * index we have shifted. @offset_shift_fsb is the length by which each
+ * extent is shifted. If there is no hole to shift the extents
+ * into, this will be considered invalid operation and we abort immediately.
+ */
+int
+xfs_bmap_shift_extents(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int *done,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t offset_shift_fsb,
+ xfs_extnum_t *current_ext,
+ xfs_fsblock_t *firstblock,
+ struct xfs_bmap_free *flist,
+ int num_exts)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_bmbt_rec_host *gotp;
+ struct xfs_bmbt_irec got;
+ struct xfs_bmbt_irec left;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp;
+ xfs_extnum_t nexts = 0;
+ xfs_fileoff_t startoff;
+ int error = 0;
+ int i;
+ int whichfork = XFS_DATA_FORK;
+ int logflags;
+ xfs_filblks_t blockcount = 0;
+
+ if (unlikely(XFS_TEST_ERROR(
+ (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+ mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+ XFS_ERRLEVEL_LOW, mp);
+ return XFS_ERROR(EFSCORRUPTED);
+ }
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ ASSERT(current_ext != NULL);
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ /* Read in all the extents */
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ return error;
+ }
+
+ /*
+ * If *current_ext is 0, we would need to lookup the extent
+ * from where we would start shifting and store it in gotp.
+ */
+ if (!*current_ext) {
+ gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
+ /*
+ * gotp can be null in 2 cases: 1) if there are no extents
+ * or 2) start_fsb lies in a hole beyond which there are
+ * no extents. Either way, we are done.
+ */
+ if (!gotp) {
+ *done = 1;
+ return 0;
+ }
+ }
+
+ /* We are going to change core inode */
+ logflags = XFS_ILOG_CORE;
+
+ if (ifp->if_flags & XFS_IFBROOT) {
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+ cur->bc_private.b.firstblock = *firstblock;
+ cur->bc_private.b.flist = flist;
+ cur->bc_private.b.flags = 0;
+ } else {
+ cur = NULL;
+ logflags |= XFS_ILOG_DEXT;
+ }
+
+ while (nexts++ < num_exts &&
+ *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) {
+
+ gotp = xfs_iext_get_ext(ifp, *current_ext);
+ xfs_bmbt_get_all(gotp, &got);
+ startoff = got.br_startoff - offset_shift_fsb;
+
+ /*
+ * Before shifting extent into hole, make sure that the hole
+ * is large enough to accomodate the shift.
+ */
+ if (*current_ext) {
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+ *current_ext - 1), &left);
+
+ if (startoff < left.br_startoff + left.br_blockcount)
+ error = XFS_ERROR(EINVAL);
+ } else if (offset_shift_fsb > got.br_startoff) {
+ /*
+ * When first extent is shifted, offset_shift_fsb
+ * should be less than the stating offset of
+ * the first extent.
+ */
+ error = XFS_ERROR(EINVAL);
+ }
+
+ if (error)
+ goto del_cursor;
+
+ if (cur) {
+ error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+ got.br_startblock,
+ got.br_blockcount,
+ &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ }
+
+ /* Check if we can merge 2 adjacent extents */
+ if (*current_ext &&
+ left.br_startoff + left.br_blockcount == startoff &&
+ left.br_startblock + left.br_blockcount ==
+ got.br_startblock &&
+ left.br_state == got.br_state &&
+ left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
+ blockcount = left.br_blockcount +
+ got.br_blockcount;
+ xfs_iext_remove(ip, *current_ext, 1, 0);
+ if (cur) {
+ error = xfs_btree_delete(cur, &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ }
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+ gotp = xfs_iext_get_ext(ifp, --*current_ext);
+ xfs_bmbt_get_all(gotp, &got);
+
+ /* Make cursor point to the extent we will update */
+ if (cur) {
+ error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+ got.br_startblock,
+ got.br_blockcount,
+ &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ }
+
+ xfs_bmbt_set_blockcount(gotp, blockcount);
+ got.br_blockcount = blockcount;
+ } else {
+ /* We have to update the startoff */
+ xfs_bmbt_set_startoff(gotp, startoff);
+ got.br_startoff = startoff;
+ }
+
+ if (cur) {
+ error = xfs_bmbt_update(cur, got.br_startoff,
+ got.br_startblock,
+ got.br_blockcount,
+ got.br_state);
+ if (error)
+ goto del_cursor;
+ }
+
+ (*current_ext)++;
+ }
+
+ /* Check if we are done */
+ if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork))
+ *done = 1;
+
+del_cursor:
+ if (cur)
+ xfs_btree_del_cursor(cur,
+ error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+ xfs_trans_log_inode(tp, ip, logflags);
+
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 33b41f35122..f84bd7af43b 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
{ BMAP_RIGHT_FILLING, "RF" }, \
{ BMAP_ATTRFORK, "ATTR" }
+
+/*
+ * This macro is used to determine how many extents will be shifted
+ * in one write transaction. We could require two splits,
+ * an extent move on the first and an extent merge on the second,
+ * So it is proper that one extent is shifted inside write transaction
+ * at a time.
+ */
+#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
+
#ifdef DEBUG
void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int whichfork, unsigned long caller_ip);
@@ -169,5 +179,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
+int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+ int *done, xfs_fileoff_t start_fsb,
+ xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
+ xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
+ int num_exts);
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f264616080c..01f6a646caa 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1349,7 +1349,6 @@ xfs_free_file_space(
* the freeing of the space succeeds at ENOSPC.
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
- tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
/*
@@ -1468,6 +1467,102 @@ out:
}
/*
+ * xfs_collapse_file_space()
+ * This routine frees disk space and shift extent for the given file.
+ * The first thing we do is to free data blocks in the specified range
+ * by calling xfs_free_file_space(). It would also sync dirty data
+ * and invalidate page cache over the region on which collapse range
+ * is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ * 0 on success
+ * errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t len)
+{
+ int done = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+ xfs_extnum_t current_ext = 0;
+ struct xfs_bmap_free free_list;
+ xfs_fsblock_t first_block;
+ int committed;
+ xfs_fileoff_t start_fsb;
+ xfs_fileoff_t shift_fsb;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+ trace_xfs_collapse_file_space(ip);
+
+ start_fsb = XFS_B_TO_FSB(mp, offset + len);
+ shift_fsb = XFS_B_TO_FSB(mp, len);
+
+ error = xfs_free_file_space(ip, offset, len);
+ if (error)
+ return error;
+
+ while (!error && !done) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+ tp->t_flags |= XFS_TRANS_RESERVE;
+ /*
+ * We would need to reserve permanent block for transaction.
+ * This will come into picture when after shifting extent into
+ * hole we found that adjacent extents can be merged which
+ * may lead to freeing of a block during record update.
+ */
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+ if (error) {
+ ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ xfs_trans_cancel(tp, 0);
+ break;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+ ip->i_gdquot, ip->i_pdquot,
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out;
+
+ xfs_trans_ijoin(tp, ip, 0);
+
+ xfs_bmap_init(&free_list, &first_block);
+
+ /*
+ * We are using the write transaction in which max 2 bmbt
+ * updates are allowed
+ */
+ error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
+ shift_fsb, &current_ext,
+ &first_block, &free_list,
+ XFS_BMAP_MAX_SHIFT_EXTENTS);
+ if (error)
+ goto out;
+
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
+ if (error)
+ goto out;
+
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+
+ return error;
+
+out:
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+}
+
+/*
* We need to check that the format of the data fork in the temporary inode is
* valid for the target inode before doing the swap. This is not a problem with
* attr1 because of the fixed fork offset, but attr2 has a dynamically sized
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 900747b2577..935ed2b24ed 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -99,6 +99,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
+int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
+ xfs_off_t len);
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 33149113e33..8752821443b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -796,20 +796,6 @@ xfs_buf_item_init(
bip->bli_formats[i].blf_map_size = map_size;
}
-#ifdef XFS_TRANS_DEBUG
- /*
- * Allocate the arrays for tracking what needs to be logged
- * and what our callers request to be logged. bli_orig
- * holds a copy of the original, clean buffer for comparison
- * against, and bli_logged keeps a 1 bit flag per byte in
- * the buffer to indicate which bytes the callers have asked
- * to have logged.
- */
- bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
- memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
- bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
-#endif
-
/*
* Put the buf item into the list of items attached to the
* buffer at the front.
@@ -957,11 +943,6 @@ STATIC void
xfs_buf_item_free(
xfs_buf_log_item_t *bip)
{
-#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig);
- kmem_free(bip->bli_logged);
-#endif /* XFS_TRANS_DEBUG */
-
xfs_buf_item_free_format(bip);
kmem_zone_free(xfs_buf_item_zone, bip);
}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index ce16ef02997..fda46253966 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -180,16 +180,23 @@ xfs_dir_init(
xfs_inode_t *dp,
xfs_inode_t *pdp)
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int error;
- memset((char *)&args, 0, sizeof(args));
- args.dp = dp;
- args.trans = tp;
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
+ error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
+ if (error)
return error;
- return xfs_dir2_sf_create(&args, pdp->i_ino);
+
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->dp = dp;
+ args->trans = tp;
+ error = xfs_dir2_sf_create(args, pdp->i_ino);
+ kmem_free(args);
+ return error;
}
/*
@@ -205,41 +212,56 @@ xfs_dir_createname(
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+ rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+ if (rval)
return rval;
XFS_STATS_INC(xs_dir_create);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = inum;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_addname(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_addname(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_addname(&args);
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = inum;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_addname(args);
else
- rval = xfs_dir2_node_addname(&args);
+ rval = xfs_dir2_node_addname(args);
+
+out_free:
+ kmem_free(args);
return rval;
}
@@ -282,46 +304,66 @@ xfs_dir_lookup(
xfs_ino_t *inum, /* out: inode number */
struct xfs_name *ci_name) /* out: actual name if CI match */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.dp = dp;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_OKNOENT;
+ /*
+ * We need to use KM_NOFS here so that lockdep will not throw false
+ * positive deadlock warnings on a non-transactional lookup path. It is
+ * safe to recurse into inode recalim in that case, but lockdep can't
+ * easily be taught about it. Hence KM_NOFS avoids having to add more
+ * lockdep Doing this avoids having to add a bunch of lockdep class
+ * annotations into the reclaim path for the ilock.
+ */
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->dp = dp;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_OKNOENT;
if (ci_name)
- args.op_flags |= XFS_DA_OP_CILOOKUP;
+ args->op_flags |= XFS_DA_OP_CILOOKUP;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_lookup(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_lookup(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_lookup(&args);
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_lookup(args);
+ goto out_check_rval;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_lookup(args);
+ goto out_check_rval;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_lookup(args);
else
- rval = xfs_dir2_node_lookup(&args);
+ rval = xfs_dir2_node_lookup(args);
+
+out_check_rval:
if (rval == EEXIST)
rval = 0;
if (!rval) {
- *inum = args.inumber;
+ *inum = args->inumber;
if (ci_name) {
- ci_name->name = args.value;
- ci_name->len = args.valuelen;
+ ci_name->name = args->value;
+ ci_name->len = args->valuelen;
}
}
+out_free:
+ kmem_free(args);
return rval;
}
@@ -338,38 +380,51 @@ xfs_dir_removename(
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_remove);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = ino;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
-
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_removename(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_removename(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_removename(&args);
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = ino;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_removename(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_removename(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_removename(args);
else
- rval = xfs_dir2_node_removename(&args);
+ rval = xfs_dir2_node_removename(args);
+out_free:
+ kmem_free(args);
return rval;
}
@@ -386,40 +441,54 @@ xfs_dir_replace(
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+ rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+ if (rval)
return rval;
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = inum;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
-
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_replace(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_replace(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_replace(&args);
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = inum;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_replace(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_replace(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_replace(args);
else
- rval = xfs_dir2_node_replace(&args);
+ rval = xfs_dir2_node_replace(args);
+out_free:
+ kmem_free(args);
return rval;
}
@@ -434,7 +503,7 @@ xfs_dir_canenter(
struct xfs_name *name, /* name of entry to add */
uint resblks)
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
@@ -443,29 +512,42 @@ xfs_dir_canenter(
ASSERT(S_ISDIR(dp->i_d.di_mode));
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.dp = dp;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->dp = dp;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
XFS_DA_OP_OKNOENT;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_addname(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_addname(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_addname(&args);
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_addname(args);
else
- rval = xfs_dir2_node_addname(&args);
+ rval = xfs_dir2_node_addname(args);
+out_free:
+ kmem_free(args);
return rval;
}
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 7aeb4c895b3..868b19f096b 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c