aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_bmap_util.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_bmap_util.c')
-rw-r--r--fs/xfs/xfs_bmap_util.c440
1 files changed, 146 insertions, 294 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 97f952caea7..64731ef3324 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -18,31 +18,31 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
+#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_trans.h"
#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
+#include "xfs_bmap_btree.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
+#include "xfs_log.h"
+#include "xfs_dinode.h"
/* Kernel only BMAP related definitions and functions */
@@ -249,48 +249,6 @@ xfs_bmap_rtalloc(
}
/*
- * Stack switching interfaces for allocation
- */
-static void
-xfs_bmapi_allocate_worker(
- struct work_struct *work)
-{
- struct xfs_bmalloca *args = container_of(work,
- struct xfs_bmalloca, work);
- unsigned long pflags;
-
- /* we are in a transaction context here */
- current_set_flags_nested(&pflags, PF_FSTRANS);
-
- args->result = __xfs_bmapi_allocate(args);
- complete(args->done);
-
- current_restore_flags_nested(&pflags, PF_FSTRANS);
-}
-
-/*
- * Some allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Otherwise just
- * call directly to avoid the context switch overhead here.
- */
-int
-xfs_bmapi_allocate(
- struct xfs_bmalloca *args)
-{
- DECLARE_COMPLETION_ONSTACK(done);
-
- if (!args->stack_switch)
- return __xfs_bmapi_allocate(args);
-
-
- args->done = &done;
- INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
- queue_work(xfs_alloc_wq, &args->work);
- wait_for_completion(&done);
- return args->result;
-}
-
-/*
* Check if the endoff is outside the last extent. If so the caller will grow
* the allocation to a stripe unit boundary. All offsets are considered outside
* the end of file for an empty fork, so 1 is returned in *eof in that case.
@@ -617,22 +575,27 @@ xfs_getbmap(
return XFS_ERROR(ENOMEM);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
- if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
- if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
+ if (whichfork == XFS_DATA_FORK) {
+ if (!(iflags & BMV_IF_DELALLOC) &&
+ (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
if (error)
goto out_unlock_iolock;
+
+ /*
+ * Even after flushing the inode, there can still be
+ * delalloc blocks on the inode beyond EOF due to
+ * speculative preallocation. These are not removed
+ * until the release function is called or the inode
+ * is inactivated. Hence we cannot assert here that
+ * ip->i_delayed_blks == 0.
+ */
}
- /*
- * even after flushing the inode, there can still be delalloc
- * blocks on the inode beyond EOF due to speculative
- * preallocation. These are not removed until the release
- * function is called or the inode is inactivated. Hence we
- * cannot assert here that ip->i_delayed_blks == 0.
- */
- }
- lock = xfs_ilock_map_shared(ip);
+ lock = xfs_ilock_data_map_shared(ip);
+ } else {
+ lock = xfs_ilock_attr_map_shared(ip);
+ }
/*
* Don't let nex be bigger than the number of extents
@@ -737,7 +700,7 @@ xfs_getbmap(
out_free_map:
kmem_free(map);
out_unlock_ilock:
- xfs_iunlock_map_shared(ip, lock);
+ xfs_iunlock(ip, lock);
out_unlock_iolock:
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -965,32 +928,12 @@ xfs_free_eofblocks(
return error;
}
-/*
- * xfs_alloc_file_space()
- * This routine allocates disk space for the given file.
- *
- * If alloc_type == 0, this request is for an ALLOCSP type
- * request which will change the file size. In this case, no
- * DMAPI event will be generated by the call. A TRUNCATE event
- * will be generated later by xfs_setattr.
- *
- * If alloc_type != 0, this request is for a RESVSP type
- * request, and a DMAPI DM_EVENT_WRITE will be generated if the
- * lower block boundary byte address is less than the file's
- * length.
- *
- * RETURNS:
- * 0 on success
- * errno on error
- *
- */
-STATIC int
+int
xfs_alloc_file_space(
- xfs_inode_t *ip,
+ struct xfs_inode *ip,
xfs_off_t offset,
xfs_off_t len,
- int alloc_type,
- int attr_flags)
+ int alloc_type)
{
xfs_mount_t *mp = ip->i_mount;
xfs_off_t count;
@@ -1188,9 +1131,15 @@ xfs_zero_remaining_bytes(
xfs_buf_unlock(bp);
for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
+ uint lock_mode;
+
offset_fsb = XFS_B_TO_FSBT(mp, offset);
nimap = 1;
+
+ lock_mode = xfs_ilock_data_map_shared(ip);
error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
+ xfs_iunlock(ip, lock_mode);
+
if (error || nimap < 1)
break;
ASSERT(imap.br_blockcount >= 1);
@@ -1207,7 +1156,12 @@ xfs_zero_remaining_bytes(
XFS_BUF_UNWRITE(bp);
XFS_BUF_READ(bp);
XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
- xfsbdstrat(mp, bp);
+
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ error = XFS_ERROR(EIO);
+ break;
+ }
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp,
@@ -1220,7 +1174,12 @@ xfs_zero_remaining_bytes(
XFS_BUF_UNDONE(bp);
XFS_BUF_UNREAD(bp);
XFS_BUF_WRITE(bp);
- xfsbdstrat(mp, bp);
+
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ error = XFS_ERROR(EIO);
+ break;
+ }
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp,
@@ -1232,24 +1191,11 @@ xfs_zero_remaining_bytes(
return error;
}
-/*
- * xfs_free_file_space()
- * This routine frees disk space for the given file.
- *
- * This routine is only called by xfs_change_file_space
- * for an UNRESVSP type call.
- *
- * RETURNS:
- * 0 on success
- * errno on error
- *
- */
-STATIC int
+int
xfs_free_file_space(
- xfs_inode_t *ip,
+ struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t len,
- int attr_flags)
+ xfs_off_t len)
{
int committed;
int done;
@@ -1267,7 +1213,6 @@ xfs_free_file_space(
int rt;
xfs_fileoff_t startoffset_fsb;
xfs_trans_t *tp;
- int need_iolock = 1;
mp = ip->i_mount;
@@ -1284,20 +1229,15 @@ xfs_free_file_space(
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
- if (attr_flags & XFS_ATTR_NOLOCK)
- need_iolock = 0;
- if (need_iolock) {
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
- /* wait for the completion of any pending DIOs */
- inode_dio_wait(VFS_I(ip));
- }
+ /* wait for the completion of any pending DIOs */
+ inode_dio_wait(VFS_I(ip));
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
ioffset = offset & ~(rounding - 1);
error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ioffset, -1);
if (error)
- goto out_unlock_iolock;
+ goto out;
truncate_pagecache_range(VFS_I(ip), ioffset, -1);
/*
@@ -1311,7 +1251,7 @@ xfs_free_file_space(
error = xfs_bmapi_read(ip, startoffset_fsb, 1,
&imap, &nimap, 0);
if (error)
- goto out_unlock_iolock;
+ goto out;
ASSERT(nimap == 0 || nimap == 1);
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
xfs_daddr_t block;
@@ -1326,7 +1266,7 @@ xfs_free_file_space(
error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
&imap, &nimap, 0);
if (error)
- goto out_unlock_iolock;
+ goto out;
ASSERT(nimap == 0 || nimap == 1);
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
@@ -1366,7 +1306,6 @@ xfs_free_file_space(
* the freeing of the space succeeds at ENOSPC.
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
- tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
/*
@@ -1412,27 +1351,23 @@ xfs_free_file_space(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- out_unlock_iolock:
- if (need_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ out:
return error;
error0:
xfs_bmap_cancel(&free_list);
error1:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
- xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
- XFS_ILOCK_EXCL);
- return error;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ goto out;
}
-STATIC int
+int
xfs_zero_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t len,
- int attr_flags)
+ xfs_off_t len)
{
struct xfs_mount *mp = ip->i_mount;
uint granularity;
@@ -1440,6 +1375,8 @@ xfs_zero_file_space(
xfs_off_t end_boundary;
int error;
+ trace_xfs_zero_file_space(ip);
+
granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
/*
@@ -1453,26 +1390,32 @@ xfs_zero_file_space(
ASSERT(start_boundary >= offset);
ASSERT(end_boundary <= offset + len);
- if (!(attr_flags & XFS_ATTR_NOLOCK))
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
if (start_boundary < end_boundary - 1) {
- /* punch out the page cache over the conversion range */
+ /*
+ * punch out delayed allocation blocks and the page cache over
+ * the conversion range
+ */
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_punch_delalloc_range(ip,
+ XFS_B_TO_FSBT(mp, start_boundary),
+ XFS_B_TO_FSB(mp, end_boundary - start_boundary));
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
truncate_pagecache_range(VFS_I(ip), start_boundary,
end_boundary - 1);
+
/* convert the blocks */
error = xfs_alloc_file_space(ip, start_boundary,
end_boundary - start_boundary - 1,
- XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT,
- attr_flags);
+ XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT);
if (error)
- goto out_unlock;
+ goto out;
/* We've handled the interior of the range, now for the edges */
- if (start_boundary != offset)
+ if (start_boundary != offset) {
error = xfs_iozero(ip, offset, start_boundary - offset);
- if (error)
- goto out_unlock;
+ if (error)
+ goto out;
+ }
if (end_boundary != offset + len)
error = xfs_iozero(ip, end_boundary,
@@ -1486,194 +1429,103 @@ xfs_zero_file_space(
error = xfs_iozero(ip, offset, len);
}
-out_unlock:
- if (!(attr_flags & XFS_ATTR_NOLOCK))
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+out:
return error;
}
/*
- * xfs_change_file_space()
- * This routine allocates or frees disk space for the given file.
- * The user specified parameters are checked for alignment and size
- * limitations.
- *
+ * xfs_collapse_file_space()
+ * This routine frees disk space and shift extent for the given file.
+ * The first thing we do is to free data blocks in the specified range
+ * by calling xfs_free_file_space(). It would also sync dirty data
+ * and invalidate page cache over the region on which collapse range
+ * is working. And Shift extent records to the left to cover a hole.
* RETURNS:
- * 0 on success
- * errno on error
+ * 0 on success
+ * errno on error
*
*/
int
-xfs_change_file_space(
- xfs_inode_t *ip,
- int cmd,
- xfs_flock64_t *bf,
- xfs_off_t offset,
- int attr_flags)
+xfs_collapse_file_space(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t len)
{
- xfs_mount_t *mp = ip->i_mount;
- int clrprealloc;
- int error;
- xfs_fsize_t fsize;
- int setprealloc;
- xfs_off_t startoffset;
- xfs_trans_t *tp;
- struct iattr iattr;
-
- if (!S_ISREG(ip->i_d.di_mode))
- return XFS_ERROR(EINVAL);
-
- switch (bf->l_whence) {
- case 0: /*SEEK_SET*/
- break;
- case 1: /*SEEK_CUR*/
- bf->l_start += offset;
- break;
- case 2: /*SEEK_END*/
- bf->l_start += XFS_ISIZE(ip);
- break;
- default:
- return XFS_ERROR(EINVAL);
- }
-
- /*
- * length of <= 0 for resv/unresv/zero is invalid. length for
- * alloc/free is ignored completely and we have no idea what userspace
- * might have set it to, so set it to zero to allow range
- * checks to pass.
- */
- switch (cmd) {
- case XFS_IOC_ZERO_RANGE:
- case XFS_IOC_RESVSP:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_UNRESVSP64:
- if (bf->l_len <= 0)
- return XFS_ERROR(EINVAL);
- break;
- default:
- bf->l_len = 0;
- break;
- }
-
- if (bf->l_start < 0 ||
- bf->l_start > mp->m_super->s_maxbytes ||
- bf->l_start + bf->l_len < 0 ||
- bf->l_start + bf->l_len >= mp->m_super->s_maxbytes)
- return XFS_ERROR(EINVAL);
-
- bf->l_whence = 0;
+ int done = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+ xfs_extnum_t current_ext = 0;
+ struct xfs_bmap_free free_list;
+ xfs_fsblock_t first_block;
+ int committed;
+ xfs_fileoff_t start_fsb;
+ xfs_fileoff_t shift_fsb;
- startoffset = bf->l_start;
- fsize = XFS_ISIZE(ip);
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
- setprealloc = clrprealloc = 0;
- switch (cmd) {
- case XFS_IOC_ZERO_RANGE:
- error = xfs_zero_file_space(ip, startoffset, bf->l_len,
- attr_flags);
- if (error)
- return error;
- setprealloc = 1;
- break;
+ trace_xfs_collapse_file_space(ip);
- case XFS_IOC_RESVSP:
- case XFS_IOC_RESVSP64:
- error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
- XFS_BMAPI_PREALLOC, attr_flags);
- if (error)
- return error;
- setprealloc = 1;
- break;
+ start_fsb = XFS_B_TO_FSB(mp, offset + len);
+ shift_fsb = XFS_B_TO_FSB(mp, len);
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_UNRESVSP64:
- if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
- attr_flags)))
- return error;
- break;
+ error = xfs_free_file_space(ip, offset, len);
+ if (error)
+ return error;
- case XFS_IOC_ALLOCSP:
- case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP:
- case XFS_IOC_FREESP64:
+ while (!error && !done) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
/*
- * These operations actually do IO when extending the file, but
- * the allocation is done seperately to the zeroing that is
- * done. This set of operations need to be serialised against
- * other IO operations, such as truncate and buffered IO. We
- * need to take the IOLOCK here to serialise the allocation and
- * zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
- * truncate, direct IO) from racing against the transient
- * allocated but not written state we can have here.
+ * We would need to reserve permanent block for transaction.
+ * This will come into picture when after shifting extent into
+ * hole we found that adjacent extents can be merged which
+ * may lead to freeing of a block during record update.
*/
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
- if (startoffset > fsize) {
- error = xfs_alloc_file_space(ip, fsize,
- startoffset - fsize, 0,
- attr_flags | XFS_ATTR_NOLOCK);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- break;
- }
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ break;
}
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = startoffset;
-
- error = xfs_setattr_size(ip, &iattr,
- attr_flags | XFS_ATTR_NOLOCK);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+ ip->i_gdquot, ip->i_pdquot,
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+ XFS_QMOPT_RES_REGBLKS);
if (error)
- return error;
-
- clrprealloc = 1;
- break;
-
- default:
- ASSERT(0);
- return XFS_ERROR(EINVAL);
- }
-
- /*
- * update the inode timestamp, mode, and prealloc flag bits
- */
- tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
+ goto out;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
- if ((attr_flags & XFS_ATTR_DMI) == 0) {
- ip->i_d.di_mode &= ~S_ISUID;
+ xfs_bmap_init(&free_list, &first_block);
/*
- * Note that we don't have to worry about mandatory
- * file locking being disabled here because we only
- * clear the S_ISGID bit if the Group execute bit is
- * on, but if it was on then mandatory locking wouldn't
- * have been enabled.
+ * We are using the write transaction in which max 2 bmbt
+ * updates are allowed
*/
- if (ip->i_d.di_mode & S_IXGRP)
- ip->i_d.di_mode &= ~S_ISGID;
+ error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
+ shift_fsb, &current_ext,
+ &first_block, &free_list,
+ XFS_BMAP_MAX_SHIFT_EXTENTS);
+ if (error)
+ goto out;
+
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
+ if (error)
+ goto out;
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- if (setprealloc)
- ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
- else if (clrprealloc)
- ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (attr_flags & XFS_ATTR_SYNC)
- xfs_trans_set_sync(tp);
- return xfs_trans_commit(tp, 0);
+ return error;
+
+out:
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
}
/*