diff options
Diffstat (limited to 'fs/xfs/xfs_iomap.c')
| -rw-r--r-- | fs/xfs/xfs_iomap.c | 299 |
1 files changed, 231 insertions, 68 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index add06b4e9a6..6d3ec2b6ee2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -17,31 +17,28 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_bmap_btree.h" #include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_bmap_util.h" #include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h" #include "xfs_trans_space.h" -#include "xfs_utils.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_quota.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" +#include "xfs_dinode.h" #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ @@ -107,7 +104,7 @@ xfs_alert_fsblock_zero( xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x\n", + "blkcnt: %llx extent-state: %x", (unsigned long long)ip->i_ino, (unsigned long long)imap->br_startblock, (unsigned long long)imap->br_startoff, @@ -131,7 +128,6 @@ xfs_iomap_write_direct( xfs_fsblock_t firstfsb; xfs_extlen_t extsz, temp; int nimaps; - int bmapi_flag; int quota_flag; int rt; xfs_trans_t *tp; @@ -185,10 +181,8 @@ xfs_iomap_write_direct( * Allocate and setup the transaction */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - error = xfs_trans_reserve(tp, resblks, - XFS_WRITE_LOG_RES(mp), resrtextents, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + resblks, resrtextents); /* * Check for running out of space, note: need lock to return */ @@ -205,18 +199,15 @@ xfs_iomap_write_direct( xfs_trans_ijoin(tp, ip, 0); - bmapi_flag = 0; - if (offset < XFS_ISIZE(ip) || extsz) - bmapi_flag |= XFS_BMAPI_PREALLOC; - /* * From this point onwards we overwrite the imap pointer that the * caller gave to us. */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, - &firstfsb, 0, imap, &nimaps, &free_list); + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + XFS_BMAPI_PREALLOC, &firstfsb, 0, + imap, &nimaps, &free_list); if (error) goto out_bmap_cancel; @@ -282,6 +273,15 @@ xfs_iomap_eof_want_preallocate( return 0; /* + * If the file is smaller than the minimum prealloc and we are using + * dynamic preallocation, don't do any preallocation at all as it is + * likely this is the only write to the file that is going to be done. + */ + if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && + XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)) + return 0; + + /* * If there are any real blocks past eof, then don't * do any speculative allocation. */ @@ -311,6 +311,121 @@ xfs_iomap_eof_want_preallocate( } /* + * Determine the initial size of the preallocation. We are beyond the current + * EOF here, but we need to take into account whether this is a sparse write or + * an extending write when determining the preallocation size. Hence we need to + * look up the extent that ends at the current write offset and use the result + * to determine the preallocation size. + * + * If the extent is a hole, then preallocation is essentially disabled. + * Otherwise we take the size of the preceeding data extent as the basis for the + * preallocation size. If the size of the extent is greater than half the + * maximum extent length, then use the current offset as the basis. This ensures + * that for large files the preallocation size always extends to MAXEXTLEN + * rather than falling short due to things like stripe unit/width alignment of + * real extents. + */ +STATIC xfs_fsblock_t +xfs_iomap_eof_prealloc_initial_size( + struct xfs_mount *mp, + struct xfs_inode *ip, + xfs_off_t offset, + xfs_bmbt_irec_t *imap, + int nimaps) +{ + xfs_fileoff_t start_fsb; + int imaps = 1; + int error; + + ASSERT(nimaps >= imaps); + + /* if we are using a specific prealloc size, return now */ + if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) + return 0; + + /* If the file is small, then use the minimum prealloc */ + if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign)) + return 0; + + /* + * As we write multiple pages, the offset will always align to the + * start of a page and hence point to a hole at EOF. i.e. if the size is + * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096) + * will return FSB 1. Hence if there are blocks in the file, we want to + * point to the block prior to the EOF block and not the hole that maps + * directly at @offset. + */ + start_fsb = XFS_B_TO_FSB(mp, offset); + if (start_fsb) + start_fsb--; + error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE); + if (error) + return 0; + + ASSERT(imaps == 1); + if (imap[0].br_startblock == HOLESTARTBLOCK) + return 0; + if (imap[0].br_blockcount <= (MAXEXTLEN >> 1)) + return imap[0].br_blockcount << 1; + return XFS_B_TO_FSB(mp, offset); +} + +STATIC bool +xfs_quota_need_throttle( + struct xfs_inode *ip, + int type, + xfs_fsblock_t alloc_blocks) +{ + struct xfs_dquot *dq = xfs_inode_dquot(ip, type); + + if (!dq || !xfs_this_quota_on(ip->i_mount, type)) + return false; + + /* no hi watermark, no throttle */ + if (!dq->q_prealloc_hi_wmark) + return false; + + /* under the lo watermark, no throttle */ + if (dq->q_res_bcount + alloc_blocks < dq->q_prealloc_lo_wmark) + return false; + + return true; +} + +STATIC void +xfs_quota_calc_throttle( + struct xfs_inode *ip, + int type, + xfs_fsblock_t *qblocks, + int *qshift) +{ + int64_t freesp; + int shift = 0; + struct xfs_dquot *dq = xfs_inode_dquot(ip, type); + + /* over hi wmark, squash the prealloc completely */ + if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { + *qblocks = 0; + return; + } + + freesp = dq->q_prealloc_hi_wmark - dq->q_res_bcount; + if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) { + shift = 2; + if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT]) + shift += 2; + if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT]) + shift += 2; + } + + /* only overwrite the throttle values if we are more aggressive */ + if ((freesp >> shift) < (*qblocks >> *qshift)) { + *qblocks = freesp; + *qshift = shift; + } +} + +/* * If we don't have a user specified preallocation size, dynamically increase * the preallocation size as the size of the file grows. Cap the maximum size * at a single extent or less if the filesystem is near full. The closer the @@ -319,43 +434,95 @@ xfs_iomap_eof_want_preallocate( STATIC xfs_fsblock_t xfs_iomap_prealloc_size( struct xfs_mount *mp, - struct xfs_inode *ip) + struct xfs_inode *ip, + xfs_off_t offset, + struct xfs_bmbt_irec *imap, + int nimaps) { xfs_fsblock_t alloc_blocks = 0; + int shift = 0; + int64_t freesp; + xfs_fsblock_t qblocks; + int qshift = 0; - if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { - int shift = 0; - int64_t freesp; + alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset, + imap, nimaps); + if (!alloc_blocks) + goto check_writeio; + qblocks = alloc_blocks; - /* - * rounddown_pow_of_two() returns an undefined result - * if we pass in alloc_blocks = 0. Hence the "+ 1" to - * ensure we always pass in a non-zero value. - */ - alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1; - alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, - rounddown_pow_of_two(alloc_blocks)); - - xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); - freesp = mp->m_sb.sb_fdblocks; - if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { - shift = 2; - if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) - shift++; - if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) - shift++; - if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) - shift++; - if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) - shift++; - } - if (shift) - alloc_blocks >>= shift; + /* + * MAXEXTLEN is not a power of two value but we round the prealloc down + * to the nearest power of two value after throttling. To prevent the + * round down from unconditionally reducing the maximum supported prealloc + * size, we round up first, apply appropriate throttling, round down and + * cap the value to MAXEXTLEN. + */ + alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), + alloc_blocks); + + xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); + freesp = mp->m_sb.sb_fdblocks; + if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { + shift = 2; + if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) + shift++; + if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) + shift++; + if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) + shift++; + if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) + shift++; } + /* + * Check each quota to cap the prealloc size and provide a shift + * value to throttle with. + */ + if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) + xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift); + if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) + xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift); + if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) + xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift); + + /* + * The final prealloc size is set to the minimum of free space available + * in each of the quotas and the overall filesystem. + * + * The shift throttle value is set to the maximum value as determined by + * the global low free space values and per-quota low free space values. + */ + alloc_blocks = MIN(alloc_blocks, qblocks); + shift = MAX(shift, qshift); + + if (shift) + alloc_blocks >>= shift; + /* + * rounddown_pow_of_two() returns an undefined result if we pass in + * alloc_blocks = 0. + */ + if (alloc_blocks) + alloc_blocks = rounddown_pow_of_two(alloc_blocks); + if (alloc_blocks > MAXEXTLEN) + alloc_blocks = MAXEXTLEN; + + /* + * If we are still trying to allocate more space than is + * available, squash the prealloc hard. This can happen if we + * have a large file on a small filesystem and the above + * lowspace thresholds are smaller than MAXEXTLEN. + */ + while (alloc_blocks && alloc_blocks >= freesp) + alloc_blocks >>= 4; + +check_writeio: if (alloc_blocks < mp->m_writeio_blocks) alloc_blocks = mp->m_writeio_blocks; + trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, + mp->m_writeio_blocks); + return alloc_blocks; } @@ -390,7 +557,6 @@ xfs_iomap_write_delay( extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, imap, XFS_WRITE_IMAPS, &prealloc); if (error) @@ -398,7 +564,10 @@ xfs_iomap_write_delay( retry: if (prealloc) { - xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); + xfs_fsblock_t alloc_blocks; + + alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap, + XFS_WRITE_IMAPS); aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); ioalign = XFS_B_TO_FSBT(mp, aligned_offset); @@ -476,7 +645,6 @@ int xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, - size_t count, xfs_bmbt_irec_t *imap) { xfs_mount_t *mp = ip->i_mount; @@ -518,10 +686,8 @@ xfs_iomap_write_allocate( tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); tp->t_flags |= XFS_TRANS_RESERVE; nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); - error = xfs_trans_reserve(tp, nres, - XFS_WRITE_LOG_RES(mp), - 0, XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + nres, 0); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); @@ -564,7 +730,7 @@ xfs_iomap_write_allocate( */ nimaps = 1; end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); - error = xfs_bmap_last_offset(NULL, ip, &last_block, + error = xfs_bmap_last_offset(ip, &last_block, XFS_DATA_FORK); if (error) goto trans_cancel; @@ -583,8 +749,7 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, - XFS_BMAPI_STACK_SWITCH, + count_fsb, 0, &first_block, 1, imap, &nimaps, &free_list); if (error) @@ -684,10 +849,8 @@ xfs_iomap_write_unwritten( sb_start_intwrite(mp->m_super); tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; - error = xfs_trans_reserve(tp, resblks, - XFS_WRITE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + resblks, 0); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); |
