diff options
Diffstat (limited to 'fs/xfs/xfs_iomap.c')
| -rw-r--r-- | fs/xfs/xfs_iomap.c | 237 |
1 files changed, 167 insertions, 70 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 912d83d8860..6d3ec2b6ee2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -17,31 +17,28 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_bmap_btree.h" #include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_bmap_util.h" #include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h" #include "xfs_trans_space.h" -#include "xfs_utils.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_quota.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" +#include "xfs_dinode.h" #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ @@ -107,7 +104,7 @@ xfs_alert_fsblock_zero( xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x\n", + "blkcnt: %llx extent-state: %x", (unsigned long long)ip->i_ino, (unsigned long long)imap->br_startblock, (unsigned long long)imap->br_startoff, @@ -131,7 +128,6 @@ xfs_iomap_write_direct( xfs_fsblock_t firstfsb; xfs_extlen_t extsz, temp; int nimaps; - int bmapi_flag; int quota_flag; int rt; xfs_trans_t *tp; @@ -185,10 +181,8 @@ xfs_iomap_write_direct( * Allocate and setup the transaction */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - error = xfs_trans_reserve(tp, resblks, - XFS_WRITE_LOG_RES(mp), resrtextents, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + resblks, resrtextents); /* * Check for running out of space, note: need lock to return */ @@ -205,18 +199,15 @@ xfs_iomap_write_direct( xfs_trans_ijoin(tp, ip, 0); - bmapi_flag = 0; - if (offset < XFS_ISIZE(ip) || extsz) - bmapi_flag |= XFS_BMAPI_PREALLOC; - /* * From this point onwards we overwrite the imap pointer that the * caller gave to us. */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, - &firstfsb, 0, imap, &nimaps, &free_list); + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + XFS_BMAPI_PREALLOC, &firstfsb, 0, + imap, &nimaps, &free_list); if (error) goto out_bmap_cancel; @@ -282,6 +273,15 @@ xfs_iomap_eof_want_preallocate( return 0; /* + * If the file is smaller than the minimum prealloc and we are using + * dynamic preallocation, don't do any preallocation at all as it is + * likely this is the only write to the file that is going to be done. + */ + if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && + XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)) + return 0; + + /* * If there are any real blocks past eof, then don't * do any speculative allocation. */ @@ -325,7 +325,7 @@ xfs_iomap_eof_want_preallocate( * rather than falling short due to things like stripe unit/width alignment of * real extents. */ -STATIC int +STATIC xfs_fsblock_t xfs_iomap_eof_prealloc_initial_size( struct xfs_mount *mp, struct xfs_inode *ip, @@ -343,6 +343,10 @@ xfs_iomap_eof_prealloc_initial_size( if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) return 0; + /* If the file is small, then use the minimum prealloc */ + if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign)) + return 0; + /* * As we write multiple pages, the offset will always align to the * start of a page and hence point to a hole at EOF. i.e. if the size is @@ -362,10 +366,65 @@ xfs_iomap_eof_prealloc_initial_size( if (imap[0].br_startblock == HOLESTARTBLOCK) return 0; if (imap[0].br_blockcount <= (MAXEXTLEN >> 1)) - return imap[0].br_blockcount; + return imap[0].br_blockcount << 1; return XFS_B_TO_FSB(mp, offset); } +STATIC bool +xfs_quota_need_throttle( + struct xfs_inode *ip, + int type, + xfs_fsblock_t alloc_blocks) +{ + struct xfs_dquot *dq = xfs_inode_dquot(ip, type); + + if (!dq || !xfs_this_quota_on(ip->i_mount, type)) + return false; + + /* no hi watermark, no throttle */ + if (!dq->q_prealloc_hi_wmark) + return false; + + /* under the lo watermark, no throttle */ + if (dq->q_res_bcount + alloc_blocks < dq->q_prealloc_lo_wmark) + return false; + + return true; +} + +STATIC void +xfs_quota_calc_throttle( + struct xfs_inode *ip, + int type, + xfs_fsblock_t *qblocks, + int *qshift) +{ + int64_t freesp; + int shift = 0; + struct xfs_dquot *dq = xfs_inode_dquot(ip, type); + + /* over hi wmark, squash the prealloc completely */ + if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { + *qblocks = 0; + return; + } + + freesp = dq->q_prealloc_hi_wmark - dq->q_res_bcount; + if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) { + shift = 2; + if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT]) + shift += 2; + if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT]) + shift += 2; + } + + /* only overwrite the throttle values if we are more aggressive */ + if ((freesp >> shift) < (*qblocks >> *qshift)) { + *qblocks = freesp; + *qshift = shift; + } +} + /* * If we don't have a user specified preallocation size, dynamically increase * the preallocation size as the size of the file grows. Cap the maximum size @@ -381,45 +440,89 @@ xfs_iomap_prealloc_size( int nimaps) { xfs_fsblock_t alloc_blocks = 0; + int shift = 0; + int64_t freesp; + xfs_fsblock_t qblocks; + int qshift = 0; alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset, imap, nimaps); - if (alloc_blocks > 0) { - int shift = 0; - int64_t freesp; - - alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, - rounddown_pow_of_two(alloc_blocks)); - - xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); - freesp = mp->m_sb.sb_fdblocks; - if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { - shift = 2; - if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) - shift++; - if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) - shift++; - if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) - shift++; - if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) - shift++; - } - if (shift) - alloc_blocks >>= shift; + if (!alloc_blocks) + goto check_writeio; + qblocks = alloc_blocks; - /* - * If we are still trying to allocate more space than is - * available, squash the prealloc hard. This can happen if we - * have a large file on a small filesystem and the above - * lowspace thresholds are smaller than MAXEXTLEN. - */ - while (alloc_blocks >= freesp) - alloc_blocks >>= 4; + /* + * MAXEXTLEN is not a power of two value but we round the prealloc down + * to the nearest power of two value after throttling. To prevent the + * round down from unconditionally reducing the maximum supported prealloc + * size, we round up first, apply appropriate throttling, round down and + * cap the value to MAXEXTLEN. + */ + alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), + alloc_blocks); + + xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); + freesp = mp->m_sb.sb_fdblocks; + if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { + shift = 2; + if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) + shift++; + if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) + shift++; + if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) + shift++; + if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) + shift++; } + /* + * Check each quota to cap the prealloc size and provide a shift + * value to throttle with. + */ + if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) + xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift); + if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) + xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift); + if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) + xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift); + + /* + * The final prealloc size is set to the minimum of free space available + * in each of the quotas and the overall filesystem. + * + * The shift throttle value is set to the maximum value as determined by + * the global low free space values and per-quota low free space values. + */ + alloc_blocks = MIN(alloc_blocks, qblocks); + shift = MAX(shift, qshift); + + if (shift) + alloc_blocks >>= shift; + /* + * rounddown_pow_of_two() returns an undefined result if we pass in + * alloc_blocks = 0. + */ + if (alloc_blocks) + alloc_blocks = rounddown_pow_of_two(alloc_blocks); + if (alloc_blocks > MAXEXTLEN) + alloc_blocks = MAXEXTLEN; + + /* + * If we are still trying to allocate more space than is + * available, squash the prealloc hard. This can happen if we + * have a large file on a small filesystem and the above + * lowspace thresholds are smaller than MAXEXTLEN. + */ + while (alloc_blocks && alloc_blocks >= freesp) + alloc_blocks >>= 4; + +check_writeio: if (alloc_blocks < mp->m_writeio_blocks) alloc_blocks = mp->m_writeio_blocks; + trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, + mp->m_writeio_blocks); + return alloc_blocks; } @@ -542,7 +645,6 @@ int xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, - size_t count, xfs_bmbt_irec_t *imap) { xfs_mount_t *mp = ip->i_mount; @@ -584,10 +686,8 @@ xfs_iomap_write_allocate( tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); tp->t_flags |= XFS_TRANS_RESERVE; nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); - error = xfs_trans_reserve(tp, nres, - XFS_WRITE_LOG_RES(mp), - 0, XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + nres, 0); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); @@ -630,7 +730,7 @@ xfs_iomap_write_allocate( */ nimaps = 1; end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); - error = xfs_bmap_last_offset(NULL, ip, &last_block, + error = xfs_bmap_last_offset(ip, &last_block, XFS_DATA_FORK); if (error) goto trans_cancel; @@ -649,8 +749,7 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, - XFS_BMAPI_STACK_SWITCH, + count_fsb, 0, &first_block, 1, imap, &nimaps, &free_list); if (error) @@ -750,10 +849,8 @@ xfs_iomap_write_unwritten( sb_start_intwrite(mp->m_super); tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; - error = xfs_trans_reserve(tp, resblks, - XFS_WRITE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + resblks, 0); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); |
