aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Scott <nathans@sgi.com>2006-01-11 15:28:28 +1100
committerNathan Scott <nathans@sgi.com>2006-01-11 15:28:28 +1100
commitdd9f438e32900d67def49fa1b8961b3e19b6fefc (patch)
treea0a51110a13445f3a27b72303e36ef6ef48db0eb
parent061f7209bdfb0193b306f88b4ff36b2574b001d3 (diff)
[XFS] Implement the di_extsize allocator hint for non-realtime files as
well. Also provides a mechanism for inheriting this property from the parent directory for new files. SGI-PV: 945264 SGI-Modid: xfs-linux-melb:xfs-kern:24367a Signed-off-by: Nathan Scott <nathans@sgi.com>
-rw-r--r--fs/xfs/xfs_bmap.c373
-rw-r--r--fs/xfs/xfs_bmap.h7
-rw-r--r--fs/xfs/xfs_dinode.h11
-rw-r--r--fs/xfs/xfs_fs.h2
-rw-r--r--fs/xfs/xfs_inode.c16
-rw-r--r--fs/xfs/xfs_iomap.c390
-rw-r--r--fs/xfs/xfs_vnodeops.c158
7 files changed, 547 insertions, 410 deletions
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e415a4698e9..8a32d65211b 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2146,13 +2146,176 @@ xfs_bmap_add_extent_hole_real(
return 0; /* keep gcc quite */
}
+/*
+ * Adjust the size of the new extent based on di_extsize and rt extsize.
+ */
+STATIC int
+xfs_bmap_extsize_align(
+ xfs_mount_t *mp,
+ xfs_bmbt_irec_t *gotp, /* next extent pointer */
+ xfs_bmbt_irec_t *prevp, /* previous extent pointer */
+ xfs_extlen_t extsz, /* align to this extent size */
+ int rt, /* is this a realtime inode? */
+ int eof, /* is extent at end-of-file? */
+ int delay, /* creating delalloc extent? */
+ int convert, /* overwriting unwritten extent? */
+ xfs_fileoff_t *offp, /* in/out: aligned offset */
+ xfs_extlen_t *lenp) /* in/out: aligned length */
+{
+ xfs_fileoff_t orig_off; /* original offset */
+ xfs_extlen_t orig_alen; /* original length */
+ xfs_fileoff_t orig_end; /* original off+len */
+ xfs_fileoff_t nexto; /* next file offset */
+ xfs_fileoff_t prevo; /* previous file offset */
+ xfs_fileoff_t align_off; /* temp for offset */
+ xfs_extlen_t align_alen; /* temp for length */
+ xfs_extlen_t temp; /* temp for calculations */
+
+ if (convert)
+ return 0;
+
+ orig_off = align_off = *offp;
+ orig_alen = align_alen = *lenp;
+ orig_end = orig_off + orig_alen;
+
+ /*
+ * If this request overlaps an existing extent, then don't
+ * attempt to perform any additional alignment.
+ */
+ if (!delay && !eof &&
+ (orig_off >= gotp->br_startoff) &&
+ (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
+ return 0;
+ }
+
+ /*
+ * If the file offset is unaligned vs. the extent size
+ * we need to align it. This will be possible unless
+ * the file was previously written with a kernel that didn't
+ * perform this alignment, or if a truncate shot us in the
+ * foot.
+ */
+ temp = do_mod(orig_off, extsz);
+ if (temp) {
+ align_alen += temp;
+ align_off -= temp;
+ }
+ /*
+ * Same adjustment for the end of the requested area.
+ */
+ if ((temp = (align_alen % extsz))) {
+ align_alen += extsz - temp;
+ }
+ /*
+ * If the previous block overlaps with this proposed allocation
+ * then move the start forward without adjusting the length.
+ */
+ if (prevp->br_startoff != NULLFILEOFF) {
+ if (prevp->br_startblock == HOLESTARTBLOCK)
+ prevo = prevp->br_startoff;
+ else
+ prevo = prevp->br_startoff + prevp->br_blockcount;
+ } else
+ prevo = 0;
+ if (align_off != orig_off && align_off < prevo)
+ align_off = prevo;
+ /*
+ * If the next block overlaps with this proposed allocation
+ * then move the start back without adjusting the length,
+ * but not before offset 0.
+ * This may of course make the start overlap previous block,
+ * and if we hit the offset 0 limit then the next block
+ * can still overlap too.
+ */
+ if (!eof && gotp->br_startoff != NULLFILEOFF) {
+ if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
+ (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
+ nexto = gotp->br_startoff + gotp->br_blockcount;
+ else
+ nexto = gotp->br_startoff;
+ } else
+ nexto = NULLFILEOFF;
+ if (!eof &&
+ align_off + align_alen != orig_end &&
+ align_off + align_alen > nexto)
+ align_off = nexto > align_alen ? nexto - align_alen : 0;
+ /*
+ * If we're now overlapping the next or previous extent that
+ * means we can't fit an extsz piece in this hole. Just move
+ * the start forward to the first valid spot and set
+ * the length so we hit the end.
+ */
+ if (align_off != orig_off && align_off < prevo)
+ align_off = prevo;
+ if (align_off + align_alen != orig_end &&
+ align_off + align_alen > nexto &&
+ nexto != NULLFILEOFF) {
+ ASSERT(nexto > prevo);
+ align_alen = nexto - align_off;
+ }
+
+ /*
+ * If realtime, and the result isn't a multiple of the realtime
+ * extent size we need to remove blocks until it is.
+ */
+ if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
+ /*
+ * We're not covering the original request, or
+ * we won't be able to once we fix the length.
+ */
+ if (orig_off < align_off ||
+ orig_end > align_off + align_alen ||
+ align_alen - temp < orig_alen)
+ return XFS_ERROR(EINVAL);
+ /*
+ * Try to fix it by moving the start up.
+ */
+ if (align_off + temp <= orig_off) {
+ align_alen -= temp;
+ align_off += temp;
+ }
+ /*
+ * Try to fix it by moving the end in.
+ */
+ else if (align_off + align_alen - temp >= orig_end)
+ align_alen -= temp;
+ /*
+ * Set the start to the minimum then trim the length.
+ */
+ else {
+ align_alen -= orig_off - align_off;
+ align_off = orig_off;
+ align_alen -= align_alen % mp->m_sb.sb_rextsize;
+ }
+ /*
+ * Result doesn't cover the request, fail it.
+ */
+ if (orig_off < align_off || orig_end > align_off + align_alen)
+ return XFS_ERROR(EINVAL);
+ } else {
+ ASSERT(orig_off >= align_off);
+ ASSERT(orig_end <= align_off + align_alen);
+ }
+
+#ifdef DEBUG
+ if (!eof && gotp->br_startoff != NULLFILEOFF)
+ ASSERT(align_off + align_alen <= gotp->br_startoff);
+ if (prevp->br_startoff != NULLFILEOFF)
+ ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
+#endif
+
+ *lenp = align_alen;
+ *offp = align_off;
+ return 0;
+}
+
#define XFS_ALLOC_GAP_UNITS 4
/*
* xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
* It figures out where to ask the underlying allocator to put the new extent.
*/
-STATIC int /* error */
+STATIC int
xfs_bmap_alloc(
xfs_bmalloca_t *ap) /* bmap alloc argument struct */
{
@@ -2163,10 +2326,10 @@ xfs_bmap_alloc(
xfs_mount_t *mp; /* mount point structure */
int nullfb; /* true if ap->firstblock isn't set */
int rt; /* true if inode is realtime */
-#ifdef __KERNEL__
- xfs_extlen_t prod=0; /* product factor for allocators */
- xfs_extlen_t ralen=0; /* realtime allocation length */
-#endif
+ xfs_extlen_t prod = 0; /* product factor for allocators */
+ xfs_extlen_t ralen = 0; /* realtime allocation length */
+ xfs_extlen_t align; /* minimum allocation alignment */
+ xfs_rtblock_t rtx;
#define ISVALID(x,y) \
(rt ? \
@@ -2182,125 +2345,25 @@ xfs_bmap_alloc(
nullfb = ap->firstblock == NULLFSBLOCK;
rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-#ifdef __KERNEL__
if (rt) {
- xfs_extlen_t extsz; /* file extent size for rt */
- xfs_fileoff_t nexto; /* next file offset */
- xfs_extlen_t orig_alen; /* original ap->alen */
- xfs_fileoff_t orig_end; /* original off+len */
- xfs_fileoff_t orig_off; /* original ap->off */
- xfs_extlen_t mod_off; /* modulus calculations */
- xfs_fileoff_t prevo; /* previous file offset */
- xfs_rtblock_t rtx; /* realtime extent number */
- xfs_extlen_t temp; /* temp for rt calculations */
-
- /*
- * Set prod to match the realtime extent size.
- */
- if (!(extsz = ap->ip->i_d.di_extsize))
- extsz = mp->m_sb.sb_rextsize;
- prod = extsz / mp->m_sb.sb_rextsize;
- orig_off = ap->off;
- orig_alen = ap->alen;
- orig_end = orig_off + orig_alen;
- /*
- * If the file offset is unaligned vs. the extent size
- * we need to align it. This will be possible unless
- * the file was previously written with a kernel that didn't
- * perform this alignment.
- */
- mod_off = do_mod(orig_off, extsz);
- if (mod_off) {
- ap->alen += mod_off;
- ap->off -= mod_off;
- }
- /*
- * Same adjustment for the end of the requested area.
- */
- if ((temp = (ap->alen % extsz)))
- ap->alen += extsz - temp;
- /*
- * If the previous block overlaps with this proposed allocation
- * then move the start forward without adjusting the length.
- */
- prevo =
- ap->prevp->br_startoff == NULLFILEOFF ?
- 0 :
- (ap->prevp->br_startoff +
- ap->prevp->br_blockcount);
- if (ap->off != orig_off && ap->off < prevo)
- ap->off = prevo;
- /*
- * If the next block overlaps with this proposed allocation
- * then move the start back without adjusting the length,
- * but not before offset 0.
- * This may of course make the start overlap previous block,
- * and if we hit the offset 0 limit then the next block
- * can still overlap too.
- */
- nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?
- NULLFILEOFF : ap->gotp->br_startoff;
- if (!ap->eof &&
- ap->off + ap->alen != orig_end &&
- ap->off + ap->alen > nexto)
- ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
- /*
- * If we're now overlapping the next or previous extent that
- * means we can't fit an extsz piece in this hole. Just move
- * the start forward to the first valid spot and set
- * the length so we hit the end.
- */
- if ((ap->off != orig_off && ap->off < prevo) ||
- (ap->off + ap->alen != orig_end &&
- ap->off + ap->alen > nexto)) {
- ap->off = prevo;
- ap->alen = nexto - prevo;
- }
- /*
- * If the result isn't a multiple of rtextents we need to
- * remove blocks until it is.
- */
- if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {
- /*
- * We're not covering the original request, or
- * we won't be able to once we fix the length.
- */
- if (orig_off < ap->off ||
- orig_end > ap->off + ap->alen ||
- ap->alen - temp < orig_alen)
- return XFS_ERROR(EINVAL);
- /*
- * Try to fix it by moving the start up.
- */
- if (ap->off + temp <= orig_off) {
- ap->alen -= temp;
- ap->off += temp;
- }
- /*
- * Try to fix it by moving the end in.
- */
- else if (ap->off + ap->alen - temp >= orig_end)
- ap->alen -= temp;
- /*
- * Set the start to the minimum then trim the length.
- */
- else {
- ap->alen -= orig_off - ap->off;
- ap->off = orig_off;
- ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
- }
- /*
- * Result doesn't cover the request, fail it.
- */
- if (orig_off < ap->off || orig_end > ap->off + ap->alen)
- return XFS_ERROR(EINVAL);
- }
+ align = ap->ip->i_d.di_extsize ?
+ ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
+ /* Set prod to match the extent size */
+ prod = align / mp->m_sb.sb_rextsize;
+
+ error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+ align, rt, ap->eof, 0,
+ ap->conv, &ap->off, &ap->alen);
+ if (error)
+ return error;
+ ASSERT(ap->alen);
ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
+
/*
* If the offset & length are not perfectly aligned
* then kill prod, it will just get us in trouble.
*/
- if (do_mod(ap->off, extsz) || ap->alen % extsz)
+ if (do_mod(ap->off, align) || ap->alen % align)
prod = 1;
/*
* Set ralen to be the actual requested length in rtextents.
@@ -2326,15 +2389,24 @@ xfs_bmap_alloc(
ap->rval = rtx * mp->m_sb.sb_rextsize;
} else
ap->rval = 0;
+ } else {
+ align = (ap->userdata && ap->ip->i_d.di_extsize &&
+ (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
+ ap->ip->i_d.di_extsize : 0;
+ if (unlikely(align)) {
+ error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+ align, rt,
+ ap->eof, 0, ap->conv,
+ &ap->off, &ap->alen);
+ ASSERT(!error);
+ ASSERT(ap->alen);
+ }
+ if (nullfb)
+ ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+ else
+ ap->rval = ap->firstblock;
}
-#else
- if (rt)
- ap->rval = 0;
-#endif /* __KERNEL__ */
- else if (nullfb)
- ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
- else
- ap->rval = ap->firstblock;
+
/*
* If allocating at eof, and there's a previous real block,
* try to use it's last block as our starting point.
@@ -2598,11 +2670,12 @@ xfs_bmap_alloc(
args.total = ap->total;
args.minlen = ap->minlen;
}
- if (ap->ip->i_d.di_extsize) {
+ if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
+ (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
args.prod = ap->ip->i_d.di_extsize;
if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
args.mod = (xfs_extlen_t)(args.prod - args.mod);
- } else if (mp->m_sb.sb_blocksize >= NBPP) {
+ } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
args.prod = 1;
args.mod = 0;
} else {
@@ -4590,6 +4663,7 @@ xfs_bmapi(
char contig; /* allocation must be one extent */
char delay; /* this request is for delayed alloc */
char exact; /* don't do all of wasdelayed extent */
+ char convert; /* unwritten extent I/O completion */
xfs_bmbt_rec_t *ep; /* extent list entry pointer */
int error; /* error return */
xfs_bmbt_irec_t got; /* current extent list record */
@@ -4643,7 +4717,7 @@ xfs_bmapi(
}
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- rt = XFS_IS_REALTIME_INODE(ip);
+ rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(ifp->if_ext_max ==
XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4654,6 +4728,7 @@ xfs_bmapi(
delay = (flags & XFS_BMAPI_DELAY) != 0;
trim = (flags & XFS_BMAPI_ENTIRE) == 0;
userdata = (flags & XFS_BMAPI_METADATA) == 0;
+ convert = (flags & XFS_BMAPI_CONVERT) != 0;
exact = (flags & XFS_BMAPI_EXACT) != 0;
rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
contig = (flags & XFS_BMAPI_CONTIG) != 0;
@@ -4748,16 +4823,26 @@ xfs_bmapi(
}
minlen = contig ? alen : 1;
if (delay) {
- xfs_extlen_t extsz = 0;
+ xfs_extlen_t extsz;
/* Figure out the extent size, adjust alen */
if (rt) {
if (!(extsz = ip->i_d.di_extsize))
extsz = mp->m_sb.sb_rextsize;
- alen = roundup(alen, extsz);
- extsz = alen / mp->m_sb.sb_rextsize;
+ } else {
+ extsz = ip->i_d.di_extsize;
+ }
+ if (extsz) {
+ error = xfs_bmap_extsize_align(mp,
+ &got, &prev, extsz,
+ rt, eof, delay, convert,
+ &aoff, &alen);
+ ASSERT(!error);
}
+ if (rt)
+ extsz = alen / mp->m_sb.sb_rextsize;
+
/*
* Make a transaction-less quota reservation for
* delayed allocation blocks. This number gets
@@ -4785,14 +4870,15 @@ xfs_bmapi(
xfs_bmap_worst_indlen(ip, alen);
ASSERT(indlen > 0);
- if (rt)
+ if (rt) {
error = xfs_mod_incore_sb(mp,
XFS_SBS_FREXTENTS,
-(extsz), rsvd);
- else
+ } else {
error = xfs_mod_incore_sb(mp,
XFS_SBS_FDBLOCKS,
-(alen), rsvd);
+ }
if (!error) {
error = xfs_mod_incore_sb(mp,
XFS_SBS_FDBLOCKS,
@@ -4811,6 +4897,7 @@ xfs_bmapi(
if (error) {
if (XFS_IS_QUOTA_ON(ip->i_mount))
/* unreserve the blocks now */
+ (void)
XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
mp, NULL, ip,
(long)alen, 0, rt ?
@@ -4849,6 +4936,7 @@ xfs_bmapi(
bma.firstblock = *firstblock;
bma.alen = alen;
bma.off = aoff;
+ bma.conv = convert;
bma.wasdel = wasdelay;
bma.minlen = minlen;
bma.low = flist->xbf_low;
@@ -5270,8 +5358,7 @@ xfs_bunmapi(
return 0;
}
XFS_STATS_INC(xs_blk_unmap);
- isrt = (whichfork == XFS_DATA_FORK) &&
- (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
+ isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
start = bno;
bno = start + len - 1;
ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
@@ -5443,7 +5530,7 @@ xfs_bunmapi(
}
if (wasdel) {
ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
- /* Update realtim/data freespace, unreserve quota */
+ /* Update realtime/data freespace, unreserve quota */
if (isrt) {
xfs_filblks_t rtexts;
@@ -5451,14 +5538,14 @@ xfs_bunmapi(
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
(int)rtexts, rsvd);
- XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
- -((long)del.br_blockcount), 0,
+ (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+ NULL, ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
(int)del.br_blockcount, rsvd);
- XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
- -((long)del.br_blockcount), 0,
+ (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+ NULL, ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
}
ip->i_delayed_blks -= del.br_blockcount;
@@ -5652,7 +5739,9 @@ xfs_getbmap(
ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
return XFS_ERROR(EINVAL);
if (whichfork == XFS_DATA_FORK) {
- if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) {
+ if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
+ (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
+ ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
prealloced = 1;
fixlen = XFS_MAXIOFFSET(mp);
} else {
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 2e0717a0130..12cc63dfc2c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -62,6 +62,10 @@ typedef struct xfs_bmap_free
#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */
/* combine contig. space */
#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */
+/* XFS_BMAPI_DIRECT_IO 0x800 */
+#define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */
+ /* need write cache flushing and no */
+ /* additional allocation alignments */
#define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w)
static inline int xfs_bmapi_aflag(int w)
@@ -101,7 +105,8 @@ typedef struct xfs_bmalloca {
char wasdel; /* replacing a delayed allocation */
char userdata;/* set if is user data */
char low; /* low on space, using seq'l ags */
- char aeof; /* allocated space at eof */
+ char aeof; /* allocated space at eof */
+ char conv; /* overwriting unwritten extents */
} xfs_bmalloca_t;
#ifdef __KERNEL__
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c5a0e537ff1..f697aab8a3d 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -246,8 +246,10 @@ typedef enum xfs_dinode_fmt
#define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */
#define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */
#define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */
-#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */
-#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */
+#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */
+#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */
+#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
+#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -259,11 +261,14 @@ typedef enum xfs_dinode_fmt
#define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT)
#define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT)
#define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
+#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
+#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
#define XFS_DIFLAG_ANY \
(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
- XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS)
+ XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
+ XFS_DIFLAG_EXTSZINHERIT)
#endif /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index ba096f80f48..3280f49496b 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -65,6 +65,8 @@ struct fsxattr {
#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
+#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
+#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index df0d4572d70..e486c7d244c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -809,6 +809,10 @@ _xfs_dic2xflags(
flags |= XFS_XFLAG_PROJINHERIT;
if (di_flags & XFS_DIFLAG_NOSYMLINKS)
flags |= XFS_XFLAG_NOSYMLINKS;
+ if (di_flags & XFS_DIFLAG_EXTSIZE)
+ flags |= XFS_XFLAG_EXTSIZE;
+ if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
+ flags |= XFS_XFLAG_EXTSZINHERIT;
}
return flags;
@@ -1192,11 +1196,19 @@ xfs_ialloc(
if ((mode & S_IFMT) == S_IFDIR) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
- } else {
+ if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ ip->i_d.di_extsize = pip->i_d.di_extsize;
+ }
+ } else if ((mode & S_IFMT) == S_IFREG) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
di_flags |= XFS_DIFLAG_REALTIME;
ip->i_iocore.io_flags |= XFS_IOCORE_RT;
}
+ if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ ip->i_d.di_extsize = pip->i_d.di_extsize;
+ }
}
if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
xfs_inherit_noatime)
@@ -1262,7 +1274,7 @@ xfs_isize_check(
if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
return;
- if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME )
+ if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
return;
nimaps = 2;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 45a77a3a6c0..5ecf3e3e86a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -263,7 +263,7 @@ phase2:
case BMAPI_WRITE:
/* If we found an extent, return it */
if (nimaps &&
- (imap.br_startblock != HOLESTARTBLOCK) &&
+ (imap.br_startblock != HOLESTARTBLOCK) &&
(imap.br_startblock != DELAYSTARTBLOCK)) {
xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
offset, count, iomapp, &imap, flags);
@@ -318,6 +318,58 @@ out:
}
STATIC int
+xfs_iomap_eof_align_last_fsb(
+ xfs_mount_t *mp,
+ xfs_iocore_t *io,
+ xfs_fsize_t isize,
+ xfs_extlen_t extsize,
+ xfs_fileoff_t *last_fsb)
+{
+ xfs_fileoff_t new_last_fsb = 0;
+ xfs_extlen_t align;
+ int eof, error;
+
+ if (io->io_flags & XFS_IOCORE_RT)
+ ;
+ /*
+ * If mounted with the "-o swalloc" option, roundup the allocation
+ * request to a stripe width boundary if the file size is >=
+ * stripe width and we are allocating past the allocation eof.
+ */
+ else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
+ (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
+ new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
+ /*
+ * Roundup the allocation request to a stripe unit (m_dalign) boundary
+ * if the file size is >= stripe unit size, and we are allocating past
+ * the allocation eof.
+ */
+ else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
+ new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
+
+ /*
+ * Always round up the allocation request to an extent boundary
+ * (when file on a real-time subvolume or has di_extsize hint).
+ */
+ if (extsize) {
+ if (new_last_fsb)
+ align = roundup_64(new_last_fsb, extsize);
+ else
+ align = extsize;
+ new_last_fsb = roundup_64(*last_fsb, align);
+ }
+
+ if (new_last_fsb) {
+ error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
+ if (error)
+ return error;
+ if (eof)
+ *last_fsb = new_last_fsb;
+ }
+ return 0;
+}
+
+STATIC int
xfs_flush_space(
xfs_inode_t *ip,
int *fsynced,
@@ -363,19 +415,20 @@ xfs_iomap_write_direct(
xfs_iocore_t *io = &ip->i_iocore;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t last_fsb;
- xfs_filblks_t count_fsb;
+ xfs_filblks_t count_fsb, resaligned;
xfs_fsblock_t firstfsb;
+ xfs_extlen_t extsz, temp;
+ xfs_fsize_t isize;
int nimaps;
- int error;
int bmapi_flag;
int quota_flag;
int rt;
xfs_trans_t *tp;
xfs_bmbt_irec_t imap;
xfs_bmap_free_t free_list;
- xfs_filblks_t qblocks, resblks;
+ uint qblocks, resblks, resrtextents;
int committed;
- int resrtextents;
+ int error;
/*
* Make sure that the dquots are there. This doesn't hold
@@ -385,37 +438,52 @@ xfs_iomap_write_direct(
if (error)
return XFS_ERROR(error);
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
- last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
- count_fsb = last_fsb - offset_fsb;
- if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) {
- xfs_fileoff_t map_last_fsb;
-
- map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
- if (map_last_fsb < last_fsb) {
- last_fsb = map_last_fsb;
- count_fsb = last_fsb - offset_fsb;
- }
- ASSERT(count_fsb > 0);
+ rt = XFS_IS_REALTIME_INODE(ip);
+ if (unlikely(rt)) {
+ if (!(extsz = ip->i_d.di_extsize))
+ extsz = mp->m_sb.sb_rextsize;
+ } else {
+ extsz = ip->i_d.di_extsize;
}
- /*
- * Determine if reserving space on the data or realtime partition.
- */
- if ((rt = XFS_IS_REALTIME_INODE(ip))) {
- xfs_extlen_t extsz;
+ isize = ip->i_d.di_size;
+ if (io->io_new_size > isize)
+ isize = io->io_new_size;
- if (!(extsz = ip->i_d.di_extsize))
- extsz = mp->m_sb.sb_rextsize;
- resrtextents = qblocks = (count_fsb + extsz - 1);
- do_div(resrtextents, mp->m_sb.sb_rextsize);
- resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
- quota_flag = XFS_QMOPT_RES_RTBLKS;
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+ if ((offset + count) > isize) {
+ error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
+ &last_fsb);
+ if (error)
+ goto error_out;
} else {
- resrtextents = 0;
- resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
- quota_flag = XFS_QMOPT_RES_REGBLKS;
+ if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
+ last_fsb = MIN(last_fsb, (xfs_fileoff_t)
+ ret_imap->br_blockcount +
+ ret_imap->br_startoff);
}
+ count_fsb = last_fsb - offset_fsb;
+ ASSERT(count_fsb > 0);
+
+ resaligned = count_fsb;
+ if (unlikely(extsz)) {
+ if ((temp = do_mod(offset_fsb, extsz)))
+ resaligned += temp;
+ if ((temp = do_mod(resaligned, extsz)))
+ resaligned += extsz - temp;
+ }
+
+ if (unlikely(rt)) {
+ resrtextents = qblocks = resaligned;
+ resrtextents /= mp->m_sb.sb_rextsize;
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+ quota_flag = XFS_QMOPT_RES_RTBLKS;
+ } else {
+ resrtextents = 0;
+ resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+ quota_flag = XFS_QMOPT_RES_REGBLKS;
+ }
/*
* Allocate and setup the transaction
@@ -426,7 +494,6 @@ xfs_iomap_write_direct(
XFS_WRITE_LOG_RES(mp), resrtextents,
XFS_TRANS_PERM_LOG_RES,
XFS_WRITE_LOG_COUNT);
-
/*
* Check for running out of space, note: need lock to return
*/
@@ -436,20 +503,20 @@ xfs_iomap_write_direct(
if (error)
goto error_out;
- if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) {
- error = (EDQUOT);
+ error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+ qblocks, 0, quota_flag);
+ if (error)
goto error1;
- }
- bmapi_flag = XFS_BMAPI_WRITE;
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
- if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt))
+ bmapi_flag = XFS_BMAPI_WRITE;
+ if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
bmapi_flag |= XFS_BMAPI_PREALLOC;
/*
- * Issue the bmapi() call to allocate the blocks
+ * Issue the xfs_bmapi() call to allocate the blocks
*/
XFS_BMAP_INIT(&free_list, &firstfsb);
nimaps = 1;
@@ -501,6 +568,62 @@ error_out:
return XFS_ERROR(error);
}
+/*
+ * If the caller is doing a write at the end of the file,
+ * then extend the allocation out to the file system's write
+ * iosize. We clean up any extra space left over when the
+ * file is closed in xfs_inactive().
+ *
+ * For sync writes, we are flushing delayed allocate space to
+ * try to make additional space available for allocation near
+ * the filesystem full boundary - preallocation hurts in that
+ * situation, of course.
+ */
+STATIC int
+xfs_iomap_eof_want_preallocate(
+ xfs_mount_t *mp,
+ xfs_iocore_t *io,
+ xfs_fsize_t isize,
+ xfs_off_t offset,
+ size_t count,
+ int ioflag,
+ xfs_bmbt_irec_t *imap,
+ int nimaps,
+ int *prealloc)
+{
+ xfs_fileoff_t start_fsb;
+ xfs_filblks_t count_fsb;
+ xfs_fsblock_t firstblock;
+ int n, error, imaps;
+
+ *prealloc = 0;
+ if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
+ return 0;
+
+ /*
+ * If there are any real blocks past eof, then don't
+ * do any speculative allocation.
+ */
+ start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
+ count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+ while (count_fsb > 0) {
+ imaps = nimaps;
+ error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
+ 0, &firstblock, 0, imap, &imaps, NULL);
+ if (error)
+ return error;
+ for (n = 0; n < imaps; n++) {
+ if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
+ (imap[n].br_startblock != DELAYSTARTBLOCK))
+ return 0;
+ start_fsb += imap[n].br_blockcount;
+ count_fsb -= imap[n].br_blockcount;
+ }
+ }
+ *prealloc = 1;
+ return 0;
+}
+
int
xfs_iomap_write_delay(
xfs_inode_t *ip,
@@ -514,13 +637,15 @@ xfs_iomap_write_delay(
xfs_iocore_t *io = &ip->i_iocore;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t last_fsb;
- xfs_fsize_t isize;
+ xfs_off_t aligned_offset;
+ xfs_fileoff_t ioalign;
xfs_fsblock_t firstblock;
+ xfs_extlen_t extsz;
+ xfs_fsize_t isize;
int nimaps;
- int error;
xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
- int aeof;
- int fsynced = 0;
+ int prealloc, fsynced = 0;
+ int error;
ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
@@ -528,152 +653,57 @@ xfs_iomap_write_delay(
* Make sure that the dquots are there. This doesn't hold
* the ilock across a disk read.
*/
-
error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
if (error)
return XFS_ERROR(error);
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ if (!(extsz = ip->i_d.di_extsize))
+ extsz = mp->m_sb.sb_rextsize;
+ } else {
+ extsz = ip->i_d.di_extsize;
+ }
+
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
retry:
isize = ip->i_d.di_size;
- if (io->io_new_size > isize) {
+ if (io->io_new_size > isize)
isize = io->io_new_size;
- }
- aeof = 0;
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
- last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
- /*
- * If the caller is doing a write at the end of the file,
- * then extend the allocation (and the buffer used for the write)
- * out to the file system's write iosize. We clean up any extra
- * space left over when the file is closed in xfs_inactive().
- *
- * For sync writes, we are flushing delayed allocate space to
- * try to make additional space available for allocation near
- * the filesystem full boundary - preallocation hurts in that
- * situation, of course.
- */
- if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
- xfs_off_t aligned_offset;
- xfs_filblks_t count_fsb;
- unsigned int iosize;
- xfs_fileoff_t ioalign;
- int n;
- xfs_fileoff_t start_fsb;
+ error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
+ ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
+ if (error)
+ return error;
- /*
- * If there are any real blocks past eof, then don't
- * do any speculative allocation.
- */
- start_fsb = XFS_B_TO_FSBT(mp,
- ((xfs_ufsize_t)(offset + count - 1)));
- count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
- while (count_fsb > 0) {
- nimaps = XFS_WRITE_IMAPS;
- error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
- 0, &firstblock, 0, imap, &nimaps, NULL);
- if (error) {
- return error;
- }
- for (n = 0; n < nimaps; n++) {
- if ( !(io->io_flags & XFS_IOCORE_RT) &&
- !imap[n].br_startblock) {
- cmn_err(CE_PANIC,"Access to block "
- "zero: fs <%s> inode: %lld "
- "start_block : %llx start_off "
- ": %llx blkcnt : %llx "
- "extent-state : %x \n",
- (ip->i_mount)->m_fsname,
- (long long)ip->i_ino,
- imap[n].br_startblock,
- imap[n].br_startoff,
- imap[n].br_blockcount,
- imap[n].br_state);
- }
- if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
- (imap[n].br_startblock != DELAYSTARTBLOCK)) {
- goto write_map;
- }
- start_fsb += imap[n].br_blockcount;