aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-10-28 10:44:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-28 10:44:50 -0700
commitf793f2961170c0b49c1650e69e7825484159ce62 (patch)
tree06d27973f9db1080c1460f32155ce2baf610c3d4 /fs
parentdabcbb1bae0f55378060b285062b20f6ec648c6a (diff)
parentb99b98dc2673a123a73068f16720232d7be7e669 (diff)
Merge http://sucs.org/~rohan/git/gfs2-3.0-nmw
* http://sucs.org/~rohan/git/gfs2-3.0-nmw: (24 commits) GFS2: Move readahead of metadata during deallocation into its own function GFS2: Remove two unused variables GFS2: Misc fixes GFS2: rewrite fallocate code to write blocks directly GFS2: speed up delete/unlink performance for large files GFS2: Fix off-by-one in gfs2_blk2rgrpd GFS2: Clean up ->page_mkwrite GFS2: Correctly set goal block after allocation GFS2: Fix AIL flush issue during fsync GFS2: Use cached rgrp in gfs2_rlist_add() GFS2: Call do_strip() directly from recursive_scan() GFS2: Remove obsolete assert GFS2: Cache the most recently used resource group in the inode GFS2: Make resource groups "append only" during life of fs GFS2: Use rbtree for resource groups and clean up bitmap buffer ref count scheme GFS2: Fix lseek after SEEK_DATA, SEEK_HOLE have been added GFS2: Clean up gfs2_create GFS2: Use ->dirty_inode() GFS2: Fix bug trap and journaled data fsync GFS2: Fix inode allocation error path ...
Diffstat (limited to 'fs')
-rw-r--r--fs/gfs2/acl.c5
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c199
-rw-r--r--fs/gfs2/dir.c50
-rw-r--r--fs/gfs2/file.c295
-rw-r--r--fs/gfs2/glops.c89
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h23
-rw-r--r--fs/gfs2/inode.c112
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/lops.c66
-rw-r--r--fs/gfs2/ops_fstype.c6
-rw-r--r--fs/gfs2/quota.c28
-rw-r--r--fs/gfs2/rgrp.c573
-rw-r--r--fs/gfs2/rgrp.h31
-rw-r--r--fs/gfs2/super.c134
-rw-r--r--fs/gfs2/trans.c5
-rw-r--r--fs/gfs2/trans.h22
-rw-r--r--fs/gfs2/xattr.c28
19 files changed, 666 insertions, 1012 deletions
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 34501b64bc4..65978d7885c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -82,7 +82,7 @@ static int gfs2_set_mode(struct inode *inode, umode_t mode)
iattr.ia_valid = ATTR_MODE;
iattr.ia_mode = mode;
- error = gfs2_setattr_simple(GFS2_I(inode), &iattr);
+ error = gfs2_setattr_simple(inode, &iattr);
}
return error;
@@ -160,6 +160,7 @@ out:
int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
{
+ struct inode *inode = &ip->i_inode;
struct posix_acl *acl;
char *data;
unsigned int len;
@@ -169,7 +170,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
if (IS_ERR(acl))
return PTR_ERR(acl);
if (!acl)
- return gfs2_setattr_simple(ip, attr);
+ return gfs2_setattr_simple(inode, attr);
error = posix_acl_chmod(&acl, GFP_NOFS, attr->ia_mode);
if (error)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index f9fbbe96c22..4858e1fed8b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -663,7 +663,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
if (&ip->i_inode == sdp->sd_rindex)
rblocks += 2 * RES_STATFS;
if (alloc_required)
- rblocks += gfs2_rg_blocks(al);
+ rblocks += gfs2_rg_blocks(ip);
error = gfs2_trans_begin(sdp, rblocks,
PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
@@ -787,7 +787,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
u64 to = pos + copied;
void *kaddr;
unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
- struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
kaddr = kmap_atomic(page, KM_USER0);
@@ -804,7 +803,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
if (copied) {
if (inode->i_size < to)
i_size_write(inode, to);
- gfs2_dinode_out(ip, di);
mark_inode_dirty(inode);
}
@@ -873,10 +871,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
gfs2_page_add_databufs(ip, page, from, to);
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
- if (ret > 0) {
- gfs2_dinode_out(ip, dibh->b_data);
- mark_inode_dirty(inode);
- }
if (inode == sdp->sd_rindex) {
adjust_fs_space(inode);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 7878c473ae6..41d494d7970 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -10,6 +10,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
@@ -36,11 +37,6 @@ struct metapath {
__u16 mp_list[GFS2_MAX_META_HEIGHT];
};
-typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
- struct buffer_head *bh, __be64 *top,
- __be64 *bottom, unsigned int height,
- void *data);
-
struct strip_mine {
int sm_first;
unsigned int sm_height;
@@ -273,6 +269,30 @@ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp
return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
}
+static void gfs2_metapath_ra(struct gfs2_glock *gl,
+ const struct buffer_head *bh, const __be64 *pos)
+{
+ struct buffer_head *rabh;
+ const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size);
+ const __be64 *t;
+
+ for (t = pos; t < endp; t++) {
+ if (!*t)
+ continue;
+
+ rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
+ if (trylock_buffer(rabh)) {
+ if (!buffer_uptodate(rabh)) {
+ rabh->b_end_io = end_buffer_read_sync;
+ submit_bh(READA | REQ_META, rabh);
+ continue;
+ }
+ unlock_buffer(rabh);
+ }
+ brelse(rabh);
+ }
+}
+
/**
* lookup_metapath - Walk the metadata tree to a specific point
* @ip: The inode
@@ -432,12 +452,14 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct super_block *sb = sdp->sd_vfs;
struct buffer_head *dibh = mp->mp_bh[0];
u64 bn, dblock = 0;
unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
unsigned dblks = 0;
unsigned ptrs_per_blk;
const unsigned end_of_metadata = height - 1;
+ int ret;
int eob = 0;
enum alloc_state state;
__be64 *ptr;
@@ -540,6 +562,15 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
dblock = bn;
while (n-- > 0)
*ptr++ = cpu_to_be64(bn++);
+ if (buffer_zeronew(bh_map)) {
+ ret = sb_issue_zeroout(sb, dblock, dblks,
+ GFP_NOFS);
+ if (ret) {
+ fs_err(sdp,
+ "Failed to zero data buffers\n");
+ clear_buffer_zeronew(bh_map);
+ }
+ }
break;
}
} while ((state != ALLOC_DATA) || !dblock);
@@ -668,76 +699,6 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
}
/**
- * recursive_scan - recursively scan through the end of a file
- * @ip: the inode
- * @dibh: the dinode buffer
- * @mp: the path through the metadata to the point to start
- * @height: the height the recursion is at
- * @block: the indirect block to look at
- * @first: 1 if this is the first block
- * @bc: the call to make for each piece of metadata
- * @data: data opaque to this function to pass to @bc
- *
- * When this is first called @height and @block should be zero and
- * @first should be 1.
- *
- * Returns: errno
- */
-
-static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
- struct metapath *mp, unsigned int height,
- u64 block, int first, block_call_t bc,
- void *data)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct buffer_head *bh = NULL;
- __be64 *top, *bottom;
- u64 bn;
- int error;
- int mh_size = sizeof(struct gfs2_meta_header);
-
- if (!height) {
- error = gfs2_meta_inode_buffer(ip, &bh);
- if (error)
- return error;
- dibh = bh;
-
- top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
- bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
- } else {
- error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
- if (error)
- return error;
-
- top = (__be64 *)(bh->b_data + mh_size) +
- (first ? mp->mp_list[height] : 0);
-
- bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
- }
-
- error = bc(ip, dibh, bh, top, bottom, height, data);
- if (error)
- goto out;
-
- if (height < ip->i_height - 1)
- for (; top < bottom; top++, first = 0) {
- if (!*top)
- continue;
-
- bn = be64_to_cpu(*top);
-
- error = recursive_scan(ip, dibh, mp, height + 1, bn,
- first, bc, data);
- if (error)
- break;
- }
-
-out:
- brelse(bh);
- return error;
-}
-
-/**
* do_strip - Look for a layer a particular layer of the file and strip it off
* @ip: the inode
* @dibh: the dinode buffer
@@ -752,9 +713,8 @@ out:
static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
struct buffer_head *bh, __be64 *top, __be64 *bottom,
- unsigned int height, void *data)
+ unsigned int height, struct strip_mine *sm)
{
- struct strip_mine *sm = data;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrp_list rlist;
u64 bn, bstart;
@@ -783,11 +743,6 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
else if (ip->i_depth)
revokes = sdp->sd_inptrs;
- if (ip != GFS2_I(sdp->sd_rindex))
- error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
- else if (!sdp->sd_rgrps)
- error = gfs2_ri_update(ip);
-
if (error)
return error;
@@ -805,7 +760,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
blen++;
else {
if (bstart)
- gfs2_rlist_add(sdp, &rlist, bstart);
+ gfs2_rlist_add(ip, &rlist, bstart);
bstart = bn;
blen = 1;
@@ -813,7 +768,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
}
if (bstart)
- gfs2_rlist_add(sdp, &rlist, bstart);
+ gfs2_rlist_add(ip, &rlist, bstart);
else
goto out; /* Nothing to do */
@@ -887,12 +842,82 @@ out_rg_gunlock:
out_rlist:
gfs2_rlist_free(&rlist);
out:
- if (ip != GFS2_I(sdp->sd_rindex))
- gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
return error;
}
/**
+ * recursive_scan - recursively scan through the end of a file
+ * @ip: the inode
+ * @dibh: the dinode buffer
+ * @mp: the path through the metadata to the point to start
+ * @height: the height the recursion is at
+ * @block: the indirect block to look at
+ * @first: 1 if this is the first block
+ * @sm: data opaque to this function to pass to @bc
+ *
+ * When this is first called @height and @block should be zero and
+ * @first should be 1.
+ *
+ * Returns: errno
+ */
+
+static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
+ struct metapath *mp, unsigned int height,
+ u64 block, int first, struct strip_mine *sm)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct buffer_head *bh = NULL;
+ __be64 *top, *bottom;
+ u64 bn;
+ int error;
+ int mh_size = sizeof(struct gfs2_meta_header);
+
+ if (!height) {
+ error = gfs2_meta_inode_buffer(ip, &bh);
+ if (error)
+ return error;
+ dibh = bh;
+
+ top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
+ bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
+ } else {
+ error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
+ if (error)
+ return error;
+
+ top = (__be64 *)(bh->b_data + mh_size) +
+ (first ? mp->mp_list[height] : 0);
+
+ bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
+ }
+
+ error = do_strip(ip, dibh, bh, top, bottom, height, sm);
+ if (error)
+ goto out;
+
+ if (height < ip->i_height - 1) {
+
+ gfs2_metapath_ra(ip->i_gl, bh, top);
+
+ for (; top < bottom; top++, first = 0) {
+ if (!*top)
+ continue;
+
+ bn = be64_to_cpu(*top);
+
+ error = recursive_scan(ip, dibh, mp, height + 1, bn,
+ first, sm);
+ if (error)
+ break;
+ }
+ }
+out:
+ brelse(bh);
+ return error;
+}
+
+
+/**
* gfs2_block_truncate_page - Deal with zeroing out data for truncate
*
* This is partly borrowed from ext3.
@@ -1031,7 +1056,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
sm.sm_first = !!size;
sm.sm_height = height;
- error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
+ error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm);
if (error)
break;
}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 1cc2f8ec52a..8ccad2467cb 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -240,16 +240,15 @@ fail:
return error;
}
-static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
- u64 offset, unsigned int size)
+static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf,
+ unsigned int size)
{
struct buffer_head *dibh;
int error;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- offset += sizeof(struct gfs2_dinode);
- memcpy(buf, dibh->b_data + offset, size);
+ memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
brelse(dibh);
}
@@ -261,13 +260,12 @@ static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
* gfs2_dir_read_data - Read a data from a directory inode
* @ip: The GFS2 Inode
* @buf: The buffer to place result into
- * @offset: File offset to begin jdata_readng from
* @size: Amount of data to transfer
*
* Returns: The amount of data actually copied or the error
*/
-static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
- unsigned int size, unsigned ra)
+static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf,
+ unsigned int size)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
u64 lblock, dblock;
@@ -275,24 +273,14 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
unsigned int o;
int copied = 0;
int error = 0;
- u64 disksize = i_size_read(&ip->i_inode);
-
- if (offset >= disksize)
- return 0;
-
- if (offset + size > disksize)
- size = disksize - offset;
-
- if (!size)
- return 0;
if (gfs2_is_stuffed(ip))
- return gfs2_dir_read_stuffed(ip, buf, offset, size);
+ return gfs2_dir_read_stuffed(ip, buf, size);
if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
return -EINVAL;
- lblock = offset;
+ lblock = 0;
o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
while (copied < size) {
@@ -311,8 +299,6 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
if (error || !dblock)
goto fail;
BUG_ON(extlen < 1);
- if (!ra)
- extlen = 1;
bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
} else {
error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
@@ -328,7 +314,7 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
extlen--;
memcpy(buf, bh->b_data + o, amount);
brelse(bh);
- buf += amount;
+ buf += (amount/sizeof(__be64));
copied += amount;
lblock++;
o = sizeof(struct gfs2_meta_header);
@@ -371,7 +357,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
if (hc == NULL)
return ERR_PTR(-ENOMEM);
- ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1);
+ ret = gfs2_dir_read_data(ip, hc, hsize);
if (ret < 0) {
kfree(hc);
return ERR_PTR(ret);
@@ -1695,7 +1681,6 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
const struct qstr *name = &dentry->d_name;
struct gfs2_dirent *dent, *prev = NULL;
struct buffer_head *bh;
- int error;
/* Returns _either_ the entry (if its first in block) or the
previous entry otherwise */
@@ -1724,22 +1709,15 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
}
brelse(bh);
- error = gfs2_meta_inode_buffer(dip, &bh);
- if (error)
- return error;
-
if (!dip->i_entries)
gfs2_consist_inode(dip);
- gfs2_trans_add_bh(dip->i_gl, bh, 1);
dip->i_entries--;
dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
if (S_ISDIR(dentry->d_inode->i_mode))
drop_nlink(&dip->i_inode);
- gfs2_dinode_out(dip, bh->b_data);
- brelse(bh);
mark_inode_dirty(&dip->i_inode);
- return error;
+ return 0;
}
/**
@@ -1829,10 +1807,6 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
if (error)
goto out_put;
- error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh);
- if (error)
- goto out_qs;
-
/* Count the number of leaves */
bh = leaf_bh;
@@ -1847,7 +1821,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
if (blk != leaf_no)
brelse(bh);
- gfs2_rlist_add(sdp, &rlist, blk);
+ gfs2_rlist_add(dip, &rlist, blk);
l_blocks++;
}
@@ -1911,8 +1885,6 @@ out_rg_gunlock:
gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
out_rlist:
gfs2_rlist_free(&rlist);
- gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh);
-out_qs:
gfs2_quota_unhold(dip);
out_put:
gfs2_alloc_put(dip);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index edeb9e80290..5002408dabe 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -59,15 +59,24 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
struct gfs2_holder i_gh;
loff_t error;
- if (origin == 2) {
+ switch (origin) {
+ case SEEK_END: /* These reference inode->i_size */
+ case SEEK_DATA:
+ case SEEK_HOLE:
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
&i_gh);
if (!error) {
error = generic_file_llseek_unlocked(file, offset, origin);
gfs2_glock_dq_uninit(&i_gh);
}
- } else
+ break;
+ case SEEK_CUR:
+ case SEEK_SET:
error = generic_file_llseek_unlocked(file, offset, origin);
+ break;
+ default:
+ error = -EINVAL;
+ }
return error;
}
@@ -357,8 +366,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned int data_blocks, ind_blocks, rblocks;
struct gfs2_holder gh;
struct gfs2_alloc *al;
+ loff_t size;
int ret;
+ /* Wait if fs is frozen. This is racy so we check again later on
+ * and retry if the fs has been frozen after the page lock has
+ * been acquired
+ */
+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret)
@@ -367,8 +383,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
- if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE))
+ if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
+ lock_page(page);
+ if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
+ ret = -EAGAIN;
+ unlock_page(page);
+ }
goto out_unlock;
+ }
+
ret = -ENOMEM;
al = gfs2_alloc_get(ip);
if (al == NULL)
@@ -388,7 +411,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
rblocks += data_blocks ? data_blocks : 1;
if (ind_blocks || data_blocks) {
rblocks += RES_STATFS + RES_QUOTA;
- rblocks += gfs2_rg_blocks(al);
+ rblocks += gfs2_rg_blocks(ip);
}
ret = gfs2_trans_begin(sdp, rblocks, 0);
if (ret)
@@ -396,21 +419,29 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
lock_page(page);
ret = -EINVAL;
- last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT;
- if (page->index > last_index)
- goto out_unlock_page;
+ size = i_size_read(inode);
+ last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+ /* Check page index against inode size */
+ if (size == 0 || (page->index > last_index))
+ goto out_trans_end;
+
+ ret = -EAGAIN;
+ /* If truncated, we must retry the operation, we may have raced
+ * with the glock demotion code.
+ */
+ if (!PageUptodate(page) || page->mapping != inode->i_mapping)
+ goto out_trans_end;
+
+ /* Unstuff, if required, and allocate backing blocks for page */
ret = 0;
- if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping)
- goto out_unlock_page;
- if (gfs2_is_stuffed(ip)) {
+ if (gfs2_is_stuffed(ip))
ret = gfs2_unstuff_dinode(ip, page);
- if (ret)
- goto out_unlock_page;
- }
- ret = gfs2_allocate_page_backing(page);
+ if (ret == 0)
+ ret = gfs2_allocate_page_backing(page);
-out_unlock_page:
- unlock_page(page);
+out_trans_end:
+ if (ret)
+ unlock_page(page);
gfs2_trans_end(sdp);
out_trans_fail:
gfs2_inplace_release(ip);
@@ -422,11 +453,17 @@ out_unlock:
gfs2_glock_dq(&gh);
out:
gfs2_holder_uninit(&gh);
- if (ret == -ENOMEM)
- ret = VM_FAULT_OOM;
- else if (ret)
- ret = VM_FAULT_SIGBUS;
- return ret;
+ if (ret == 0) {
+ set_page_dirty(page);
+ /* This check must be post dropping of transaction lock */
+ if (inode->i_sb->s_frozen == SB_UNFROZEN) {
+ wait_on_page_writeback(page);
+ } else {
+ ret = -EAGAIN;
+ unlock_page(page);
+ }
+ }
+ return block_page_mkwrite_return(ret);
}
static const struct vm_operations_struct gfs2_vm_ops = {
@@ -551,8 +588,16 @@ static int gfs2_close(struct inode *inode, struct file *file)
* @end: the end position in the file to sync
* @datasync: set if we can ignore timestamp changes
*
- * The VFS will flush data for us. We only need to worry
- * about metadata here.
+ * We split the data flushing here so that we don't wait for the data
+ * until after we've also sent the metadata to disk. Note that for
+ * data=ordered, we will write & wait for the data at the log flush
+ * stage anyway, so this is unlikely to make much of a difference
+ * except in the data=writeback case.
+ *
+ * If the fdatawrite fails due to any reason except -EIO, we will
+ * continue the remainder of the fsync, although we'll still report
+ * the error at the end. This is to match filemap_write_and_wait_range()
+ * behaviour.
*
* Returns: errno
*/
@@ -560,30 +605,34 @@ static int gfs2_close(struct inode *inode, struct file *file)
static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
int datasync)
{
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
struct gfs2_inode *ip = GFS2_I(inode);
- int ret;
+ int ret, ret1 = 0;
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (ret)
- return ret;
- mutex_lock(&inode->i_mutex);
+ if (mapping->nrpages) {
+ ret1 = filemap_fdatawrite_range(mapping, start, end);
+ if (ret1 == -EIO)
+ return ret1;
+ }
if (datasync)
sync_state &= ~I_DIRTY_SYNC;
if (sync_state) {
ret = sync_inode_metadata(inode, 1);
- if (ret) {
- mutex_unlock(&inode->i_mutex);
+ if (ret)
return ret;
- }
- gfs2_ail_flush(ip->i_gl);
+ if (gfs2_is_jdata(ip))
+ filemap_write_and_wait(mapping);
+ gfs2_ail_flush(ip->i_gl, 1);
}
- mutex_unlock(&inode->i_mutex);
- return 0;
+ if (mapping->nrpages)
+ ret = filemap_fdatawait_range(mapping, start, end);
+
+ return ret ? ret : ret1;
}
/**
@@ -620,135 +669,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
-static int empty_write_end(struct page *page, unsigned from,
- unsigned to, int mode)
-{
- struct inode *inode = page->mapping->host;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct buffer_head *bh;
- unsigned offset, blksize = 1 << inode->i_blkbits;
- pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
-
- zero_user(page, from, to-from);
- mark_page_accessed(page);
-
- if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
- if (!gfs2_is_writeback(ip))
- gfs2_page_add_databufs(ip, page, from, to);
-
- block_commit_write(page, from, to);
- return 0;
- }
-
- offset = 0;
- bh = page_buffers(page);
- while (offset < to) {
- if (offset >= from) {
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- clear_buffer_new(bh);
- write_dirty_buffer(bh, WRITE);
- }
- offset += blksize;
- bh = bh->b_this_page;
- }
-
- offset = 0;
- bh = page_buffers(page);
- while (offset < to) {
- if (offset >= from) {
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
- return -EIO;
- }
- offset += blksize;
- bh = bh->b_this_page;
- }
- return 0;
-}
-
-static int needs_empty_write(sector_t block, struct inode *inode)
-{
- int error;
- struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
-
- bh_map.b_size = 1 << inode->i_blkbits;
- error = gfs2_block_map(inode, block, &bh_map, 0);
- if (unlikely(error))
- return error;
- return !buffer_mapped(&bh_map);
-}
-
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
- int mode)
-{
- struct inode *inode = page->mapping->host;
- unsigned start, end, next, blksize;
- sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- int ret;
-
- blksize = 1 << inode->i_blkbits;
- next = end = 0;
- while (next < from) {
- next += blksize;
- block++;
- }
- start = next;
- do {
- next += blksize;
- ret = needs_empty_write(block, inode);
- if (unlikely(ret < 0))
- return ret;
- if (ret == 0) {
- if (end) {
- ret = __block_write_begin(page, start, end - start,
- gfs2_block_map);
- if (unlikely(ret))
- return ret;
- ret = empty_write_end(page, start, end, mode);
- if (unlikely(ret))
- return ret;
- end = 0;
- }
- start = next;
- }
- else
- end = next;
- block++;
- } while (next < to);
-
- if (end) {
- ret = __block_write_begin(page, start, end - start, gfs2_block_map);
- if (unlikely(ret))
- return ret;
- ret = empty_write_end(page, start, end, mode);
- if (unlikely(ret))
- return ret;
- }
-
- return 0;
-}
-
static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
int mode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *dibh;
int error;
- u64 start = offset >> PAGE_CACHE_SHIFT;
- unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
- u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
- pgoff_t curr;
- struct page *page;
- unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
- unsigned int from, to;
-
- if (!end_offset)
- end_offset = PAGE_CACHE_SIZE;
+ unsigned int nr_blks;
+ sector_t lblock = offset >> inode->i_blkbits;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (unlikely(error))
- goto out;
+ return error;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
@@ -758,40 +690,31 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
goto out;
}
- curr = start;
- offset = start << PAGE_CACHE_SHIFT;
- from = start_offset;
- to = PAGE_CACHE_SIZE;
- while (curr <= end) {
- page = grab_cache_page_write_begin(inode->i_mapping, curr,
- AOP_FLAG_NOFS);
- if (unlikely(!page)) {
- error = -ENOMEM;
- goto out;
- }
+ while (len) {
+ struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
+ bh_map.b_size = len;
+ set_buffer_zeronew(&bh_map);
- if (curr == end)
- to = end_offset;
- error = write_empty_blocks(page, from, to, mode);
- if (!error && offset + to > inode->i_size &&
- !(mode & FALLOC_FL_KEEP_SIZE)) {
- i_size_write(inode, offset + to);
- }
- unlock_page(page);
- page_cache_release(page);
- if (error)
+ error = gfs2_block_map(inode, lblock, &bh_map, 1);
+ if (unlikely(error))
goto out;
- curr++;
- offset += PAGE_CACHE_SIZE;
- from = 0;
+ len -= bh_map.b_size;
+ nr_blks = bh_map.b_size >> inode->i_blkbits;
+ lblock += nr_blks;
+ if (!buffer_new(&bh_map))
+ continue;
+ if (unlikely(!buffer_zeronew(&bh_map))) {
+ error = -EIO;
+ goto out;
+ }
}
+ if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
+ i_size_write(inode, offset + len);
- gfs2_dinode_out(ip, dibh->b_data);
mark_inode_dirty(inode);
- brelse(dibh);
-
out:
+ brelse(dibh);
return error;
}
@@ -799,7 +722,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
unsigned int *data_blocks, unsigned int *ind_blocks)
{
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
+ unsigned int max_blocks = ip->i_rgd->rd_free_clone;
unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
for (tmp = max_data; tmp > sdp->sd_diptrs;) {
@@ -831,6 +754,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
int error;
loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
+ loff_t max_chunk_size = UINT_MAX & bsize_mask;
next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
/* We only support the FALLOC_FL_KEEP_SIZE mode */
@@ -884,11 +808,12 @@ retry:
goto out_qunlock;
}
max_bytes = bytes;
- calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
+ calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
+ &max_bytes, &data_blocks, &ind_blocks);
al->al_requested = data_blocks + ind_blocks;
rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
- RES_RG_HDR + gfs2_rg_blocks(al);
+ RES_RG_HDR + gfs2_rg_blocks(ip);
if (gfs2_is_jdata(ip))
rblocks += data_blocks ? data_blocks : 1;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index da21ecaafcc..78418b4fa85 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,40 +28,55 @@
#include "trans.h"
#include "dir.h"
+static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
+{
+ fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
+ bh, (unsigned long long)bh->b_blocknr, bh->b_state,
+ bh->b_page->mapping, bh->b_page->flags);
+ fs_err(gl->g