aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c7
-rw-r--r--fs/btrfs/backref.c3
-rw-r--r--fs/btrfs/check-integrity.c2
-rw-r--r--fs/btrfs/ctree.c4
-rw-r--r--fs/btrfs/ctree.h8
-rw-r--r--fs/btrfs/delayed-ref.h1
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/disk-io.c52
-rw-r--r--fs/btrfs/extent-tree.c94
-rw-r--r--fs/btrfs/extent_io.c138
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-cache.c43
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/inode-map.c8
-rw-r--r--fs/btrfs/inode.c81
-rw-r--r--fs/btrfs/ioctl.c10
-rw-r--r--fs/btrfs/raid56.c2
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/scrub.c10
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/volumes.c54
-rw-r--r--fs/btrfs/volumes.h20
-rw-r--r--fs/cifs/cifs_dfs_ref.c141
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/connect.c23
-rw-r--r--fs/cifs/dns_resolve.c4
-rw-r--r--fs/cifs/inode.c3
-rw-r--r--fs/efivarfs/file.c14
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/extents.c9
-rw-r--r--fs/ext4/extents_status.c17
-rw-r--r--fs/ext4/extents_status.h3
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/inode.c85
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/page-io.c121
-rw-r--r--fs/fat/inode.c15
-rw-r--r--fs/gfs2/Kconfig2
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/gfs2/rgrp.c9
-rw-r--r--fs/hfs/bnode.c6
-rw-r--r--fs/nilfs2/inode.c27
-rw-r--r--fs/ocfs2/extent_map.c2
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/inode.c9
-rw-r--r--fs/reiserfs/xattr.c14
-rw-r--r--fs/reiserfs/xattr_acl.c3
-rw-r--r--fs/xfs/xfs_aops.c19
-rw-r--r--fs/xfs/xfs_attr_leaf.c96
-rw-r--r--fs/xfs/xfs_attr_remote.c408
-rw-r--r--fs/xfs/xfs_attr_remote.h10
-rw-r--r--fs/xfs/xfs_buf.c3
-rw-r--r--fs/xfs/xfs_buf_item.c7
-rw-r--r--fs/xfs/xfs_da_btree.c7
-rw-r--r--fs/xfs/xfs_dfrag.c8
-rw-r--r--fs/xfs/xfs_dir2_format.h1
-rw-r--r--fs/xfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/xfs_dir2_node.c13
-rw-r--r--fs/xfs/xfs_extfree_item.c5
-rw-r--r--fs/xfs/xfs_fs.h1
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_iops.c47
-rw-r--r--fs/xfs/xfs_log_cil.c2
-rw-r--r--fs/xfs/xfs_log_recover.c11
-rw-r--r--fs/xfs/xfs_qm_syscalls.c40
-rw-r--r--fs/xfs/xfs_symlink.c20
-rw-r--r--fs/xfs/xfs_vnodeops.c4
69 files changed, 1043 insertions, 757 deletions
diff --git a/fs/aio.c b/fs/aio.c
index c5b1a8c1041..7fe5bdee163 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -307,7 +307,9 @@ static void free_ioctx(struct kioctx *ctx)
kunmap_atomic(ring);
while (atomic_read(&ctx->reqs_active) > 0) {
- wait_event(ctx->wait, head != ctx->tail);
+ wait_event(ctx->wait,
+ head != ctx->tail ||
+ atomic_read(&ctx->reqs_active) <= 0);
avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
@@ -1299,8 +1301,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
* < min_nr if the timeout specified by timeout has elapsed
* before sufficient events are available, where timeout == NULL
* specifies an infinite timeout. Note that the timeout pointed to by
- * timeout is relative and will be updated if not NULL and the
- * operation blocks. Will fail with -ENOSYS if not implemented.
+ * timeout is relative. Will fail with -ENOSYS if not implemented.
*/
SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
long, min_nr,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b4fb4155811..290e347b6db 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -918,7 +918,8 @@ again:
ref->parent, bsz, 0);
if (!eb || !extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 18af6f48781..1431a696501 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
unsigned int j;
DECLARE_COMPLETION_ONSTACK(complete);
- bio = bio_alloc(GFP_NOFS, num_pages - i);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
if (!bio) {
printk(KERN_INFO
"btrfsic: bio_alloc() for %u pages failed!\n",
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index de6de8e60b4..02fae7f7e42 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -951,10 +951,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
BUG_ON(ret); /* -ENOMEM */
}
if (new_flags != 0) {
+ int level = btrfs_header_level(buf);
+
ret = btrfs_set_disk_extent_flags(trans, root,
buf->start,
buf->len,
- new_flags, 0);
+ new_flags, level, 0);
if (ret)
return ret;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 63c328a9ce9..d6dd49b51ba 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -88,12 +88,12 @@ struct btrfs_ordered_sum;
/* holds checksums of all the data extents */
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
-/* for storing balance parameters in the root tree */
-#define BTRFS_BALANCE_OBJECTID -4ULL
-
/* holds quota configuration and tracking */
#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+/* for storing balance parameters in the root tree */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
@@ -3075,7 +3075,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
- int is_data);
+ int level, int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index f75fcaf79ae..70b962cc177 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -60,6 +60,7 @@ struct btrfs_delayed_ref_node {
struct btrfs_delayed_extent_op {
struct btrfs_disk_key key;
u64 flags_to_set;
+ int level;
unsigned int update_key:1;
unsigned int update_flags:1;
unsigned int is_data:1;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 7ba7b3900cb..65241f32d3f 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -313,6 +313,11 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_device *tgt_device = NULL;
struct btrfs_device *src_device = NULL;
+ if (btrfs_fs_incompat(fs_info, RAID56)) {
+ pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n");
+ return -EINVAL;
+ }
+
switch (args->start.cont_reading_from_srcdev_mode) {
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4e9ebe1f182..e7b3cb5286a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -152,7 +152,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
{ .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
{ .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
- { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
+ { .id = BTRFS_QUOTA_TREE_OBJECTID, .name_stem = "quota" },
{ .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
@@ -1513,7 +1513,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
}
root->commit_root = btrfs_root_node(root);
- BUG_ON(!root->node); /* -ENOMEM */
out:
if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
root->ref_cows = 1;
@@ -1988,30 +1987,33 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
{
free_extent_buffer(info->tree_root->node);
free_extent_buffer(info->tree_root->commit_root);
- free_extent_buffer(info->dev_root->node);
- free_extent_buffer(info->dev_root->commit_root);
- free_extent_buffer(info->extent_root->node);
- free_extent_buffer(info->extent_root->commit_root);
- free_extent_buffer(info->csum_root->node);
- free_extent_buffer(info->csum_root->commit_root);
- if (info->quota_root) {
- free_extent_buffer(info->quota_root->node);
- free_extent_buffer(info->quota_root->commit_root);
- }
-
info->tree_root->node = NULL;
info->tree_root->commit_root = NULL;
- info->dev_root->node = NULL;
- info->dev_root->commit_root = NULL;
- info->extent_root->node = NULL;
- info->extent_root->commit_root = NULL;
- info->csum_root->node = NULL;
- info->csum_root->commit_root = NULL;
+
+ if (info->dev_root) {
+ free_extent_buffer(info->dev_root->node);
+ free_extent_buffer(info->dev_root->commit_root);
+ info->dev_root->node = NULL;
+ info->dev_root->commit_root = NULL;
+ }
+ if (info->extent_root) {
+ free_extent_buffer(info->extent_root->node);
+ free_extent_buffer(info->extent_root->commit_root);
+ info->extent_root->node = NULL;
+ info->extent_root->commit_root = NULL;
+ }
+ if (info->csum_root) {
+ free_extent_buffer(info->csum_root->node);
+ free_extent_buffer(info->csum_root->commit_root);
+ info->csum_root->node = NULL;
+ info->csum_root->commit_root = NULL;
+ }
if (info->quota_root) {
+ free_extent_buffer(info->quota_root->node);
+ free_extent_buffer(info->quota_root->commit_root);
info->quota_root->node = NULL;
info->quota_root->commit_root = NULL;
}
-
if (chunk_root) {
free_extent_buffer(info->chunk_root->node);
free_extent_buffer(info->chunk_root->commit_root);
@@ -3128,7 +3130,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
* caller
*/
device->flush_bio = NULL;
- bio = bio_alloc(GFP_NOFS, 0);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
return -ENOMEM;
@@ -3659,8 +3661,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
ordered_operations);
list_del_init(&btrfs_inode->ordered_operations);
+ spin_unlock(&root->fs_info->ordered_extent_lock);
btrfs_invalidate_inodes(btrfs_inode->root);
+
+ spin_lock(&root->fs_info->ordered_extent_lock);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
@@ -3782,8 +3787,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
list_del_init(&btrfs_inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&btrfs_inode->runtime_flags);
+ spin_unlock(&root->fs_info->delalloc_lock);
btrfs_invalidate_inodes(btrfs_inode->root);
+
+ spin_lock(&root->fs_info->delalloc_lock);
}
spin_unlock(&root->fs_info->delalloc_lock);
@@ -3808,7 +3816,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
while (start <= end) {
eb = btrfs_find_tree_block(root, start,
root->leafsize);
- start += eb->len;
+ start += root->leafsize;
if (!eb)
continue;
wait_on_extent_buffer_writeback(eb);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2305b5c5cf0..df472ab1b5a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2070,8 +2070,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
u32 item_size;
int ret;
int err = 0;
- int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
- node->type == BTRFS_SHARED_BLOCK_REF_KEY);
+ int metadata = !extent_op->is_data;
if (trans->aborted)
return 0;
@@ -2086,11 +2085,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
key.objectid = node->bytenr;
if (metadata) {
- struct btrfs_delayed_tree_ref *tree_ref;
-
- tree_ref = btrfs_delayed_node_to_tree_ref(node);
key.type = BTRFS_METADATA_ITEM_KEY;
- key.offset = tree_ref->level;
+ key.offset = extent_op->level;
} else {
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = node->num_bytes;
@@ -2719,7 +2715,7 @@ out:
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
- int is_data)
+ int level, int is_data)
{
struct btrfs_delayed_extent_op *extent_op;
int ret;
@@ -2732,6 +2728,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
extent_op->update_flags = 1;
extent_op->update_key = 0;
extent_op->is_data = is_data ? 1 : 0;
+ extent_op->level = level;
ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
num_bytes, extent_op);
@@ -3109,6 +3106,11 @@ again:
WARN_ON(ret);
if (i_size_read(inode) > 0) {
+ ret = btrfs_check_trunc_cache_free_space(root,
+ &root->fs_info->global_block_rsv);
+ if (ret)
+ goto out_put;
+
ret = btrfs_truncate_free_space_cache(root, trans, path,
inode);
if (ret)
@@ -4562,6 +4564,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
+ if (fs_info->quota_root)
+ fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
update_global_block_rsv(fs_info);
@@ -6651,51 +6655,51 @@ use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
int ret;
+ bool global_updated = false;
block_rsv = get_block_rsv(trans, root);
- if (block_rsv->size == 0) {
- ret = reserve_metadata_bytes(root, block_rsv, blocksize,
- BTRFS_RESERVE_NO_FLUSH);
- /*
- * If we couldn't reserve metadata bytes try and use some from
- * the global reserve.
- */
- if (ret && block_rsv != global_rsv) {
- ret = block_rsv_use_bytes(global_rsv, blocksize);
- if (!ret)
- return global_rsv;
- return ERR_PTR(ret);
- } else if (ret) {
- return ERR_PTR(ret);
- }
+ if (unlikely(block_rsv->size == 0))
+ goto try_reserve;
+again:
+ ret = block_rsv_use_bytes(block_rsv, blocksize);
+ if (!ret)
return block_rsv;
+
+ if (block_rsv->failfast)
+ return ERR_PTR(ret);
+
+ if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
+ global_updated = true;
+ update_global_block_rsv(root->fs_info);
+ goto again;
}
- ret = block_rsv_use_bytes(block_rsv, blocksize);
+ if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ static DEFINE_RATELIMIT_STATE(_rs,
+ DEFAULT_RATELIMIT_INTERVAL * 10,
+ /*DEFAULT_RATELIMIT_BURST*/ 1);
+ if (__ratelimit(&_rs))
+ WARN(1, KERN_DEBUG
+ "btrfs: block rsv returned %d\n", ret);
+ }
+try_reserve:
+ ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+ BTRFS_RESERVE_NO_FLUSH);
if (!ret)
return block_rsv;
- if (ret && !block_rsv->failfast) {
- if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
- static DEFINE_RATELIMIT_STATE(_rs,
- DEFAULT_RATELIMIT_INTERVAL * 10,
- /*DEFAULT_RATELIMIT_BURST*/ 1);
- if (__ratelimit(&_rs))
- WARN(1, KERN_DEBUG
- "btrfs: block rsv returned %d\n", ret);
- }
- ret = reserve_metadata_bytes(root, block_rsv, blocksize,
- BTRFS_RESERVE_NO_FLUSH);
- if (!ret) {
- return block_rsv;
- } else if (ret && block_rsv != global_rsv) {
- ret = block_rsv_use_bytes(global_rsv, blocksize);
- if (!ret)
- return global_rsv;
- }
+ /*
+ * If we couldn't reserve metadata bytes try and use some from
+ * the global reserve if its space type is the same as the global
+ * reservation.
+ */
+ if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
+ block_rsv->space_info == global_rsv->space_info) {
+ ret = block_rsv_use_bytes(global_rsv, blocksize);
+ if (!ret)
+ return global_rsv;
}
-
- return ERR_PTR(-ENOSPC);
+ return ERR_PTR(ret);
}
static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
@@ -6763,6 +6767,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
extent_op->update_key = 1;
extent_op->update_flags = 1;
extent_op->is_data = 0;
+ extent_op->level = level;
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
ins.objectid,
@@ -6934,7 +6939,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
- eb->len, flag, 0);
+ eb->len, flag,
+ btrfs_header_level(eb), 0);
BUG_ON(ret); /* -ENOMEM */
wc->flags[level] |= flag;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 32d67a822e9..e7e7afb4a87 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -23,6 +23,7 @@
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
+static struct bio_set *btrfs_bioset;
#ifdef CONFIG_BTRFS_DEBUG
static LIST_HEAD(buffers);
@@ -125,10 +126,20 @@ int __init extent_io_init(void)
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
+
+ btrfs_bioset = bioset_create(BIO_POOL_SIZE,
+ offsetof(struct btrfs_io_bio, bio));
+ if (!btrfs_bioset)
+ goto free_buffer_cache;
return 0;
+free_buffer_cache:
+ kmem_cache_destroy(extent_buffer_cache);
+ extent_buffer_cache = NULL;
+
free_state_cache:
kmem_cache_destroy(extent_state_cache);
+ extent_state_cache = NULL;
return -ENOMEM;
}
@@ -145,6 +156,8 @@ void extent_io_exit(void)
kmem_cache_destroy(extent_state_cache);
if (extent_buffer_cache)
kmem_cache_destroy(extent_buffer_cache);
+ if (btrfs_bioset)
+ bioset_free(btrfs_bioset);
}
void extent_io_tree_init(struct extent_io_tree *tree,
@@ -1948,28 +1961,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
}
/*
- * helper function to unlock a page if all the extents in the tree
- * for that page are unlocked
- */
-static void check_page_locked(struct extent_io_tree *tree, struct page *page)
-{
- u64 start = page_offset(page);
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
- unlock_page(page);
-}
-
-/*
- * helper function to end page writeback if all the extents
- * in the tree for that page are done with writeback
- */
-static void check_page_writeback(struct extent_io_tree *tree,
- struct page *page)
-{
- end_page_writeback(page);
-}
-
-/*
* When IO fails, either with EIO or csum verification fails, we
* try other mirrors that might have a good copy of the data. This
* io_failure_record is used to record state as we go through all the
@@ -2046,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
return 0;
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_private = &compl;
@@ -2336,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
free_io_failure(inode, failrec, 0);
return -EIO;
@@ -2398,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
struct extent_io_tree *tree;
u64 start;
u64 end;
- int whole_page;
do {
struct page *page = bvec->bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- start = page_offset(page) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
+ /* We always issue full-page reads, but if some block
+ * in a page fails to read, blk_update_request() will
+ * advance bv_offset and adjust bv_len to compensate.
+ * Print a warning for nonzero offsets, and an error
+ * if they don't add up to a full page. */
+ if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+ printk("%s page write in btrfs with offset %u and length %u\n",
+ bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+ ? KERN_ERR "partial" : KERN_INFO "incomplete",
+ bvec->bv_offset, bvec->bv_len);
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
+ start = page_offset(page);
+ end = start + bvec->bv_offset + bvec->bv_len - 1;
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
@@ -2418,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
if (end_extent_writepage(page, err, start, end))
continue;
- if (whole_page)
- end_page_writeback(page);
- else
- check_page_writeback(tree, page);
+ end_page_writeback(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
@@ -2446,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
struct extent_io_tree *tree;
u64 start;
u64 end;
- int whole_page;
int mirror;
int ret;
@@ -2457,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
struct page *page = bvec->bv_page;
struct extent_state *cached = NULL;
struct extent_state *state;
+ struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
- "mirror=%ld\n", (u64)bio->bi_sector, err,
- (long int)bio->bi_bdev);
+ "mirror=%lu\n", (u64)bio->bi_sector, err,
+ io_bio->mirror_num);
tree = &BTRFS_I(page->mapping->host)->io_tree;
- start = page_offset(page) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
+ /* We always issue full-page reads, but if some block
+ * in a page fails to read, blk_update_request() will
+ * advance bv_offset and adjust bv_len to compensate.
+ * Print a warning for nonzero offsets, and an error
+ * if they don't add up to a full page. */
+ if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+ printk("%s page read in btrfs with offset %u and length %u\n",
+ bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+ ? KERN_ERR "partial" : KERN_INFO "incomplete",
+ bvec->bv_offset, bvec->bv_len);
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
+ start = page_offset(page);
+ end = start + bvec->bv_offset + bvec->bv_len - 1;
if (++bvec <= bvec_end)
prefetchw(&bvec->bv_page->flags);
@@ -2485,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
spin_unlock(&tree->lock);
- mirror = (int)(unsigned long)bio->bi_bdev;
+ mirror = io_bio->mirror_num;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
@@ -2528,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
- if (whole_page) {
- if (uptodate) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
+ if (uptodate) {
+ SetPageUptodate(page);
} else {
- if (uptodate) {
- check_page_uptodate(tree, page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- check_page_locked(tree, page);
+ ClearPageUptodate(page);
+ SetPageError(page);
}
+ unlock_page(page);
} while (bvec <= bvec_end);
bio_put(bio);
}
+/*
+ * this allocates from the btrfs_bioset. We're returning a bio right now
+ * but you can call btrfs_io_bio for the appropriate container_of magic
+ */
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags)
{
struct bio *bio;
- bio = bio_alloc(gfp_flags, nr_vecs);
+ bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(gfp_flags, nr_vecs);
+ while (!bio && (nr_vecs /= 2)) {
+ bio = bio_alloc_bioset(gfp_flags,
+ nr_vecs, btrfs_bioset);
+ }
}
if (bio) {
@@ -2571,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
return bio;
}
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+{
+ return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
+}
+
+
+/* this also allocates from the btrfs_bioset */
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+ return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+}
+
+
static int __must_check submit_one_bio(int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
@@ -3988,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
last_for_get_extent = isize;
}
- lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
&cached_state);
em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4075,7 +4083,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
out_free:
free_extent_map(em);
out:
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state, GFP_NOFS);
return ret;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a2c03a17500..41fb81e7ec5 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -336,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags);
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
struct btrfs_fs_info;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ecca6c7375a..e53009657f0 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -197,30 +197,32 @@ int create_free_space_inode(struct btrfs_root *root,
block_group->key.objectid);
}
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct inode *inode)
+int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv)
{
- struct btrfs_block_rsv *rsv;
u64 needed_bytes;
- loff_t oldsize;
- int ret = 0;
-
- rsv = trans->block_rsv;
- trans->block_rsv = &root->fs_info->global_block_rsv;
+ int ret;
/* 1 for slack space, 1 for updating the inode */
needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
btrfs_calc_trans_metadata_size(root, 1);
- spin_lock(&trans->block_rsv->lock);
- if (trans->block_rsv->reserved < needed_bytes) {
- spin_unlock(&trans->block_rsv->lock);
- trans->block_rsv = rsv;
- return -ENOSPC;
- }
- spin_unlock(&trans->block_rsv->lock);
+ spin_lock(&rsv->lock);
+ if (rsv->reserved < needed_bytes)
+ ret = -ENOSPC;
+ else
+ ret = 0;
+ spin_unlock(&rsv->lock);
+ return 0;
+}
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct inode *inode)
+{
+ loff_t oldsize;
+ int ret = 0;
oldsize = i_size_read(inode);
btrfs_i_size_write(inode, 0);
@@ -232,9 +234,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY);
-
if (ret) {
- trans->block_rsv = rsv;
btrfs_abort_transaction(trans, root, ret);
return ret;
}
@@ -242,7 +242,6 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
ret = btrfs_update_inode(trans, root, inode);
if (ret)
btrfs_abort_transaction(trans, root, ret);
- trans->block_rsv = rsv;
return ret;
}
@@ -920,10 +919,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
/* Make sure we can fit our crcs into the first page */
if (io_ctl.check_crcs &&
- (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
- WARN_ON(1);
+ (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
goto out_nospc;
- }
io_ctl_set_generation(&io_ctl, trans->transid);
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 4dc17d8809c..8b7f19f4496 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -54,6 +54,8 @@ int create_free_space_inode(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
+int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index d26f67a59e3..2c66ddbbe67 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -429,11 +429,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
num_bytes = trans->bytes_reserved;
/*
* 1 item for inode item insertion if need
- * 3 items for inode item update (in the worst case)
+ * 4 items for inode item update (in the worst case)
+ * 1 items for slack space if we need do truncation
* 1 item for free space object
* 3 items for pre-allocation
*/
- trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
+ trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
trans->bytes_reserved,
BTRFS_RESERVE_NO_FLUSH);
@@ -468,7 +469,8 @@ again:
if (i_size_read(inode) > 0) {
ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret != -ENOSPC)
+ btrfs_abort_transaction(trans, root, ret);
goto out_put;
}
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9b31b3b091f..af978f7682b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -715,8 +715,10 @@ retry:
async_extent->ram_size - 1, 0);
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ ret = -ENOMEM;
goto out_free_reserve;
+ }
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
@@ -923,8 +925,10 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
}
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ ret = -ENOMEM;
goto out_reserve;
+ }
em->start = start;
em->orig_start = em->start;
ram_size = ins.offset;
@@ -4724,6 +4728,7 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
no_delete:
+ btrfs_remove_delayed_node(inode);
clear_inode(inode);
return;
}
@@ -4839,14 +4844,13 @@ static void inode_tree_add(struct inode *inode)
struct rb_node **p;
struct rb_node *parent;
u64 ino = btrfs_ino(inode);
-again:
- p = &root->inode_tree.rb_node;
- parent = NULL;
if (inode_unhashed(inode))
return;
-
+again:
+ parent = NULL;
spin_lock(&root->inode_lock);
+ p = &root->inode_tree.rb_node;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct btrfs_inode, rb_node);
@@ -6928,7 +6932,11 @@ struct btrfs_dio_private {
/* IO errors */
int errors;
+ /* orig_bio is our btrfs_io_bio */
struct bio *orig_bio;
+
+ /* dio_bio came from fs/direct-io.c */
+ struct bio *dio_bio;
};
static void btrfs_endio_direct_read(struct bio *bio, int err)
@@ -6938,6 +6946,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
struct bio_vec *bvec = bio->bi_io_vec;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct bio *dio_bio;
u64 start;
start = dip->logical_offset;
@@ -6977,14 +6986,15 @@ failed:
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
dip->logical_offset + dip->bytes - 1);
- bio->bi_private = dip->private;
+ dio_bio = dip->dio_bio;
kfree(dip);
/* If we had a csum failure make sure to clear the uptodate flag */
if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- dio_end_io(bio, err);
+ clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+ dio_end_io(dio_bio, err);
+ bio_put(bio);
}
static void btrfs_endio_direct_write(struct bio *bio, int err)
@@ -6995,6 +7005,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct btrfs_ordered_extent *ordered = NULL;
u64 ordered_offset = dip->logical_offset;
u64 ordered_bytes = dip->bytes;
+ struct bio *dio_bio;
int ret;
if (err)
@@ -7022,14 +7033,15 @@ out_test:
goto again;
}
out_done:
- bio->bi_private = dip->private;
+ dio_bio = dip->dio_bio;
kfree(dip);
/* If we had an error make sure to clear the uptodate flag */
if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- dio_end_io(bio, err);
+ clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+ dio_end_io(dio_bio, err);
+ bio_put(bio);
}
static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
@@ -7065,10 +7077,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
if (!atomic_dec_and_test(&dip->pending_bios))
goto out;
- if (dip->errors)
+ if (dip->errors) {
bio_io_error(dip->orig_bio);
- else {
- set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+ } else {
+ set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
bio_endio(dip->orig_bio, 0);
}
out:
@@ -7243,25 +7255,34 @@ out_err:
return 0;
}
-static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
- loff_t file_offset)
+static void btrfs_submit_direct(int rw, struct bio *dio_bio,
+ struct inode *inode, loff_t file_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_dio_private *dip;
- struct bio_vec *bvec = bio->bi_io_vec;
+ struct bio_vec *bvec = dio_bio->bi_io_vec;
+ struct bio *io_bio;
int skip_sum;
int write = rw & REQ_WRITE;
int ret = 0;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+ io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
+
+ if (!io_bio) {
+ ret = -ENOMEM;
+ goto free_ordered;
+ }
+
dip = kmalloc(sizeof(*dip), GFP_NOFS);
if (!dip) {
ret = -ENOMEM;
- goto free_ordered;
+ goto free_io_bio;
}
- dip->private = bio->bi_private;
+ dip->private = dio_bio->bi_private;
+ io_bio->bi_private = dio_bio->bi_private;
dip->inode = inode;
dip->logical_offset = file_offset;
@@ -7269,22 +7290,27 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
do {
dip->bytes += bvec->bv_len;
bvec++;
- } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
+ } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
- dip->disk_bytenr = (u64)bio->bi_sector << 9;
- bio->bi_private = dip;
+ dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
+ io_bio->bi_private = dip;
dip->errors = 0;
- dip->orig_bio = bio;
+ dip->orig_bio = io_bio;
+ dip->dio_bio = dio_bio;
atomic_set(&dip->pending_bios, 0);
if (write)
- bio->bi_end_io = btrfs_endio_direct_write;
+ io_bio->bi_end_io = btrfs_endio_direct_write;
else
- bio->bi_end_io = btrfs_endio_direct_read;
+ io_bio->bi_end_io = btrfs_endio_direct_read;
ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
if (!ret)
return;
+
+free_io_bio:
+ bio_put(io_bio);
+
free_ordered:
/*
* If this is a write, we need to clean up the reserved space and kill
@@ -7300,7 +7326,7 @@ free_ordered:
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
}
- bio_endio(bio, ret);
+ bio_endio(dio_bio, ret);
}
static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
@@ -7979,7 +8005,6 @@ void btrfs_destroy_inode(struct inode *inode)
inode_tree_del(inode);
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
free:
- btrfs_remove_delayed_node(inode);
call_rcu(&inode->i_rcu, btrfs_i_callback);
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0de4a2fcfb2..0f81d67cdc8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1801,7 +1801,11 @@ static noinline int copy_to_sk(struct btrfs_root *root,
item_off = btrfs_item_ptr_offset(leaf, i);
item_len = btrfs_item_size_nr(leaf, i);
- if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
+ btrfs_item_key_to_cpu(leaf, key, i);
+ if (!key_in_sk(key, sk))
+ continue;
+
+ if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
item_len = 0;
if (sizeof(sh) + item_len + *sk_offset >
@@ -1810,10 +1814,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
goto overflow;
}
- btrfs_item_key_to_cpu(leaf, key, i);
- if (!key_in_sk(key, sk))
- continue;
-
sh.objectid = key->objectid;
sh.offset = key->offset;
sh.type = key->type;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0740621daf6..0525e1389f5 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1050,7 +1050,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
}
/* put a new bio on the list */
- bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
if (!bio)
return -ENOMEM;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 704a1b8d2a2..395b82031a4 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1773,7 +1773,7 @@ again:
if (!eb || !extent_buffer_uptodate(eb)) {
ret = (!eb) ? -ENOMEM : -EIO;
free_extent_buffer(eb);
- return ret;
+ break;
}
btrfs_tree_lock(eb);
if (cow) {
@@ -3350,6 +3350,11 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
}
truncate:
+ ret = btrfs_check_trunc_cache_free_space(root,
+ &fs_info->global_block_rsv);
+ if (ret)
+ goto out;
+
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f489e24659a..79bd479317c 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1296,7 +1296,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
}
WARN_ON(!page->page);
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
@@ -1431,7 +1431,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
@@ -1522,7 +1522,7 @@ again:
sbio->dev = wr_ctx->tgtdev;
bio = sbio->bio;
if (!bio) {
- bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
if (!bio) {
mutex_unlock(&wr_ctx->wr_lock);
return -ENOMEM;
@@ -1930,7 +1930,7 @@ again:
sbio->dev = spage->dev;
bio = sbio->bio;
if (!bio) {
- bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
if (!bio)
return -ENOMEM;
sbio->bio = bio;
@@ -3307,7 +3307,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
"btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a4807ced23c..f0857e092a3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1263,6 +1263,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
+ btrfs_pause_balance(fs_info);
ret = btrfs_commit_super(root);
if (ret)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e925ced971..8bffb9174af 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3120,14 +3120,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP;
- else if (num_devices < 4)
+ else if (num_devices > 1)
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
- else
- allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6);
-
+ if (num_devices > 2)
+ allowed |= BTRFS_BLOCK_GROUP_RAID5;
+ if (num_devices > 3)
+ allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_RAID6);
if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(!alloc_profile_is_valid(bctl->data.target, 1) ||
(bctl->data.target & ~allowed))) {
@@ -5019,42 +5018,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
return 0;
}
-static void *merge_stripe_index_into_bio_private(void *bi_private,
- unsigned int stripe_index)
-{
- /*
- * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
- * at most 1.
- * The alternative solution (instead of stealing bits from the
- * pointer) would be to allocate an intermediate structure
- * that contains the old private pointer plus the stripe_index.
- */
- BUG_ON((((uintptr_t)bi_private) & 3) != 0);
- BUG_ON(stripe_index > 3);
- return (void *)(((uintptr_t)bi_private) | stripe_index);
-}
-
-static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
-{
- return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
-}
-
-static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
-{
- return (unsigned int)((uintptr_t)bi_private) & 3;
-}
-
static void btrfs_end_bio(struct bio *bio, int err)
{
- struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
+ struct btrfs_bio *bbio = bio->bi_private;
int is_orig_bio = 0;
if (err) {
atomic_inc(&bbio->error);
if (err == -EIO || err == -EREMOTEIO) {
unsigned int stripe_index =
- extract_stripe_index_from_bio_private(
- bio->bi_private);
+ btrfs_io_bio(bio)->stripe_index;
struct btrfs_device *dev;
BUG_ON(stripe_index >= bbio->num_stripes);
@@ -5084,8 +5057,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
}
bio->bi_private = bbio->private;
bio->bi_end_io = bbio->end_io;
- bio->bi_bdev = (struct block_device *)
- (unsigned long)bbio->mirror_num;
+ btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
/* only send an error to the higher layers if it is
* beyond the tolerance of the btrfs bio
*/
@@ -5211,8 +5183,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
bio->bi_private = bbio;
- bio->bi_private = merge_stripe_index_into_bio_private(
- bio->bi_private, (unsigned int)dev_nr);
+ btrfs_io_bio(bio)->stripe_index = dev_nr;
bio->bi_end_io = btrfs_end_bio;
bio->bi_sector = physical >> 9;
#ifdef DEBUG
@@ -5273,8 +5244,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
if (atomic_dec_and_test(&bbio->stripes_pending)) {
bio->bi_private = bbio->private;
bio->bi_end_io = bbio->end_io;
- bio->bi_bdev = (struct block_device *)
- (unsigned long)bbio->mirror_num;
+ btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
bio->bi_sector = logical >> 9;
kfree(bbio);
bio_endio(bio, -EIO);
@@ -5352,7 +5322,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
}
if (dev_nr < total_devs - 1) {
- bio = bio_clone(first_bio, GFP_NOFS);
+ bio = btrfs_bio_clone(first_bio, GFP_NOFS);
BUG_ON(!bio); /* -ENOMEM */
} else {
bio = first_bio;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 845ccbb0d2e..f6247e2a47f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -152,6 +152,26 @@ struct btrfs_fs_devices {
int rotating;
};
+/*
+ * we need the mirror number and stripe index to be passed around
+ * the call chain while we are processing end_io (especially errors).
+ * Really, what we need is a btrfs_bio structure that has this info
+ * and is properly sized with its stripe array, but we're not there
+ * quite yet. We have our own btrfs bioset, and all of the bios
+ * we allocate are actually btrfs_io_bios. We'll cram as much of
+ * struct btrfs_bio as we can into this over time.
+ */
+struct btrfs_io_bio {
+ unsigned long mirror_num;
+ unsigned long stripe_index;
+ struct bio bio;
+};
+
+static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
+{
+ return container_of(bio, struct btrfs_io_bio, bio);
+}
+
struct btrfs_bio_stripe {
struct btrfs_device *dev;
u64 physical;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 8e33ec65847..58df174deb1 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/vfs.h>
#include <linux/fs.h>
+#include <linux/inet.h>
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifsfs.h"
@@ -48,58 +49,74 @@ void cifs_dfs_release_automount_timer(void)
}
/**
- * cifs_get_share_name - extracts share name from UNC
- * @node_name: pointer to UNC string
+ * cifs_build_devname - build a devicename from a UNC and optional prepath
+ * @nodename: pointer to UNC string
+ * @prepath: pointer to prefixpath (or NULL if there isn't one)
*
- * Extracts sharename form full UNC.
- * i.e. strips from UNC trailing path that is not part of share
- * name and fixup missing '\' in the beginning of DFS node refferal
- * if necessary.
- * Returns pointer to share name on success or ERR_PTR on error.
- * Caller is responsible for freeing returned string.
+ * Build a new cifs devicename after chasing a DFS referral. Allocate a buffer
+ * big enough to hold the final thing. Copy the UNC from the nodename, and
+ * concatenate the prepath onto the end of it if there is one.
+ *
+ * Returns pointer to the built string, or a ERR_PTR. Caller is responsible
+ * for freeing the returned string.
*/
-static char *cifs_get_share_name(const char *node_name)
+static char *
+cifs_build_devname(char *nodename, const char *prepath)
{
- int len;
- char *UNC;
- char *pSep;
-
- len = strlen(node_name);
- UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */,
- GFP_KERNEL);
- if (!UNC)
- return ERR_PTR(-ENOMEM);
+ size_t pplen;
+ size_t unclen;
+ char *dev;
+ char *pos;
+
+ /* skip over any preceding delimiters */
+ nodename += strspn(nodename, "\\");
+ if (!*nodename)
+ return ERR_PTR(-EINVAL);
- /* get share name and server name */
- if (node_name[1] != '\\') {
- UNC[0] = '\\';
- strncpy(UNC+1, node_name, len);
- len++;
- UNC[len] = 0;
- } else {
- strncpy(UNC, node_name, len);
- UNC[len] = 0;
- }
+ /* get length of UNC and set pos to last char */
+ unclen = strlen(nodename);
+ pos = nodename + unclen - 1;
- /* find server name end */
- pSep = memchr(UNC+2, '\\', len-2);
- if (!pSep) {
- cifs_dbg(VFS, "%s: no server name end in node name: %s\n",
- __func__, node_name);
- kfree(UNC);
- return ERR_PTR(-EINVAL);
+ /* trim off any trailing delimiters */
+ while (*pos == '\\') {
+ --pos;
+ --unclen;
}
- /* find sharename end */
- pSep++;
- pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC));
- if (pSep) {
- /* trim path up to sharename end
- * now we have share name in UNC */
- *pSep = 0;
+ /* allocate a buffer:
+ * +2 for preceding "//"
+ * +1 for delimiter between UNC and prepath
+ * +1 for trailing NULL
+ */
+ pplen = prepath ? strlen(prepath) : 0;
+ dev = kmalloc(2 + unclen + 1 + pplen + 1, GFP_KERNEL);
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ pos = dev;
+ /* add the initial "//" */
+ *pos = '/';
+ ++pos;
+ *pos = '/';
+ ++pos;
+
+ /* copy in the UNC portion from referral */
+ memcpy(pos, nodename, unclen);
+ pos += unclen;
+
+ /* copy the prefixpath remainder (if there is one) */
+ if (pplen) {
+ *pos = '/';
+ ++pos;
+ memcpy(pos, prepath, pplen);
+ pos += pplen;
}
- return UNC;
+ /* NULL terminator */
+ *pos = '\0';
+
+ convert_delimiter(dev, '/');
+ return dev;
}
@@ -123,6 +140,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
{
int rc;
char *mountdata = NULL;
+ const char *prepath = NULL;
int md_len;
char *tkn_e;
char *srvIP = NULL;
@@ -132,7 +150,10 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
if (sb_mountdata == NULL)
return ERR_PTR(-EINVAL);
- *devname = cifs_get_share_name(ref->node_name);
+ if (strlen(fullpath) - ref->path_consumed)
+ prepath = fullpath + ref->path_consumed;
+
+ *devname = cifs_build_devname(ref->node_name, prepath);
if (IS_ERR(*devname)) {
rc = PTR_ERR(*devname);
*devname = NULL;
@@ -146,12 +167,14 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
goto compose_mount_options_err;
}
- /* md_len = strlen(...) + 12 for 'sep+prefixpath='
- * assuming that we have 'unc=' and 'ip=' in
- * the original sb_mountdata
+ /*
+ * In most cases, we'll be building a shorter string than the original,
+ * but we do have to assume that the address in the ip= option may be
+ * much longer than the original. Add the max length of an address
+ * string to the length of the original string to allow for worst case.
*/
- md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12;
- mountdata = kzalloc(md_len+1, GFP_KERNEL);
+ md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN;
+ mountdata = kzalloc(md_len + 1, GFP_KERNEL);
if (mountdata == NULL) {
rc = -ENOMEM;
goto compose_mount_options_err;
@@ -195,26 +218,6 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
strncat(mountdata, &sep, 1);
strcat(mountdata, "ip=");
strcat(mountdata, srvIP);
- strncat(mountdata, &sep, 1);
- strcat(mountdata, "unc=");
- strcat(mountdata, *devname);
-
- /* find & copy prefixpath */
- tkn_e = strchr(ref->node_name + 2, '\\');
- if (tkn_e == NULL) {
- /* invalid unc, missing share name*/
- rc = -EINVAL;
- goto compose_mount_options_err;
- }
-
- tkn_e = strchr(tkn_e + 1, '\\');
- if (tkn_e || (strlen(fullpath) - ref->path_consumed)) {
- strncat(mountdata, &sep, 1);
- strcat(mountdata, "prefixpath=");
- if (tkn_e)
- strcat(mountdata, tkn_e + 1);
- strcat(mountdata, fullpath + ref->path_consumed);
- }
/*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/
/*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 72e4efee138..3752b9f6d9e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -372,9 +372,6 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
cifs_show_security(s, tcon->ses->server);
cifs_show_cache_flavor(s, cifs_sb);
- seq_printf(s, ",unc=");
- seq_escape(s, tcon->treeName, " \t\n\\");
-
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
seq_printf(s, ",multiuser");
else if (tcon->ses->user_name)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 99eeaa17ee0..5b97e56ddbc 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1061,6 +1061,7 @@ static int cifs_parse_security_flavors(char *value,
#endif
case Opt_sec_none:
vol->nullauth = 1;
+ vol->secFlg |= CIFSSEC_MAY_NTLM;
break;
default:
cifs_dbg(VFS, "bad security option: %s\n", value);
@@ -1257,14 +1258,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->backupuid_specified = false; /* no backup intent for a user */
vol->backupgid_specified = false; /* no backup intent for a group */
- /*
- * For now, we ignore -EINVAL errors under the assumption that the
- * unc= and prefixpath= options will be usable.
- */
- if (cifs_parse_devname(devname, vol) == -ENOMEM) {
- printk(KERN_ERR "CIFS: Unable to allocate memory to parse "
- "device string.\n");
- goto out_nomem;
+ switch (cifs_parse_devname(devname, vol)) {
+ case 0:
+ break;
+ case -ENOMEM:
+ cifs_dbg(VFS, "Unable to allocate memory for devname.\n");
+ goto cifs_parse_mount_err;
+ case -EINVAL:
+ cifs_dbg(VFS, "Malformed UNC in devname.\n");
+ goto cifs_parse_mount_err;
+ default:
+ cifs_dbg(VFS, "Unknown error parsing devname.\n");
+ goto cifs_parse_mount_err;
}
while ((data = strsep(&options, separator)) != NULL) {
@@ -1826,7 +1831,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
}
#endif
if (!vol->UNC) {
- cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string or in unc= option!\n");
+ cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n");
goto cifs_parse_mount_err;
}
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index e7512e49761..7ede7306599 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -34,7 +34,7 @@
/**
* dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
- * @unc: UNC path specifying the server
+ * @unc: UNC path specifying the server (with '/' as delimiter)
* @ip_addr: Where to return the IP address.
*
* The IP address will be returned in string form, and the caller is
@@ -64,7 +64,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
hostname = unc + 2;
/* Search for server name delimiter */
- sep = memchr(hostname, '\\', len);
+ sep = memchr(hostname, '/', len);
if (sep)
len = sep - hostname;
else
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index fc3025199cb..20efd81266c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -171,7 +171,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL)
inode->i_flags |= S_AUTOMOUNT;
- cifs_set_ops(inode);
+ if (inode->i_state & I_NEW)
+ cifs_set_ops(inode);
}
void
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index bfb53156431..8dd524f3228 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -44,8 +44,11 @@ static ssize_t efivarfs_file_write(struct file *file,
bytes = efivar_entry_set_get_size(var, attributes, &datasize,
data, &set);
- if (!set && bytes)
+ if (!set && bytes) {
+ if (bytes == -ENOENT)
+ bytes = -EIO;
goto out;
+ }
if (bytes == -ENOENT) {
drop_nlink(inode);
@@ -76,7 +79,14 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
int err;
err = efivar_entry_size(var, &datasize);
- if (err)
+
+ /*
+ * efivarfs represents uncommitted variables with
+ * zero-length files. Reading them should return EOF.
+ */
+ if (err == -ENOENT)
+ return 0;
+ else if (err)
return err;
data = kmalloc(datasize + sizeof(attributes), GFP_KERNEL);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0aabb344b02..5aae3d12d40 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -209,7 +209,6 @@ typedef struct ext4_io_end {
ssize_t size; /* size of the extent */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
- atomic_t count; /* reference counter */
} ext4_io_end_t;
struct ext4_io_submit {
@@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
/* page-io.c */
extern int __init ext4_init_pageio(void);
+extern void ext4_add_complete_io(ext4_io_end_t *io_end);
extern void ext4_exit_pageio(void);
extern void ext4_ioend_shutdown(struct inode *);
+extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
-extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
-extern int ext4_put_io_end(ext4_io_end_t *io_end);
-extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
-extern void ext4_io_submit_init(struct ext4_io_submit *io,
- struct writeback_control *wbc);
extern void ext4_end_io_work(struct work_struct *work);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 107936db244..bc0f1910b9c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3642,7 +3642,7 @@ int ext4_find_delalloc_range(struct inode *inode,
{
struct extent_status es;
- ext4_es_find_delayed_extent(inode, lblk_start, &es);
+ ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es);
if (es.es_len == 0)
return 0; /* there is no delay extent in this tree */
else if (es.es_lblk <= lblk_start &&
@@ -4608,9 +4608,10 @@ static int ext4_find_delayed_extent(struct inode *inode,
struct extent_status es;
ext4_lblk_t block, next_del;
- ext4_es_find_delayed_extent(inode, newes->es_lblk, &es);
-
if (newes->es_pblk == 0) {
+ ext4_es_find_delayed_extent_range(inode, newes->es_lblk,
+ newes->es_lblk + newes->es_len - 1, &es);
+
/*
* No extent in extent-tree contains block @newes->es_pblk,
* then the block may stay in 1)a hole or 2)delayed-extent.
@@ -4630,7 +4631,7 @@ static int ext4_find_delayed_extent(struct inode *inode,
}
block = newes->es_lblk + newes->es_len;
- ext4_es_find_delayed_extent(inode, block, &es);
+ ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es);
if (es.es_len == 0)
next_del = EXT_MAX_BLOCKS;
else
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index fe3337a85ed..e6941e622d3 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -232,14 +232,16 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
}
/*
- * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk
- * if it exists, otherwise, the next extent after @es->lblk.
+ * ext4_es_find_delayed_extent_range: find the 1st delayed extent covering
+ * @es->lblk if it exists, otherwise, the next extent after @es->lblk.
*
* @inode: the inode which owns delayed extents
* @lblk: the offset where we start to search
+ * @end: the offset where we stop to search
* @es: delayed extent that we found
*/
-void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
+void ext4_es_find_delayed_extent_range(struct inode *inode,
+ ext4_lblk_t lblk, ext4_lblk_t end,
struct extent_status *es)
{
struct ext4_es_tree *tree = NULL;
@@ -247,7 +249,8 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
struct rb_node *node;
BUG_ON(es == NULL);
- trace_ext4_es_find_delayed_extent_enter(inode, lblk);
+ BUG_ON(end < lblk);
+ trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
read_lock(&EXT4_I(inode)->i_es_lock);
tree = &EXT4_I(inode)->i_es_tree;
@@ -270,6 +273,10 @@ out:
if (es1 && !ext4_es_is_delayed(es1)) {
while ((node = rb_next(&es1->rb_node)) != NULL) {
es1 = rb_entry(node, struct extent_status, rb_node);
+ if (es1->es_lblk > end) {
+ es1 = NULL;
+ break;
+ }
if (ext4_es_is_delayed(es1))
break;
}
@@ -285,7 +292,7 @@ out:
read_unlock(&EXT4_I(inode)->i_es_lock);
ext4_es_lru_add(inode);
- trace_ext4_es_find_delayed_extent_exit(inode, es);
+ trace_ext4_es_find_delayed_extent_range_exit(inode, es);
}
static struct extent_status *
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index d8e2d4dc311..f740eb03b70 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -62,7 +62,8 @@ extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
unsigned long long status);
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len);
-extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
+extern void ext4_es_find_delayed_extent_range(struct inode *inode,
+ ext4_lblk_t lblk, ext4_lblk_t end,
struct extent_status *es);
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4959e29573b..b1b4d51b5d8 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -465,7 +465,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
* If there is a delay extent at this offset,
* it will be as a data.
*/
- ext4_es_find_delayed_extent(inode, last, &es);
+ ext4_es_find_delayed_extent_range(inode, last, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
if (last != start)
dataoff = last << blkbits;
@@ -548,7 +548,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
* If there is a delay extent at this offset,
* we will skip this extent.
*/
- ext4_es_find_delayed_extent(inode, last, &es);
+ ext4_es_find_delayed_extent_range(inode, last, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
last = es.es_lblk + es.es_len;
holeoff = last << blkbits;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0723774bdfb..d6382b89ecb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1488,10 +1488,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
struct ext4_io_submit io_submit;
BUG_ON(mpd->next_page <= mpd->first_page);
- ext4_io_submit_init(&io_submit, mpd->wbc);
- io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
- if (!io_submit.io_end)
- return -ENOMEM;
+ memset(&io_submit, 0, sizeof(io_submit));
/*
* We need to start from the first_page to the next_page - 1
* to make sure we also write the mapped dirty buffer_heads.
@@ -1579,8 +1576,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
pagevec_release(&pvec);
}
ext4_io_submit(&io_submit);
- /* Drop io_end reference we got from init */
- ext4_put_io_end_defer(io_submit.io_end);
return ret;
}
@@ -2239,16 +2234,9 @@ static int ext4_writepage(struct page *page,
*/
return __ext4_journalled_writepage(page, len);
- ext4_io_submit_init(&io_submit, wbc);
- io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
- if (!io_submit.io_end) {
- redirty_page_for_writepage(wbc, page);
- return -ENOMEM;
- }
+ memset(&io_submit, 0, sizeof(io_submit));
ret = ext4_bio_write_page(&io_submit, page, len, wbc);
ext4_io_submit(&io_submit);
- /* Drop io_end reference we got from init */
- ext4_put_io_end_defer(io_submit.io_end);
return ret;
}
@@ -3079,13 +3067,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
struct inode *inode = file_inode(iocb->ki_filp);
ext4_io_end_t *io_end = iocb->private;
- /* if not async direct IO just return */
- if (!io_end) {
- inode_dio_done(inode);
- if (is_async)
- aio_complete(iocb, ret, 0);
- return;
- }
+ /* if not async direct IO or dio with 0 bytes write, just return */
+ if (!io_end || !size)
+ goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p "
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3093,13 +3077,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
size);
iocb->private = NULL;
+
+ /* if not aio dio with unwritten extents, just free io and return */
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+ ext4_free_io_end(io_end);
+out:
+ inode_dio_done(inode);
+ if (is_async)
+ aio_complete(iocb, ret, 0);
+ return;
+ }
+
io_end->offset = offset;
io_end->size = size;
if (is_async) {
io_end->iocb = iocb;
io_end->result = ret;
}
- ext4_put_io_end_defer(io_end);
+
+ ext4_add_complete_io(io_end);
}
/*
@@ -3133,7 +3129,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
get_block_t *get_block_func = NULL;
int dio_flags = 0;
loff_t final_size = offset + count;
- ext4_io_end_t *io_end = NULL;
/* Use the old path for reads and writes beyond i_size. */
if (rw != WRITE || final_size > inode->i_size)
@@ -3172,16 +3167,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
iocb->private = NULL;
ext4_inode_aio_set(inode, NULL);
if (!is_sync_kiocb(iocb)) {
- io_end = ext4_init_io_end(inode, GFP_NOFS);
+ ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
if (!io_end) {
ret = -ENOMEM;
goto retake_lock;
}
io_end->flag |= EXT4_IO_END_DIRECT;
- /*
- * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
- */
- iocb->private = ext4_get_io_end(io_end);
+ iocb->private = io_end;
/*
* we save the io structure for current async direct
* IO, so that later ext4_map_blocks() could flag the
@@ -3205,27 +3197,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
NULL,
dio_flags);
+ if (iocb->private)
+ ext4_inode_aio_set(inode, NULL);
/*
- * Put our reference to io_end. This can free the io_end structure e.g.
- * in sync IO case or in case of error. It can even perform extent
- * conversion if all bios we submitted finished before we got here.
- * Note that in that case iocb->private can be already set to NULL
- * here.
+ * The io_end structure takes a reference to the inode, that
+ * structure needs to be destroyed and the reference to the
+ * inode need to be dropped, when IO is complete, even with 0
+ * byte write, or failed.
+ *
+ * In the successful AIO DIO case, the io_end structure will
+ * be destroyed and the reference to the inode will be dropped
+ * after the end_io call back function is called.
+ *
+ * In the case there is 0 byte write, or error case, since VFS
+ * direct IO won't invoke the end_io call back function, we
+ * need to free the end_io structure here.
*/
- if (io_end) {
- ext4_inode_aio_set(inode, NULL);
- ext4_put_io_end(io_end);
- /*
- * In case of error or no write ext4_end_io_dio() was not
- * called so we have to put iocb's reference.
- */
- if (ret <= 0 && ret != -EIOCBQUEUED) {
- WARN_ON(iocb->private != io_end);
- ext4_put_io_end(io_end);
- iocb->private = NULL;
- }
- }
- if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
+ if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
+ ext4_free_io_end(iocb->private);
+ iocb->private = NULL;
+ } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) {
int err;
/*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b1ed9e07434..def84082a9a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2105,7 +2105,11 @@ repeat:
group = ac->ac_g_ex.fe_group;
for (i = 0; i < ngroups; group++, i++) {
- if (group == ngroups)
+ /*
+ * Artificially restricted ngroups for non-extent
+ * files makes group > ngroups possible on first loop.
+ */
+ if (group >= ngroups)
group = 0;
/* This now checks without needing the buddy page */
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 19599bded62..4acf1f78881 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -62,28 +62,15 @@ void ext4_ioend_shutdown(struct inode *inode)
cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
}
-static void ext4_release_io_end(ext4_io_end_t *io_end)
+void ext4_free_io_end(ext4_io_end_t *io)
{
- BUG_ON(!list_empty(&io_end->list));
- BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
-
- if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
- wake_up_all(ext4_ioend_wq(io_end->inode));
- if (io_end->flag & EXT4_IO_END_DIRECT)
- inode_dio_done(io_end->inode);
- if (io_end->iocb)
- aio_complete(io_end->iocb, io_end->result, 0);
- kmem_cache_free(io_end_cachep, io_end);
-}
-
-static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
-{
- struct inode *inode = io_end->inode;
+ BUG_ON(!io);
+ BUG_ON(!list_empty(&io->list));
+ BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
- io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
- /* Wake up anyone waiting on unwritten extent conversion */
- if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
- wake_up_all(ext4_ioend_wq(inode));
+ if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
+ wake_up_all(ext4_ioend_wq(io->inode));
+ kmem_cache_free(io_end_cachep, io);
}
/* check a range of space and convert unwritten extents to written. */
@@ -106,8 +93,13 @@ static int ext4_end_io(ext4_io_end_t *io)
"(inode %lu, offset %llu, size %zd, error %d)",
inode->i_ino, offset, size, ret);
}
- ext4_clear_io_unwritten_flag(io);
- ext4_release_io_end(io);
+ /* Wake up anyone waiting on unwritten extent conversion */
+ if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
+ wake_up_all(ext4_ioend_wq(inode));
+ if (io->flag & EXT4_IO_END_DIRECT)
+ inode_dio_done(inode);
+ if (io->iocb)
+ aio_complete(io->iocb, io->result, 0);
return ret;
}
@@ -138,7 +130,7 @@ static void dump_completed_IO(struct inode *inode)
}
/* Add the io_end to per-inode completed end_io list. */
-static void ext4_add_complete_io(ext4_io_end_t *io_end)
+void ext4_add_complete_io(ext4_io_end_t *io_end)
{
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
struct workqueue_struct *wq;
@@ -175,6 +167,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
err = ext4_end_io(io);
if (unlikely(!ret && err))
ret = err;
+ io->flag &= ~EXT4_IO_END_UNWRITTEN;
+ ext4_free_io_end(io);
}
return ret;
}
@@ -206,43 +200,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
atomic_inc(&EXT4_I(inode)->i_ioend_count);
io->inode = inode;
INIT_LIST_HEAD(&io->list);
- atomic_set(&io->count, 1);
}
return io;
}
-void ext4_put_io_end_defer(ext4_io_end_t *io_end)
-{
- if (atomic_dec_and_test(&io_end->count)) {
- if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
- ext4_release_io_end(io_end);
- return;
- }
- ext4_add_complete_io(io_end);
- }
-}
-
-int ext4_put_io_end(ext4_io_end_t *io_end)
-{
- int err = 0;
-
- if (atomic_dec_and_test(&io_end->count)) {
- if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
- err = ext4_convert_unwritten_extents(io_end->inode,
- io_end->offset, io_end->size);
- ext4_clear_io_unwritten_flag(io_end);
- }
- ext4_release_io_end(io_end);
- }
- return err;
-}
-
-ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
-{
- atomic_inc(&io_end->count);
- return io_end;
-}
-
/*
* Print an buffer I/O error compatible with the fs/buffer.c. This
* provides compatibility with dmesg scrapers that look for a specific
@@ -325,7 +286,12 @@ static void ext4_end_bio(struct bio *bio, int error)
bi_sector >> (inode->i_blkbits - 9));
}
- ext4_put_io_end_defer(io_end);
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+ ext4_free_io_end(io_end);
+ return;
+ }
+
+ ext4_add_complete_io(io_end);
}
void ext4_io_submit(struct ext4_io_submit *io)
@@ -339,37 +305,40 @@ void ext4_io_submit(struct ext4_io_submit *io)
bio_put(io->io_bio);
}
io->io_bio = NULL;
-}
-
-void ext4_io_submit_init(struct ext4_io_submit *io,
- struct writeback_control *wbc)
-{
- io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
- io->io_bio = NULL;
+ io->io_op = 0;
io->io_end = NULL;
}
-static int io_submit_init_bio(struct ext4_io_submit *io,
- struct buffer_head *bh)
+static int io_submit_init(struct ext4_io_submit *io,
+ struct inode *inode,
+ struct writeback_control *wbc,
+ struct buffer_head *bh)
{
+ ext4_io_end_t *io_end;
+ struct page *page = bh->b_page;
int nvecs = bio_get_nr_vecs(bh->b_bdev);
struct bio *bio;
+ io_end = ext4_init_io_end(inode, GFP_NOFS);
+ if (!io_end)
+ return -ENOMEM;
bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
+ bio->bi_private = io->io_end = io_end;
bio->bi_end_io = ext4_end_bio;
- bio->bi_private = ext4_get_io_end(io->io_end);
- if (!io->io_end->size)
- io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
- + bh_offset(bh);
+
+ io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
+
io->io_bio = bio;
+ io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
io->io_next_block = bh->b_blocknr;
return 0;
}
static int io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode,
+ struct writeback_control *wbc,
struct buffer_head *bh)
{
ext4_io_end_t *io_end;
@@ -380,18 +349,18 @@ submit_and_retry:
ext4_io_submit(io);
}
if (io->io_bio == NULL) {
- ret = io_submit_init_bio(io, bh);
+ ret = io_submit_init(io, inode, wbc, bh);
if (ret)
return ret;
}
- ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
- if (ret != bh->b_size)
- goto submit_and_retry;
io_end = io->io_end;
if (test_clear_buffer_uninit(bh))
ext4_set_io_unwritten_flag(inode, io_end);
- io_end->size += bh->b_size;
+ io->io_end->size += bh->b_size;
io->io_next_block++;
+ ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
+ if (ret != bh->b_size)
+ goto submit_and_retry;
return 0;
}
@@ -463,7 +432,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
- ret = io_submit_add_bh(io, inode, bh);
+ ret = io_submit_add_bh(io, inode, wbc, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index dfce656ddb3..5d4513cb1b3 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1229,6 +1229,19 @@ static int fat_read_root(struct inode *inode)
return 0;
}
+static unsigned long calc_fat_clusters(struct super_block *sb)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+
+ /* Divide first to avoid overflow */
+ if (sbi->fat_bits != 12) {
+ unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits;
+ return ent_per_sec * sbi->fat_length;
+ }
+
+ return sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
+}
+
/*
* Read the super block of an MS-DOS FS.
*/
@@ -1434,7 +1447,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
sbi->dirty = b->fat16.state & FAT_STATE_DIRTY;
/* check that FAT table does not overflow */
- fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
+ fat_clusters = calc_fat_clusters(sb);
total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
if (total_clusters > MAX_FAT(sb)) {
if (!silent)
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index eb08c9e43c2..5a376ab81fe 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -26,7 +26,7 @@ config GFS2_FS
config GFS2_FS_LOCKING_DLM
bool "GFS2 DLM locking"
depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
- HOTPLUG && DLM && CONFIGFS_FS && SYSFS
+ HOTPLUG && CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
help
Multiple node locking module for GFS2
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index c5fa758fd84..68b4c8f1fce 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -212,7 +212,7 @@ static void gfs2_end_log_write(struct bio *bio, int error)
fs_err(sdp, "Error %d writing to log\n", error);
}
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i) {
page = bvec->bv_page;
if (page_has_buffers(page))
gfs2_end_log_write_bh(sdp, bvec, error);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c7c840e916f..c253b13722e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -121,7 +121,7 @@ static u64 qd2index(struct gfs2_quota_data *qd)
{
struct kqid qid = qd->qd_id;
return (2 * (u64)from_kqid(&init_user_ns, qid)) +
- (qid.type == USRQUOTA) ? 0 : 1;
+ ((qid.type == USRQUOTA) ? 0 : 1);
}
static u64 qd2offset(struct gfs2_quota_data *qd)
@@ -721,7 +721,7 @@ get_a_page:
goto unlock_out;
}
- gfs2_trans_add_meta(ip->i_gl, bh);
+ gfs2_trans_add_data(ip->i_gl, bh);
kaddr = kmap_atomic(page);
if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0c5a575b513..5232525934a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1401,9 +1401,14 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
u32 extlen;
u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
int ret;
+ struct inode *inode = &ip->i_inode;
- extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
- extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
+ if (S_ISDIR(inode->i_mode))
+ extlen = 1;
+ else {
+ extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
+ extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
+ }
if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
return;
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index f3b1a15ccd5..d3fa6bd9503 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -415,7 +415,11 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
spin_lock(&tree->hash_lock);
node = hfs_bnode_findhash(tree, num);
spin_unlock(&tree->hash_lock);
- BUG_ON(node);
+ if (node) {
+ pr_crit("new node %u already hashed?\n", num);
+ WARN_ON(1);
+ return node;
+ }
node = __hfs_bnode_create(tree, num);
if (!node)
return ERR_PTR(-ENOMEM);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 689fb608648..bccfec8343c 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -219,13 +219,32 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
static int nilfs_set_page_dirty(struct page *page)
{
- int ret = __set_page_dirty_buffers(page);
+ int ret = __set_page_dirty_nobuffers(page);
- if (ret) {
+ if (page_has_buffers(page)) {
struct inode *inode = page->mapping->host;
- unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
+ unsigned nr_dirty = 0;
+ struct buffer_head *bh, *head;
- nilfs_set_file_dirty(inode, nr_dirty);
+ /*
+ * This page is locked by callers, and no other thread
+ * concurrently marks its buffers dirty since they are
+ * only dirtied through routines in fs/buffer.c in
+ * which call sites of mark_buffer_dirty are protected
+ * by page lock.
+ */
+ bh = head = page_buffers(page);
+ do {
+ /* Do not mark hole blocks dirty */
+ if (buffer_dirty(bh) || !buffer_mapped(bh))
+ continue;
+
+ set_buffer_dirty(bh);
+ nr_dirty++;
+ } while (bh = bh->b_this_page, bh != head);
+
+ if (nr_dirty)
+ nilfs_set_file_dirty(inode, nr_dirty);
}
return ret;
}
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 1c39efb71ba..2487116d0d3 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -790,7 +790,7 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
&hole_size, &rec, &is_last);
if (ret) {
mlog_errno(ret);
- goto out;
+ goto out_unlock;
}
if (rec.e_blkno == 0ULL) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8a7509f9e6f..ff54014a24e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2288,7 +2288,7 @@ relock:
ret = ocfs2_inode_lock(inode, NULL, 1);
if (ret < 0) {
mlog_errno(ret);
- goto out_sems;
+ goto out;
}
ocfs2_inode_unlock(inode, 1);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 66c53b642a8..6c2d136561c 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -204,6 +204,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
next_pos = deh_offset(deh) + 1;
if (item_moved(&tmp_ih, &path_to_entry)) {
+ set_cpu_key_k_offset(&pos_key,
+ next_pos);
goto research;
}
} /* for */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 77d6d47abc8..f844533792e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1811,11 +1811,16 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
- if (insert_inode_locked4(inode, args.objectid,
- reiserfs_find_actor, &args) < 0) {
+
+ reiserfs_write_unlock(inode->i_sb);
+ err = insert_inode_locked4(inode, args.objectid,
+ reiserfs_find_actor, &args);
+ reiserfs_write_lock(inode->i_sb);
+ if (err) {
err = -EINVAL;
goto out_bad_inode;
}
+
if (old_format_only(sb))
/* not a perfect generation count, as object ids can be reused, but
** this is as good as reiserfs can do right now.
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4cce1d9552f..821bcf70e46 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -318,7 +318,19 @@ static int delete_one_xattr(struct dentry *dentry, void *data)
static int chown_one_xattr(struct dentry *dentry, void *data)
{
struct iattr *attrs = data;
- return reiserfs_setattr(dentry, attrs);
+ int ia_valid = attrs->ia_valid;
+ int err;
+
+ /*
+ * We only want the ownership bits. Otherwise, we'll do
+ * things like change a directory to a regular file if
+ * ATTR_MODE is set.
+ */
+ attrs->ia_valid &= (ATTR_UID|ATTR_GID);
+ err = reiserfs_setattr(dentry, attrs);
+ attrs->ia_valid = ia_valid;
+
+ return err;
}
/* No i_mutex, but the inode is unconnected. */
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index d7c01ef64ed..6c8767fdfc6 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -443,6 +443,9 @@ int reiserfs_acl_chmod(struct inode *inode)
int depth;
int error;
+ if (IS_PRIVATE(inode))
+ return 0;
+
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 2b2691b7342..41a695048be 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -725,6 +725,25 @@ xfs_convert_page(
(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
i_size_read(inode));
+ /*
+ * If the current map does not span the entire page we are about to try
+ * to write, then give up. The only way we can write a page that spans
+ * multiple mappings in a single writeback iteration is via the
+ * xfs_vm_writepage() function. Data integrity writeback requires the
+ * entire page to be written in a single attempt, otherwise the part of
+ * the page we don't write here doesn't get written as part of the data
+ * integrity sync.
+ *
+ * For normal writeback, we also don't attempt to write partial pages
+ * here as it simply means that write_cache_pages() will see it under
+ * writeback and ignore the page until some point in the future, at
+ * which time this will be the only page in the file that needs
+ * writeback. Hence for more optimal IO patterns, we should always
+ * avoid partial page writeback due to multiple mappings on a page here.
+ */
+ if (!xfs_imap_valid(inode, imap, end_offset))
+ goto fail_unlock_page;
+
len = 1 << inode->i_blkbits;
p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
PAGE_CACHE_SIZE);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 08d5457c948..d788302e506 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -931,20 +931,22 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
*/
int
xfs_attr_shortform_allfit(
- struct xfs_buf *bp,
- struct xfs_inode *dp)
+ struct xfs_buf *bp,
+ struct xfs_inode *dp)
{
- xfs_attr_leafblock_t *leaf;
- xfs_attr_leaf_entry_t *entry;
+ struct xfs_attr_leafblock *leaf;
+ struct xfs_attr_leaf_entry *entry;
xfs_attr_leaf_name_local_t *name_loc;
- int bytes, i;
+ struct xfs_attr3_icleaf_hdr leafhdr;
+ int bytes;
+ int i;
leaf = bp->b_addr;
- ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+ xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+ entry = xfs_attr3_leaf_entryp(leaf);
- entry = &leaf->entries[0];
bytes = sizeof(struct xfs_attr_sf_hdr);
- for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
+ for (i = 0; i < leafhdr.count; entry++, i++) {
if (entry->flags & XFS_ATTR_INCOMPLETE)
continue; /* don't copy partial entries */
if (!(entry->flags & XFS_ATTR_LOCAL))
@@ -954,15 +956,15 @@ xfs_attr_shortform_allfit(
return(0);
if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
return(0);
- bytes += sizeof(struct xfs_attr_sf_entry)-1
+ bytes += sizeof(struct xfs_attr_sf_entry) - 1
+ name_loc->namelen
+ be16_to_cpu(name_loc->valuelen);
}
if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
(dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
(bytes == sizeof(struct xfs_attr_sf_hdr)))
- return(-1);
- return(xfs_attr_shortform_bytesfit(dp, bytes));
+ return -1;
+ return xfs_attr_shortform_bytesfit(dp, bytes);
}
/*
@@ -1410,7 +1412,7 @@ xfs_attr3_leaf_add_work(
name_rmt->valuelen = 0;
name_rmt->valueblk = 0;
args->rmtblkno = 1;
- args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
+ args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
}
xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -1443,11 +1445,12 @@ xfs_attr3_leaf_add_work(
STATIC void
xfs_attr3_leaf_compact(
struct xfs_da_args *args,
- struct xfs_attr3_icleaf_hdr *ichdr_d,
+ struct xfs_attr3_icleaf_hdr *ichdr_dst,
struct xfs_buf *bp)
{
- xfs_attr_leafblock_t *leaf_s, *leaf_d;
- struct xfs_attr3_icleaf_hdr ichdr_s;
+ struct xfs_attr_leafblock *leaf_src;
+ struct xfs_attr_leafblock *leaf_dst;
+ struct xfs_attr3_icleaf_hdr ichdr_src;
struct xfs_trans *trans = args->trans;
struct xfs_mount *mp = trans->t_mountp;
char *tmpbuffer;
@@ -1455,29 +1458,38 @@ xfs_attr3_leaf_compact(
trace_xfs_attr_leaf_compact(args);
tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
- ASSERT(tmpbuffer != NULL);
memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
memset(bp->b_addr, 0, XFS_LBSIZE(mp));
+ leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
+ leaf_dst = bp->b_addr;
/*
- * Copy basic information
+ * Copy the on-disk header back into the destination buffer to ensure
+ * all the information in the header that is not part of the incore
+ * header structure is preserved.
*/
- leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
- leaf_d = bp->b_addr;
- ichdr_s = *ichdr_d; /* struct copy */
- ichdr_d->firstused = XFS_LBSIZE(mp);
- ichdr_d->usedbytes = 0;
- ichdr_d->count = 0;
- ichdr_d->holes = 0;
- ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_s);
- ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
+ memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src));
+
+ /* Initialise the incore headers */
+ ichdr_src = *ichdr_dst; /* struct copy */
+ ichdr_dst->firstused = XFS_LBSIZE(mp);
+ ichdr_dst->usedbytes = 0;
+ ichdr_dst->count = 0;
+ ichdr_dst->holes = 0;
+ ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src);
+ ichdr_dst->freemap[0].size = ichdr_dst->firstused -
+ ichdr_dst->freemap[0].base;
+
+
+ /* write the header back to initialise the underlying buffer */
+ xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
/*
* Copy all entry's in the same (sorted) order,
* but allocate name/value pairs packed and in sequence.
*/
- xfs_attr3_leaf_moveents(leaf_s, &ichdr_s, 0, leaf_d, ichdr_d, 0,
- ichdr_s.count, mp);
+ xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0,
+ ichdr_src.count, mp);
/*
* this logs the entire buffer, but the caller must write the header
* back to the buffer when it is finished modifying it.
@@ -2179,14 +2191,24 @@ xfs_attr3_leaf_unbalance(
struct xfs_attr_leafblock *tmp_leaf;
struct xfs_attr3_icleaf_hdr tmphdr;
- tmp_leaf = kmem_alloc(state->blocksize, KM_SLEEP);
- memset(tmp_leaf, 0, state->blocksize);
- memset(&tmphdr, 0, sizeof(tmphdr));
+ tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP);
+
+ /*
+ * Copy the header into the temp leaf so that all the stuff
+ * not in the incore header is present and gets copied back in
+ * once we've moved all the entries.
+ */
+ memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf));
+ memset(&tmphdr, 0, sizeof(tmphdr));
tmphdr.magic = savehdr.magic;
tmphdr.forw = savehdr.forw;
tmphdr.back = savehdr.back;
tmphdr.firstused = state->blocksize;
+
+ /* write the header to the temp buffer to initialise it */
+ xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
+
if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
drop_blk->bp, &drophdr)) {
xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
@@ -2330,9 +2352,11 @@ xfs_attr3_leaf_lookup_int(
if (!xfs_attr_namesp_match(args->flags, entry->flags))
continue;
args->index = probe;
+ args->valuelen = be32_to_cpu(name_rmt->valuelen);
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
- args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount,
- be32_to_cpu(name_rmt->valuelen));
+ args->rmtblkcnt = xfs_attr3_rmt_blocks(
+ args->dp->i_mount,
+ args->valuelen);
return XFS_ERROR(EEXIST);
}
}
@@ -2383,7 +2407,8 @@ xfs_attr3_leaf_getvalue(
ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
valuelen = be32_to_cpu(name_rmt->valuelen);
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
- args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen);
+ args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
+ valuelen);
if (args->flags & ATTR_KERNOVAL) {
args->valuelen = valuelen;
return 0;
@@ -2709,7 +2734,8 @@ xfs_attr3_leaf_list_int(
args.valuelen = valuelen;
args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
- args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
+ args.rmtblkcnt = xfs_attr3_rmt_blocks(
+ args.dp->i_mount, valuelen);
retval = xfs_attr_rmtval_get(&args);
if (retval)
return retval;
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index dee84466dcc..ef6b0c12452 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -47,22 +47,55 @@
* Each contiguous block has a header, so it is not just a simple attribute
* length to FSB conversion.
*/
-static int
+int
xfs_attr3_rmt_blocks(
struct xfs_mount *mp,
int attrlen)
{
- int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp,
- mp->m_sb.sb_blocksize);
- return (attrlen + buflen - 1) / buflen;
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
+ return (attrlen + buflen - 1) / buflen;
+ }
+ return XFS_B_TO_FSB(mp, attrlen);
+}
+
+/*
+ * Checking of the remote attribute header is split into two parts. The verifier
+ * does CRC, location and bounds checking, the unpacking function checks the
+ * attribute parameters and owner.
+ */
+static bool
+xfs_attr3_rmt_hdr_ok(
+ struct xfs_mount *mp,
+ void *ptr,
+ xfs_ino_t ino,
+ uint32_t offset,
+ uint32_t size,
+ xfs_daddr_t bno)
+{
+ struct xfs_attr3_rmt_hdr *rmt = ptr;
+
+ if (bno != be64_to_cpu(rmt->rm_blkno))
+ return false;
+ if (offset != be32_to_cpu(rmt->rm_offset))
+ return false;
+ if (size != be32_to_cpu(rmt->rm_bytes))
+ return false;
+ if (ino != be64_to_cpu(rmt->rm_owner))
+ return false;
+
+ /* ok */
+ return true;
}
static bool
xfs_attr3_rmt_verify(
- struct xfs_buf *bp)
+ struct xfs_mount *mp,
+ void *ptr,
+ int fsbsize,
+ xfs_daddr_t bno)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
+ struct xfs_attr3_rmt_hdr *rmt = ptr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
@@ -70,7 +103,9 @@ xfs_attr3_rmt_verify(
return false;
if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
return false;
- if (bp->b_bn != be64_to_cpu(rmt->rm_blkno))
+ if (be64_to_cpu(rmt->rm_blkno) != bno)
+ return false;
+ if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
return false;
if (be32_to_cpu(rmt->rm_offset) +
be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
@@ -86,17 +121,40 @@ xfs_attr3_rmt_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ char *ptr;
+ int len;
+ bool corrupt = false;
+ xfs_daddr_t bno;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
- if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_ATTR3_RMT_CRC_OFF) ||
- !xfs_attr3_rmt_verify(bp)) {
+ ptr = bp->b_addr;
+ bno = bp->b_bn;
+ len = BBTOB(bp->b_length);
+ ASSERT(len >= XFS_LBSIZE(mp));
+
+ while (len > 0) {
+ if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
+ XFS_ATTR3_RMT_CRC_OFF)) {
+ corrupt = true;
+ break;
+ }
+ if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
+ corrupt = true;
+ break;
+ }
+ len -= XFS_LBSIZE(mp);
+ ptr += XFS_LBSIZE(mp);
+ bno += mp->m_bsize;
+ }
+
+ if (corrupt) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ } else
+ ASSERT(len == 0);
}
static void
@@ -105,23 +163,39 @@ xfs_attr3_rmt_write_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_buf_log_item *bip = bp->b_fspriv;
+ char *ptr;
+ int len;
+ xfs_daddr_t bno;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
- if (!xfs_attr3_rmt_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- return;
- }
+ ptr = bp->b_addr;
+ bno = bp->b_bn;
+ len = BBTOB(bp->b_length);
+ ASSERT(len >= XFS_LBSIZE(mp));
+
+ while (len > 0) {
+ if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
+ XFS_CORRUPTION_ERROR(__func__,
+ XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ return;
+ }
+ if (bip) {
+ struct xfs_attr3_rmt_hdr *rmt;
+
+ rmt = (struct xfs_attr3_rmt_hdr *)ptr;
+ rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ }
+ xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF);
- if (bip) {
- struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
- rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ len -= XFS_LBSIZE(mp);
+ ptr += XFS_LBSIZE(mp);
+ bno += mp->m_bsize;
}
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_ATTR3_RMT_CRC_OFF);
+ ASSERT(len == 0);
}
const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
@@ -129,15 +203,16 @@ const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
.verify_write = xfs_attr3_rmt_write_verify,
};
-static int
+STATIC int
xfs_attr3_rmt_hdr_set(
struct xfs_mount *mp,
+ void *ptr,
xfs_ino_t ino,
uint32_t offset,
uint32_t size,
- struct xfs_buf *bp)
+ xfs_daddr_t bno)
{
- struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
+ struct xfs_attr3_rmt_hdr *rmt = ptr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
return 0;
@@ -147,36 +222,107 @@ xfs_attr3_rmt_hdr_set(
rmt->rm_bytes = cpu_to_be32(size);
uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
rmt->rm_owner = cpu_to_be64(ino);
- rmt->rm_blkno = cpu_to_be64(bp->b_bn);
- bp->b_ops = &xfs_attr3_rmt_buf_ops;
+ rmt->rm_blkno = cpu_to_be64(bno);
return sizeof(struct xfs_attr3_rmt_hdr);
}
/*
- * Checking of the remote attribute header is split into two parts. the verifier
- * does CRC, location and bounds checking, the unpacking function checks the
- * attribute parameters and owner.
+ * Helper functions to copy attribute data in and out of the one disk extents
*/
-static bool
-xfs_attr3_rmt_hdr_ok(
- struct xfs_mount *mp,
- xfs_ino_t ino,
- uint32_t offset,
- uint32_t size,
- struct xfs_buf *bp)
+STATIC int
+xfs_attr_rmtval_copyout(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ xfs_ino_t ino,
+ int *offset,
+ int *valuelen,
+ char **dst)
{
- struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
+ char *src = bp->b_addr;
+ xfs_daddr_t bno = bp->b_bn;
+ int len = BBTOB(bp->b_length);
- if (offset != be32_to_cpu(rmt->rm_offset))
- return false;
- if (size != be32_to_cpu(rmt->rm_bytes))
- return false;
- if (ino != be64_to_cpu(rmt->rm_owner))
- return false;
+ ASSERT(len >= XFS_LBSIZE(mp));
- /* ok */
- return true;
+ while (len > 0 && *valuelen > 0) {
+ int hdr_size = 0;
+ int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+
+ byte_cnt = min_t(int, *valuelen, byte_cnt);
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
+ byte_cnt, bno)) {
+ xfs_alert(mp,
+"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
+ bno, *offset, byte_cnt, ino);
+ return EFSCORRUPTED;
+ }
+ hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
+ }
+
+ memcpy(*dst, src + hdr_size, byte_cnt);
+
+ /* roll buffer forwards */
+ len -= XFS_LBSIZE(mp);
+ src += XFS_LBSIZE(mp);
+ bno += mp->m_bsize;
+
+ /* roll attribute data forwards */
+ *valuelen -= byte_cnt;
+ *dst += byte_cnt;
+ *offset += byte_cnt;
+ }
+ return 0;
+}
+
+STATIC void
+xfs_attr_rmtval_copyin(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ xfs_ino_t ino,
+ int *offset,
+ int *valuelen,
+ char **src)
+{
+ char *dst = bp->b_addr;
+ xfs_daddr_t bno = bp->b_bn;
+ int len = BBTOB(bp->b_length);
+
+ ASSERT(len >= XFS_LBSIZE(mp));
+
+ while (len > 0 && *valuelen > 0) {
+ int hdr_size;
+ int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+
+ byte_cnt = min(*valuelen, byte_cnt);
+ hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
+ byte_cnt, bno);
+
+ memcpy(dst + hdr_size, *src, byte_cnt);
+
+ /*
+ * If this is the last block, zero the remainder of it.
+ * Check that we are actually the last block, too.
+ */
+ if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) {
+ ASSERT(*valuelen - byte_cnt == 0);
+ ASSERT(len == XFS_LBSIZE(mp));
+ memset(dst + hdr_size + byte_cnt, 0,
+ XFS_LBSIZE(mp) - hdr_size - byte_cnt);
+ }
+
+ /* roll buffer forwards */
+ len -= XFS_LBSIZE(mp);
+ dst += XFS_LBSIZE(mp);
+ bno += mp->m_bsize;
+
+ /* roll attribute data forwards */
+ *valuelen -= byte_cnt;
+ *src += byte_cnt;
+ *offset += byte_cnt;
+ }
}
/*
@@ -190,13 +336,12 @@ xfs_attr_rmtval_get(
struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
struct xfs_mount *mp = args->dp->i_mount;
struct xfs_buf *bp;
- xfs_daddr_t dblkno;
xfs_dablk_t lblkno = args->rmtblkno;
- void *dst = args->value;
+ char *dst = args->value;
int valuelen = args->valuelen;
int nmap;
int error;
- int blkcnt;
+ int blkcnt = args->rmtblkcnt;
int i;
int offset = 0;
@@ -207,52 +352,36 @@ xfs_attr_rmtval_get(
while (valuelen > 0) {
nmap = ATTR_RMTVALUE_MAPSIZE;
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
- args->rmtblkcnt, map, &nmap,
+ blkcnt, map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return error;
ASSERT(nmap >= 1);
for (i = 0; (i < nmap) && (valuelen > 0); i++) {
- int byte_cnt;
- char *src;
+ xfs_daddr_t dblkno;
+ int dblkcnt;
ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
(map[i].br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
- blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+ dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
- dblkno, blkcnt, 0, &bp,
+ dblkno, dblkcnt, 0, &bp,
&xfs_attr3_rmt_buf_ops);
if (error)
return error;
- byte_cnt = min_t(int, valuelen, BBTOB(bp->b_length));
- byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
-
- src = bp->b_addr;
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
- if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
- offset, byte_cnt, bp)) {
- xfs_alert(mp,
-"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
- offset, byte_cnt, args->dp->i_ino);
- xfs_buf_relse(bp);
- return EFSCORRUPTED;
-
- }
-
- src += sizeof(struct xfs_attr3_rmt_hdr);
- }
-
- memcpy(dst, src, byte_cnt);
+ error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
+ &offset, &valuelen,
+ &dst);
xfs_buf_relse(bp);
+ if (error)
+ return error;
- offset += byte_cnt;
- dst += byte_cnt;
- valuelen -= byte_cnt;
-
+ /* roll attribute extent map forwards */
lblkno += map[i].br_blockcount;
+ blkcnt -= map[i].br_blockcount;
}
}
ASSERT(valuelen == 0);
@@ -270,17 +399,13 @@ xfs_attr_rmtval_set(
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
struct xfs_bmbt_irec map;
- struct xfs_buf *bp;
- xfs_daddr_t dblkno;
xfs_dablk_t lblkno;
xfs_fileoff_t lfileoff = 0;
- void *src = args->value;
+ char *src = args->value;
int blkcnt;
int valuelen;
int nmap;
int error;
- int hdrcnt = 0;
- bool crcs = xfs_sb_version_hascrc(&mp->m_sb);
int offset = 0;
trace_xfs_attr_rmtval_set(args);
@@ -289,24 +414,14 @@ xfs_attr_rmtval_set(
* Find a "hole" in the attribute address space large enough for
* us to drop the new attribute's value into. Because CRC enable
* attributes have headers, we can't just do a straight byte to FSB
- * conversion. We calculate the worst case block count in this case
- * and we may not need that many, so we have to handle this when
- * allocating the blocks below.
+ * conversion and have to take the header space into account.
*/
- if (!crcs)
- blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
- else
- blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
-
+ blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
XFS_ATTR_FORK);
if (error)
return error;
- /* Start with the attribute data. We'll allocate the rest afterwards. */
- if (crcs)
- blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
-
args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
args->rmtblkcnt = blkcnt;
@@ -349,26 +464,6 @@ xfs_attr_rmtval_set(
(map.br_startblock != HOLESTARTBLOCK));
lblkno += map.br_blockcount;
blkcnt -= map.br_blockcount;
- hdrcnt++;
-
- /*
- * If we have enough blocks for the attribute data, calculate
- * how many extra blocks we need for headers. We might run
- * through this multiple times in the case that the additional
- * headers in the blocks needed for the data fragments spills
- * into requiring more blocks. e.g. for 512 byte blocks, we'll
- * spill for another block every 9 headers we require in this
- * loop.
- */
- if (crcs && blkcnt == 0) {
- int total_len;
-
- total_len = args->valuelen +
- hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
- blkcnt = XFS_B_TO_FSB(mp, total_len);
- blkcnt -= args->rmtblkcnt;
- args->rmtblkcnt += blkcnt;
- }
/*
* Start the next trans in the chain.
@@ -385,18 +480,19 @@ xfs_attr_rmtval_set(
* the INCOMPLETE flag.
*/
lblkno = args->rmtblkno;
+ blkcnt = args->rmtblkcnt;
valuelen = args->valuelen;
while (valuelen > 0) {
- int byte_cnt;
- char *buf;
+ struct xfs_buf *bp;
+ xfs_daddr_t dblkno;
+ int dblkcnt;
+
+ ASSERT(blkcnt > 0);
- /*
- * Try to remember where we decided to put the value.
- */
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
- args->rmtblkcnt, &map, &nmap,
+ blkcnt, &map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return(error);
@@ -405,41 +501,27 @@ xfs_attr_rmtval_set(
(map.br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
- blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+ dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
- bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
+ bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
if (!bp)
return ENOMEM;
bp->b_ops = &xfs_attr3_rmt_buf_ops;
- byte_cnt = BBTOB(bp->b_length);
- byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
- if (valuelen < byte_cnt)
- byte_cnt = valuelen;
-
- buf = bp->b_addr;
- buf += xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
- byte_cnt, bp);
- memcpy(buf, src, byte_cnt);
-
- if (byte_cnt < BBTOB(bp->b_length))
- xfs_buf_zero(bp, byte_cnt,
- BBTOB(bp->b_length) - byte_cnt);
+ xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
+ &valuelen, &src);
error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
xfs_buf_relse(bp);
if (error)
return error;
- src += byte_cnt;
- valuelen -= byte_cnt;
- offset += byte_cnt;
- hdrcnt--;
+ /* roll attribute extent map forwards */
lblkno += map.br_blockcount;
+ blkcnt -= map.br_blockcount;
}
ASSERT(valuelen == 0);
- ASSERT(hdrcnt == 0);
return 0;
}
@@ -448,33 +530,40 @@ xfs_attr_rmtval_set(
* out-of-line buffer that it is stored on.
*/
int
-xfs_attr_rmtval_remove(xfs_da_args_t *args)
+xfs_attr_rmtval_remove(
+ struct xfs_da_args *args)
{
- xfs_mount_t *mp;
- xfs_bmbt_irec_t map;
- xfs_buf_t *bp;
- xfs_daddr_t dblkno;
- xfs_dablk_t lblkno;
- int valuelen, blkcnt, nmap, error, done, committed;
+ struct xfs_mount *mp = args->dp->i_mount;
+ xfs_dablk_t lblkno;
+ int blkcnt;
+ int error;
+ int done;
trace_xfs_attr_rmtval_remove(args);
- mp = args->dp->i_mount;
-
/*
- * Roll through the "value", invalidating the attribute value's
- * blocks.
+ * Roll through the "value", invalidating the attribute value's blocks.
+ * Note that args->rmtblkcnt is the minimum number of data blocks we'll
+ * see for a CRC enabled remote attribute. Each extent will have a
+ * header, and so we may have more blocks than we realise here. If we
+ * fail to map the blocks correctly, we'll have problems with the buffer
+ * lookups.
*/
lblkno = args->rmtblkno;
- valuelen = args->rmtblkcnt;
- while (valuelen > 0) {
+ blkcnt = args->rmtblkcnt;
+ while (blkcnt > 0) {
+ struct xfs_bmbt_irec map;
+ struct xfs_buf *bp;
+ xfs_daddr_t dblkno;
+ int dblkcnt;
+ int nmap;
+
/*
* Try to remember where we decided to put the value.
*/
nmap = 1;
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
- args->rmtblkcnt, &map, &nmap,
- XFS_BMAPI_ATTRFORK);
+ blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
if (error)
return(error);
ASSERT(nmap == 1);
@@ -482,21 +571,20 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
(map.br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
- blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+ dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
/*
* If the "remote" value is in the cache, remove it.
*/
- bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
+ bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
if (bp) {
xfs_buf_stale(bp);
xfs_buf_relse(bp);
bp = NULL;
}
- valuelen -= map.br_blockcount;
-
lblkno += map.br_blockcount;
+ blkcnt -= map.br_blockcount;
}
/*
@@ -506,6 +594,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
blkcnt = args->rmtblkcnt;
done = 0;
while (!done) {
+ int committed;
+
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h
index c7cca60a062..92a8fd7977c 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/xfs_attr_remote.h
@@ -20,6 +20,14 @@
#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
+/*
+ * There is one of these headers per filesystem block in a remote attribute.
+ * This is done to ensure there is a 1:1 mapping between the attribute value
+ * length and the number of blocks needed to store the attribute. This makes the
+ * verification of a buffer a little more complex, but greatly simplifies the
+ * allocation, reading and writing of these attributes as we don't have to guess
+ * the number of blocks needed to store the attribute data.
+ */
struct xfs_attr3_rmt_hdr {
__be32 rm_magic;
__be32 rm_offset;
@@ -39,6 +47,8 @@ struct xfs_attr3_rmt_hdr {
extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
+int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
+
int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_set(struct xfs_da_args *args);
int xfs_attr_rmtval_remove(struct xfs_da_args *args);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 82b70bda9f4..1b2472a46e4 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -513,6 +513,7 @@ _xfs_buf_find(
xfs_alert(btp->bt_mount,
"%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
__func__, blkno, eofs);
+ WARN_ON(1);
return NULL;
}
@@ -1649,7 +1650,7 @@ xfs_alloc_buftarg(
{
xfs_buftarg_t *btp;
- btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+ btp = kmem_zalloc(sizeof(*btp), KM_SLEEP | KM_NOFS);
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index cf263476d6b..4ec43177704 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -262,12 +262,7 @@ xfs_buf_item_format_segment(
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
-/*
- * You would think we need to bump the nvecs here too, but we do not
- * this number is used by recovery, and it gets confused by the boundary
- * split here
- * nvecs++;
- */
+ nvecs++;
vecp++;
first_bit = next_bit;
last_bit = next_bit;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9b26a99ebfe..0b8b2a13cd2 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -270,6 +270,7 @@ xfs_da3_node_read_verify(
break;
return;
case XFS_ATTR_LEAF_MAGIC:
+ case XFS_ATTR3_LEAF_MAGIC:
bp->b_ops = &xfs_attr3_leaf_buf_ops;
bp->b_ops->verify_read(bp);
return;
@@ -2464,7 +2465,8 @@ xfs_buf_map_from_irec(
ASSERT(nirecs >= 1);
if (nirecs > 1) {
- map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP);
+ map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
+ KM_SLEEP | KM_NOFS);
if (!map)
return ENOMEM;
*mapp = map;
@@ -2520,7 +2522,8 @@ xfs_dabuf_map(
* Optimize the one-block case.
*/
if (nfsb != 1)
- irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_SLEEP);
+ irecs = kmem_zalloc(sizeof(irec) * nfsb,
+ KM_SLEEP | KM_NOFS);
nirecs = nfsb;
error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index f852b082a08..c407e1ccff4 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -219,6 +219,14 @@ xfs_swap_extents(
int taforkblks = 0;
__uint64_t tmp;
+ /*
+ * We have no way of updating owner information in the BMBT blocks for
+ * each inode on CRC enabled filesystems, so to avoid corrupting the
+ * this metadata we simply don't allow extent swaps to occur.
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ return XFS_ERROR(EINVAL);
+
tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
if (!tempifp) {
error = XFS_ERROR(ENOMEM);
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index a3b1bd841a8..995f1f505a5 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -715,6 +715,7 @@ struct xfs_dir3_free_hdr {
__be32 firstdb; /* db of first entry */
__be32 nvalid; /* count of valid entries */
__be32 nused; /* count of used entries */
+ __be32 pad; /* 64 bit alignment. */
};
struct xfs_dir3_free {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 721ba2fe8e5..da71a1819d7 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1336,7 +1336,7 @@ xfs_dir2_leaf_getdents(
mp->m_sb.sb_blocksize);
map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
(length * sizeof(struct xfs_bmbt_irec)),
- KM_SLEEP);
+ KM_SLEEP | KM_NOFS);
map_info->map_size = length;
/*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5246de4912d..2226a00acd1 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -263,18 +263,19 @@ xfs_dir3_free_get_buf(
* Initialize the new block to be empty, and remember
* its first slot as our empty slot.
*/
- hdr.magic = XFS_DIR2_FREE_MAGIC;
- hdr.firstdb = 0;
- hdr.nused = 0;
- hdr.nvalid = 0;
+ memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr));
+ memset(&hdr, 0, sizeof(hdr));
+
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
hdr.magic = XFS_DIR3_FREE_MAGIC;
+
hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
- }
+ } else
+ hdr.magic = XFS_DIR2_FREE_MAGIC;
xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr);
*bpp = bp;
return 0;
@@ -1921,8 +1922,6 @@ xfs_dir2_node_addname_int(
*/
freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
xfs_dir3_free_max_bests(mp);
- free->hdr.nvalid = 0;
- free->hdr.nused = 0;
} else {
free = fbp->b_addr;
bests = xfs_dir3_free_bests_p(mp, free);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index c0f375087ef..452920a3f03 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -305,11 +305,12 @@ xfs_efi_release(xfs_efi_log_item_t *efip,
{
ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) {
- __xfs_efi_release(efip);
-
/* recovery needs us to drop the EFI reference, too */
if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
__xfs_efi_release(efip);
+
+ __xfs_efi_release(efip);
+ /* efip may now have been freed, do not reference it again. */
}
}
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 6dda3f949b0..d0469554539 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -236,6 +236,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */
#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
+#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 87595b211da..3c3644ea825 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -99,7 +99,9 @@ xfs_fs_geometry(
(xfs_sb_version_hasattr2(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
(xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_PROJID32 : 0);
+ XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
+ (xfs_sb_version_hascrc(&mp->m_sb) ?
+ XFS_FSOP_GEOM_FLAGS_V5SB : 0);
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
mp->m_sb.sb_logsectsize : BBSIZE;
geo->rtsectsize = mp->m_sb.sb_blocksize;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d82efaa2ac7..ca9ecaa8111 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -455,6 +455,28 @@ xfs_vn_getattr(
return 0;
}
+static void
+xfs_setattr_mode(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ struct iattr *iattr)
+{
+ struct inode *inode = VFS_I(ip);
+ umode_t mode = iattr->ia_mode;
+
+ ASSERT(tp);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ mode &= ~S_ISGID;
+
+ ip->i_d.di_mode &= S_IFMT;
+ ip->i_d.di_mode |= mode & ~S_IFMT;
+
+ inode->i_mode &= S_IFMT;
+ inode->i_mode |= mode & ~S_IFMT;
+}
+
int
xfs_setattr_nonsize(
struct xfs_inode *ip,
@@ -606,18 +628,8 @@ xfs_setattr_nonsize(
/*
* Change file access modes.
*/
- if (mask & ATTR_MODE) {
- umode_t mode = iattr->ia_mode;
-
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- mode &= ~S_ISGID;
-
- ip->i_d.di_mode &= S_IFMT;
- ip->i_d.di_mode |= mode & ~S_IFMT;
-
- inode->i_mode &= S_IFMT;
- inode->i_mode |= mode & ~S_IFMT;
- }
+ if (mask & ATTR_MODE)
+ xfs_setattr_mode(tp, ip, iattr);
/*
* Change file access or modified times.
@@ -714,9 +726,8 @@ xfs_setattr_size(
return XFS_ERROR(error);
ASSERT(S_ISREG(ip->i_d.di_mode));
- ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
- ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
- ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+ ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+ ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
if (!(flags & XFS_ATTR_NOLOCK)) {
lock_flags |= XFS_IOLOCK_EXCL;
@@ -860,6 +871,12 @@ xfs_setattr_size(
xfs_inode_clear_eofblocks_tag(ip);
}
+ /*
+ * Change file access modes.
+ */
+ if (mask & ATTR_MODE)
+ xfs_setattr_mode(tp, ip, iattr);
+
if (mask & ATTR_CTIME) {
inode->i_ctime = iattr->ia_ctime;
ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index e3d0b85d852..d0833b54e55 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -139,7 +139,7 @@ xlog_cil_prepare_log_vecs(
new_lv = kmem_zalloc(sizeof(*new_lv) +
niovecs * sizeof(struct xfs_log_iovec),
- KM_SLEEP);
+ KM_SLEEP|KM_NOFS);
/* The allocated iovec region lies beyond the log vector. */
new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 93f03ec17ee..d9e4d3c3991 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2097,6 +2097,17 @@ xlog_recover_do_reg_buffer(
((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
/*
+ * The dirty regions logged in the buffer, even though
+ * contiguous, may span multiple chunks. This is because the
+ * dirty region may span a physical page boundary in a buffer
+ * and hence be split into two separate vectors for writing into
+ * the log. Hence we need to trim nbits back to the length of
+ * the current region being copied out of the log.
+ */
+ if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
+ nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
+
+ /*
* Do a sanity check if this is a dquot buffer. Just checking
* the first dquot in the buffer should do. XXXThis is
* probably a good thing to do for other buf types also.
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index c41190cad6e..6cdf6ffc36a 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -489,31 +489,36 @@ xfs_qm_scall_setqlim(
if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
return 0;
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
- error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
- 0, 0, XFS_DEFAULT_LOG_COUNT);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return (error);
- }
-
/*
* We don't want to race with a quotaoff so take the quotaoff lock.
- * (We don't hold an inode lock, so there's nothing else to stop
- * a quotaoff from happening). (XXXThis doesn't currently happen
- * because we take the vfslock before calling xfs_qm_sysent).
+ * We don't hold an inode lock, so there's nothing else to stop
+ * a quotaoff from happening.
*/
mutex_lock(&q->qi_quotaofflock);
/*
- * Get the dquot (locked), and join it to the transaction.
- * Allocate the dquot if this doesn't exist.
+ * Get the dquot (locked) before we start, as we need to do a
+ * transaction to allocate it if it doesn't exist. Once we have the
+ * dquot, unlock it so we can start the next transaction safely. We hold
+ * a reference to the dquot, so it's safe to do this unlock/lock without
+ * it being reclaimed in the mean time.
*/
- if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+ error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp);
+ if (error) {
ASSERT(error != ENOENT);
goto out_unlock;
}
+ xfs_dqunlock(dqp);
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
+ error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
+ 0, 0, XFS_DEFAULT_LOG_COUNT);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ goto out_rele;
+ }
+
+ xfs_dqlock(dqp);
xfs_trans_dqjoin(tp, dqp);
ddq = &dqp->q_core;
@@ -621,9 +626,10 @@ xfs_qm_scall_setqlim(
xfs_trans_log_dquot(tp, dqp);
error = xfs_trans_commit(tp, 0);
- xfs_qm_dqrele(dqp);
- out_unlock:
+out_rele:
+ xfs_qm_dqrele(dqp);
+out_unlock:
mutex_unlock(&q->qi_quotaofflock);
return error;
}
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 5f234389327..195a403e152 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -56,16 +56,9 @@ xfs_symlink_blocks(
struct xfs_mount *mp,
int pathlen)
{
- int fsblocks = 0;
- int len = pathlen;
+ int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
- do {
- fsblocks++;
- len -= XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
- } while (len > 0);
-
- ASSERT(fsblocks <= XFS_SYMLINK_MAPS);
- return fsblocks;
+ return (pathlen + buflen - 1) / buflen;
}
static int
@@ -405,7 +398,7 @@ xfs_symlink(
if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version))
fs_blocks = 0;
else
- fs_blocks = XFS_B_TO_FSB(mp, pathlen);
+ fs_blocks = xfs_symlink_blocks(mp, pathlen);
resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
@@ -512,7 +505,7 @@ xfs_symlink(
cur_chunk = target_path;
offset = 0;
for (n = 0; n < nmaps; n++) {
- char *buf;
+ char *buf;
d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
@@ -525,9 +518,7 @@ xfs_symlink(
bp->b_ops = &xfs_symlink_buf_ops;
byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
- if (pathlen < byte_cnt) {
- byte_cnt = pathlen;
- }
+ byte_cnt = min(byte_cnt, pathlen);
buf = bp->b_addr;
buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset,
@@ -542,6 +533,7 @@ xfs_symlink(
xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
(char *)bp->b_addr);
}
+ ASSERT(pathlen == 0);
}
/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1501f4fa51a..0176bb21f09 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1453,7 +1453,7 @@ xfs_free_file_space(
xfs_mount_t *mp;
int nimap;
uint resblks;
- uint rounding;
+ xfs_off_t rounding;
int rt;
xfs_fileoff_t startoffset_fsb;
xfs_trans_t *tp;
@@ -1482,7 +1482,7 @@ xfs_free_file_space(
inode_dio_wait(VFS_I(ip));
}
- rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+ rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
ioffset = offset & ~(rounding - 1);
error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ioffset, -1);