aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2010-05-16 10:46:25 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:50 -0400
commitf0486c68e4bd9a06a5904d3eeb3a0d73a83befb8 (patch)
tree509428ef400ef45e875a3c448b63b86cbea36aea
parent2ead6ae770d9f9dec9f4286bf0fd9001b4388c4b (diff)
Btrfs: Introduce contexts for metadata reservation
Introducing metadata reseravtion contexts has two major advantages. First, it makes metadata reseravtion more traceable. Second, it can reclaim freed space and re-add them to the itself after transaction committed. Besides add btrfs_block_rsv structure and related helper functions, This patch contains following changes: Move code that decides if freed tree block should be pinned into btrfs_free_tree_block(). Make space accounting more accurate, mainly for handling read only block groups. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.c104
-rw-r--r--fs/btrfs/ctree.h69
-rw-r--r--fs/btrfs/disk-io.c9
-rw-r--r--fs/btrfs/extent-tree.c1029
-rw-r--r--fs/btrfs/relocation.c16
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/transaction.h8
7 files changed, 853 insertions, 385 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6795a713b20..6bee8e5204f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
- struct extent_buffer *cow)
+ struct extent_buffer *cow,
+ int *last_ref)
{
u64 refs;
u64 owner;
@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
BUG_ON(ret);
}
clean_tree_block(trans, root, buf);
+ *last_ref = 1;
}
return 0;
}
@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
struct extent_buffer *cow;
int level;
+ int last_ref = 0;
int unlock_orig = 0;
u64 parent_start;
@@ -442,7 +445,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
- update_ref_for_cow(trans, root, buf, cow);
+ update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (buf == root->node) {
WARN_ON(parent && parent != buf);
@@ -457,8 +460,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
extent_buffer_get(cow);
spin_unlock(&root->node_lock);
- btrfs_free_tree_block(trans, root, buf->start, buf->len,
- parent_start, root->root_key.objectid, level);
+ btrfs_free_tree_block(trans, root, buf, parent_start,
+ last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@@ -473,8 +476,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
- btrfs_free_tree_block(trans, root, buf->start, buf->len,
- parent_start, root->root_key.objectid, level);
+ btrfs_free_tree_block(trans, root, buf, parent_start,
+ last_ref);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@@ -949,6 +952,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
return bin_search(eb, key, level, slot);
}
+static void root_add_used(struct btrfs_root *root, u32 size)
+{
+ spin_lock(&root->accounting_lock);
+ btrfs_set_root_used(&root->root_item,
+ btrfs_root_used(&root->root_item) + size);
+ spin_unlock(&root->accounting_lock);
+}
+
+static void root_sub_used(struct btrfs_root *root, u32 size)
+{
+ spin_lock(&root->accounting_lock);
+ btrfs_set_root_used(&root->root_item,
+ btrfs_root_used(&root->root_item) - size);
+ spin_unlock(&root->accounting_lock);
+}
+
/* given a node and slot number, this reads the blocks it points to. The
* extent buffer is returned with a reference taken (but unlocked).
* NULL is returned on error.
@@ -1019,7 +1038,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_tree_unlock(child);
+ free_extent_buffer(child);
+ goto enospc;
+ }
spin_lock(&root->node_lock);
root->node = child;
@@ -1034,11 +1057,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
- ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
- 0, root->root_key.objectid, level);
+
+ root_sub_used(root, mid->len);
+ btrfs_free_tree_block(trans, root, mid, 0, 1);
/* once for the root ptr */
free_extent_buffer(mid);
- return ret;
+ return 0;
}
if (btrfs_header_nritems(mid) >
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
@@ -1088,23 +1112,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
- u64 bytenr = right->start;
- u32 blocksize = right->len;
-
clean_tree_block(trans, root, right);
btrfs_tree_unlock(right);
- free_extent_buffer(right);
- right = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot +
1);
if (wret)
ret = wret;
- wret = btrfs_free_tree_block(trans, root,
- bytenr, blocksize, 0,
- root->root_key.objectid,
- level);
- if (wret)
- ret = wret;
+ root_sub_used(root, right->len);
+ btrfs_free_tree_block(trans, root, right, 0, 1);
+ free_extent_buffer(right);
+ right = NULL;
} else {
struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0);
@@ -1136,21 +1153,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
- /* we've managed to empty the middle node, drop it */
- u64 bytenr = mid->start;
- u32 blocksize = mid->len;
-
clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid);
- free_extent_buffer(mid);
- mid = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot);
if (wret)
ret = wret;
- wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
- 0, root->root_key.objectid, level);
- if (wret)
- ret = wret;
+ root_sub_used(root, mid->len);
+ btrfs_free_tree_block(trans, root, mid, 0, 1);
+ free_extent_buffer(mid);
+ mid = NULL;
} else {
/* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key;
@@ -1740,7 +1751,6 @@ again:
p->nodes[level + 1],
p->slots[level + 1], &b);
if (err) {
- free_extent_buffer(b);
ret = err;
goto done;
}
@@ -2076,6 +2086,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
if (IS_ERR(c))
return PTR_ERR(c);
+ root_add_used(root, root->nodesize);
+
memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_nritems(c, 1);
btrfs_set_header_level(c, level);
@@ -2134,6 +2146,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
int nritems;
BUG_ON(!path->nodes[level]);
+ btrfs_assert_tree_locked(path->nodes[level]);
lower = path->nodes[level];
nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems);
@@ -2202,6 +2215,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
if (IS_ERR(split))
return PTR_ERR(split);
+ root_add_used(root, root->nodesize);
+
memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_level(split, btrfs_header_level(c));
btrfs_set_header_bytenr(split, split->start);
@@ -2415,6 +2430,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
+ else
+ clean_tree_block(trans, root, left);
+
btrfs_mark_buffer_dirty(right);
btrfs_item_key(right, &disk_key, 0);
@@ -2660,6 +2678,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(left);
if (right_nritems)
btrfs_mark_buffer_dirty(right);
+ else
+ clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0);
wret = fixup_low_keys(trans, root, path, &disk_key, 1);
@@ -2669,8 +2689,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
path->slots[0] += old_left_nritems;
- if (btrfs_header_nritems(path->nodes[0]) == 0)
- clean_tree_block(trans, root, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = left;
@@ -2932,10 +2950,10 @@ again:
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
root->root_key.objectid,
&disk_key, 0, l->start, 0);
- if (IS_ERR(right)) {
- BUG_ON(1);
+ if (IS_ERR(right))
return PTR_ERR(right);
- }
+
+ root_add_used(root, root->leafsize);
memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(right, right->start);
@@ -3054,7 +3072,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
ret = split_leaf(trans, root, &key, path, ins_len, 1);
- BUG_ON(ret);
+ if (ret)
+ goto err;
path->keep_locks = 0;
btrfs_unlock_up_safe(path, 1);
@@ -3796,9 +3815,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
*/
btrfs_unlock_up_safe(path, 0);
- ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
- 0, root->root_key.objectid, 0);
- return ret;
+ root_sub_used(root, leaf->len);
+
+ btrfs_free_tree_block(trans, root, leaf, 0, 1);
+ return 0;
}
/*
* delete the item at the leaf level in path. If that empties
@@ -3865,6 +3885,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (leaf == root->node) {
btrfs_set_header_level(leaf, 0);
} else {
+ btrfs_set_path_blocking(path);
+ clean_tree_block(trans, root, leaf);
ret = btrfs_del_leaf(trans, root, path, leaf);
BUG_ON(ret);
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 85c7b95dd2f..7d2479694a5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -707,6 +707,20 @@ struct btrfs_space_info {
atomic_t caching_threads;
};
+struct btrfs_block_rsv {
+ u64 size;
+ u64 reserved;
+ u64 freed[2];
+ struct btrfs_space_info *space_info;
+ struct list_head list;
+ spinlock_t lock;
+ atomic_t usage;
+ unsigned int priority:8;
+ unsigned int durable:1;
+ unsigned int refill_used:1;
+ unsigned int full:1;
+};
+
/*
* free clusters are used to claim free space in relatively large chunks,
* allowing us to do less seeky writes. They are used for all metadata
@@ -757,6 +771,7 @@ struct btrfs_block_group_cache {
spinlock_t lock;
u64 pinned;
u64 reserved;
+ u64 reserved_pinned;
u64 bytes_super;
u64 flags;
u64 sectorsize;
@@ -822,6 +837,22 @@ struct btrfs_fs_info {
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
+ /* block reservation for extent, checksum and root tree */
+ struct btrfs_block_rsv global_block_rsv;
+ /* block reservation for delay allocation */
+ struct btrfs_block_rsv delalloc_block_rsv;
+ /* block reservation for metadata operations */
+ struct btrfs_block_rsv trans_block_rsv;
+ /* block reservation for chunk tree */
+ struct btrfs_block_rsv chunk_block_rsv;
+
+ struct btrfs_block_rsv empty_block_rsv;
+
+ /* list of block reservations that cross multiple transactions */
+ struct list_head durable_block_rsv_list;
+
+ struct mutex durable_block_rsv_mutex;
+
u64 generation;
u64 last_trans_committed;
@@ -1008,6 +1039,9 @@ struct btrfs_root {
struct completion kobj_unregister;
struct mutex objectid_mutex;
+ spinlock_t accounting_lock;
+ struct btrfs_block_rsv *block_rsv;
+
struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
@@ -1980,10 +2014,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size);
-int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u32 blocksize,
- u64 parent, u64 root_objectid, int level);
+void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ u64 parent, int last_ref);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
@@ -2037,9 +2071,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 size);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
-int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
- struct btrfs_block_group_cache *group);
-
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
@@ -2058,6 +2089,30 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
+void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
+void btrfs_free_block_rsv(struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv);
+void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv);
+int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, int *retries);
+int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 min_reserved, int min_factor);
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
+ struct btrfs_block_rsv *dst_rsv,
+ u64 num_bytes);
+void btrfs_block_rsv_release(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes);
+int btrfs_set_block_group_ro(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache);
+int btrfs_set_block_group_rw(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 05f26acfd07..574594cf6b5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -903,6 +903,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->name = NULL;
root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
+ root->block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list);
@@ -910,6 +911,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->node_lock);
spin_lock_init(&root->list_lock);
spin_lock_init(&root->inode_lock);
+ spin_lock_init(&root->accounting_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
@@ -1620,6 +1622,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
btrfs_mapping_init(&fs_info->mapping_tree);
+ btrfs_init_block_rsv(&fs_info->global_block_rsv);
+ btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
+ btrfs_init_block_rsv(&fs_info->trans_block_rsv);
+ btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
+ btrfs_init_block_rsv(&fs_info->empty_block_rsv);
+ INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
+ mutex_init(&fs_info->durable_block_rsv_mutex);
atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f32b1618ee6..3367278ac6a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,10 +35,9 @@
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int alloc,
- int mark_free);
-static int update_reserved_extents(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve);
+ u64 bytenr, u64 num_bytes, int alloc);
+static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve, int sinfo);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
-static int pin_down_bytes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, u64 num_bytes,
- int is_data, int reserved,
- struct extent_buffer **must_clean);
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
@@ -97,8 +90,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
{
- if (atomic_dec_and_test(&cache->count))
+ if (atomic_dec_and_test(&cache->count)) {
+ WARN_ON(cache->pinned > 0);
+ WARN_ON(cache->reserved > 0);
+ WARN_ON(cache->reserved_pinned > 0);
kfree(cache);
+ }
}
/*
@@ -325,7 +322,7 @@ static int caching_kthread(void *data)
exclude_super_stripes(extent_root, block_group);
spin_lock(&block_group->space_info->lock);
- block_group->space_info->bytes_super += block_group->bytes_super;
+ block_group->space_info->bytes_readonly += block_group->bytes_super;
spin_unlock(&block_group->space_info->lock);
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -1880,7 +1877,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
return ret;
}
-
/* helper function to actually process a single delayed ref entry */
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -1900,32 +1896,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
if (insert_reserved) {
- int mark_free = 0;
- struct extent_buffer *must_clean = NULL;
-
- ret = pin_down_bytes(trans, root, NULL,
- node->bytenr, node->num_bytes,
- head->is_data, 1, &must_clean);
- if (ret > 0)
- mark_free = 1;
-
- if (must_clean) {
- clean_tree_block(NULL, root, must_clean);
- btrfs_tree_unlock(must_clean);
- free_extent_buffer(must_clean);
- }
+ btrfs_pin_extent(root, node->bytenr,
+ node->num_bytes, 1);
if (head->is_data) {
ret = btrfs_del_csums(trans, root,
node->bytenr,
node->num_bytes);
BUG_ON(ret);
}
- if (mark_free) {
- ret = btrfs_free_reserved_extent(root,
- node->bytenr,
- node->num_bytes);
- BUG_ON(ret);
- }
}
mutex_unlock(&head->mutex);
return 0;
@@ -2356,6 +2334,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
ret = 0;
out:
btrfs_free_path(path);
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ WARN_ON(ret > 0);
return ret;
}
@@ -2706,7 +2686,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
- found->bytes_delalloc = 0;
+ found->bytes_may_use = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
@@ -2731,19 +2711,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
}
-static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
-{
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- if (!cache->ro) {
- cache->space_info->bytes_readonly += cache->key.offset -
- btrfs_block_group_used(&cache->item);
- cache->ro = 1;
- }
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
-}
-
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
u64 num_devices = root->fs_info->fs_devices->rw_devices;
@@ -2802,11 +2769,8 @@ static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
{
- u64 alloc_target;
-
- alloc_target = btrfs_get_alloc_profile(root, 1);
BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
- alloc_target);
+ BTRFS_BLOCK_GROUP_DATA);
}
static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
@@ -3412,10 +3376,334 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
return reclaimed >= to_reclaim;
}
+static int should_retry_reserve(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, int *retries)
+{
+ struct btrfs_space_info *space_info = block_rsv->space_info;
+ int ret;
+
+ if ((*retries) > 2)
+ return -ENOSPC;
+
+ ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
+ if (ret)
+ return 1;
+
+ if (trans && trans->transaction->in_commit)
+ return -ENOSPC;
+
+ ret = shrink_delalloc(trans, root, space_info, num_bytes);
+ if (ret)
+ return ret;
+
+ spin_lock(&space_info->lock);
+ if (space_info->bytes_pinned < num_bytes)
+ ret = 1;
+ spin_unlock(&space_info->lock);
+ if (ret)
+ return -ENOSPC;
+
+ (*retries)++;
+
+ if (trans)
+ return -EAGAIN;
+
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+
+ return 1;
+}
+
+static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ struct btrfs_space_info *space_info = block_rsv->space_info;
+ u64 unused;
+ int ret = -ENOSPC;
+
+ spin_lock(&space_info->lock);
+ unused = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly;
+
+ if (unused < space_info->total_bytes)
+ unused = space_info->total_bytes - unused;
+ else
+ unused = 0;
+
+ if (unused >= num_bytes) {
+ if (block_rsv->priority >= 10) {
+ space_info->bytes_reserved += num_bytes;
+ ret = 0;
+ } else {
+ if ((unused + block_rsv->reserved) *
+ block_rsv->priority >=
+ (num_bytes + block_rsv->reserved) * 10) {
+ space_info->bytes_reserved += num_bytes;
+ ret = 0;
+ }
+ }
+ }
+ spin_unlock(&space_info->lock);
+
+ return ret;
+}
+
+static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_block_rsv *block_rsv;
+ if (root->ref_cows)
+ block_rsv = trans->block_rsv;
+ else
+ block_rsv = root->block_rsv;
+
+ if (!block_rsv)
+ block_rsv = &root->fs_info->empty_block_rsv;
+
+ return block_rsv;
+}
+
+static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ int ret = -ENOSPC;
+ spin_lock(&block_rsv->lock);
+ if (block_rsv->reserved >= num_bytes) {
+ block_rsv->reserved -= num_bytes;
+ if (block_rsv->reserved < block_rsv->size)
+ block_rsv->full = 0;
+ ret = 0;
+ }
+ spin_unlock(&block_rsv->lock);
+ return ret;
+}
+
+static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, int update_size)
+{
+ spin_lock(&block_rsv->lock);
+ block_rsv->reserved += num_bytes;
+ if (update_size)
+ block_rsv->size += num_bytes;
+ else if (block_rsv->reserved >= block_rsv->size)
+ block_rsv->full = 1;
+ spin_unlock(&block_rsv->lock);
+}
+
+void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
+ struct btrfs_block_rsv *dest, u64 num_bytes)
+{
+ struct btrfs_space_info *space_info = block_rsv->space_info;
+
+ spin_lock(&block_rsv->lock);
+ if (num_bytes == (u64)-1)
+ num_bytes = block_rsv->size;
+ block_rsv->size -= num_bytes;
+ if (block_rsv->reserved >= block_rsv->size) {
+ num_bytes = block_rsv->reserved - block_rsv->size;
+ block_rsv->reserved = block_rsv->size;
+ block_rsv->full = 1;
+ } else {
+ num_bytes = 0;
+ }
+ spin_unlock(&block_rsv->lock);
+
+ if (num_bytes > 0) {
+ if (dest) {
+ block_rsv_add_bytes(dest, num_bytes, 0);
+ } else {
+ spin_lock(&space_info->lock);
+ space_info->bytes_reserved -= num_bytes;
+ spin_unlock(&space_info->lock);
+ }
+ }
+}
+
+static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
+ struct btrfs_block_rsv *dst, u64 num_bytes)
+{
+ int ret;
+
+ ret = block_rsv_use_bytes(src, num_bytes);
+ if (ret)
+ return ret;
+
+ block_rsv_add_bytes(dst, num_bytes, 1);
+ return 0;
+}
+
+void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
+{
+ memset(rsv, 0, sizeof(*rsv));
+ spin_lock_init(&rsv->lock);
+ atomic_set(&rsv->usage, 1);
+ rsv->priority = 6;
+ INIT_LIST_HEAD(&rsv->list);
+}
+
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
+{
+ struct btrfs_block_rsv *block_rsv;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ u64 alloc_target;
+
+ block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
+ if (!block_rsv)
+ return NULL;
+
+ btrfs_init_block_rsv(block_rsv);
+
+ alloc_target = btrfs_get_alloc_profile(root, 0);
+ block_rsv->space_info = __find_space_info(fs_info,
+ BTRFS_BLOCK_GROUP_METADATA);
+
+ return block_rsv;
+}
+
+void btrfs_free_block_rsv(struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv)
+{
+ if (rsv && atomic_dec_and_test(&rsv->usage)) {
+ btrfs_block_rsv_release(root, rsv, (u64)-1);
+ if (!rsv->durable)
+ kfree(rsv);
+ }
+}
+
+/*
+ * make the block_rsv struct be able to capture freed space.
+ * the captured space will re-add to the the block_rsv struct
+ * after transaction commit
+ */
+void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv)
+{
+ block_rsv->durable = 1;
+ mutex_lock(&fs_info->durable_block_rsv_mutex);
+ list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
+ mutex_unlock(&fs_info->durable_block_rsv_mutex);
+}
+
+int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, int *retries)
+{
+ int ret;
+
+ if (num_bytes == 0)
+ return 0;
+again:
+ ret = reserve_metadata_bytes(block_rsv, num_bytes);
+ if (!ret) {
+ block_rsv_add_bytes(block_rsv, num_bytes, 1);
+ return 0;
+ }
+
+ ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
+ if (ret > 0)
+ goto again;
+
+ return ret;
+}
+
+int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 min_reserved, int min_factor)
+{
+ u64 num_bytes = 0;
+ int commit_trans = 0;
+ int ret = -ENOSPC;
+
+ if (!block_rsv)
+ return 0;
+
+ spin_lock(&block_rsv->lock);
+ if (min_factor > 0)
+ num_bytes = div_factor(block_rsv->size, min_factor);
+ if (min_reserved > num_bytes)
+ num_bytes = min_reserved;
+
+ if (block_rsv->reserved >= num_bytes) {
+ ret = 0;
+ } else {
+ num_bytes -= block_rsv->reserved;
+ if (block_rsv->durable &&
+ block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
+ commit_trans = 1;
+ }
+ spin_unlock(&block_rsv->lock);
+ if (!ret)
+ return 0;
+
+ if (block_rsv->refill_used) {
+ ret = reserve_metadata_bytes(block_rsv, num_bytes);
+ if (!ret) {
+ block_rsv_add_bytes(block_rsv, num_bytes, 0);
+ return 0;
+ }
+ }
+
+ if (commit_trans) {
+ if (trans)
+ return -EAGAIN;
+
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
+ ret = btrfs_commit_transaction(trans, root);
+ return 0;
+ }
+
+ WARN_ON(1);
+ printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
+ block_rsv->size, block_rsv->reserved,
+ block_rsv->freed[0], block_rsv->freed[1]);
+
+ return -ENOSPC;
+}
+
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
+ struct btrfs_block_rsv *dst_rsv,
+ u64 num_bytes)
+{
+ return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
+}
+
+void btrfs_block_rsv_release(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
+ if (global_rsv->full || global_rsv == block_rsv ||
+ block_rsv->space_info != global_rsv->space_info)
+ global_rsv = NULL;
+ block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
+}
+
+static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_space_info *space_info;
+
+ space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+ fs_info->chunk_block_rsv.space_info = space_info;
+ fs_info->chunk_block_rsv.priority = 10;
+
+ space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+ fs_info->trans_block_rsv.space_info = space_info;
+ fs_info->empty_block_rsv.space_info = space_info;
+ fs_info->empty_block_rsv.priority = 10;
+
+ fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
+}
+
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int alloc,
- int mark_free)
+ u64 bytenr, u64 num_bytes, int alloc)
{
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *info = root->fs_info;
@@ -3459,30 +3747,21 @@ static int update_block_group(struct btrfs_trans_handle *trans,
cache->space_info->bytes_reserved -= num_bytes;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
- if (cache->ro)
- cache->space_info->bytes_readonly -= num_bytes;
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
} else {
old_val -= num_bytes;
btrfs_set_block_group_used(&cache->item, old_val);
+ cache->pinned += num_bytes;
+ cache->space_info->bytes_pinned += num_bytes;
cache->space_info->bytes_used -= num_bytes;
cache->space_info->disk_used -= num_bytes * factor;
- if (cache->ro)
- cache->space_info->bytes_readonly += num_bytes;
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
- if (mark_free) {
- int ret;
- ret = btrfs_discard_extent(root, bytenr,
- num_bytes);
- WARN_ON(ret);
-
- ret = btrfs_add_free_space(cache, bytenr,
- num_bytes);
- WARN_ON(ret);
- }
+ set_extent_dirty(info->pinned_extents,
+ bytenr, bytenr + num_bytes - 1,
+ GFP_NOFS | __GFP_NOFAIL);
}
btrfs_put_block_group(cache);
total -= num_bytes;
@@ -3506,18 +3785,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
return bytenr;
}
-/*
- * this function must be called within transaction
- */
-int btrfs_pin_extent(struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int reserved)
+static int pin_down_extent(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache,
+ u64 bytenr, u64 num_bytes, int reserved)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_group_cache *cache;
-
- cache = btrfs_lookup_block_group(fs_info, bytenr);
- BUG_ON(!cache);
-
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned += num_bytes;
@@ -3529,28 +3800,68 @@ int btrfs_pin_extent(struct btrfs_root *root,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
- btrfs_put_block_group(cache);
+ set_extent_dirty(root->fs_info->pinned_extents, bytenr,
+ bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
+ return 0;
+}
- set_extent_dirty(fs_info->pinned_extents,
- bytenr, bytenr + num_bytes - 1, GFP_NOFS);
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent(struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int reserved)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = btrfs_lookup_block_group(root->fs_info, bytenr);
+ BUG_ON(!cache);
+
+ pin_down_extent(root, cache, bytenr, num_bytes, reserved);
+
+ btrfs_put_block_group(cache);
return 0;
}
-static int update_reserved_extents(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve)
+/*
+ * update size of reserved extents. this function may return -EAGAIN
+ * if 'reserve' is true or 'sinfo' is false.
+ */
+static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve, int sinfo)
{
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- if (reserve) {
- cache->reserved += num_bytes;
- cache->space_info->bytes_reserved += num_bytes;
+ int ret = 0;
+ if (sinfo) {
+ struct btrfs_space_info *space_info = cache->space_info;
+ spin_lock(&space_info->lock);
+ spin_lock(&cache->lock);
+ if (reserve) {
+ if (cache->ro) {
+ ret = -EAGAIN;