aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c2694
1 files changed, 1592 insertions, 1102 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edc7d208c5c..359a754c782 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -21,6 +21,7 @@
#include <linux/blkdev.h>
#include <linux/sort.h>
#include <linux/rcupdate.h>
+#include <linux/kthread.h>
#include "compat.h"
#include "hash.h"
#include "ctree.h"
@@ -31,12 +32,12 @@
#include "locking.h"
#include "free-space-cache.h"
-static int update_reserved_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int reserve);
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -56,10 +57,26 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
u64 flags, struct btrfs_disk_key *key,
int level, struct btrfs_key *ins);
-
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
+static int pin_down_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 num_bytes,
+ int is_data, int reserved,
+ struct extent_buffer **must_clean);
+static int find_next_key(struct btrfs_path *path, int level,
+ struct btrfs_key *key);
+static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
+ int dump_block_groups);
+
+static noinline int
+block_group_cache_done(struct btrfs_block_group_cache *cache)
+{
+ smp_mb();
+ return cache->cached == BTRFS_CACHE_FINISHED;
+}
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
@@ -145,21 +162,96 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
return ret;
}
+static int add_excluded_extent(struct btrfs_root *root,
+ u64 start, u64 num_bytes)
+{
+ u64 end = start + num_bytes - 1;
+ set_extent_bits(&root->fs_info->freed_extents[0],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ set_extent_bits(&root->fs_info->freed_extents[1],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ return 0;
+}
+
+static void free_excluded_extents(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 start, end;
+
+ start = cache->key.objectid;
+ end = start + cache->key.offset - 1;
+
+ clear_extent_bits(&root->fs_info->freed_extents[0],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ clear_extent_bits(&root->fs_info->freed_extents[1],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+}
+
+static int exclude_super_stripes(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 bytenr;
+ u64 *logical;
+ int stripe_len;
+ int i, nr, ret;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+ cache->key.objectid, bytenr,
+ 0, &logical, &nr, &stripe_len);
+ BUG_ON(ret);
+
+ while (nr--) {
+ cache->bytes_super += stripe_len;
+ ret = add_excluded_extent(root, logical[nr],
+ stripe_len);
+ BUG_ON(ret);
+ }
+
+ kfree(logical);
+ }
+ return 0;
+}
+
+static struct btrfs_caching_control *
+get_caching_control(struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_caching_control *ctl;
+
+ spin_lock(&cache->lock);
+ if (cache->cached != BTRFS_CACHE_STARTED) {
+ spin_unlock(&cache->lock);
+ return NULL;
+ }
+
+ ctl = cache->caching_ctl;
+ atomic_inc(&ctl->count);
+ spin_unlock(&cache->lock);
+ return ctl;
+}
+
+static void put_caching_control(struct btrfs_caching_control *ctl)
+{
+ if (atomic_dec_and_test(&ctl->count))
+ kfree(ctl);
+}
+
/*
* this is only called by cache_block_group, since we could have freed extents
* we need to check the pinned_extents for any extents that can't be used yet
* since their free space will be released as soon as the transaction commits.
*/
-static int add_new_free_space(struct btrfs_block_group_cache *block_group,
+static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end)
{
- u64 extent_start, extent_end, size;
+ u64 extent_start, extent_end, size, total_added = 0;
int ret;
while (start < end) {
- ret = find_first_extent_bit(&info->pinned_extents, start,
+ ret = find_first_extent_bit(info->pinned_extents, start,
&extent_start, &extent_end,
- EXTENT_DIRTY);
+ EXTENT_DIRTY | EXTENT_UPTODATE);
if (ret)
break;
@@ -167,6 +259,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
start = extent_end + 1;
} else if (extent_start > start && extent_start < end) {
size = extent_start - start;
+ total_added += size;
ret = btrfs_add_free_space(block_group, start,
size);
BUG_ON(ret);
@@ -178,112 +271,183 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
if (start < end) {
size = end - start;
+ total_added += size;
ret = btrfs_add_free_space(block_group, start, size);
BUG_ON(ret);
}
- return 0;
-}
-
-static int remove_sb_from_cache(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
-{
- u64 bytenr;
- u64 *logical;
- int stripe_len;
- int i, nr, ret;
-
- for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
- ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
- cache->key.objectid, bytenr, 0,
- &logical, &nr, &stripe_len);
- BUG_ON(ret);
- while (nr--) {
- btrfs_remove_free_space(cache, logical[nr],
- stripe_len);
- }
- kfree(logical);
- }
- return 0;
+ return total_added;
}
-static int cache_block_group(struct btrfs_root *root,
- struct btrfs_block_group_cache *block_group)
+static int caching_kthread(void *data)
{
+ struct btrfs_block_group_cache *block_group = data;
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
+ struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_path *path;
- int ret = 0;
- struct btrfs_key key;
struct extent_buffer *leaf;
- int slot;
- u64 last;
-
- if (!block_group)
- return 0;
-
- root = root->fs_info->extent_root;
-
- if (block_group->cached)
- return 0;
+ struct btrfs_key key;
+ u64 total_found = 0;
+ u64 last = 0;
+ u32 nritems;
+ int ret = 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 2;
+ exclude_super_stripes(extent_root, block_group);
+ spin_lock(&block_group->space_info->lock);
+ block_group->space_info->bytes_super += block_group->bytes_super;
+ spin_unlock(&block_group->space_info->lock);
+
+ last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+
/*
- * we get into deadlocks with paths held by callers of this function.
- * since the alloc_mutex is protecting things right now, just
- * skip the locking here
+ * We don't want to deadlock with somebody trying to allocate a new
+ * extent for the extent root while also trying to search the extent
+ * root to add free space. So we skip locking and search the commit
+ * root, since its read-only
*/
path->skip_locking = 1;
- last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+ path->search_commit_root = 1;
+ path->reada = 2;
+
key.objectid = last;
key.offset = 0;
- btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+again:
+ mutex_lock(&caching_ctl->mutex);
+ /* need to make sure the commit_root doesn't disappear */
+ down_read(&fs_info->extent_commit_sem);
+
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
goto err;
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+
while (1) {
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto err;
- if (ret == 0)
- continue;
- else
+ smp_mb();
+ if (fs_info->closing > 1) {
+ last = (u64)-1;
+ break;
+ }
+
+ if (path->slots[0] < nritems) {
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ } else {
+ ret = find_next_key(path, 0, &key);
+ if (ret)
break;
+
+ caching_ctl->progress = last;
+ btrfs_release_path(extent_root, path);
+ up_read(&fs_info->extent_commit_sem);
+ mutex_unlock(&caching_ctl->mutex);
+ if (btrfs_transaction_in_commit(fs_info))
+ schedule_timeout(1);
+ else
+ cond_resched();
+ goto again;
+ }
+
+ if (key.objectid < block_group->key.objectid) {
+ path->slots[0]++;
+ continue;
}
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid < block_group->key.objectid)
- goto next;
if (key.objectid >= block_group->key.objectid +
block_group->key.offset)
break;
- if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
- add_new_free_space(block_group, root->fs_info, last,
- key.objectid);
-
+ if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+ total_found += add_new_free_space(block_group,
+ fs_info, last,
+ key.objectid);
last = key.objectid + key.offset;
+
+ if (total_found > (1024 * 1024 * 2)) {
+ total_found = 0;
+ wake_up(&caching_ctl->wait);
+ }
}
-next:
path->slots[0]++;
}
+ ret = 0;
- add_new_free_space(block_group, root->fs_info, last,
- block_group->key.objectid +
- block_group->key.offset);
+ total_found += add_new_free_space(block_group, fs_info, last,
+ block_group->key.objectid +
+ block_group->key.offset);
+ caching_ctl->progress = (u64)-1;
+
+ spin_lock(&block_group->lock);
+ block_group->caching_ctl = NULL;
+ block_group->cached = BTRFS_CACHE_FINISHED;
+ spin_unlock(&block_group->lock);
- block_group->cached = 1;
- remove_sb_from_cache(root, block_group);
- ret = 0;
err:
btrfs_free_path(path);
+ up_read(&fs_info->extent_commit_sem);
+
+ free_excluded_extents(extent_root, block_group);
+
+ mutex_unlock(&caching_ctl->mutex);
+ wake_up(&caching_ctl->wait);
+
+ put_caching_control(caching_ctl);
+ atomic_dec(&block_group->space_info->caching_threads);
+ return 0;
+}
+
+static int cache_block_group(struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_caching_control *caching_ctl;
+ struct task_struct *tsk;
+ int ret = 0;
+
+ smp_mb();
+ if (cache->cached != BTRFS_CACHE_NO)
+ return 0;
+
+ caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
+ BUG_ON(!caching_ctl);
+
+ INIT_LIST_HEAD(&caching_ctl->list);
+ mutex_init(&caching_ctl->mutex);
+ init_waitqueue_head(&caching_ctl->wait);
+ caching_ctl->block_group = cache;
+ caching_ctl->progress = cache->key.objectid;
+ /* one for caching kthread, one for caching block group list */
+ atomic_set(&caching_ctl->count, 2);
+
+ spin_lock(&cache->lock);
+ if (cache->cached != BTRFS_CACHE_NO) {
+ spin_unlock(&cache->lock);
+ kfree(caching_ctl);
+ return 0;
+ }
+ cache->caching_ctl = caching_ctl;
+ cache->cached = BTRFS_CACHE_STARTED;
+ spin_unlock(&cache->lock);
+
+ down_write(&fs_info->extent_commit_sem);
+ list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
+ up_write(&fs_info->extent_commit_sem);
+
+ atomic_inc(&cache->space_info->caching_threads);
+
+ tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
+ cache->key.objectid);
+ if (IS_ERR(tsk)) {
+ ret = PTR_ERR(tsk);
+ printk(KERN_ERR "error running thread %d\n", ret);
+ BUG();
+ }
+
return ret;
}
@@ -990,15 +1154,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
return type;
}
-static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
+static int find_next_key(struct btrfs_path *path, int level,
+ struct btrfs_key *key)
{
- int level;
- BUG_ON(!path->keep_locks);
- for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+ for (; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
break;
- btrfs_assert_tree_locked(path->nodes[level]);
if (path->slots[level] + 1 >=
btrfs_header_nritems(path->nodes[level]))
continue;
@@ -1158,7 +1320,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
* For simplicity, we just do not add new inline back
* ref if there is any kind of item for this block
*/
- if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
+ if (find_next_key(path, 0, &key) == 0 &&
+ key.objectid == bytenr &&
key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
err = -EAGAIN;
goto out;
@@ -1409,7 +1572,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
static void btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
{
- blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+ blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
+ DISCARD_FL_BARRIER);
}
#endif
@@ -1554,7 +1718,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
parent, ref_root, flags,
ref->objectid, ref->offset,
&ins, node->ref_mod);
- update_reserved_extents(root, ins.objectid, ins.offset, 0);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
node->num_bytes, parent,
@@ -1680,7 +1843,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
extent_op->flags_to_set,
&extent_op->key,
ref->level, &ins);
- update_reserved_extents(root, ins.objectid, ins.offset, 0);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
node->num_bytes, parent, ref_root,
@@ -1715,16 +1877,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
if (insert_reserved) {
+ int mark_free = 0;
+ struct extent_buffer *must_clean = NULL;
+
+ ret = pin_down_bytes(trans, root, NULL,
+ node->bytenr, node->num_bytes,
+ head->is_data, 1, &must_clean);
+ if (ret > 0)
+ mark_free = 1;
+
+ if (must_clean) {
+ clean_tree_block(NULL, root, must_clean);
+ btrfs_tree_unlock(must_clean);
+ free_extent_buffer(must_clean);
+ }
if (head->is_data) {
ret = btrfs_del_csums(trans, root,
node->bytenr,
node->num_bytes);
BUG_ON(ret);
}
- btrfs_update_pinned_extents(root, node->bytenr,
- node->num_bytes, 1);
- update_reserved_extents(root, node->bytenr,
- node->num_bytes, 0);
+ if (mark_free) {
+ ret = btrfs_free_reserved_extent(root,
+ node->bytenr,
+ node->num_bytes);
+ BUG_ON(ret);
+ }
}
mutex_unlock(&head->mutex);
return 0;
@@ -2388,13 +2566,29 @@ fail:
}
+static struct btrfs_block_group_cache *
+next_block_group(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ struct rb_node *node;
+ spin_lock(&root->fs_info->block_group_cache_lock);
+ node = rb_next(&cache->cache_node);
+ btrfs_put_block_group(cache);
+ if (node) {
+ cache = rb_entry(node, struct btrfs_block_group_cache,
+ cache_node);
+ atomic_inc(&cache->count);
+ } else
+ cache = NULL;
+ spin_unlock(&root->fs_info->block_group_cache_lock);
+ return cache;
+}
+
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- struct btrfs_block_group_cache *cache, *entry;
- struct rb_node *n;
+ struct btrfs_block_group_cache *cache;
int err = 0;
- int werr = 0;
struct btrfs_path *path;
u64 last = 0;
@@ -2403,39 +2597,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
return -ENOMEM;
while (1) {
- cache = NULL;
- spin_lock(&root->fs_info->block_group_cache_lock);
- for (n = rb_first(&root->fs_info->block_group_cache_tree);
- n; n = rb_next(n)) {
- entry = rb_entry(n, struct btrfs_block_group_cache,
- cache_node);
- if (entry->dirty) {
- cache = entry;
- break;
- }
+ if (last == 0) {
+ err = btrfs_run_delayed_refs(trans, root,
+ (unsigned long)-1);
+ BUG_ON(err);
}
- spin_unlock(&root->fs_info->block_group_cache_lock);
- if (!cache)
- break;
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ while (cache) {
+ if (cache->dirty)
+ break;
+ cache = next_block_group(root, cache);
+ }
+ if (!cache) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
cache->dirty = 0;
- last += cache->key.offset;
+ last = cache->key.objectid + cache->key.offset;
- err = write_one_cache_group(trans, root,
- path, cache);
- /*
- * if we fail to write the cache group, we want
- * to keep it marked dirty in hopes that a later
- * write will work
- */
- if (err) {
- werr = err;
- continue;
- }
+ err = write_one_cache_group(trans, root, path, cache);
+ BUG_ON(err);
+ btrfs_put_block_group(cache);
}
+
btrfs_free_path(path);
- return werr;
+ return 0;
}
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -2485,6 +2675,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->force_alloc = 0;
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
+ atomic_set(&found->caching_threads, 0);
return 0;
}
@@ -2576,60 +2767,346 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
alloc_target);
}
+static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
+{
+ u64 num_bytes;
+ int level;
+
+ level = BTRFS_MAX_LEVEL - 2;
+ /*
+ * NOTE: these calculations are absolutely the worst possible case.
+ * This assumes that _every_ item we insert will require a new leaf, and
+ * that the tree has grown to its maximum level size.
+ */
+
+ /*
+ * for every item we insert we could insert both an extent item and a
+ * extent ref item. Then for ever item we insert, we will need to cow
+ * both the original leaf, plus the leaf to the left and right of it.
+ *
+ * Unless we are talking about the extent root, then we just want the
+ * number of items * 2, since we just need the extent item plus its ref.
+ */
+ if (root == root->fs_info->extent_root)
+ num_bytes = num_items * 2;
+ else
+ num_bytes = (num_items + (2 * num_items)) * 3;
+
+ /*
+ * num_bytes is total number of leaves we could need times the leaf
+ * size, and then for every leaf we could end up cow'ing 2 nodes per
+ * level, down to the leaf level.
+ */
+ num_bytes = (num_bytes * root->leafsize) +
+ (num_bytes * (level * 2)) * root->nodesize;
+
+ return num_bytes;
+}
+
/*
- * for now this just makes sure we have at least 5% of our metadata space free
- * for use.
+ * Unreserve metadata space for delalloc. If we have less reserved credits than
+ * we have extents, this function does nothing.
*/
-int btrfs_check_metadata_free_space(struct btrfs_root *root)
+int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
+ struct inode *inode, int num_items)
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_space_info *meta_sinfo;
- u64 alloc_target, thresh;
- int committed = 0, ret;
+ u64 num_bytes;
+ u64 alloc_target;
+ bool bug = false;
/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);
-again:
+ num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
+ num_items);
+
spin_lock(&meta_sinfo->lock);
- if (!meta_sinfo->full)
- thresh = meta_sinfo->total_bytes * 80;
- else
- thresh = meta_sinfo->total_bytes * 95;
+ if (BTRFS_I(inode)->delalloc_reserved_extents <=
+ BTRFS_I(inode)->delalloc_extents) {
+ spin_unlock(&meta_sinfo->lock);
+ return 0;
+ }
+
+ BTRFS_I(inode)->delalloc_reserved_extents--;
+ BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0);
+
+ if (meta_sinfo->bytes_delalloc < num_bytes) {
+ bug = true;
+ meta_sinfo->bytes_delalloc = 0;
+ } else {
+ meta_sinfo->bytes_delalloc -= num_bytes;
+ }
+ spin_unlock(&meta_sinfo->lock);
+
+ BUG_ON(bug);
+
+ return 0;
+}
+
+static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
+{
+ u64 thresh;
+
+ thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
+ meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
+ meta_sinfo->bytes_super + meta_sinfo->bytes_root +
+ meta_sinfo->bytes_may_use;
+ thresh = meta_sinfo->total_bytes - thresh;
+ thresh *= 80;
do_div(thresh, 100);
+ if (thresh <= meta_sinfo->bytes_delalloc)
+ meta_sinfo->force_delalloc = 1;
+ else
+ meta_sinfo->force_delalloc = 0;
+}
- if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
- meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
- struct btrfs_trans_handle *trans;
- if (!meta_sinfo->full) {
- meta_sinfo->force_alloc = 1;
- spin_unlock(&meta_sinfo->lock);
+static int maybe_allocate_chunk(struct btrfs_root *root,
+ struct btrfs_space_info *info)
+{
+ struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
+ struct btrfs_trans_handle *trans;
+ bool wait = false;
+ int ret = 0;
+ u64 min_metadata;
+ u64 free_space;
- trans = btrfs_start_transaction(root, 1);
- if (!trans)
- return -ENOMEM;
+ free_space = btrfs_super_total_bytes(disk_super);
+ /*
+ * we allow the metadata to grow to a max of either 5gb or 5% of the
+ * space in the volume.
+ */
+ min_metadata = min((u64)5 * 1024 * 1024 * 1024,
+ div64_u64(free_space * 5, 100));
+ if (info->total_bytes >= min_metadata) {
+ spin_unlock(&info->lock);
+ return 0;
+ }
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- 2 * 1024 * 1024, alloc_target, 0);
- btrfs_end_transaction(trans, root);
+ if (info->full) {
+ spin_unlock(&info->lock);
+ return 0;
+ }
+
+ if (!info->allocating_chunk) {
+ info->force_alloc = 1;
+ info->allocating_chunk = 1;
+ init_waitqueue_head(&info->wait);
+ } else {
+ wait = true;
+ }
+
+ spin_unlock(&info->lock);
+
+ if (wait) {
+ wait_event(info->wait,
+ !info->allocating_chunk);
+ return 1;
+ }
+
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ 4096 + 2 * 1024 * 1024,
+ info->flags, 0);
+ btrfs_end_transaction(trans, root);
+ if (ret)
+ goto out;
+out:
+ spin_lock(&info->lock);
+ info->allocating_chunk = 0;
+ spin_unlock(&info->lock);
+ wake_up(&info->wait);
+
+ if (ret)
+ return 0;
+ return 1;
+}
+
+/*
+ * Reserve metadata space for delalloc.
+ */
+int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
+ struct inode *inode, int num_items)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_space_info *meta_sinfo;
+ u64 num_bytes;
+ u64 used;
+ u64 alloc_target;
+ int flushed = 0;
+ int force_delalloc;
+
+ /* get the space info for where the metadata will live */
+ alloc_target = btrfs_get_alloc_profile(root, 0);
+ meta_sinfo = __find_space_info(info, alloc_target);
+
+ num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
+ num_items);
+again:
+ spin_lock(&meta_sinfo->lock);
+
+ force_delalloc = meta_sinfo->force_delalloc;
+
+ if (unlikely(!meta_sinfo->bytes_root))
+ meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
+
+ if (!flushed)
+ meta_sinfo->bytes_delalloc += num_bytes;
+
+ used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
+ meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
+ meta_sinfo->bytes_super + meta_sinfo->bytes_root +
+ meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
+
+ if (used > meta_sinfo->total_bytes) {
+ flushed++;
+
+ if (flushed == 1) {
+ if (maybe_allocate_chunk(root, meta_sinfo))
+ goto again;
+ flushed++;
+ } else {
+ spin_unlock(&meta_sinfo->lock);
+ }
+
+ if (flushed == 2) {
+ filemap_flush(inode->i_mapping);
+ goto again;
+ } else if (flushed == 3) {
+ btrfs_start_delalloc_inodes(root);
+ btrfs_wait_ordered_extents(root, 0);
goto again;
}
+ spin_lock(&meta_sinfo->lock);
+ meta_sinfo->bytes_delalloc -= num_bytes;
spin_unlock(&meta_sinfo->lock);
+ printk(KERN_ERR "enospc, has %d, reserved %d\n",
+ BTRFS_I(inode)->delalloc_extents,
+ BTRFS_I(inode)->delalloc_reserved_extents);
+ dump_space_info(meta_sinfo, 0, 0);
+ return -ENOSPC;
+ }
- if (!committed) {
- committed = 1;
- trans = btrfs_join_transaction(root, 1);
- if (!trans)
- return -ENOMEM;
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
+ BTRFS_I(inode)->delalloc_reserved_extents++;
+ check_force_delalloc(meta_sinfo);
+ spin_unlock(&meta_sinfo->lock);
+
+ if (!flushed && force_delalloc)
+ filemap_flush(inode->i_mapping);
+
+ return 0;
+}
+
+/*
+ * unreserve num_items number of items worth of metadata space. This needs to
+ * be paired with btrfs_reserve_metadata_space.
+ *
+ * NOTE: if you have the option, run this _AFTER_ you do a
+ * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
+ * oprations which will result in more used metadata, so we want to make sure we
+ * can do that without issue.
+ */
+int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_space_info *meta_sinfo;
+ u64 num_bytes;
+ u64 alloc_target;
+ bool bug = false;
+
+ /* get the space info for where the metadata will live */
+ alloc_target = btrfs_get_alloc_profile(root, 0);
+ meta_sinfo = __find_space_info(info, alloc_target);
+
+ num_bytes = calculate_bytes_needed(root, num_items);
+
+ spin_lock(&meta_sinfo->lock);
+ if (meta_sinfo->bytes_may_use < num_bytes) {
+ bug = true;
+ meta_sinfo->bytes_may_use = 0;
+ } else {
+ meta_sinfo->bytes_may_use -= num_bytes;
+ }
+ spin_unlock(&meta_sinfo->lock);
+
+ BUG_ON(bug);
+
+ return 0;
+}
+
+/*
+ * Reserve some metadata space for use. We'll calculate the worste case number
+ * of bytes that would be needed to modify num_items number of items. If we
+ * have space, fantastic, if not, you get -ENOSPC. Please call
+ * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
+ * items you reserved, since whatever metadata you needed should have already
+ * been allocated.
+ *
+ * This will commit the transaction to make more space if we don't have enough
+ * metadata space. THe only time we don't do this is if we're reserving space
+ * inside of a transaction, then we will just return -ENOSPC and it is the
+ * callers responsibility to handle it properly.
+ */
+int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_space_info *meta_sinfo;
+ u64 num_bytes;
+ u64 used;
+ u64 alloc_target;
+ int retries = 0;
+
+ /* get the space info for where the metadata will live */
+ alloc_target = btrfs_get_alloc_profile(root, 0);
+ meta_sinfo = __find_space_info(info, alloc_target);
+
+ num_bytes = calculate_bytes_needed(root, num_items);
+again:
+ spin_lock(&meta_sinfo->lock);
+
+ if (unlikely(!meta_sinfo->bytes_root))
+ meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
+
+ if (!retries)
+ meta_sinfo->bytes_may_use += num_bytes;
+
+ used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
+ meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
+ meta_sinfo->bytes_super + meta_sinfo->bytes_root +
+ meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
+
+ if (used > meta_sinfo->total_bytes) {
+ retries++;
+ if (retries == 1) {
+ if (maybe_allocate_chunk(root, meta_sinfo))
+ goto again;
+ retries++;
+ } else {
+ spin_unlock(&meta_sinfo->lock);
+ }
+
+ if (retries == 2) {
+ btrfs_start_delalloc_inodes(root);
+ btrfs_wait_ordered_extents(root, 0);
goto again;
}
+ spin_lock(&meta_sinfo->lock);
+ meta_sinfo->bytes_may_use -= num_bytes;
+ spin_unlock(&meta_sinfo->lock);
+
+ dump_space_info(meta_sinfo, 0, 0);
return -ENOSPC;
}
+
+ check_force_delalloc(meta_sinfo);
spin_unlock(&meta_sinfo->lock);
return 0;
@@ -2649,13 +3126,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
data_sinfo = BTRFS_I(inode)->space_info;
+ if (!data_sinfo)
+ goto alloc;
+
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
if (data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
- data_sinfo->bytes_may_use < bytes) {
+ data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
struct btrfs_trans_handle *trans;
/*
@@ -2667,7 +3147,7 @@ again:
data_sinfo->force_alloc = 1;
spin_unlock(&data_sinfo->lock);
-
+alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
trans = btrfs_start_transaction(root, 1);
if (!trans)
@@ -2679,12 +3159,17 @@ again:
btrfs_end_transaction(trans, root);
if (ret)
return ret;
+
+ if (!data_sinfo) {
+ btrfs_set_inode_space_info(root, inode);
+ data_sinfo = BTRFS_I(inode)->space_info;
+ }
goto again;
}
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
- if (!committed) {
+ if (!committed && !root->fs_info->open_ioctl_trans) {
committed = 1;
trans = btrfs_join_transaction(root, 1);
if (!trans)
@@ -2697,7 +3182,7 @@ again:
printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
", %llu bytes_used, %llu bytes_reserved, "
- "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
+ "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
"%llu total\n", (unsigned long long)bytes,
(unsigned long long)data_sinfo->bytes_delalloc,
(unsigned long long)data_sinfo->bytes_used,
@@ -2712,7 +3197,7 @@ again:
BTRFS_I(inode)->reserved_bytes += bytes;
spin_unlock(&data_sinfo->lock);
- return btrfs_check_metadata_free_space(root);
+ return 0;
}
/*
@@ -2811,17 +3296,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
BUG_ON(!space_info);
spin_lock(&space_info->lock);
- if (space_info->force_alloc) {
+ if (space_info->force_alloc)
force = 1;
- space_info->force_alloc = 0;
- }
if (space_info->full) {
spin_unlock(&space_info->lock);
goto out;
}
thresh = space_info->total_bytes - space_info->bytes_readonly;
- thresh = div_factor(thresh, 6);
+ thresh = div_factor(thresh, 8);
if (!force &&
(space_info->bytes_used + space_info->bytes_pinned +
space_info->bytes_reserved + alloc_bytes) < thresh) {
@@ -2835,7 +3318,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
* we keep a reasonable number of metadata chunks allocated in the
* FS as well.
*/
- if (flags & BTRFS_BLOCK_GROUP_DATA) {
+ if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
fs_info->data_chunk_allocations++;
if (!(fs_info->data_chunk_allocations %
fs_info->metadata_ratio))
@@ -2843,8 +3326,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
}
ret = btrfs_alloc_chunk(trans, extent_root, flags);
+ spin_lock(&space_info->lock);
if (ret)
space_info->full = 1;
+ space_info->force_alloc = 0;
+ spin_unlock(&space_info->lock);
out:
mutex_unlock(&extent_root->fs_info->chunk_mutex);
return ret;
@@ -2893,10 +3379,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
num_bytes = min(total, cache->key.offset - byte_in_group);
if (alloc) {
old_val += num_bytes;
+ btrfs_set_block_group_used(&cache->item, old_val);
+ cache->reserved -= num_bytes;
cache->space_info->bytes_used += num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
if (cache->ro)
cache->space_info->bytes_readonly -= num_bytes;
- btrfs_set_block_group_used(&cache->item, old_val);
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
} else {
@@ -2941,110 +3429,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
return bytenr;
}
-int btrfs_update_pinned_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int pin)
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent(struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int reserved)
{
- u64 len;
- struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_group_cache *cache;
+
+ cache = btrfs_lookup_block_group(fs_info, bytenr);
+ BUG_ON(!cache);
+
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ cache->pinned += num_bytes;
+ cache->space_info->bytes_pinned += num_bytes;
+ if (reserved) {
+ cache->reserved -= num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
+ }
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
+
+ btrfs_put_block_group(cache);
+
+ set_extent_dirty(fs_info->pinned_extents,
+ bytenr, bytenr + num_bytes - 1, GFP_NOFS);