aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c2255
1 files changed, 1353 insertions, 902 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c6a4f459ad7..b9080d71991 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,10 +35,9 @@
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int alloc,
- int mark_free);
-static int update_reserved_extents(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve);
+ u64 bytenr, u64 num_bytes, int alloc);
+static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve, int sinfo);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
-static int pin_down_bytes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, u64 num_bytes,
- int is_data, int reserved,
- struct extent_buffer **must_clean);
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
@@ -91,8 +84,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
{
- if (atomic_dec_and_test(&cache->count))
+ if (atomic_dec_and_test(&cache->count)) {
+ WARN_ON(cache->pinned > 0);
+ WARN_ON(cache->reserved > 0);
+ WARN_ON(cache->reserved_pinned > 0);
kfree(cache);
+ }
}
/*
@@ -319,7 +316,7 @@ static int caching_kthread(void *data)
exclude_super_stripes(extent_root, block_group);
spin_lock(&block_group->space_info->lock);
- block_group->space_info->bytes_super += block_group->bytes_super;
+ block_group->space_info->bytes_readonly += block_group->bytes_super;
spin_unlock(&block_group->space_info->lock);
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -507,6 +504,9 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
+ flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
+ BTRFS_BLOCK_GROUP_METADATA;
+
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags == flags) {
@@ -610,6 +610,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
}
/*
+ * helper function to lookup reference count and flags of extent.
+ *
+ * the head node for delayed ref is used to store the sum of all the
+ * reference count modifications queued up in the rbtree. the head
+ * node may also store the extent flags to set. This way you can check
+ * to see what the reference count and extent flags would be if all of
+ * the delayed refs are not processed.
+ */
+int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, u64 *refs, u64 *flags)
+{
+ struct btrfs_delayed_ref_head *head;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_path *path;
+ struct btrfs_extent_item *ei;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u32 item_size;
+ u64 num_refs;
+ u64 extent_flags;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = num_bytes;
+ if (!trans) {
+ path->skip_locking = 1;
+ path->search_commit_root = 1;
+ }
+again:
+ ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+ &key, path, 0, 0);
+ if (ret < 0)
+ goto out_free;
+
+ if (ret == 0) {
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (item_size >= sizeof(*ei)) {
+ ei = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item);
+ num_refs = btrfs_extent_refs(leaf, ei);
+ extent_flags = btrfs_extent_flags(leaf, ei);
+ } else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+ struct btrfs_extent_item_v0 *ei0;
+ BUG_ON(item_size != sizeof(*ei0));
+ ei0 = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item_v0);
+ num_refs = btrfs_extent_refs_v0(leaf, ei0);
+ /* FIXME: this isn't correct for data */
+ extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+#else
+ BUG();
+#endif
+ }
+ BUG_ON(num_refs == 0);
+ } else {
+ num_refs = 0;
+ extent_flags = 0;
+ ret = 0;
+ }
+
+ if (!trans)
+ goto out;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+ head = btrfs_find_delayed_ref_head(trans, bytenr);
+ if (head) {
+ if (!mutex_trylock(&head->mutex)) {
+ atomic_inc(&head->node.refs);
+ spin_unlock(&delayed_refs->lock);
+
+ btrfs_release_path(root->fs_info->extent_root, path);
+
+ mutex_lock(&head->mutex);
+ mutex_unlock(&head->mutex);
+ btrfs_put_delayed_ref(&head->node);
+ goto again;
+ }
+ if (head->extent_op && head->extent_op->update_flags)
+ extent_flags |= head->extent_op->flags_to_set;
+ else
+ BUG_ON(num_refs == 0);
+
+ num_refs += head->node.ref_mod;
+ mutex_unlock(&head->mutex);
+ }
+ spin_unlock(&delayed_refs->lock);
+out:
+ WARN_ON(num_refs == 0);
+ if (refs)
+ *refs = num_refs;
+ if (flags)
+ *flags = extent_flags;
+out_free:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
* Back reference rules. Back refs have three main goals:
*
* 1) differentiate between all holders of references to an extent so that
@@ -1871,7 +1978,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
return ret;
}
-
/* helper function to actually process a single delayed ref entry */
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -1891,32 +1997,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
if (insert_reserved) {
- int mark_free = 0;
- struct extent_buffer *must_clean = NULL;
-
- ret = pin_down_bytes(trans, root, NULL,
- node->bytenr, node->num_bytes,
- head->is_data, 1, &must_clean);
- if (ret > 0)
- mark_free = 1;
-
- if (must_clean) {
- clean_tree_block(NULL, root, must_clean);
- btrfs_tree_unlock(must_clean);
- free_extent_buffer(must_clean);
- }
+ btrfs_pin_extent(root, node->bytenr,
+ node->num_bytes, 1);
if (head->is_data) {
ret = btrfs_del_csums(trans, root,
node->bytenr,
node->num_bytes);
BUG_ON(ret);
}
- if (mark_free) {
- ret = btrfs_free_reserved_extent(root,
- node->bytenr,
- node->num_bytes);
- BUG_ON(ret);
- }
}
mutex_unlock(&head->mutex);
return 0;
@@ -2347,6 +2435,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
ret = 0;
out:
btrfs_free_path(path);
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ WARN_ON(ret > 0);
return ret;
}
@@ -2660,12 +2750,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
struct btrfs_space_info **space_info)
{
struct btrfs_space_info *found;
+ int i;
+ int factor;
+
+ if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ factor = 2;
+ else
+ factor = 1;
found = __find_space_info(info, flags);
if (found) {
spin_lock(&found->lock);
found->total_bytes += total_bytes;
found->bytes_used += bytes_used;
+ found->disk_used += bytes_used * factor;
found->full = 0;
spin_unlock(&found->lock);
*space_info = found;
@@ -2675,18 +2774,20 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
if (!found)
return -ENOMEM;
- INIT_LIST_HEAD(&found->block_groups);
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ INIT_LIST_HEAD(&found->block_groups[i]);
init_rwsem(&found->groups_sem);
- init_waitqueue_head(&found->flush_wait);
- init_waitqueue_head(&found->allocate_wait);
spin_lock_init(&found->lock);
- found->flags = flags;
+ found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
+ BTRFS_BLOCK_GROUP_SYSTEM |
+ BTRFS_BLOCK_GROUP_METADATA);
found->total_bytes = total_bytes;
found->bytes_used = bytes_used;
+ found->disk_used = bytes_used * factor;
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
- found->bytes_delalloc = 0;
+ found->bytes_may_use = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
@@ -2711,19 +2812,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
}
-static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
-{
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- if (!cache->ro) {
- cache->space_info->bytes_readonly += cache->key.offset -
- btrfs_block_group_used(&cache->item);
- cache->ro = 1;
- }
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
-}
-
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
u64 num_devices = root->fs_info->fs_devices->rw_devices;
@@ -2752,491 +2840,50 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
return flags;
}
-static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
-{
- struct btrfs_fs_info *info = root->fs_info;
- u64 alloc_profile;
-
- if (data) {
- alloc_profile = info->avail_data_alloc_bits &
- info->data_alloc_profile;
- data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
- } else if (root == root->fs_info->chunk_root) {
- alloc_profile = info->avail_system_alloc_bits &
- info->system_alloc_profile;
- data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
- } else {
- alloc_profile = info->avail_metadata_alloc_bits &
- info->metadata_alloc_profile;
- data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
- }
-
- return btrfs_reduce_alloc_profile(root, data);
-}
-
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
-{
- u64 alloc_target;
-
- alloc_target = btrfs_get_alloc_profile(root, 1);
- BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
- alloc_target);
-}
-
-static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
-{
- u64 num_bytes;
- int level;
-
- level = BTRFS_MAX_LEVEL - 2;
- /*
- * NOTE: these calculations are absolutely the worst possible case.
- * This assumes that _every_ item we insert will require a new leaf, and
- * that the tree has grown to its maximum level size.
- */
-
- /*
- * for every item we insert we could insert both an extent item and a
- * extent ref item. Then for ever item we insert, we will need to cow
- * both the original leaf, plus the leaf to the left and right of it.
- *
- * Unless we are talking about the extent root, then we just want the
- * number of items * 2, since we just need the extent item plus its ref.
- */
- if (root == root->fs_info->extent_root)
- num_bytes = num_items * 2;
- else
- num_bytes = (num_items + (2 * num_items)) * 3;
-
- /*
- * num_bytes is total number of leaves we could need times the leaf
- * size, and then for every leaf we could end up cow'ing 2 nodes per
- * level, down to the leaf level.
- */
- num_bytes = (num_bytes * root->leafsize) +
- (num_bytes * (level * 2)) * root->nodesize;
-
- return num_bytes;
-}
-
-/*
- * Unreserve metadata space for delalloc. If we have less reserved credits than
- * we have extents, this function does nothing.
- */
-int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
- struct inode *inode, int num_items)
-{
- struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_space_info *meta_sinfo;
- u64 num_bytes;
- u64 alloc_target;
- bool bug = false;
-
- /* get the space info for where the metadata will live */
- alloc_target = btrfs_get_alloc_profile(root, 0);
- meta_sinfo = __find_space_info(info, alloc_target);
-
- num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
- num_items);
-
- spin_lock(&meta_sinfo->lock);
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- if (BTRFS_I(inode)->reserved_extents <=
- BTRFS_I(inode)->outstanding_extents) {
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- spin_unlock(&meta_sinfo->lock);
- return 0;
- }
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
- BTRFS_I(inode)->reserved_extents -= num_items;
- BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
-
- if (meta_sinfo->bytes_delalloc < num_bytes) {
- bug = true;
- meta_sinfo->bytes_delalloc = 0;
- } else {
- meta_sinfo->bytes_delalloc -= num_bytes;
- }
- spin_unlock(&meta_sinfo->lock);
-
- BUG_ON(bug);
-
- return 0;
-}
-
-static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
+static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
{
- u64 thresh;
-
- thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
- meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
- meta_sinfo->bytes_super + meta_sinfo->bytes_root +
- meta_sinfo->bytes_may_use;
-
- thresh = meta_sinfo->total_bytes - thresh;
- thresh *= 80;
- do_div(thresh, 100);
- if (thresh <= meta_sinfo->bytes_delalloc)
- meta_sinfo->force_delalloc = 1;
- else
- meta_sinfo->force_delalloc = 0;
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ flags |= root->fs_info->avail_data_alloc_bits &
+ root->fs_info->data_alloc_profile;
+ else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ flags |= root->fs_info->avail_system_alloc_bits &
+ root->fs_info->system_alloc_profile;
+ else if (flags & BTRFS_BLOCK_GROUP_METADATA)
+ flags |= root->fs_info->avail_metadata_alloc_bits &
+ root->fs_info->metadata_alloc_profile;
+ return btrfs_reduce_alloc_profile(root, flags);
}
-struct async_flush {
- struct btrfs_root *root;
- struct btrfs_space_info *info;
- struct btrfs_work work;
-};
-
-static noinline void flush_delalloc_async(struct btrfs_work *work)
+static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
{
- struct async_flush *async;
- struct btrfs_root *root;
- struct btrfs_space_info *info;
-
- async = container_of(work, struct async_flush, work);
- root = async->root;
- info = async->info;
-
- btrfs_start_delalloc_inodes(root, 0);
- wake_up(&info->flush_wait);
- btrfs_wait_ordered_extents(root, 0, 0);
-
- spin_lock(&info->lock);
- info->flushing = 0;
- spin_unlock(&info->lock);
- wake_up(&info->flush_wait);
-
- kfree(async);
-}
-
-static void wait_on_flush(struct btrfs_space_info *info)
-{
- DEFINE_WAIT(wait);
- u64 used;
-
- while (1) {
- prepare_to_wait(&info->flush_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- spin_lock(&info->lock);
- if (!info->flushing) {
- spin_unlock(&info->lock);
- break;
- }
-
- used = info->bytes_used + info->bytes_reserved +
- info->bytes_pinned + info->bytes_readonly +
- info->bytes_super + info->bytes_root +
- info->bytes_may_use + info->bytes_delalloc;
- if (used < info->total_bytes) {
- spin_unlock(&info->lock);
- break;
- }
- spin_unlock(&info->lock);
- schedule();
- }
- finish_wait(&info->flush_wait, &wait);
-}
-
-static void flush_delalloc(struct btrfs_root *root,
- struct btrfs_space_info *info)
-{
- struct async_flush *async;
- bool wait = false;
-
- spin_lock(&info->lock);
+ u64 flags;
- if (!info->flushing)
- info->flushing = 1;
+ if (data)
+ flags = BTRFS_BLOCK_GROUP_DATA;
+ else if (root == root->fs_info->chunk_root)
+ flags = BTRFS_BLOCK_GROUP_SYSTEM;
else
- wait = true;
-
- spin_unlock(&info->lock);
-
- if (wait) {
- wait_on_flush(info);
- return;
- }
-
- async = kzalloc(sizeof(*async), GFP_NOFS);
- if (!async)
- goto flush;
-
- async->root = root;
- async->info = info;
- async->work.func = flush_delalloc_async;
+ flags = BTRFS_BLOCK_GROUP_METADATA;
- btrfs_queue_worker(&root->fs_info->enospc_workers,
- &async->work);
- wait_on_flush(info);
- return;
-
-flush:
- btrfs_start_delalloc_inodes(root, 0);
- btrfs_wait_ordered_extents(root, 0, 0);
-
- spin_lock(&info->lock);
- info->flushing = 0;
- spin_unlock(&info->lock);
- wake_up(&info->flush_wait);
+ return get_alloc_profile(root, flags);
}
-static int maybe_allocate_chunk(struct btrfs_root *root,
- struct btrfs_space_info *info)
-{
- struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
- struct btrfs_trans_handle *trans;
- bool wait = false;
- int ret = 0;
- u64 min_metadata;
- u64 free_space;
-
- free_space = btrfs_super_total_bytes(disk_super);
- /*
- * we allow the metadata to grow to a max of either 10gb or 5% of the
- * space in the volume.
- */
- min_metadata = min((u64)10 * 1024 * 1024 * 1024,
- div64_u64(free_space * 5, 100));
- if (info->total_bytes >= min_metadata) {
- spin_unlock(&info->lock);
- return 0;
- }
-
- if (info->full) {
- spin_unlock(&info->lock);
- return 0;
- }
-
- if (!info->allocating_chunk) {
- info->force_alloc = 1;
- info->allocating_chunk = 1;
- } else {
- wait = true;
- }
-
- spin_unlock(&info->lock);
-
- if (wait) {
- wait_event(info->allocate_wait,
- !info->allocating_chunk);
- return 1;
- }
-
- trans = btrfs_start_transaction(root, 1);
- if (!trans) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- 4096 + 2 * 1024 * 1024,
- info->flags, 0);
- btrfs_end_transaction(trans, root);
- if (ret)
- goto out;
-out:
- spin_lock(&info->lock);
- info->allocating_chunk = 0;
- spin_unlock(&info->lock);
- wake_up(&info->allocate_wait);
-
- if (ret)
- return 0;
- return 1;
-}
-
-/*
- * Reserve metadata space for delalloc.
- */
-int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
- struct inode *inode, int num_items)
-{
- struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_space_info *meta_sinfo;
- u64 num_bytes;
- u64 used;
- u64 alloc_target;
- int flushed = 0;
- int force_delalloc;
-
- /* get the space info for where the metadata will live */
- alloc_target = btrfs_get_alloc_profile(root, 0);
- meta_sinfo = __find_space_info(info, alloc_target);
-
- num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
- num_items);
-again:
- spin_lock(&meta_sinfo->lock);
-
- force_delalloc = meta_sinfo->force_delalloc;
-
- if (unlikely(!meta_sinfo->bytes_root))
- meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
-
- if (!flushed)
- meta_sinfo->bytes_delalloc += num_bytes;
-
- used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
- meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
- meta_sinfo->bytes_super + meta_sinfo->bytes_root +
- meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
-
- if (used > meta_sinfo->total_bytes) {
- flushed++;
-
- if (flushed == 1) {
- if (maybe_allocate_chunk(root, meta_sinfo))
- goto again;
- flushed++;
- } else {
- spin_unlock(&meta_sinfo->lock);
- }
-
- if (flushed == 2) {
- filemap_flush(inode->i_mapping);
- goto again;
- } else if (flushed == 3) {
- flush_delalloc(root, meta_sinfo);
- goto again;
- }
- spin_lock(&meta_sinfo->lock);
- meta_sinfo->bytes_delalloc -= num_bytes;
- spin_unlock(&meta_sinfo->lock);
- printk(KERN_ERR "enospc, has %d, reserved %d\n",
- BTRFS_I(inode)->outstanding_extents,
- BTRFS_I(inode)->reserved_extents);
- dump_space_info(meta_sinfo, 0, 0);
- return -ENOSPC;
- }
-
- BTRFS_I(inode)->reserved_extents += num_items;
- check_force_delalloc(meta_sinfo);
- spin_unlock(&meta_sinfo->lock);
-
- if (!flushed && force_delalloc)
- filemap_flush(inode->i_mapping);
-
- return 0;
-}
-
-/*
- * unreserve num_items number of items worth of metadata space. This needs to
- * be paired with btrfs_reserve_metadata_space.
- *
- * NOTE: if you have the option, run this _AFTER_ you do a
- * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
- * oprations which will result in more used metadata, so we want to make sure we
- * can do that without issue.
- */
-int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
-{
- struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_space_info *meta_sinfo;
- u64 num_bytes;
- u64 alloc_target;
- bool bug = false;
-
- /* get the space info for where the metadata will live */
- alloc_target = btrfs_get_alloc_profile(root, 0);
- meta_sinfo = __find_space_info(info, alloc_target);
-
- num_bytes = calculate_bytes_needed(root, num_items);
-
- spin_lock(&meta_sinfo->lock);
- if (meta_sinfo->bytes_may_use < num_bytes) {
- bug = true;
- meta_sinfo->bytes_may_use = 0;
- } else {
- meta_sinfo->bytes_may_use -= num_bytes;
- }
- spin_unlock(&meta_sinfo->lock);
-
- BUG_ON(bug);
-
- return 0;
-}
-
-/*
- * Reserve some metadata space for use. We'll calculate the worste case number
- * of bytes that would be needed to modify num_items number of items. If we
- * have space, fantastic, if not, you get -ENOSPC. Please call
- * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
- * items you reserved, since whatever metadata you needed should have already
- * been allocated.
- *
- * This will commit the transaction to make more space if we don't have enough
- * metadata space. THe only time we don't do this is if we're reserving space
- * inside of a transaction, then we will just return -ENOSPC and it is the
- * callers responsibility to handle it properly.
- */
-int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
+void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
{
- struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_space_info *meta_sinfo;
- u64 num_bytes;
- u64 used;
- u64 alloc_target;
- int retries = 0;
-
- /* get the space info for where the metadata will live */
- alloc_target = btrfs_get_alloc_profile(root, 0);
- meta_sinfo = __find_space_info(info, alloc_target);
-
- num_bytes = calculate_bytes_needed(root, num_items);
-again:
- spin_lock(&meta_sinfo->lock);
-
- if (unlikely(!meta_sinfo->bytes_root))
- meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
-
- if (!retries)
- meta_sinfo->bytes_may_use += num_bytes;
-
- used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
- meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
- meta_sinfo->bytes_super + meta_sinfo->bytes_root +
- meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
-
- if (used > meta_sinfo->total_bytes) {
- retries++;
- if (retries == 1) {
- if (maybe_allocate_chunk(root, meta_sinfo))
- goto again;
- retries++;
- } else {
- spin_unlock(&meta_sinfo->lock);
- }
-
- if (retries == 2) {
- flush_delalloc(root, meta_sinfo);
- goto again;
- }
- spin_lock(&meta_sinfo->lock);
- meta_sinfo->bytes_may_use -= num_bytes;
- spin_unlock(&meta_sinfo->lock);
-
- dump_space_info(meta_sinfo, 0, 0);
- return -ENOSPC;
- }
-
- check_force_delalloc(meta_sinfo);
- spin_unlock(&meta_sinfo->lock);
-
- return 0;
+ BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
+ BTRFS_BLOCK_GROUP_DATA);
}
/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
*/
-int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes)
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
u64 used;
- int ret = 0, committed = 0, flushed = 0;
+ int ret = 0, committed = 0;
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
@@ -3248,21 +2895,13 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
- used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc +
- data_sinfo->bytes_reserved + data_sinfo->bytes_pinned +
- data_sinfo->bytes_readonly + data_sinfo->bytes_may_use +
- data_sinfo->bytes_super;
+ used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
+ data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
+ data_sinfo->bytes_may_use;
if (used + bytes > data_sinfo->total_bytes) {
struct btrfs_trans_handle *trans;
- if (!flushed) {
- spin_unlock(&data_sinfo->lock);
- flush_delalloc(root, data_sinfo);
- flushed = 1;
- goto again;
- }
-
/*
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
@@ -3274,15 +2913,15 @@ again:
spin_unlock(&data_sinfo->lock);
alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
- trans = btrfs_start_transaction(root, 1);
- if (!trans)
- return -ENOMEM;
+ trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
bytes + 2 * 1024 * 1024,
alloc_target, 0);
btrfs_end_transaction(trans, root);
- if (ret)
+ if (ret < 0)
return ret;
if (!data_sinfo) {
@@ -3297,25 +2936,26 @@ alloc:
if (!committed && !root->fs_info->open_ioctl_trans) {
committed = 1;
trans = btrfs_join_transaction(root, 1);
- if (!trans)
- return -ENOMEM;
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
if (ret)
return ret;
goto again;
}
- printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
- ", %llu bytes_used, %llu bytes_reserved, "
- "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
- "%llu total\n", (unsigned long long)bytes,
- (unsigned long long)data_sinfo->bytes_delalloc,
+#if 0 /* I hope we never need this code again, just in case */
+ printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
+ "%llu bytes_reserved, " "%llu bytes_pinned, "
+ "%llu bytes_readonly, %llu may use %llu total\n",
+ (unsigned long long)bytes,
(unsigned long long)data_sinfo->bytes_used,
(unsigned long long)data_sinfo->bytes_reserved,
(unsigned long long)data_sinfo->bytes_pinned,
(unsigned long long)data_sinfo->bytes_readonly,
(unsigned long long)data_sinfo->bytes_may_use,
(unsigned long long)data_sinfo->total_bytes);
+#endif
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
@@ -3326,12 +2966,13 @@ alloc:
}
/*
- * if there was an error for whatever reason after calling
- * btrfs_check_data_free_space, call this so we can cleanup the counters.
+ * called when we are clearing an delalloc extent from the
+ * inode's io_tree or there was an error for whatever reason
+ * after calling btrfs_check_data_free_space
*/
-void btrfs_free_reserved_data_space(struct btrfs_root *root,
- struct inode *inode, u64 bytes)
+void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_space_info *data_sinfo;
/* make sure bytes are sectorsize aligned */
@@ -3344,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root,
spin_unlock(&data_sinfo->lock);
}
-/* called when we are adding a delalloc extent to the inode's io_tree */
-void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes)
-{
- struct btrfs_space_info *data_sinfo;
-
- /* get the space info for where this inode will be storing its data */
- data_sinfo = BTRFS_I(inode)->space_info;
-
- /* make sure we have enough space to handle the data first */
- spin_lock(&data_sinfo->lock);
- data_sinfo->bytes_delalloc += bytes;
-
- /*
- * we are adding a delalloc extent without calling
- * btrfs_check_data_free_space first. This happens on a weird
- * writepage condition, but shouldn't hurt our accounting
- */
- if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
- data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
- BTRFS_I(inode)->reserved_bytes = 0;
- } else {
- data_sinfo->bytes_may_use -= bytes;
- BTRFS_I(inode)->reserved_bytes -= bytes;
- }
-
- spin_unlock(&data_sinfo->lock);
-}
-
-/* called when we are clearing an delalloc extent from the inode's io_tree */
-void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes)
-{
- struct btrfs_space_info *info;
-
- info = BTRFS_I(inode)->space_info;
-
- spin_lock(&info->lock);
- info->bytes_delalloc -= bytes;
- spin_unlock(&info->lock);
-}
-
static void force_metadata_allocation(struct btrfs_fs_info *info)
{
struct list_head *head = &info->space_info;
@@ -3399,13 +2998,28 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_unlock();
}
+static int should_alloc_chunk(struct btrfs_space_info *sinfo,
+ u64 alloc_bytes)
+{
+ u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+
+ if (sinfo->bytes_used + sinfo->bytes_reserved +
+ alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+ return 0;
+
+ if (sinfo->bytes_used + sinfo->bytes_reserved +
+ alloc_bytes < div_factor(num_bytes, 8))
+ return 0;
+
+ return 1;
+}
+
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
{
struct btrfs_space_info *space_info;
struct btrfs_fs_info *fs_info = extent_root->fs_info;
- u64 thresh;
int ret = 0;
mutex_lock(&fs_info->chunk_mutex);
@@ -3428,11 +3042,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
goto out;
}
- thresh = space_info->total_bytes - space_info->bytes_readonly;
- thresh = div_factor(thresh, 8);
- if (!force &&
- (space_info->bytes_used + space_info->bytes_pinned +
- space_info->bytes_reserved + alloc_bytes) < thresh) {
+ if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
spin_unlock(&space_info->lock);
goto out;
}
@@ -3454,6 +3064,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
spin_lock(&space_info->lock);
if (ret)
space_info->full = 1;
+ else
+ ret = 1;
space_info->force_alloc = 0;
spin_unlock(&space_info->lock);
out:
@@ -3461,13 +3073,713 @@ out:
return ret;
}
+static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_space_info *sinfo, u64 num_bytes)
+{
+ int ret;
+ int end_trans = 0;
+
+ if (sinfo->full)
+ return 0;
+
+ spin_lock(&sinfo->lock);
+ ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
+ spin_unlock(&sinfo->lock);
+ if (!ret)
+ return 0;
+
+ if (!trans) {
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
+ end_trans = 1;
+ }
+
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ num_bytes + 2 * 1024 * 1024,
+ get_alloc_profile(root, sinfo->flags), 0);
+
+ if (end_trans)
+ btrfs_end_transaction(trans, root);
+
+ return ret == 1 ? 1 : 0;
+}
+
+/*
+ * shrink metadata reservation for delalloc
+ */
+static int shrink_delalloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 to_reclaim)
+{
+ struct btrfs_block_rsv *block_rsv;
+ u64 reserved;
+ u64 max_reclaim;
+ u64 reclaimed = 0;
+ int pause = 1;
+ int ret;
+
+ block_rsv = &root->fs_info->delalloc_block_rsv;
+ spin_lock(&block_rsv->lock);
+ reserved = block_rsv->reserved;
+ spin_unlock(&block_rsv->lock);
+
+ if (reserved == 0)
+ return 0;
+
+ max_reclaim = min(reserved, to_reclaim);
+
+ while (1) {
+ ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
+ if (!ret) {
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(pause);
+ pause <<= 1;
+ if (pause > HZ / 10)
+ pause = HZ / 10;
+ } else {
+ pause = 1;
+ }
+
+ spin_lock(&block_rsv->lock);
+ if (reserved > block_rsv->reserved)
+ reclaimed = reserved - block_rsv->reserved;
+ reserved = block_rsv->reserved;
+ spin_unlock(&block_rsv->lock);
+
+ if (reserved == 0 || reclaimed >= max_reclaim)
+ break;
+
+ if (trans && trans->transaction->blocked)
+ return -EAGAIN;
+ }
+ return reclaimed >= to_reclaim;
+}
+
+static int should_retry_reserve(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, int *retries)
+{
+ struct btrfs_space_info *space_info = block_rsv->space_info;
+ int ret;
+
+ if ((*retries) > 2)
+ return -ENOSPC;
+
+ ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
+ if (ret)
+ return 1;
+
+ if (trans && trans->transaction->in_commit)
+ return -ENOSPC;
+
+ ret = shrink_delalloc(trans, root, num_bytes);
+ if (ret)
+ return ret;
+
+ spin_lock(&space_info->lock);
+ if (space_info->bytes_pinned < num_bytes)
+ ret = 1;
+ spin_unlock(&space_info->lock);
+ if (ret)
+ return -ENOSPC;
+
+ (*retries)++;
+
+ if (trans)
+ return -EAGAIN;
+
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+
+ return 1;
+}
+
+static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ struct btrfs_space_info *space_info = block_rsv->space_info;
+ u64 unused;
+ int ret = -ENOSPC;
+
+ spin_lock(&space_info->lock);
+ unused = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + spac