diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 2207 |
1 files changed, 1173 insertions, 1034 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 535f85ba104..e238a0cdac6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -32,12 +32,12 @@ #include "locking.h" #include "free-space-cache.h" -static int update_reserved_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int reserve); static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free); +static int update_reserved_extents(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -57,10 +57,19 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, u64 parent, u64 root_objectid, u64 flags, struct btrfs_disk_key *key, int level, struct btrfs_key *ins); - static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force); +static int pin_down_bytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 num_bytes, + int is_data, int reserved, + struct extent_buffer **must_clean); +static int find_next_key(struct btrfs_path *path, int level, + struct btrfs_key *key); +static void dump_space_info(struct btrfs_space_info *info, u64 bytes, + int dump_block_groups); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -153,34 +162,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, return ret; } -/* - * We always set EXTENT_LOCKED for the super mirror extents so we don't - * overwrite them, so those bits need to be unset. Also, if we are unmounting - * with pinned extents still sitting there because we had a block group caching, - * we need to clear those now, since we are done. - */ -void btrfs_free_pinned_extents(struct btrfs_fs_info *info) +static int add_excluded_extent(struct btrfs_root *root, + u64 start, u64 num_bytes) { - u64 start, end, last = 0; - int ret; + u64 end = start + num_bytes - 1; + set_extent_bits(&root->fs_info->freed_extents[0], + start, end, EXTENT_UPTODATE, GFP_NOFS); + set_extent_bits(&root->fs_info->freed_extents[1], + start, end, EXTENT_UPTODATE, GFP_NOFS); + return 0; +} - while (1) { - ret = find_first_extent_bit(&info->pinned_extents, last, - &start, &end, - EXTENT_LOCKED|EXTENT_DIRTY); - if (ret) - break; +static void free_excluded_extents(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 start, end; - clear_extent_bits(&info->pinned_extents, start, end, - EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); - last = end+1; - } + start = cache->key.objectid; + end = start + cache->key.offset - 1; + + clear_extent_bits(&root->fs_info->freed_extents[0], + start, end, EXTENT_UPTODATE, GFP_NOFS); + clear_extent_bits(&root->fs_info->freed_extents[1], + start, end, EXTENT_UPTODATE, GFP_NOFS); } -static int remove_sb_from_cache(struct btrfs_root *root, - struct btrfs_block_group_cache *cache) +static int exclude_super_stripes(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) { - struct btrfs_fs_info *fs_info = root->fs_info; u64 bytenr; u64 *logical; int stripe_len; @@ -192,17 +201,42 @@ static int remove_sb_from_cache(struct btrfs_root *root, cache->key.objectid, bytenr, 0, &logical, &nr, &stripe_len); BUG_ON(ret); + while (nr--) { - try_lock_extent(&fs_info->pinned_extents, - logical[nr], - logical[nr] + stripe_len - 1, GFP_NOFS); + cache->bytes_super += stripe_len; + ret = add_excluded_extent(root, logical[nr], + stripe_len); + BUG_ON(ret); } + kfree(logical); } - return 0; } +static struct btrfs_caching_control * +get_caching_control(struct btrfs_block_group_cache *cache) +{ + struct btrfs_caching_control *ctl; + + spin_lock(&cache->lock); + if (cache->cached != BTRFS_CACHE_STARTED) { + spin_unlock(&cache->lock); + return NULL; + } + + ctl = cache->caching_ctl; + atomic_inc(&ctl->count); + spin_unlock(&cache->lock); + return ctl; +} + +static void put_caching_control(struct btrfs_caching_control *ctl) +{ + if (atomic_dec_and_test(&ctl->count)) + kfree(ctl); +} + /* * this is only called by cache_block_group, since we could have freed extents * we need to check the pinned_extents for any extents that can't be used yet @@ -215,9 +249,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, int ret; while (start < end) { - ret = find_first_extent_bit(&info->pinned_extents, start, + ret = find_first_extent_bit(info->pinned_extents, start, &extent_start, &extent_end, - EXTENT_DIRTY|EXTENT_LOCKED); + EXTENT_DIRTY | EXTENT_UPTODATE); if (ret) break; @@ -249,22 +283,27 @@ static int caching_kthread(void *data) { struct btrfs_block_group_cache *block_group = data; struct btrfs_fs_info *fs_info = block_group->fs_info; - u64 last = 0; + struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; + struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_path *path; - int ret = 0; - struct btrfs_key key; struct extent_buffer *leaf; - int slot; + struct btrfs_key key; u64 total_found = 0; - - BUG_ON(!fs_info); + u64 last = 0; + u32 nritems; + int ret = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - atomic_inc(&block_group->space_info->caching_threads); + exclude_super_stripes(extent_root, block_group); + spin_lock(&block_group->space_info->lock); + block_group->space_info->bytes_super += block_group->bytes_super; + spin_unlock(&block_group->space_info->lock); + last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); + /* * We don't want to deadlock with somebody trying to allocate a new * extent for the extent root while also trying to search the extent @@ -277,74 +316,64 @@ static int caching_kthread(void *data) key.objectid = last; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.type = BTRFS_EXTENT_ITEM_KEY; again: + mutex_lock(&caching_ctl->mutex); /* need to make sure the commit_root doesn't disappear */ down_read(&fs_info->extent_commit_sem); - ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto err; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + while (1) { smp_mb(); - if (block_group->fs_info->closing > 1) { + if (fs_info->closing > 1) { last = (u64)-1; break; } - leaf = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(fs_info->extent_root, path); - if (ret < 0) - goto err; - else if (ret) + if (path->slots[0] < nritems) { + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + } else { + ret = find_next_key(path, 0, &key); + if (ret) break; - if (need_resched() || - btrfs_transaction_in_commit(fs_info)) { - leaf = path->nodes[0]; - - /* this shouldn't happen, but if the - * leaf is empty just move on. - */ - if (btrfs_header_nritems(leaf) == 0) - break; - /* - * we need to copy the key out so that - * we are sure the next search advances - * us forward in the btree. - */ - btrfs_item_key_to_cpu(leaf, &key, 0); - btrfs_release_path(fs_info->extent_root, path); - up_read(&fs_info->extent_commit_sem); + caching_ctl->progress = last; + btrfs_release_path(extent_root, path); + up_read(&fs_info->extent_commit_sem); + mutex_unlock(&caching_ctl->mutex); + if (btrfs_transaction_in_commit(fs_info)) schedule_timeout(1); - goto again; - } + else + cond_resched(); + goto again; + } + if (key.objectid < block_group->key.objectid) { + path->slots[0]++; continue; } - btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid < block_group->key.objectid) - goto next; if (key.objectid >= block_group->key.objectid + block_group->key.offset) break; - if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { + if (key.type == BTRFS_EXTENT_ITEM_KEY) { total_found += add_new_free_space(block_group, fs_info, last, key.objectid); last = key.objectid + key.offset; - } - if (total_found > (1024 * 1024 * 2)) { - total_found = 0; - wake_up(&block_group->caching_q); + if (total_found > (1024 * 1024 * 2)) { + total_found = 0; + wake_up(&caching_ctl->wait); + } } -next: path->slots[0]++; } ret = 0; @@ -352,33 +381,65 @@ next: total_found += add_new_free_space(block_group, fs_info, last, block_group->key.objectid + block_group->key.offset); + caching_ctl->progress = (u64)-1; spin_lock(&block_group->lock); + block_group->caching_ctl = NULL; block_group->cached = BTRFS_CACHE_FINISHED; spin_unlock(&block_group->lock); err: btrfs_free_path(path); up_read(&fs_info->extent_commit_sem); - atomic_dec(&block_group->space_info->caching_threads); - wake_up(&block_group->caching_q); + free_excluded_extents(extent_root, block_group); + + mutex_unlock(&caching_ctl->mutex); + wake_up(&caching_ctl->wait); + + put_caching_control(caching_ctl); + atomic_dec(&block_group->space_info->caching_threads); return 0; } static int cache_block_group(struct btrfs_block_group_cache *cache) { + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_caching_control *caching_ctl; struct task_struct *tsk; int ret = 0; + smp_mb(); + if (cache->cached != BTRFS_CACHE_NO) + return 0; + + caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); + BUG_ON(!caching_ctl); + + INIT_LIST_HEAD(&caching_ctl->list); + mutex_init(&caching_ctl->mutex); + init_waitqueue_head(&caching_ctl->wait); + caching_ctl->block_group = cache; + caching_ctl->progress = cache->key.objectid; + /* one for caching kthread, one for caching block group list */ + atomic_set(&caching_ctl->count, 2); + spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { spin_unlock(&cache->lock); - return ret; + kfree(caching_ctl); + return 0; } + cache->caching_ctl = caching_ctl; cache->cached = BTRFS_CACHE_STARTED; spin_unlock(&cache->lock); + down_write(&fs_info->extent_commit_sem); + list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); + up_write(&fs_info->extent_commit_sem); + + atomic_inc(&cache->space_info->caching_threads); + tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", cache->key.objectid); if (IS_ERR(tsk)) { @@ -1507,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } -#ifdef BIO_RW_DISCARD static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, DISCARD_FL_BARRIER); } -#endif static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, u64 num_bytes) { -#ifdef BIO_RW_DISCARD int ret; u64 map_length = num_bytes; struct btrfs_multi_bio *multi = NULL; + if (!btrfs_test_opt(root, DISCARD)) + return 0; + /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, bytenr, &map_length, &multi, 0); @@ -1543,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, } return ret; -#else - return 0; -#endif } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, @@ -1657,7 +1715,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, parent, ref_root, flags, ref->objectid, ref->offset, &ins, node->ref_mod); - update_reserved_extents(root, ins.objectid, ins.offset, 0); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, node->num_bytes, parent, @@ -1783,7 +1840,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, extent_op->flags_to_set, &extent_op->key, ref->level, &ins); - update_reserved_extents(root, ins.objectid, ins.offset, 0); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, node->num_bytes, parent, ref_root, @@ -1818,16 +1874,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, BUG_ON(extent_op); head = btrfs_delayed_node_to_head(node); if (insert_reserved) { + int mark_free = 0; + struct extent_buffer *must_clean = NULL; + + ret = pin_down_bytes(trans, root, NULL, + node->bytenr, node->num_bytes, + head->is_data, 1, &must_clean); + if (ret > 0) + mark_free = 1; + + if (must_clean) { + clean_tree_block(NULL, root, must_clean); + btrfs_tree_unlock(must_clean); + free_extent_buffer(must_clean); + } if (head->is_data) { ret = btrfs_del_csums(trans, root, node->bytenr, node->num_bytes); BUG_ON(ret); } - btrfs_update_pinned_extents(root, node->bytenr, - node->num_bytes, 1); - update_reserved_extents(root, node->bytenr, - node->num_bytes, 0); + if (mark_free) { + ret = btrfs_free_reserved_extent(root, + node->bytenr, + node->num_bytes); + BUG_ON(ret); + } } mutex_unlock(&head->mutex); return 0; @@ -2692,60 +2764,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) alloc_target); } +static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) +{ + u64 num_bytes; + int level; + + level = BTRFS_MAX_LEVEL - 2; + /* + * NOTE: these calculations are absolutely the worst possible case. + * This assumes that _every_ item we insert will require a new leaf, and + * that the tree has grown to its maximum level size. + */ + + /* + * for every item we insert we could insert both an extent item and a + * extent ref item. Then for ever item we insert, we will need to cow + * both the original leaf, plus the leaf to the left and right of it. + * + * Unless we are talking about the extent root, then we just want the + * number of items * 2, since we just need the extent item plus its ref. + */ + if (root == root->fs_info->extent_root) + num_bytes = num_items * 2; + else + num_bytes = (num_items + (2 * num_items)) * 3; + + /* + * num_bytes is total number of leaves we could need times the leaf + * size, and then for every leaf we could end up cow'ing 2 nodes per + * level, down to the leaf level. + */ + num_bytes = (num_bytes * root->leafsize) + + (num_bytes * (level * 2)) * root->nodesize; + + return num_bytes; +} + /* - * for now this just makes sure we have at least 5% of our metadata space free - * for use. + * Unreserve metadata space for delalloc. If we have less reserved credits than + * we have extents, this function does nothing. */ -int btrfs_check_metadata_free_space(struct btrfs_root *root) +int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, + struct inode *inode, int num_items) { struct btrfs_fs_info *info = root->fs_info; struct btrfs_space_info *meta_sinfo; - u64 alloc_target, thresh; - int committed = 0, ret; + u64 num_bytes; + u64 alloc_target; + bool bug = false; /* get the space info for where the metadata will live */ alloc_target = btrfs_get_alloc_profile(root, 0); meta_sinfo = __find_space_info(info, alloc_target); -again: + num_bytes = calculate_bytes_needed(root->fs_info->extent_root, + num_items); + spin_lock(&meta_sinfo->lock); - if (!meta_sinfo->full) - thresh = meta_sinfo->total_bytes * 80; - else - thresh = meta_sinfo->total_bytes * 95; + spin_lock(&BTRFS_I(inode)->accounting_lock); + if (BTRFS_I(inode)->reserved_extents <= + BTRFS_I(inode)->outstanding_extents) { + spin_unlock(&BTRFS_I(inode)->accounting_lock); + spin_unlock(&meta_sinfo->lock); + return 0; + } + spin_unlock(&BTRFS_I(inode)->accounting_lock); + + BTRFS_I(inode)->reserved_extents--; + BUG_ON(BTRFS_I(inode)->reserved_extents < 0); + + if (meta_sinfo->bytes_delalloc < num_bytes) { + bug = true; + meta_sinfo->bytes_delalloc = 0; + } else { + meta_sinfo->bytes_delalloc -= num_bytes; + } + spin_unlock(&meta_sinfo->lock); + + BUG_ON(bug); + + return 0; +} + +static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) +{ + u64 thresh; + + thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + + meta_sinfo->bytes_super + meta_sinfo->bytes_root + + meta_sinfo->bytes_may_use; + thresh = meta_sinfo->total_bytes - thresh; + thresh *= 80; do_div(thresh, 100); + if (thresh <= meta_sinfo->bytes_delalloc) + meta_sinfo->force_delalloc = 1; + else + meta_sinfo->force_delalloc = 0; +} - if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + - meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { - struct btrfs_trans_handle *trans; - if (!meta_sinfo->full) { - meta_sinfo->force_alloc = 1; - spin_unlock(&meta_sinfo->lock); +struct async_flush { + struct btrfs_root *root; + struct btrfs_space_info *info; + struct btrfs_work work; +}; - trans = btrfs_start_transaction(root, 1); - if (!trans) - return -ENOMEM; +static noinline void flush_delalloc_async(struct btrfs_work *work) +{ + struct async_flush *async; + struct btrfs_root *root; + struct btrfs_space_info *info; - ret = do_chunk_alloc(trans, root->fs_info->extent_root, - 2 * 1024 * 1024, alloc_target, 0); - btrfs_end_transaction(trans, root); + async = container_of(work, struct async_flush, work); + root = async->root; + info = async->info; + + btrfs_start_delalloc_inodes(root); + wake_up(&info->flush_wait); + btrfs_wait_ordered_extents(root, 0); + + spin_lock(&info->lock); + info->flushing = 0; + spin_unlock(&info->lock); + wake_up(&info->flush_wait); + + kfree(async); +} + +static void wait_on_flush(struct btrfs_space_info *info) +{ + DEFINE_WAIT(wait); + u64 used; + + while (1) { + prepare_to_wait(&info->flush_wait, &wait, + TASK_UNINTERRUPTIBLE); + spin_lock(&info->lock); + if (!info->flushing) { + spin_unlock(&info->lock); + break; + } + + used = info->bytes_used + info->bytes_reserved + + info->bytes_pinned + info->bytes_readonly + + info->bytes_super + info->bytes_root + + info->bytes_may_use + info->bytes_delalloc; + if (used < info->total_bytes) { + spin_unlock(&info->lock); + break; + } + spin_unlock(&info->lock); + schedule(); + } + finish_wait(&info->flush_wait, &wait); +} + +static void flush_delalloc(struct btrfs_root *root, + struct btrfs_space_info *info) +{ + struct async_flush *async; + bool wait = false; + + spin_lock(&info->lock); + + if (!info->flushing) { + info->flushing = 1; + init_waitqueue_head(&info->flush_wait); + } else { + wait = true; + } + + spin_unlock(&info->lock); + + if (wait) { + wait_on_flush(info); + return; + } + + async = kzalloc(sizeof(*async), GFP_NOFS); + if (!async) + goto flush; + + async->root = root; + async->info = info; + async->work.func = flush_delalloc_async; + + btrfs_queue_worker(&root->fs_info->enospc_workers, + &async->work); + wait_on_flush(info); + return; + +flush: + btrfs_start_delalloc_inodes(root); + btrfs_wait_ordered_extents(root, 0); + + spin_lock(&info->lock); + info->flushing = 0; + spin_unlock(&info->lock); + wake_up(&info->flush_wait); +} + +static int maybe_allocate_chunk(struct btrfs_root *root, + struct btrfs_space_info *info) +{ + struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + struct btrfs_trans_handle *trans; + bool wait = false; + int ret = 0; + u64 min_metadata; + u64 free_space; + + free_space = btrfs_super_total_bytes(disk_super); + /* + * we allow the metadata to grow to a max of either 5gb or 5% of the + * space in the volume. + */ + min_metadata = min((u64)5 * 1024 * 1024 * 1024, + div64_u64(free_space * 5, 100)); + if (info->total_bytes >= min_metadata) { + spin_unlock(&info->lock); + return 0; + } + + if (info->full) { + spin_unlock(&info->lock); + return 0; + } + + if (!info->allocating_chunk) { + info->force_alloc = 1; + info->allocating_chunk = 1; + init_waitqueue_head(&info->allocate_wait); + } else { + wait = true; + } + + spin_unlock(&info->lock); + + if (wait) { + wait_event(info->allocate_wait, + !info->allocating_chunk); + return 1; + } + + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto out; + } + + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + 4096 + 2 * 1024 * 1024, + info->flags, 0); + btrfs_end_transaction(trans, root); + if (ret) + goto out; +out: + spin_lock(&info->lock); + info->allocating_chunk = 0; + spin_unlock(&info->lock); + wake_up(&info->allocate_wait); + + if (ret) + return 0; + return 1; +} + +/* + * Reserve metadata space for delalloc. + */ +int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, + struct inode *inode, int num_items) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 num_bytes; + u64 used; + u64 alloc_target; + int flushed = 0; + int force_delalloc; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + + num_bytes = calculate_bytes_needed(root->fs_info->extent_root, + num_items); +again: + spin_lock(&meta_sinfo->lock); + + force_delalloc = meta_sinfo->force_delalloc; + + if (unlikely(!meta_sinfo->bytes_root)) + meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); + + if (!flushed) + meta_sinfo->bytes_delalloc += num_bytes; + + used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + + meta_sinfo->bytes_super + meta_sinfo->bytes_root + + meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; + + if (used > meta_sinfo->total_bytes) { + flushed++; + + if (flushed == 1) { + if (maybe_allocate_chunk(root, meta_sinfo)) + goto again; + flushed++; + } else { + spin_unlock(&meta_sinfo->lock); + } + + if (flushed == 2) { + filemap_flush(inode->i_mapping); + goto again; + } else if (flushed == 3) { + flush_delalloc(root, meta_sinfo); goto again; } + spin_lock(&meta_sinfo->lock); + meta_sinfo->bytes_delalloc -= num_bytes; spin_unlock(&meta_sinfo->lock); + printk(KERN_ERR "enospc, has %d, reserved %d\n", + BTRFS_I(inode)->outstanding_extents, + BTRFS_I(inode)->reserved_extents); + dump_space_info(meta_sinfo, 0, 0); + return -ENOSPC; + } - if (!committed) { - committed = 1; - trans = btrfs_join_transaction(root, 1); - if (!trans) - return -ENOMEM; - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; + BTRFS_I(inode)->reserved_extents++; + check_force_delalloc(meta_sinfo); + spin_unlock(&meta_sinfo->lock); + + if (!flushed && force_delalloc) + filemap_flush(inode->i_mapping); + + return 0; +} + +/* + * unreserve num_items number of items worth of metadata space. This needs to + * be paired with btrfs_reserve_metadata_space. + * + * NOTE: if you have the option, run this _AFTER_ you do a + * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref + * oprations which will result in more used metadata, so we want to make sure we + * can do that without issue. + */ +int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 num_bytes; + u64 alloc_target; + bool bug = false; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + + num_bytes = calculate_bytes_needed(root, num_items); + + spin_lock(&meta_sinfo->lock); + if (meta_sinfo->bytes_may_use < num_bytes) { + bug = true; + meta_sinfo->bytes_may_use = 0; + } else { + meta_sinfo->bytes_may_use -= num_bytes; + } + spin_unlock(&meta_sinfo->lock); + + BUG_ON(bug); + + return 0; +} + +/* + * Reserve some metadata space for use. We'll calculate the worste case number + * of bytes that would be needed to modify num_items number of items. If we + * have space, fantastic, if not, you get -ENOSPC. Please call + * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of + * items you reserved, since whatever metadata you needed should have already + * been allocated. + * + * This will commit the transaction to make more space if we don't have enough + * metadata space. THe only time we don't do this is if we're reserving space + * inside of a transaction, then we will just return -ENOSPC and it is the + * callers responsibility to handle it properly. + */ +int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 num_bytes; + u64 used; + u64 alloc_target; + int retries = 0; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + + num_bytes = calculate_bytes_needed(root, num_items); +again: + spin_lock(&meta_sinfo->lock); + + if (unlikely(!meta_sinfo->bytes_root)) + meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); + + if (!retries) + meta_sinfo->bytes_may_use += num_bytes; + + used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + + meta_sinfo->bytes_super + meta_sinfo->bytes_root + + meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; + + if (used > meta_sinfo->total_bytes) { + retries++; + if (retries == 1) { + if (maybe_allocate_chunk(root, meta_sinfo)) + goto again; + retries++; + } else { + spin_unlock(&meta_sinfo->lock); + } + + if (retries == 2) { + flush_delalloc(root, meta_sinfo); goto again; } + spin_lock(&meta_sinfo->lock); + meta_sinfo->bytes_may_use -= num_bytes; + spin_unlock(&meta_sinfo->lock); + + dump_space_info(meta_sinfo, 0, 0); return -ENOSPC; } + + check_force_delalloc(meta_sinfo); spin_unlock(&meta_sinfo->lock); return 0; @@ -2765,13 +3225,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); data_sinfo = BTRFS_I(inode)->space_info; + if (!data_sinfo) + goto alloc; + again: /* make sure we have enough space to handle the data first */ spin_lock(&data_sinfo->lock); if (data_sinfo->total_bytes - data_sinfo->bytes_used - data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - - data_sinfo->bytes_may_use < bytes) { + data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { struct btrfs_trans_handle *trans; /* @@ -2783,7 +3246,7 @@ again: data_sinfo->force_alloc = 1; spin_unlock(&data_sinfo->lock); - +alloc: alloc_target = btrfs_get_alloc_profile(root, 1); trans = btrfs_start_transaction(root, 1); if (!trans) @@ -2795,12 +3258,17 @@ again: btrfs_end_transaction(trans, root); if (ret) return ret; + + if (!data_sinfo) { + btrfs_set_inode_space_info(root, inode); + data_sinfo = BTRFS_I(inode)->space_info; + } goto again; } spin_unlock(&data_sinfo->lock); /* commit the current transaction and try again */ - if (!committed) { + if (!committed && !root->fs_info->open_ioctl_trans) { committed = 1; trans = btrfs_join_transaction(root, 1); if (!trans) @@ -2828,7 +3296,7 @@ again: BTRFS_I(inode)->reserved_bytes += bytes; spin_unlock(&data_sinfo->lock); - return btrfs_check_metadata_free_space(root); + return 0; } /* @@ -2927,17 +3395,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, BUG_ON(!space_info); spin_lock(&space_info->lock); - if (space_info->force_alloc) { + if (space_info->force_alloc) force = 1; - space_info->force_alloc = 0; - } if (space_info->full) { spin_unlock(&space_info->lock); goto out; } thresh = space_info->total_bytes - space_info->bytes_readonly; - thresh = div_factor(thresh, 6); + thresh = div_factor(thresh, 8); if (!force && (space_info->bytes_used + space_info->bytes_pinned + space_info->bytes_reserved + alloc_bytes) < thresh) { @@ -2951,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, * we keep a reasonable number of metadata chunks allocated in the * FS as well. */ - if (flags & BTRFS_BLOCK_GROUP_DATA) { + if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { fs_info->data_chunk_allocations++; if (!(fs_info->data_chunk_allocations % fs_info->metadata_ratio)) @@ -2959,8 +3425,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } ret = btrfs_alloc_chunk(trans, extent_root, flags); + spin_lock(&space_info->lock); if (ret) space_info->full = 1; + space_info->force_alloc = 0; + spin_unlock(&space_info->lock); out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; @@ -3009,10 +3478,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { old_val += num_bytes; + btrfs_set_block_group_used(&cache->item, old_val); + cache->reserved -= num_bytes; cache->space_info->bytes_used += num_bytes; + cache->space_info->bytes_reserved -= num_bytes; if (cache->ro) cache->space_info->bytes_readonly -= num_bytes; - btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); } else { @@ -3057,127 +3528,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) return bytenr; } -int btrfs_update_pinned_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int pin) +/* + * this function must be called within transaction + */ +int btrfs_pin_extent(struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int reserved) { - u64 len; - struct btrfs_block_group_cache *cache; struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_group_cache *cache; - if (pin) - set_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + num - 1, GFP_NOFS); - - while (num > 0) { - cache = btrfs_lookup_block_group(fs_info, bytenr); - BUG_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); - if (pin) { - spin_lock(&cache->space_info->lock); - spin_lock(&cache->lock); - cache->pinned += len; - cache->space_info->bytes_pinned += len; - spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); - fs_info->total_pinned += len; - } else { - int unpin = 0; + cache = btrfs_lookup_block_group(fs_info, bytenr); + BUG_ON(!cache); - /* - * in order to not race with the block group caching, we - * only want to unpin the extent if we are cached. If - * we aren't cached, we want to start async caching this - * block group so we can free the extent the next time - * around. - */ - spin_lock(&cache->space_info->lock); - spin_lock(&cache->lock); - unpin = (cache->cached == BTRFS_CACHE_FINISHED); - if (likely(unpin)) { - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; - fs_info->total_pinned -= len; - } - spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); + cache->pinned += num_bytes; + cache->space_info->bytes_pinned += num_bytes; + if (reserved) { + cache->reserved -= num_bytes; + cache->space_info->bytes_reserved -= num_bytes; + } + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); - if (likely(unpin)) - clear_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + len -1, - GFP_NOFS); - else - cache_block_group(cache); + btrfs_put_block_group(cache); - if (unpin) - btrfs_add_free_space(cache, bytenr, len); - } - btrfs_put_block_group(cache); - bytenr += len; - num -= len; + set_extent_dirty(fs_info->pinned_extents, + bytenr, bytenr + num_bytes - 1, GFP_NOFS); + return 0; +} + +static int update_reserved_extents(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve) +{ + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); + if (reserve) { + cache->reserved += num_bytes; + cache->space_info->bytes_reserved += num_bytes; + } else { + cache->reserved -= num_bytes; + cache->space_info->bytes_reserved -= num_bytes; } + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); return 0; } -static int update_reserved_extents(struct btrfs_ |