diff options
author | David Sterba <dsterba@suse.cz> | 2011-10-24 14:47:57 +0200 |
---|---|---|
committer | David Sterba <dsterba@suse.cz> | 2011-10-24 14:47:57 +0200 |
commit | afd582ac8f10382002a72b4d17d9c2db328ed8b8 (patch) | |
tree | 91246c1296c06cc0d5add8d10452e7fb110ed920 /fs | |
parent | c3b92c8787367a8bb53d57d9789b558f1295cc96 (diff) | |
parent | 016fc6a63e465d5b94e4028f6d05d9703e195428 (diff) |
Merge remote-tracking branch 'remotes/josef/for-chris' into btrfs-next-stable
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 17 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 56 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 8 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 668 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 194 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 3 | ||||
-rw-r--r-- | fs/btrfs/file.c | 25 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 902 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 6 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 286 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 20 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 19 | ||||
-rw-r--r-- | fs/btrfs/super.c | 245 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 116 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 39 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 11 |
16 files changed, 1557 insertions, 1058 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d9f99a16edd..5a5d325a393 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -103,11 +103,6 @@ struct btrfs_inode { */ u64 delalloc_bytes; - /* total number of bytes that may be used for this inode for - * delalloc - */ - u64 reserved_bytes; - /* * the size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk @@ -115,9 +110,6 @@ struct btrfs_inode { */ u64 disk_i_size; - /* flags field from the on disk inode */ - u32 flags; - /* * if this is a directory then index_cnt is the counter for the index * number for new files that are created @@ -132,6 +124,15 @@ struct btrfs_inode { u64 last_unlink_trans; /* + * Number of bytes outstanding that are going to need csums. This is + * used in ENOSPC accounting. + */ + u64 csum_bytes; + + /* flags field from the on disk inode */ + u32 flags; + + /* * Counters to keep track of the number of extent item's we may use due * to delalloc and such. outstanding_extents is the number of extent * items we think we'll end up using, and reserved_extents is the number diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03912c5c6f4..227620993bc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -30,6 +30,7 @@ #include <linux/kobject.h> #include <trace/events/btrfs.h> #include <asm/kmap_types.h> +#include <linux/pagemap.h> #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" @@ -772,14 +773,8 @@ struct btrfs_space_info { struct btrfs_block_rsv { u64 size; u64 reserved; - u64 freed[2]; struct btrfs_space_info *space_info; - struct list_head list; spinlock_t lock; - atomic_t usage; - unsigned int priority:8; - unsigned int durable:1; - unsigned int refill_used:1; unsigned int full:1; }; @@ -840,10 +835,10 @@ struct btrfs_block_group_cache { spinlock_t lock; u64 pinned; u64 reserved; - u64 reserved_pinned; u64 bytes_super; u64 flags; u64 sectorsize; + u64 cache_generation; unsigned int ro:1; unsigned int dirty:1; unsigned int iref:1; @@ -899,6 +894,10 @@ struct btrfs_fs_info { spinlock_t block_group_cache_lock; struct rb_root block_group_cache_tree; + /* keep track of unallocated space */ + spinlock_t free_chunk_lock; + u64 free_chunk_space; + struct extent_io_tree freed_extents[2]; struct extent_io_tree *pinned_extents; @@ -919,11 +918,6 @@ struct btrfs_fs_info { struct btrfs_block_rsv empty_block_rsv; - /* list of block reservations that cross multiple transactions */ - struct list_head durable_block_rsv_list; - - struct mutex durable_block_rsv_mutex; - u64 generation; u64 last_trans_committed; @@ -2129,6 +2123,11 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); } +static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) +{ + return mapping_gfp_mask(mapping) & ~__GFP_FS; +} + /* extent-tree.c */ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, unsigned num_items) @@ -2137,6 +2136,17 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, 3 * num_items; } +/* + * Doing a truncate won't result in new nodes or leaves, just what we need for + * COW. + */ +static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, + unsigned num_items) +{ + return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * + num_items; +} + void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); @@ -2196,8 +2206,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); -int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo); int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -2240,25 +2248,20 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); void btrfs_free_block_rsv(struct btrfs_root *root, struct btrfs_block_rsv *rsv); -void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv); -int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_block_rsv_add(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes); -int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_block_rsv_check(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int min_factor); +int btrfs_block_rsv_refill(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, - u64 min_reserved, int min_factor); + u64 min_reserved); int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, struct btrfs_block_rsv *dst_rsv, u64 num_bytes); void btrfs_block_rsv_release(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes); -int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *rsv); int btrfs_set_block_group_ro(struct btrfs_root *root, struct btrfs_block_group_cache *cache); int btrfs_set_block_group_rw(struct btrfs_root *root, @@ -2579,11 +2582,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_cleanup(struct btrfs_root *root); -void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending, - u64 *bytes_to_reserve); -void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending); void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 07b3ac662e1..51372a52116 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1648,6 +1648,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); + spin_lock_init(&fs_info->free_chunk_lock); mutex_init(&fs_info->reloc_mutex); init_completion(&fs_info->kobj_unregister); @@ -1665,8 +1666,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_block_rsv(&fs_info->trans_block_rsv); btrfs_init_block_rsv(&fs_info->chunk_block_rsv); btrfs_init_block_rsv(&fs_info->empty_block_rsv); - INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); - mutex_init(&fs_info->durable_block_rsv_mutex); atomic_set(&fs_info->nr_async_submits, 0); atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_submit_draining, 0); @@ -1677,6 +1676,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->metadata_ratio = 0; fs_info->defrag_inodes = RB_ROOT; fs_info->trans_no_join = 0; + fs_info->free_chunk_space = 0; fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); @@ -2545,8 +2545,6 @@ int close_ctree(struct btrfs_root *root) /* clear out the rbtree of defraggable inodes */ btrfs_run_defrag_inodes(root->fs_info); - btrfs_put_block_group_cache(fs_info); - /* * Here come 2 situations when btrfs is broken to flip readonly: * @@ -2572,6 +2570,8 @@ int close_ctree(struct btrfs_root *root) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } + btrfs_put_block_group_cache(fs_info); + kthread_stop(root->fs_info->transaction_kthread); kthread_stop(root->fs_info->cleaner_kthread); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f5be06a2462..4eb7d2ba38f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -52,6 +52,21 @@ enum { CHUNK_ALLOC_LIMITED = 2, }; +/* + * Control how reservations are dealt with. + * + * RESERVE_FREE - freeing a reservation. + * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for + * ENOSPC accounting + * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update + * bytes_may_use as the ENOSPC accounting is done elsewhere + */ +enum { + RESERVE_FREE = 0, + RESERVE_ALLOC = 1, + RESERVE_ALLOC_NO_ACCOUNT = 2, +}; + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); @@ -81,6 +96,8 @@ static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); +static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -104,7 +121,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) if (atomic_dec_and_test(&cache->count)) { WARN_ON(cache->pinned > 0); WARN_ON(cache->reserved > 0); - WARN_ON(cache->reserved_pinned > 0); kfree(cache->free_space_ctl); kfree(cache); } @@ -465,7 +481,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * we likely hold important locks. */ if (trans && (!trans->transaction->in_commit) && - (root && root != root->fs_info->tree_root)) { + (root && root != root->fs_info->tree_root) && + btrfs_test_opt(root, SPACE_CACHE)) { spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { spin_unlock(&cache->lock); @@ -2700,6 +2717,13 @@ again: goto again; } + /* We've already setup this transaction, go ahead and exit */ + if (block_group->cache_generation == trans->transid && + i_size_read(inode)) { + dcs = BTRFS_DC_SETUP; + goto out_put; + } + /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next @@ -2749,12 +2773,15 @@ again: if (!ret) dcs = BTRFS_DC_SETUP; btrfs_free_reserved_data_space(inode, num_pages); + out_put: iput(inode); out_free: btrfs_release_path(path); out: spin_lock(&block_group->lock); + if (!ret) + block_group->cache_generation = trans->transid; block_group->disk_cache_state = dcs; spin_unlock(&block_group->lock); @@ -3122,16 +3149,13 @@ commit_trans: return -ENOSPC; } data_sinfo->bytes_may_use += bytes; - BTRFS_I(inode)->reserved_bytes += bytes; spin_unlock(&data_sinfo->lock); return 0; } /* - * called when we are clearing an delalloc extent from the - * inode's io_tree or there was an error for whatever reason - * after calling btrfs_check_data_free_space + * Called if we need to clear a data reservation for this inode. */ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) { @@ -3144,7 +3168,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) data_sinfo = BTRFS_I(inode)->space_info; spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; - BTRFS_I(inode)->reserved_bytes -= bytes; spin_unlock(&data_sinfo->lock); } @@ -3165,6 +3188,7 @@ static int should_alloc_chunk(struct btrfs_root *root, struct btrfs_space_info *sinfo, u64 alloc_bytes, int force) { + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; u64 thresh; @@ -3173,6 +3197,13 @@ static int should_alloc_chunk(struct btrfs_root *root, return 1; /* + * We need to take into account the global rsv because for all intents + * and purposes it's used space. Don't worry about locking the + * global_rsv, it doesn't change except when the transaction commits. + */ + num_allocated += global_rsv->size; + + /* * in limited mode, we want to have some free space up to * about 1% of the FS size. */ @@ -3303,7 +3334,8 @@ out: * shrink metadata reservation for delalloc */ static int shrink_delalloc(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 to_reclaim, int sync) + struct btrfs_root *root, u64 to_reclaim, + bool wait_ordered) { struct btrfs_block_rsv *block_rsv; struct btrfs_space_info *space_info; @@ -3311,7 +3343,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, u64 max_reclaim; u64 reclaimed = 0; long time_left; - int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; + unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; int loops = 0; unsigned long progress; @@ -3319,7 +3351,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, space_info = block_rsv->space_info; smp_mb(); - reserved = space_info->bytes_reserved; + reserved = space_info->bytes_may_use; progress = space_info->reservation_progress; if (reserved == 0) @@ -3334,7 +3366,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, } max_reclaim = min(reserved, to_reclaim); - + nr_pages = max_t(unsigned long, nr_pages, + max_reclaim >> PAGE_CACHE_SHIFT); while (loops < 1024) { /* have the flusher threads jump in and do some IO */ smp_mb(); @@ -3343,9 +3376,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); spin_lock(&space_info->lock); - if (reserved > space_info->bytes_reserved) - reclaimed += reserved - space_info->bytes_reserved; - reserved = space_info->bytes_reserved; + if (reserved > space_info->bytes_may_use) + reclaimed += reserved - space_info->bytes_may_use; + reserved = space_info->bytes_may_use; spin_unlock(&space_info->lock); loops++; @@ -3356,11 +3389,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, if (trans && trans->transaction->blocked) return -EAGAIN; - time_left = schedule_timeout_interruptible(1); + if (wait_ordered && !trans) { + btrfs_wait_ordered_extents(root, 0, 0); + } else { + time_left = schedule_timeout_interruptible(1); - /* We were interrupted, exit */ - if (time_left) - break; + /* We were interrupted, exit */ + if (time_left) + break; + } /* we've kicked the IO a few times, if anything has been freed, * exit. There is no sense in looping here for a long time @@ -3375,35 +3412,39 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, } } - if (reclaimed >= to_reclaim && !trans) - btrfs_wait_ordered_extents(root, 0, 0); + return reclaimed >= to_reclaim; } -/* - * Retries tells us how many times we've called reserve_metadata_bytes. The - * idea is if this is the first call (retries == 0) then we will add to our - * reserved count if we can't make the allocation in order to hold our place - * while we go and try and free up space. That way for retries > 1 we don't try - * and add space, we just check to see if the amount of unused space is >= the - * total space, meaning that our reservation is valid. +/** + * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space + * @root - the root we're allocating for + * @block_rsv - the block_rsv we're allocating for + * @orig_bytes - the number of bytes we want + * @flush - wether or not we can flush to make our reservation * - * However if we don't intend to retry this reservation, pass -1 as retries so - * that it short circuits this logic. + * This will reserve orgi_bytes number of bytes from the space info associated + * with the block_rsv. If there is not enough space it will make an attempt to + * flush out space to make room. It will do this by flushing delalloc if + * possible or committing the transaction. If flush is 0 then no attempts to + * regain reservations will be made and this will fail if there is not enough + * space already. */ -static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int reserve_metadata_bytes(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 orig_bytes, int flush) { struct btrfs_space_info *space_info = block_rsv->space_info; - u64 unused; + struct btrfs_trans_handle *trans; + u64 used; u64 num_bytes = orig_bytes; int retries = 0; int ret = 0; bool committed = false; bool flushing = false; + bool wait_ordered = false; + trans = (struct btrfs_trans_handle *)current->journal_info; again: ret = 0; spin_lock(&space_info->lock); @@ -3431,9 +3472,9 @@ again: } ret = -ENOSPC; - unused = space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - space_info->bytes_may_use; + used = space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + space_info->bytes_may_use; /* * The idea here is that we've not already over-reserved the block group @@ -3442,10 +3483,9 @@ again: * lets start flushing stuff first and then come back and try to make * our reservation. */ - if (unused <= space_info->total_bytes) { - unused = space_info->total_bytes - unused; - if (unused >= num_bytes) { - space_info->bytes_reserved += orig_bytes; + if (used <= space_info->total_bytes) { + if (used + orig_bytes <= space_info->total_bytes) { + space_info->bytes_may_use += orig_bytes; ret = 0; } else { /* @@ -3461,10 +3501,60 @@ again: * amount plus the amount of bytes that we need for this * reservation. */ - num_bytes = unused - space_info->total_bytes + + wait_ordered = true; + num_bytes = used - space_info->total_bytes + (orig_bytes * (retries + 1)); } + if (ret) { + u64 profile = btrfs_get_alloc_profile(root, 0); + u64 avail; + + /* + * If we have a lot of space that's pinned, don't bother doing + * the overcommit dance yet and just commit the transaction. + */ + avail = (space_info->total_bytes - space_info->bytes_used) * 8; + do_div(avail, 10); + if (space_info->bytes_pinned >= avail && flush && !trans && + !committed) { + space_info->flush = 1; + flushing = true; + spin_unlock(&space_info->lock); + goto commit; + } + + spin_lock(&root->fs_info->free_chunk_lock); + avail = root->fs_info->free_chunk_space; + + /* + * If we have dup, raid1 or raid10 then only half of the free + * space is actually useable. + */ + if (profile & (BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10)) + avail >>= 1; + + /* + * If we aren't flushing don't let us overcommit too much, say + * 1/8th of the space. If we can flush, let it overcommit up to + * 1/2 of the space. + */ + if (flush) + avail >>= 3; + else + avail >>= 1; + spin_unlock(&root->fs_info->free_chunk_lock); + + if (used + num_bytes < space_info->total_bytes + avail) { + space_info->bytes_may_use += orig_bytes; + ret = 0; + } else { + wait_ordered = true; + } + } + /* * Couldn't make our reservation, save our place so while we're trying * to reclaim space we can actually use it instead of somebody else @@ -3484,7 +3574,7 @@ again: * We do synchronous shrinking since we don't actually unreserve * metadata until after the IO is completed. */ - ret = shrink_delalloc(trans, root, num_bytes, 1); + ret = shrink_delalloc(trans, root, num_bytes, wait_ordered); if (ret < 0) goto out; @@ -3496,25 +3586,16 @@ again: * so go back around and try again. */ if (retries < 2) { + wait_ordered = true; retries++; goto again; } - /* - * Not enough space to be reclaimed, don't bother committing the - * transaction. - */ - spin_lock(&space_info->lock); - if (space_info->bytes_pinned < orig_bytes) - ret = -ENOSPC; - spin_unlock(&space_info->lock); - if (ret) - goto out; - ret = -EAGAIN; if (trans) goto out; +commit: ret = -ENOSPC; if (committed) goto out; @@ -3542,10 +3623,12 @@ out: static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_block_rsv *block_rsv; - if (root->ref_cows) + struct btrfs_block_rsv *block_rsv = NULL; + + if (root->ref_cows || root == root->fs_info->csum_root) block_rsv = trans->block_rsv; - else + + if (!block_rsv) block_rsv = root->block_rsv; if (!block_rsv) @@ -3616,7 +3699,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, } if (num_bytes) { spin_lock(&space_info->lock); - space_info->bytes_reserved -= num_bytes; + space_info->bytes_may_use -= num_bytes; space_info->reservation_progress++; spin_unlock(&space_info->lock); } @@ -3640,9 +3723,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) { memset(rsv, 0, sizeof(*rsv)); spin_lock_init(&rsv->lock); - atomic_set(&rsv->usage, 1); - rsv->priority = 6; - INIT_LIST_HEAD(&rsv->list); } struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) @@ -3663,29 +3743,11 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) void btrfs_free_block_rsv(struct btrfs_root *root, struct btrfs_block_rsv *rsv) { - if (rsv && atomic_dec_and_test(&rsv->usage)) { - btrfs_block_rsv_release(root, rsv, (u64)-1); - if (!rsv->durable) - kfree(rsv); - } -} - -/* - * make the block_rsv struct be able to capture freed space. - * the captured space will re-add to the the block_rsv struct - * after transaction commit - */ -void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv) -{ - block_rsv->durable = 1; - mutex_lock(&fs_info->durable_block_rsv_mutex); - list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); - mutex_unlock(&fs_info->durable_block_rsv_mutex); + btrfs_block_rsv_release(root, rsv, (u64)-1); + kfree(rsv); } -int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_block_rsv_add(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes) { @@ -3694,7 +3756,7 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, if (num_bytes == 0) return 0; - ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); + ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); if (!ret) { block_rsv_add_bytes(block_rsv, num_bytes, 1); return 0; @@ -3703,55 +3765,52 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, return ret; } -int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - u64 min_reserved, int min_factor) +int btrfs_block_rsv_check(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int min_factor) { u64 num_bytes = 0; - int commit_trans = 0; int ret = -ENOSPC; if (!block_rsv) return 0; spin_lock(&block_rsv->lock); - if (min_factor > 0) - num_bytes = div_factor(block_rsv->size, min_factor); - if (min_reserved > num_bytes) - num_bytes = min_reserved; + num_bytes = div_factor(block_rsv->size, min_factor); + if (block_rsv->reserved >= num_bytes) + ret = 0; + spin_unlock(&block_rsv->lock); - if (block_rsv->reserved >= num_bytes) { + return ret; +} + +int btrfs_block_rsv_refill(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 min_reserved) +{ + u64 num_bytes = 0; + int ret = -ENOSPC; + + if (!block_rsv) + return 0; + + spin_lock(&block_rsv->lock); + num_bytes = min_reserved; + if (block_rsv->reserved >= num_bytes) ret = 0; - } else { + else num_bytes -= block_rsv->reserved; - if (block_rsv->durable && - block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) - commit_trans = 1; - } spin_unlock(&block_rsv->lock); + if (!ret) return 0; - if (block_rsv->refill_used) { - ret = reserve_metadata_bytes(trans, root, block_rsv, - num_bytes, 0); - if (!ret) { - block_rsv_add_bytes(block_rsv, num_bytes, 0); - return 0; - } - } - - if (commit_trans) { - if (trans) - return -EAGAIN; - trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); - ret = btrfs_commit_transaction(trans, root); + ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); + if (!ret) { + block_rsv_add_bytes(block_rsv, num_bytes, 0); return 0; } - return -ENOSPC; + return ret; } int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, @@ -3827,12 +3886,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) if (sinfo->total_bytes > num_bytes) { num_bytes = sinfo->total_bytes - num_bytes; block_rsv->reserved += num_bytes; - sinfo->bytes_reserved += num_bytes; + sinfo->bytes_may_use += num_bytes; } if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; - sinfo->bytes_reserved -= num_bytes; + sinfo->bytes_may_use -= num_bytes; sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; @@ -3848,16 +3907,12 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); fs_info->chunk_block_rsv.space_info = space_info; - fs_info->chunk_block_rsv.priority = 10; space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); fs_info->global_block_rsv.space_info = space_info; - fs_info->global_block_rsv.priority = 10; - fs_info->global_block_rsv.refill_used = 1; fs_info->delalloc_block_rsv.space_info = space_info; fs_info->trans_block_rsv.space_info = space_info; fs_info->empty_block_rsv.space_info = space_info; - fs_info->empty_block_rsv.priority = 10; fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; @@ -3865,10 +3920,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; - btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); - - btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); - update_global_block_rsv(fs_info); } @@ -3883,46 +3934,13 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) WARN_ON(fs_info->chunk_block_rsv.reserved > 0); } -int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *rsv) -{ - struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; - u64 num_bytes; - int ret; - - /* - * Truncate should be freeing data, but give us 2 items just in case it - * needs to use some space. We may want to be smarter about this in the - * future. - */ - num_bytes = btrfs_calc_trans_metadata_size(root, 2); - - /* We already have enough bytes, just return */ - if (rsv->reserved >= num_bytes) - return 0; - - num_bytes -= rsv->reserved; - - /* - * You should have reserved enough space before hand to do this, so this - * should not fail. - */ - ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); - BUG_ON(ret); - - return 0; -} - void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root) { if (!trans->bytes_reserved) return; - BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); - btrfs_block_rsv_release(root, trans->block_rsv, - trans->bytes_reserved); + btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); trans->bytes_reserved = 0; } @@ -3964,11 +3982,19 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); } +/** + * drop_outstanding_extent - drop an outstanding extent + * @inode: the inode we're dropping the extent for + * + * This is called when we are freeing up an outstanding extent, either called + * after an error or after an extent is written. This will return the number of + * reserved extents that need to be freed. This must be called with + * BTRFS_I(inode)->lock held. + */ static unsigned drop_outstanding_extent(struct inode *inode) { unsigned dropped_extents = 0; - spin_lock(&BTRFS_I(inode)->lock); BUG_ON(!BTRFS_I(inode)->outstanding_extents); BTRFS_I(inode)->outstanding_extents--; @@ -3978,19 +4004,70 @@ static unsigned drop_outstanding_extent(struct inode *inode) */ if (BTRFS_I(inode)->outstanding_extents >= BTRFS_I(inode)->reserved_extents) - goto out; + return 0; dropped_extents = BTRFS_I(inode)->reserved_extents - BTRFS_I(inode)->outstanding_extents; BTRFS_I(inode)->reserved_extents -= dropped_extents; -out: - spin_unlock(&BTRFS_I(inode)->lock); return dropped_extents; } -static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) +/** + * calc_csum_metadata_size - return the amount of metada space that must be + * reserved/free'd for the given bytes. + * @inode: the inode we're manipulating + * @num_bytes: the number of bytes in question + * @reserve: 1 if we are reserving space, 0 if we are freeing space + * + * This adjusts the number of csum_bytes in the inode and then returns the + * correct amount of metadata that must either be reserved or freed. We + * calculate how many checksums we can fit into one leaf and then divide the + * number of bytes that will need to be checksumed by this value to figure out + * how many checksums will be required. If we are adding bytes then the number + * may go up and we will return the number of additional bytes that must be + * reserved. If it is going down we will return the number of bytes that must + * be freed. + * + * This must be called with BTRFS_I(inode)->lock held. + */ +static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, + int reserve) { - return num_bytes >>= 3; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 csum_size; + int num_csums_per_leaf; + int num_csums; + int old_csums; + + if (BTRFS_I(inode)->flags & BTRFS_INODE |