aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDavid Sterba <dsterba@suse.cz>2011-10-24 14:47:57 +0200
committerDavid Sterba <dsterba@suse.cz>2011-10-24 14:47:57 +0200
commitafd582ac8f10382002a72b4d17d9c2db328ed8b8 (patch)
tree91246c1296c06cc0d5add8d10452e7fb110ed920 /fs
parentc3b92c8787367a8bb53d57d9789b558f1295cc96 (diff)
parent016fc6a63e465d5b94e4028f6d05d9703e195428 (diff)
Merge remote-tracking branch 'remotes/josef/for-chris' into btrfs-next-stable
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/btrfs_inode.h17
-rw-r--r--fs/btrfs/ctree.h56
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent-tree.c668
-rw-r--r--fs/btrfs/extent_io.c194
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/file.c25
-rw-r--r--fs/btrfs/free-space-cache.c902
-rw-r--r--fs/btrfs/inode-map.c6
-rw-r--r--fs/btrfs/inode.c286
-rw-r--r--fs/btrfs/ioctl.c20
-rw-r--r--fs/btrfs/relocation.c19
-rw-r--r--fs/btrfs/super.c245
-rw-r--r--fs/btrfs/transaction.c116
-rw-r--r--fs/btrfs/volumes.c39
-rw-r--r--fs/btrfs/xattr.c11
16 files changed, 1557 insertions, 1058 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d9f99a16edd..5a5d325a393 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -103,11 +103,6 @@ struct btrfs_inode {
*/
u64 delalloc_bytes;
- /* total number of bytes that may be used for this inode for
- * delalloc
- */
- u64 reserved_bytes;
-
/*
* the size of the file stored in the metadata on disk. data=ordered
* means the in-memory i_size might be larger than the size on disk
@@ -115,9 +110,6 @@ struct btrfs_inode {
*/
u64 disk_i_size;
- /* flags field from the on disk inode */
- u32 flags;
-
/*
* if this is a directory then index_cnt is the counter for the index
* number for new files that are created
@@ -132,6 +124,15 @@ struct btrfs_inode {
u64 last_unlink_trans;
/*
+ * Number of bytes outstanding that are going to need csums. This is
+ * used in ENOSPC accounting.
+ */
+ u64 csum_bytes;
+
+ /* flags field from the on disk inode */
+ u32 flags;
+
+ /*
* Counters to keep track of the number of extent item's we may use due
* to delalloc and such. outstanding_extents is the number of extent
* items we think we'll end up using, and reserved_extents is the number
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 03912c5c6f4..227620993bc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -30,6 +30,7 @@
#include <linux/kobject.h>
#include <trace/events/btrfs.h>
#include <asm/kmap_types.h>
+#include <linux/pagemap.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@@ -772,14 +773,8 @@ struct btrfs_space_info {
struct btrfs_block_rsv {
u64 size;
u64 reserved;
- u64 freed[2];
struct btrfs_space_info *space_info;
- struct list_head list;
spinlock_t lock;
- atomic_t usage;
- unsigned int priority:8;
- unsigned int durable:1;
- unsigned int refill_used:1;
unsigned int full:1;
};
@@ -840,10 +835,10 @@ struct btrfs_block_group_cache {
spinlock_t lock;
u64 pinned;
u64 reserved;
- u64 reserved_pinned;
u64 bytes_super;
u64 flags;
u64 sectorsize;
+ u64 cache_generation;
unsigned int ro:1;
unsigned int dirty:1;
unsigned int iref:1;
@@ -899,6 +894,10 @@ struct btrfs_fs_info {
spinlock_t block_group_cache_lock;
struct rb_root block_group_cache_tree;
+ /* keep track of unallocated space */
+ spinlock_t free_chunk_lock;
+ u64 free_chunk_space;
+
struct extent_io_tree freed_extents[2];
struct extent_io_tree *pinned_extents;
@@ -919,11 +918,6 @@ struct btrfs_fs_info {
struct btrfs_block_rsv empty_block_rsv;
- /* list of block reservations that cross multiple transactions */
- struct list_head durable_block_rsv_list;
-
- struct mutex durable_block_rsv_mutex;
-
u64 generation;
u64 last_trans_committed;
@@ -2129,6 +2123,11 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
(space_info->flags & BTRFS_BLOCK_GROUP_DATA));
}
+static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
+{
+ return mapping_gfp_mask(mapping) & ~__GFP_FS;
+}
+
/* extent-tree.c */
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
unsigned num_items)
@@ -2137,6 +2136,17 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
3 * num_items;
}
+/*
+ * Doing a truncate won't result in new nodes or leaves, just what we need for
+ * COW.
+ */
+static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
+ unsigned num_items)
+{
+ return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
+ num_items;
+}
+
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
@@ -2196,8 +2206,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 owner, u64 offset);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve, int sinfo);
int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
@@ -2240,25 +2248,20 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
-void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv);
-int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
-int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_block_rsv_check(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv, int min_factor);
+int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
- u64 min_reserved, int min_factor);
+ u64 min_reserved);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
-int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_rsv *rsv);
int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
@@ -2579,11 +2582,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_cleanup(struct btrfs_root *root);
-void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending,
- u64 *bytes_to_reserve);
-void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending);
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 07b3ac662e1..51372a52116 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1648,6 +1648,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
+ spin_lock_init(&fs_info->free_chunk_lock);
mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
@@ -1665,8 +1666,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_init_block_rsv(&fs_info->trans_block_rsv);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
btrfs_init_block_rsv(&fs_info->empty_block_rsv);
- INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
- mutex_init(&fs_info->durable_block_rsv_mutex);
atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
@@ -1677,6 +1676,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->metadata_ratio = 0;
fs_info->defrag_inodes = RB_ROOT;
fs_info->trans_no_join = 0;
+ fs_info->free_chunk_space = 0;
fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8);
@@ -2545,8 +2545,6 @@ int close_ctree(struct btrfs_root *root)
/* clear out the rbtree of defraggable inodes */
btrfs_run_defrag_inodes(root->fs_info);
- btrfs_put_block_group_cache(fs_info);
-
/*
* Here come 2 situations when btrfs is broken to flip readonly:
*
@@ -2572,6 +2570,8 @@ int close_ctree(struct btrfs_root *root)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
+ btrfs_put_block_group_cache(fs_info);
+
kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f5be06a2462..4eb7d2ba38f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -52,6 +52,21 @@ enum {
CHUNK_ALLOC_LIMITED = 2,
};
+/*
+ * Control how reservations are dealt with.
+ *
+ * RESERVE_FREE - freeing a reservation.
+ * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
+ * ENOSPC accounting
+ * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
+ * bytes_may_use as the ENOSPC accounting is done elsewhere
+ */
+enum {
+ RESERVE_FREE = 0,
+ RESERVE_ALLOC = 1,
+ RESERVE_ALLOC_NO_ACCOUNT = 2,
+};
+
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc);
@@ -81,6 +96,8 @@ static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
+static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -104,7 +121,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
if (atomic_dec_and_test(&cache->count)) {
WARN_ON(cache->pinned > 0);
WARN_ON(cache->reserved > 0);
- WARN_ON(cache->reserved_pinned > 0);
kfree(cache->free_space_ctl);
kfree(cache);
}
@@ -465,7 +481,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
* we likely hold important locks.
*/
if (trans && (!trans->transaction->in_commit) &&
- (root && root != root->fs_info->tree_root)) {
+ (root && root != root->fs_info->tree_root) &&
+ btrfs_test_opt(root, SPACE_CACHE)) {
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
@@ -2700,6 +2717,13 @@ again:
goto again;
}
+ /* We've already setup this transaction, go ahead and exit */
+ if (block_group->cache_generation == trans->transid &&
+ i_size_read(inode)) {
+ dcs = BTRFS_DC_SETUP;
+ goto out_put;
+ }
+
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
@@ -2749,12 +2773,15 @@ again:
if (!ret)
dcs = BTRFS_DC_SETUP;
btrfs_free_reserved_data_space(inode, num_pages);
+
out_put:
iput(inode);
out_free:
btrfs_release_path(path);
out:
spin_lock(&block_group->lock);
+ if (!ret)
+ block_group->cache_generation = trans->transid;
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
@@ -3122,16 +3149,13 @@ commit_trans:
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
- BTRFS_I(inode)->reserved_bytes += bytes;
spin_unlock(&data_sinfo->lock);
return 0;
}
/*
- * called when we are clearing an delalloc extent from the
- * inode's io_tree or there was an error for whatever reason
- * after calling btrfs_check_data_free_space
+ * Called if we need to clear a data reservation for this inode.
*/
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
{
@@ -3144,7 +3168,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
data_sinfo = BTRFS_I(inode)->space_info;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
- BTRFS_I(inode)->reserved_bytes -= bytes;
spin_unlock(&data_sinfo->lock);
}
@@ -3165,6 +3188,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
struct btrfs_space_info *sinfo, u64 alloc_bytes,
int force)
{
+ struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
u64 thresh;
@@ -3173,6 +3197,13 @@ static int should_alloc_chunk(struct btrfs_root *root,
return 1;
/*
+ * We need to take into account the global rsv because for all intents
+ * and purposes it's used space. Don't worry about locking the
+ * global_rsv, it doesn't change except when the transaction commits.
+ */
+ num_allocated += global_rsv->size;
+
+ /*
* in limited mode, we want to have some free space up to
* about 1% of the FS size.
*/
@@ -3303,7 +3334,8 @@ out:
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim, int sync)
+ struct btrfs_root *root, u64 to_reclaim,
+ bool wait_ordered)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
@@ -3311,7 +3343,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
- int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
unsigned long progress;
@@ -3319,7 +3351,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
space_info = block_rsv->space_info;
smp_mb();
- reserved = space_info->bytes_reserved;
+ reserved = space_info->bytes_may_use;
progress = space_info->reservation_progress;
if (reserved == 0)
@@ -3334,7 +3366,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
}
max_reclaim = min(reserved, to_reclaim);
-
+ nr_pages = max_t(unsigned long, nr_pages,
+ max_reclaim >> PAGE_CACHE_SHIFT);
while (loops < 1024) {
/* have the flusher threads jump in and do some IO */
smp_mb();
@@ -3343,9 +3376,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_reserved)
- reclaimed += reserved - space_info->bytes_reserved;
- reserved = space_info->bytes_reserved;
+ if (reserved > space_info->bytes_may_use)
+ reclaimed += reserved - space_info->bytes_may_use;
+ reserved = space_info->bytes_may_use;
spin_unlock(&space_info->lock);
loops++;
@@ -3356,11 +3389,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (trans && trans->transaction->blocked)
return -EAGAIN;
- time_left = schedule_timeout_interruptible(1);
+ if (wait_ordered && !trans) {
+ btrfs_wait_ordered_extents(root, 0, 0);
+ } else {
+ time_left = schedule_timeout_interruptible(1);
- /* We were interrupted, exit */
- if (time_left)
- break;
+ /* We were interrupted, exit */
+ if (time_left)
+ break;
+ }
/* we've kicked the IO a few times, if anything has been freed,
* exit. There is no sense in looping here for a long time
@@ -3375,35 +3412,39 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
}
}
- if (reclaimed >= to_reclaim && !trans)
- btrfs_wait_ordered_extents(root, 0, 0);
+
return reclaimed >= to_reclaim;
}
-/*
- * Retries tells us how many times we've called reserve_metadata_bytes. The
- * idea is if this is the first call (retries == 0) then we will add to our
- * reserved count if we can't make the allocation in order to hold our place
- * while we go and try and free up space. That way for retries > 1 we don't try
- * and add space, we just check to see if the amount of unused space is >= the
- * total space, meaning that our reservation is valid.
+/**
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
+ * @root - the root we're allocating for
+ * @block_rsv - the block_rsv we're allocating for
+ * @orig_bytes - the number of bytes we want
+ * @flush - wether or not we can flush to make our reservation
*
- * However if we don't intend to retry this reservation, pass -1 as retries so
- * that it short circuits this logic.
+ * This will reserve orgi_bytes number of bytes from the space info associated
+ * with the block_rsv. If there is not enough space it will make an attempt to
+ * flush out space to make room. It will do this by flushing delalloc if
+ * possible or committing the transaction. If flush is 0 then no attempts to
+ * regain reservations will be made and this will fail if there is not enough
+ * space already.
*/
-static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 orig_bytes, int flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
- u64 unused;
+ struct btrfs_trans_handle *trans;
+ u64 used;
u64 num_bytes = orig_bytes;
int retries = 0;
int ret = 0;
bool committed = false;
bool flushing = false;
+ bool wait_ordered = false;
+ trans = (struct btrfs_trans_handle *)current->journal_info;
again:
ret = 0;
spin_lock(&space_info->lock);
@@ -3431,9 +3472,9 @@ again:
}
ret = -ENOSPC;
- unused = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
+ used = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use;
/*
* The idea here is that we've not already over-reserved the block group
@@ -3442,10 +3483,9 @@ again:
* lets start flushing stuff first and then come back and try to make
* our reservation.
*/
- if (unused <= space_info->total_bytes) {
- unused = space_info->total_bytes - unused;
- if (unused >= num_bytes) {
- space_info->bytes_reserved += orig_bytes;
+ if (used <= space_info->total_bytes) {
+ if (used + orig_bytes <= space_info->total_bytes) {
+ space_info->bytes_may_use += orig_bytes;
ret = 0;
} else {
/*
@@ -3461,10 +3501,60 @@ again:
* amount plus the amount of bytes that we need for this
* reservation.
*/
- num_bytes = unused - space_info->total_bytes +
+ wait_ordered = true;
+ num_bytes = used - space_info->total_bytes +
(orig_bytes * (retries + 1));
}
+ if (ret) {
+ u64 profile = btrfs_get_alloc_profile(root, 0);
+ u64 avail;
+
+ /*
+ * If we have a lot of space that's pinned, don't bother doing
+ * the overcommit dance yet and just commit the transaction.
+ */
+ avail = (space_info->total_bytes - space_info->bytes_used) * 8;
+ do_div(avail, 10);
+ if (space_info->bytes_pinned >= avail && flush && !trans &&
+ !committed) {
+ space_info->flush = 1;
+ flushing = true;
+ spin_unlock(&space_info->lock);
+ goto commit;
+ }
+
+ spin_lock(&root->fs_info->free_chunk_lock);
+ avail = root->fs_info->free_chunk_space;
+
+ /*
+ * If we have dup, raid1 or raid10 then only half of the free
+ * space is actually useable.
+ */
+ if (profile & (BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ avail >>= 1;
+
+ /*
+ * If we aren't flushing don't let us overcommit too much, say
+ * 1/8th of the space. If we can flush, let it overcommit up to
+ * 1/2 of the space.
+ */
+ if (flush)
+ avail >>= 3;
+ else
+ avail >>= 1;
+ spin_unlock(&root->fs_info->free_chunk_lock);
+
+ if (used + num_bytes < space_info->total_bytes + avail) {
+ space_info->bytes_may_use += orig_bytes;
+ ret = 0;
+ } else {
+ wait_ordered = true;
+ }
+ }
+
/*
* Couldn't make our reservation, save our place so while we're trying
* to reclaim space we can actually use it instead of somebody else
@@ -3484,7 +3574,7 @@ again:
* We do synchronous shrinking since we don't actually unreserve
* metadata until after the IO is completed.
*/
- ret = shrink_delalloc(trans, root, num_bytes, 1);
+ ret = shrink_delalloc(trans, root, num_bytes, wait_ordered);
if (ret < 0)
goto out;
@@ -3496,25 +3586,16 @@ again:
* so go back around and try again.
*/
if (retries < 2) {
+ wait_ordered = true;
retries++;
goto again;
}
- /*
- * Not enough space to be reclaimed, don't bother committing the
- * transaction.
- */
- spin_lock(&space_info->lock);
- if (space_info->bytes_pinned < orig_bytes)
- ret = -ENOSPC;
- spin_unlock(&space_info->lock);
- if (ret)
- goto out;
-
ret = -EAGAIN;
if (trans)
goto out;
+commit:
ret = -ENOSPC;
if (committed)
goto out;
@@ -3542,10 +3623,12 @@ out:
static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- struct btrfs_block_rsv *block_rsv;
- if (root->ref_cows)
+ struct btrfs_block_rsv *block_rsv = NULL;
+
+ if (root->ref_cows || root == root->fs_info->csum_root)
block_rsv = trans->block_rsv;
- else
+
+ if (!block_rsv)
block_rsv = root->block_rsv;
if (!block_rsv)
@@ -3616,7 +3699,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
}
if (num_bytes) {
spin_lock(&space_info->lock);
- space_info->bytes_reserved -= num_bytes;
+ space_info->bytes_may_use -= num_bytes;
space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
@@ -3640,9 +3723,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
{
memset(rsv, 0, sizeof(*rsv));
spin_lock_init(&rsv->lock);
- atomic_set(&rsv->usage, 1);
- rsv->priority = 6;
- INIT_LIST_HEAD(&rsv->list);
}
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
@@ -3663,29 +3743,11 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv)
{
- if (rsv && atomic_dec_and_test(&rsv->usage)) {
- btrfs_block_rsv_release(root, rsv, (u64)-1);
- if (!rsv->durable)
- kfree(rsv);
- }
-}
-
-/*
- * make the block_rsv struct be able to capture freed space.
- * the captured space will re-add to the the block_rsv struct
- * after transaction commit
- */
-void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv)
-{
- block_rsv->durable = 1;
- mutex_lock(&fs_info->durable_block_rsv_mutex);
- list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
- mutex_unlock(&fs_info->durable_block_rsv_mutex);
+ btrfs_block_rsv_release(root, rsv, (u64)-1);
+ kfree(rsv);
}
-int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
@@ -3694,7 +3756,7 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
if (num_bytes == 0)
return 0;
- ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
@@ -3703,55 +3765,52 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
return ret;
}
-int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 min_reserved, int min_factor)
+int btrfs_block_rsv_check(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv, int min_factor)
{
u64 num_bytes = 0;
- int commit_trans = 0;
int ret = -ENOSPC;
if (!block_rsv)
return 0;
spin_lock(&block_rsv->lock);
- if (min_factor > 0)
- num_bytes = div_factor(block_rsv->size, min_factor);
- if (min_reserved > num_bytes)
- num_bytes = min_reserved;
+ num_bytes = div_factor(block_rsv->size, min_factor);
+ if (block_rsv->reserved >= num_bytes)
+ ret = 0;
+ spin_unlock(&block_rsv->lock);
- if (block_rsv->reserved >= num_bytes) {
+ return ret;
+}
+
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 min_reserved)
+{
+ u64 num_bytes = 0;
+ int ret = -ENOSPC;
+
+ if (!block_rsv)
+ return 0;
+
+ spin_lock(&block_rsv->lock);
+ num_bytes = min_reserved;
+ if (block_rsv->reserved >= num_bytes)
ret = 0;
- } else {
+ else
num_bytes -= block_rsv->reserved;
- if (block_rsv->durable &&
- block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
- commit_trans = 1;
- }
spin_unlock(&block_rsv->lock);
+
if (!ret)
return 0;
- if (block_rsv->refill_used) {
- ret = reserve_metadata_bytes(trans, root, block_rsv,
- num_bytes, 0);
- if (!ret) {
- block_rsv_add_bytes(block_rsv, num_bytes, 0);
- return 0;
- }
- }
-
- if (commit_trans) {
- if (trans)
- return -EAGAIN;
- trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
- ret = btrfs_commit_transaction(trans, root);
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
+ if (!ret) {
+ block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
}
- return -ENOSPC;
+ return ret;
}
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
@@ -3827,12 +3886,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (sinfo->total_bytes > num_bytes) {
num_bytes = sinfo->total_bytes - num_bytes;
block_rsv->reserved += num_bytes;
- sinfo->bytes_reserved += num_bytes;
+ sinfo->bytes_may_use += num_bytes;
}
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
- sinfo->bytes_reserved -= num_bytes;
+ sinfo->bytes_may_use -= num_bytes;
sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
@@ -3848,16 +3907,12 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
fs_info->chunk_block_rsv.space_info = space_info;
- fs_info->chunk_block_rsv.priority = 10;
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
fs_info->global_block_rsv.space_info = space_info;
- fs_info->global_block_rsv.priority = 10;
- fs_info->global_block_rsv.refill_used = 1;
fs_info->delalloc_block_rsv.space_info = space_info;
fs_info->trans_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.space_info = space_info;
- fs_info->empty_block_rsv.priority = 10;
fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
@@ -3865,10 +3920,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
- btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv);
-
- btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);
-
update_global_block_rsv(fs_info);
}
@@ -3883,46 +3934,13 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
}
-int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_rsv *rsv)
-{
- struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
- u64 num_bytes;
- int ret;
-
- /*
- * Truncate should be freeing data, but give us 2 items just in case it
- * needs to use some space. We may want to be smarter about this in the
- * future.
- */
- num_bytes = btrfs_calc_trans_metadata_size(root, 2);
-
- /* We already have enough bytes, just return */
- if (rsv->reserved >= num_bytes)
- return 0;
-
- num_bytes -= rsv->reserved;
-
- /*
- * You should have reserved enough space before hand to do this, so this
- * should not fail.
- */
- ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
- BUG_ON(ret);
-
- return 0;
-}
-
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (!trans->bytes_reserved)
return;
- BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv);
- btrfs_block_rsv_release(root, trans->block_rsv,
- trans->bytes_reserved);
+ btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
trans->bytes_reserved = 0;
}
@@ -3964,11 +3982,19 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
+/**
+ * drop_outstanding_extent - drop an outstanding extent
+ * @inode: the inode we're dropping the extent for
+ *
+ * This is called when we are freeing up an outstanding extent, either called
+ * after an error or after an extent is written. This will return the number of
+ * reserved extents that need to be freed. This must be called with
+ * BTRFS_I(inode)->lock held.
+ */
static unsigned drop_outstanding_extent(struct inode *inode)
{
unsigned dropped_extents = 0;
- spin_lock(&BTRFS_I(inode)->lock);
BUG_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
@@ -3978,19 +4004,70 @@ static unsigned drop_outstanding_extent(struct inode *inode)
*/
if (BTRFS_I(inode)->outstanding_extents >=
BTRFS_I(inode)->reserved_extents)
- goto out;
+ return 0;
dropped_extents = BTRFS_I(inode)->reserved_extents -
BTRFS_I(inode)->outstanding_extents;
BTRFS_I(inode)->reserved_extents -= dropped_extents;
-out:
- spin_unlock(&BTRFS_I(inode)->lock);
return dropped_extents;
}
-static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
+/**
+ * calc_csum_metadata_size - return the amount of metada space that must be
+ * reserved/free'd for the given bytes.
+ * @inode: the inode we're manipulating
+ * @num_bytes: the number of bytes in question
+ * @reserve: 1 if we are reserving space, 0 if we are freeing space
+ *
+ * This adjusts the number of csum_bytes in the inode and then returns the
+ * correct amount of metadata that must either be reserved or freed. We
+ * calculate how many checksums we can fit into one leaf and then divide the
+ * number of bytes that will need to be checksumed by this value to figure out
+ * how many checksums will be required. If we are adding bytes then the number
+ * may go up and we will return the number of additional bytes that must be
+ * reserved. If it is going down we will return the number of bytes that must
+ * be freed.
+ *
+ * This must be called with BTRFS_I(inode)->lock held.
+ */
+static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
+ int reserve)
{
- return num_bytes >>= 3;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 csum_size;
+ int num_csums_per_leaf;
+ int num_csums;
+ int old_csums;
+
+ if (BTRFS_I(inode)->flags & BTRFS_INODE