aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/btrfs_inode.h8
-rw-r--r--fs/btrfs/ctree.c71
-rw-r--r--fs/btrfs/ctree.h60
-rw-r--r--fs/btrfs/disk-io.c50
-rw-r--r--fs/btrfs/disk-io.h10
-rw-r--r--fs/btrfs/extent-tree.c384
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/file.c24
-rw-r--r--fs/btrfs/inode-map.c1
-rw-r--r--fs/btrfs/inode.c66
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/locking.c39
-rw-r--r--fs/btrfs/locking.h4
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c14
17 files changed, 542 insertions, 206 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index a8c9693b75a..72677ce2b74 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -66,6 +66,9 @@ struct btrfs_inode {
*/
struct list_head delalloc_inodes;
+ /* the space_info for where this inode's data allocations are done */
+ struct btrfs_space_info *space_info;
+
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
@@ -94,6 +97,11 @@ struct btrfs_inode {
*/
u64 delalloc_bytes;
+ /* total number of bytes that may be used for this inode for
+ * delalloc
+ */
+ u64 reserved_bytes;
+
/*
* the size of the file stored in the metadata on disk. data=ordered
* means the in-memory i_size might be larger than the size on disk
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 551177c0011..37f31b5529a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -38,19 +38,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
-inline void btrfs_init_path(struct btrfs_path *p)
-{
- memset(p, 0, sizeof(*p));
-}
-
struct btrfs_path *btrfs_alloc_path(void)
{
struct btrfs_path *path;
- path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
- if (path) {
- btrfs_init_path(path);
+ path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
+ if (path)
path->reada = 1;
- }
return path;
}
@@ -69,14 +62,38 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
/*
* reset all the locked nodes in the patch to spinning locks.
+ *
+ * held is used to keep lockdep happy, when lockdep is enabled
+ * we set held to a blocking lock before we go around and
+ * retake all the spinlocks in the path. You can safely use NULL
+ * for held
*/
-noinline void btrfs_clear_path_blocking(struct btrfs_path *p)
+noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
+ struct extent_buffer *held)
{
int i;
- for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /* lockdep really cares that we take all of these spinlocks
+ * in the right order. If any of the locks in the path are not
+ * currently blocking, it is going to complain. So, make really
+ * really sure by forcing the path to blocking before we clear
+ * the path blocking.
+ */
+ if (held)
+ btrfs_set_lock_blocking(held);
+ btrfs_set_path_blocking(p);
+#endif
+
+ for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
if (p->nodes[i] && p->locks[i])
btrfs_clear_lock_blocking(p->nodes[i]);
}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ if (held)
+ btrfs_clear_lock_blocking(held);
+#endif
}
/* this also releases the path */
@@ -260,7 +277,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (*cow_ret == buf)
unlock_orig = 1;
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
if (parent)
parent_start = parent->start;
@@ -286,7 +303,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid, level, &ins);
BUG_ON(ret);
cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
- buf->len);
+ buf->len, level);
} else {
cow = btrfs_alloc_free_block(trans, root, buf->len,
parent_start,
@@ -917,9 +934,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* promote the child to a root */
child = read_node_slot(root, mid, 0);
+ BUG_ON(!child);
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
- BUG_ON(!child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
BUG_ON(ret);
@@ -1530,8 +1547,7 @@ again:
* for higher level blocks, try not to allocate blocks
* with the block and the parent locks held.
*/
- if (level > 0 && !prealloc_block.objectid &&
- btrfs_path_lock_waiting(p, level)) {
+ if (level > 0 && !prealloc_block.objectid) {
u32 size = b->len;
u64 hint = b->start;
@@ -1567,7 +1583,7 @@ cow_done:
if (!p->skip_locking)
p->locks[level] = 1;
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
/*
* we have a lock on b and as long as we aren't changing
@@ -1606,7 +1622,7 @@ cow_done:
btrfs_set_path_blocking(p);
sret = split_node(trans, root, p, level);
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
BUG_ON(sret > 0);
if (sret) {
@@ -1626,7 +1642,7 @@ cow_done:
btrfs_set_path_blocking(p);
sret = balance_level(trans, root, p, level);
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
if (sret) {
ret = sret;
@@ -1689,13 +1705,13 @@ cow_done:
if (!p->skip_locking) {
int lret;
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
lret = btrfs_try_spin_lock(b);
if (!lret) {
btrfs_set_path_blocking(p);
btrfs_tree_lock(b);
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, b);
}
}
} else {
@@ -1707,7 +1723,7 @@ cow_done:
btrfs_set_path_blocking(p);
sret = split_leaf(trans, root, key,
p, ins_len, ret == 0);
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
BUG_ON(sret > 0);
if (sret) {
@@ -2349,7 +2365,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (slot >= btrfs_header_nritems(upper) - 1)
return 1;
- WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+ btrfs_assert_tree_locked(path->nodes[1]);
right = read_node_slot(root, upper, slot + 1);
btrfs_tree_lock(right);
@@ -2546,7 +2562,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (right_nritems == 0)
return 1;
- WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+ btrfs_assert_tree_locked(path->nodes[1]);
left = read_node_slot(root, path->nodes[1], slot - 1);
btrfs_tree_lock(left);
@@ -3927,7 +3943,6 @@ find_next_key:
btrfs_release_path(root, path);
goto again;
} else {
- btrfs_clear_path_blocking(path);
goto out;
}
}
@@ -3947,7 +3962,7 @@ find_next_key:
path->locks[level - 1] = 1;
path->nodes[level - 1] = cur;
unlock_up(path, level, 1);
- btrfs_clear_path_blocking(path);
+ btrfs_clear_path_blocking(path, NULL);
}
out:
if (ret == 0)
@@ -4086,7 +4101,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
next = read_node_slot(root, c, slot);
if (!path->skip_locking) {
- WARN_ON(!btrfs_tree_locked(c));
+ btrfs_assert_tree_locked(c);
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
}
@@ -4111,7 +4126,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
reada_for_search(root, path, level, slot, 0);
next = read_node_slot(root, next, 0);
if (!path->skip_locking) {
- WARN_ON(!btrfs_tree_locked(path->nodes[level]));
+ btrfs_assert_tree_locked(path->nodes[level]);
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 531db112c8b..5e1d4e30e9d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -43,11 +43,7 @@ struct btrfs_ordered_sum;
#define BTRFS_ACL_NOT_CACHED ((void *)-1)
-#ifdef CONFIG_LOCKDEP
-# define BTRFS_MAX_LEVEL 7
-#else
-# define BTRFS_MAX_LEVEL 8
-#endif
+#define BTRFS_MAX_LEVEL 8
/* holds pointers to all of the tree roots */
#define BTRFS_ROOT_TREE_OBJECTID 1ULL
@@ -600,13 +596,27 @@ struct btrfs_block_group_item {
struct btrfs_space_info {
u64 flags;
- u64 total_bytes;
- u64 bytes_used;
- u64 bytes_pinned;
- u64 bytes_reserved;
- u64 bytes_readonly;
- int full;
- int force_alloc;
+
+ u64 total_bytes; /* total bytes in the space */
+ u64 bytes_used; /* total bytes used on disk */
+ u64 bytes_pinned; /* total bytes pinned, will be freed when the
+ transaction finishes */
+ u64 bytes_reserved; /* total bytes the allocator has reserved for
+ current allocations */
+ u64 bytes_readonly; /* total bytes that are read only */
+
+ /* delalloc accounting */
+ u64 bytes_delalloc; /* number of bytes reserved for allocation,
+ this space is not necessarily reserved yet
+ by the allocator */
+ u64 bytes_may_use; /* number of bytes that may be used for
+ delalloc */
+
+ int full; /* indicates that we cannot allocate any more
+ chunks for this space */
+ int force_alloc; /* set if we need to force a chunk alloc for
+ this space */
+
struct list_head list;
/* for block groups in our same type */
@@ -774,7 +784,14 @@ struct btrfs_fs_info {
struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
+
+ /*
+ * the space_info list is almost entirely read only. It only changes
+ * when we add a new raid type to the FS, and that happens
+ * very rarely. RCU is used to protect it.
+ */
struct list_head space_info;
+
spinlock_t delalloc_lock;
spinlock_t new_trans_lock;
u64 delalloc_bytes;
@@ -1715,7 +1732,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 empty_size);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u32 blocksize);
+ u64 bytenr, u32 blocksize,
+ int level);
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 parent, u64 min_bytes,
@@ -1785,6 +1803,18 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root);
int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
+void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
+
+int btrfs_check_metadata_free_space(struct btrfs_root *root);
+int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes);
+void btrfs_free_reserved_data_space(struct btrfs_root *root,
+ struct inode *inode, u64 bytes);
+void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes);
+void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes);
/* ctree.c */
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
@@ -1834,9 +1864,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
-void btrfs_init_path(struct btrfs_path *p);
void btrfs_set_path_blocking(struct btrfs_path *p);
-void btrfs_clear_path_blocking(struct btrfs_path *p);
void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2032,8 +2060,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
unsigned long btrfs_force_ra(struct address_space *mapping,
struct file_ra_state *ra, struct file *file,
pgoff_t offset, pgoff_t last_index);
-int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
- int for_del);
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_delete_inode(struct inode *inode);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5aebddd7119..3e18175248e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -75,6 +75,40 @@ struct async_submit_bio {
struct btrfs_work work;
};
+/* These are used to set the lockdep class on the extent buffer locks.
+ * The class is set by the readpage_end_io_hook after the buffer has
+ * passed csum validation but before the pages are unlocked.
+ *
+ * The lockdep class is also set by btrfs_init_new_buffer on freshly
+ * allocated blocks.
+ *
+ * The class is based on the level in the tree block, which allows lockdep
+ * to know that lower nodes nest inside the locks of higher nodes.
+ *
+ * We also add a check to make sure the highest level of the tree is
+ * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
+ * code needs update as well.
+ */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# if BTRFS_MAX_LEVEL != 8
+# error
+# endif
+static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
+static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
+ /* leaf */
+ "btrfs-extent-00",
+ "btrfs-extent-01",
+ "btrfs-extent-02",
+ "btrfs-extent-03",
+ "btrfs-extent-04",
+ "btrfs-extent-05",
+ "btrfs-extent-06",
+ "btrfs-extent-07",
+ /* highest possible level */
+ "btrfs-extent-08",
+};
+#endif
+
/*
* extents on the btree inode are pretty simple, there's one extent
* that covers the entire device
@@ -347,6 +381,15 @@ static int check_tree_block_fsid(struct btrfs_root *root,
return ret;
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
+{
+ lockdep_set_class_and_name(&eb->lock,
+ &btrfs_eb_class[level],
+ btrfs_eb_name[level]);
+}
+#endif
+
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
@@ -392,6 +435,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
}
found_level = btrfs_header_level(eb);
+ btrfs_set_buffer_lockdep_class(eb, found_level);
+
ret = csum_tree_block(root, eb, 1);
if (ret)
ret = -EIO;
@@ -812,7 +857,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid) {
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
/* ugh, clear_extent_buffer_dirty can be expensive */
btrfs_set_lock_blocking(buf);
@@ -1777,7 +1822,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_DEV_TREE_OBJECTID, dev_root);
dev_root->track_dirty = 1;
-
if (ret)
goto fail_extent_root;
@@ -2317,7 +2361,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
btrfs_set_lock_blocking(buf);
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
if (transid != root->fs_info->generation) {
printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
"found %llu running %llu\n",
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 494a56eb298..95029db227b 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -101,4 +101,14 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btree_lock_page_hook(struct page *page);
+
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
+#else
+static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
+ int level)
+{
+}
+#endif
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7527523c2d2..fefe83ad205 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -20,6 +20,7 @@
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/sort.h>
+#include <linux/rcupdate.h>
#include "compat.h"
#include "hash.h"
#include "crc32c.h"
@@ -60,6 +61,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
+static int do_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, u64 alloc_bytes,
+ u64 flags, int force);
+
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
return (cache->flags & bits) == bits;
@@ -326,13 +331,33 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
{
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
- list_for_each_entry(found, head, list) {
- if (found->flags == flags)
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags == flags) {
+ rcu_read_unlock();
return found;
+ }
}
+ rcu_read_unlock();
return NULL;
}
+/*
+ * after adding space to the filesystem, we need to clear the full flags
+ * on all the space infos.
+ */
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list)
+ found->full = 0;
+ rcu_read_unlock();
+}
+
static u64 div_factor(u64 num, int factor)
{
if (factor == 10)
@@ -1323,8 +1348,25 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- finish_current_insert(trans, root->fs_info->extent_root, 1);
- del_pending_extents(trans, root->fs_info->extent_root, 1);
+ u64 start;
+ u64 end;
+ int ret;
+
+ while(1) {
+ finish_current_insert(trans, root->fs_info->extent_root, 1);
+ del_pending_extents(trans, root->fs_info->extent_root, 1);
+
+ /* is there more work to do? */
+ ret = find_first_extent_bit(&root->fs_info->pending_del,
+ 0, &start, &end, EXTENT_WRITEBACK);
+ if (!ret)
+ continue;
+ ret = find_first_extent_bit(&root->fs_info->extent_ins,
+ 0, &start, &end, EXTENT_WRITEBACK);
+ if (!ret)
+ continue;
+ break;
+ }
return 0;
}
@@ -1882,7 +1924,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
if (!found)
return -ENOMEM;
- list_add(&found->list, &info->space_info);
INIT_LIST_HEAD(&found->block_groups);
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
@@ -1892,9 +1933,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
+ found->bytes_delalloc = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
+ list_add_rcu(&found->list, &info->space_info);
return 0;
}
@@ -1955,6 +1998,233 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
return flags;
}
+static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ u64 alloc_profile;
+
+ if (data) {
+ alloc_profile = info->avail_data_alloc_bits &
+ info->data_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
+ } else if (root == root->fs_info->chunk_root) {
+ alloc_profile = info->avail_system_alloc_bits &
+ info->system_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
+ } else {
+ alloc_profile = info->avail_metadata_alloc_bits &
+ info->metadata_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
+ }
+
+ return btrfs_reduce_alloc_profile(root, data);
+}
+
+void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
+{
+ u64 alloc_target;
+
+ alloc_target = btrfs_get_alloc_profile(root, 1);
+ BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
+ alloc_target);
+}
+
+/*
+ * for now this just makes sure we have at least 5% of our metadata space free
+ * for use.
+ */
+int btrfs_check_metadata_free_space(struct btrfs_root *root)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_space_info *meta_sinfo;
+ u64 alloc_target, thresh;
+ int committed = 0, ret;
+
+ /* get the space info for where the metadata will live */
+ alloc_target = btrfs_get_alloc_profile(root, 0);
+ meta_sinfo = __find_space_info(info, alloc_target);
+
+again:
+ spin_lock(&meta_sinfo->lock);
+ if (!meta_sinfo->full)
+ thresh = meta_sinfo->total_bytes * 80;
+ else
+ thresh = meta_sinfo->total_bytes * 95;
+
+ do_div(thresh, 100);
+
+ if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
+ meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
+ struct btrfs_trans_handle *trans;
+ if (!meta_sinfo->full) {
+ meta_sinfo->force_alloc = 1;
+ spin_unlock(&meta_sinfo->lock);
+
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ 2 * 1024 * 1024, alloc_target, 0);
+ btrfs_end_transaction(trans, root);
+ goto again;
+ }
+ spin_unlock(&meta_sinfo->lock);
+
+ if (!committed) {
+ committed = 1;
+ trans = btrfs_join_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
+ goto again;
+ }
+ return -ENOSPC;
+ }
+ spin_unlock(&meta_sinfo->lock);
+
+ return 0;
+}
+
+/*
+ * This will check the space that the inode allocates from to make sure we have
+ * enough space for bytes.
+ */
+int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+ int ret = 0, committed = 0;
+
+ /* make sure bytes are sectorsize aligned */
+ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+
+ data_sinfo = BTRFS_I(inode)->space_info;
+again:
+ /* make sure we have enough space to handle the data first */
+ spin_lock(&data_sinfo->lock);
+ if (data_sinfo->total_bytes - data_sinfo->bytes_used -
+ data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
+ data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
+ data_sinfo->bytes_may_use < bytes) {
+ struct btrfs_trans_handle *trans;
+
+ /*
+ * if we don't have enough free bytes in this space then we need
+ * to alloc a new chunk.
+ */
+ if (!data_sinfo->full) {
+ u64 alloc_target;
+
+ data_sinfo->force_alloc = 1;
+ spin_unlock(&data_sinfo->lock);
+
+ alloc_target = btrfs_get_alloc_profile(root, 1);
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ bytes + 2 * 1024 * 1024,
+ alloc_target, 0);
+ btrfs_end_transaction(trans, root);
+ if (ret)
+ return ret;
+ goto again;
+ }
+ spin_unlock(&data_sinfo->lock);
+
+ /* commit the current transaction and try again */
+ if (!committed) {
+ committed = 1;
+ trans = btrfs_join_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
+ goto again;
+ }
+
+ printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
+ ", %llu bytes_used, %llu bytes_reserved, "
+ "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
+ "%llu total\n", bytes, data_sinfo->bytes_delalloc,
+ data_sinfo->bytes_used, data_sinfo->bytes_reserved,
+ data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
+ data_sinfo->bytes_may_use, data_sinfo->total_bytes);
+ return -ENOSPC;
+ }
+ data_sinfo->bytes_may_use += bytes;
+ BTRFS_I(inode)->reserved_bytes += bytes;
+ spin_unlock(&data_sinfo->lock);
+
+ return btrfs_check_metadata_free_space(root);
+}
+
+/*
+ * if there was an error for whatever reason after calling
+ * btrfs_check_data_free_space, call this so we can cleanup the counters.
+ */
+void btrfs_free_reserved_data_space(struct btrfs_root *root,
+ struct inode *inode, u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+
+ /* make sure bytes are sectorsize aligned */
+ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+
+ data_sinfo = BTRFS_I(inode)->space_info;
+ spin_lock(&data_sinfo->lock);
+ data_sinfo->bytes_may_use -= bytes;
+ BTRFS_I(inode)->reserved_bytes -= bytes;
+ spin_unlock(&data_sinfo->lock);
+}
+
+/* called when we are adding a delalloc extent to the inode's io_tree */
+void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+
+ /* get the space info for where this inode will be storing its data */
+ data_sinfo = BTRFS_I(inode)->space_info;
+
+ /* make sure we have enough space to handle the data first */
+ spin_lock(&data_sinfo->lock);
+ data_sinfo->bytes_delalloc += bytes;
+
+ /*
+ * we are adding a delalloc extent without calling
+ * btrfs_check_data_free_space first. This happens on a weird
+ * writepage condition, but shouldn't hurt our accounting
+ */
+ if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
+ data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
+ BTRFS_I(inode)->reserved_bytes = 0;
+ } else {
+ data_sinfo->bytes_may_use -= bytes;
+ BTRFS_I(inode)->reserved_bytes -= bytes;
+ }
+
+ spin_unlock(&data_sinfo->lock);
+}
+
+/* called when we are clearing an delalloc extent from the inode's io_tree */
+void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *info;
+
+ info = BTRFS_I(inode)->space_info;
+
+ spin_lock(&info->lock);
+ info->bytes_delalloc -= bytes;
+ spin_unlock(&info->lock);
+}
+
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
@@ -2211,13 +2481,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
u64 end;
u64 priv;
u64 search = 0;
- u64 skipped = 0;
struct btrfs_fs_info *info = extent_root->fs_info;
struct btrfs_path *path;
struct pending_extent_op *extent_op, *tmp;
struct list_head insert_list, update_list;
int ret;
- int num_inserts = 0, max_inserts;
+ int num_inserts = 0, max_inserts, restart = 0;
path = btrfs_alloc_path();
INIT_LIST_HEAD(&insert_list);
@@ -2233,19 +2502,19 @@ again:
ret = find_first_extent_bit(&info->extent_ins, search, &start,
&end, EXTENT_WRITEBACK);
if (ret) {
- if (skipped && all && !num_inserts &&
+ if (restart && !num_inserts &&
list_empty(&update_list)) {
- skipped = 0;
+ restart = 0;
search = 0;
continue;
}
- mutex_unlock(&info->extent_ins_mutex);
break;
}
ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
if (!ret) {
- skipped = 1;
+ if (all)
+ restart = 1;
search = end + 1;
if (need_resched()) {
mutex_unlock(&info->extent_ins_mutex);
@@ -2264,7 +2533,7 @@ again:
list_add_tail(&extent_op->list, &insert_list);
search = end + 1;
if (num_inserts == max_inserts) {
- mutex_unlock(&info->extent_ins_mutex);
+ restart = 1;
break;
}
} else if (extent_op->type == PENDING_BACKREF_UPDATE) {
@@ -2280,7 +2549,6 @@ again:
* somebody marked this thing for deletion then just unlock it and be
* done, the free_extents will handle it
*/
- mutex_lock(&info->extent_ins_mutex);
list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
clear_extent_bits(&info->extent_ins, extent_op->bytenr,
extent_op->bytenr + extent_op->num_bytes - 1,
@@ -2302,6 +2570,10 @@ again:
if (!list_empty(&update_list)) {
ret = update_backrefs(trans, extent_root, path, &update_list);
BUG_ON(ret);
+
+ /* we may have COW'ed new blocks, so lets start over */
+ if (all)
+ restart = 1;
}
/*
@@ -2309,9 +2581,9 @@ again:
* need to make sure everything is cleaned then reset everything and
* go back to the beginning
*/
- if (!num_inserts && all && skipped) {
+ if (!num_inserts && restart) {
search = 0;
- skipped = 0;
+ restart = 0;
INIT_LIST_HEAD(&update_list);
INIT_LIST_HEAD(&insert_list);
goto again;
@@ -2368,27 +2640,19 @@ again:
BUG_ON(ret);
/*
- * if we broke out of the loop in order to insert stuff because we hit
- * the maximum number of inserts at a time we can handle, then loop
- * back and pick up where we left off
+ * if restart is set for whatever reason we need to go back and start
+ * searching through the pending list again.
+ *
+ * We just inserted some extents, which could have resulted in new
+ * blocks being allocated, which would result in new blocks needing
+ * updates, so if all is set we _must_ restart to get the updated
+ * blocks.
*/
- if (num_inserts == max_inserts) {
- INIT_LIST_HEAD(&insert_list);
- INIT_LIST_HEAD(&update_list);
- num_inserts = 0;
- goto again;
- }
-
- /*
- * again, if we need to make absolutely sure there are no more pending
- * extent operations left and we know that we skipped some, go back to
- * the beginning and do it all again
- */
- if (all && skipped) {
+ if (restart || all) {
INIT_LIST_HEAD(&insert_list);
INIT_LIST_HEAD(&update_list);
search = 0;
- skipped = 0;
+ restart = 0;
num_inserts = 0;
goto again;
}
@@ -2709,6 +2973,8 @@ again:
goto again;
}
+ if (!err)
+ finish_current_insert(trans, extent_root, 0);
return err;
}
@@ -2859,7 +3125,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (data & BTRFS_BLOCK_GROUP_METADATA) {
last_ptr = &root->fs_info->last_alloc;
- empty_cluster = 64 * 1024;
+ if (!btrfs_test_opt(root, SSD))
+ empty_cluster = 64 * 1024;
}
if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
@@ -3091,6 +3358,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
(unsigned long long)(info->total_bytes - info->bytes_used -
info->bytes_pinned - info->bytes_reserved),
(info->full) ? "" : "not ");
+ printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
+ " may_use=%llu, used=%llu\n", info->total_bytes,
+ info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
+ info->bytes_used);
down_read(&info->groups_sem);
list_for_each_entry(cache, &info->block_groups, list) {
@@ -3117,24 +3388,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
{
int ret;
u64 search_start = 0;
- u64 alloc_profile;
struct btrfs_fs_info *info = root->fs_info;
- if (data) {
- alloc_profile = info->avail_data_alloc_bits &
- info->data_alloc_profile;
- data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
- } else if (root == root->fs_info->chunk_root) {
- alloc_profile = info->avail_system_alloc_bits &
- info->system_alloc_profile;
- data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
- } else {
- alloc_profile = info->avail_metadata_alloc_bits &
- info->metadata_alloc_profile;
- data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
- }
+ data = btrfs_get_alloc_profile(root, data);
a