diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 8 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 71 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 60 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 50 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 10 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 384 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 2 | ||||
-rw-r--r-- | fs/btrfs/file.c | 24 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 1 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 66 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 6 | ||||
-rw-r--r-- | fs/btrfs/locking.c | 39 | ||||
-rw-r--r-- | fs/btrfs/locking.h | 4 | ||||
-rw-r--r-- | fs/btrfs/super.c | 5 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 2 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 2 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 14 |
17 files changed, 542 insertions, 206 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index a8c9693b75a..72677ce2b74 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -66,6 +66,9 @@ struct btrfs_inode { */ struct list_head delalloc_inodes; + /* the space_info for where this inode's data allocations are done */ + struct btrfs_space_info *space_info; + /* full 64 bit generation number, struct vfs_inode doesn't have a big * enough field for this. */ @@ -94,6 +97,11 @@ struct btrfs_inode { */ u64 delalloc_bytes; + /* total number of bytes that may be used for this inode for + * delalloc + */ + u64 reserved_bytes; + /* * the size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 551177c0011..37f31b5529a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -38,19 +38,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans, static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); -inline void btrfs_init_path(struct btrfs_path *p) -{ - memset(p, 0, sizeof(*p)); -} - struct btrfs_path *btrfs_alloc_path(void) { struct btrfs_path *path; - path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); - if (path) { - btrfs_init_path(path); + path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); + if (path) path->reada = 1; - } return path; } @@ -69,14 +62,38 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) /* * reset all the locked nodes in the patch to spinning locks. + * + * held is used to keep lockdep happy, when lockdep is enabled + * we set held to a blocking lock before we go around and + * retake all the spinlocks in the path. You can safely use NULL + * for held */ -noinline void btrfs_clear_path_blocking(struct btrfs_path *p) +noinline void btrfs_clear_path_blocking(struct btrfs_path *p, + struct extent_buffer *held) { int i; - for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* lockdep really cares that we take all of these spinlocks + * in the right order. If any of the locks in the path are not + * currently blocking, it is going to complain. So, make really + * really sure by forcing the path to blocking before we clear + * the path blocking. + */ + if (held) + btrfs_set_lock_blocking(held); + btrfs_set_path_blocking(p); +#endif + + for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { if (p->nodes[i] && p->locks[i]) btrfs_clear_lock_blocking(p->nodes[i]); } + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (held) + btrfs_clear_lock_blocking(held); +#endif } /* this also releases the path */ @@ -260,7 +277,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (*cow_ret == buf) unlock_orig = 1; - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); if (parent) parent_start = parent->start; @@ -286,7 +303,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, level, &ins); BUG_ON(ret); cow = btrfs_init_new_buffer(trans, root, prealloc_dest, - buf->len); + buf->len, level); } else { cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, @@ -917,9 +934,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* promote the child to a root */ child = read_node_slot(root, mid, 0); + BUG_ON(!child); btrfs_tree_lock(child); btrfs_set_lock_blocking(child); - BUG_ON(!child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); BUG_ON(ret); @@ -1530,8 +1547,7 @@ again: * for higher level blocks, try not to allocate blocks * with the block and the parent locks held. */ - if (level > 0 && !prealloc_block.objectid && - btrfs_path_lock_waiting(p, level)) { + if (level > 0 && !prealloc_block.objectid) { u32 size = b->len; u64 hint = b->start; @@ -1567,7 +1583,7 @@ cow_done: if (!p->skip_locking) p->locks[level] = 1; - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); /* * we have a lock on b and as long as we aren't changing @@ -1606,7 +1622,7 @@ cow_done: btrfs_set_path_blocking(p); sret = split_node(trans, root, p, level); - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); BUG_ON(sret > 0); if (sret) { @@ -1626,7 +1642,7 @@ cow_done: btrfs_set_path_blocking(p); sret = balance_level(trans, root, p, level); - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); if (sret) { ret = sret; @@ -1689,13 +1705,13 @@ cow_done: if (!p->skip_locking) { int lret; - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); lret = btrfs_try_spin_lock(b); if (!lret) { btrfs_set_path_blocking(p); btrfs_tree_lock(b); - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, b); } } } else { @@ -1707,7 +1723,7 @@ cow_done: btrfs_set_path_blocking(p); sret = split_leaf(trans, root, key, p, ins_len, ret == 0); - btrfs_clear_path_blocking(p); + btrfs_clear_path_blocking(p, NULL); BUG_ON(sret > 0); if (sret) { @@ -2349,7 +2365,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - WARN_ON(!btrfs_tree_locked(path->nodes[1])); + btrfs_assert_tree_locked(path->nodes[1]); right = read_node_slot(root, upper, slot + 1); btrfs_tree_lock(right); @@ -2546,7 +2562,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (right_nritems == 0) return 1; - WARN_ON(!btrfs_tree_locked(path->nodes[1])); + btrfs_assert_tree_locked(path->nodes[1]); left = read_node_slot(root, path->nodes[1], slot - 1); btrfs_tree_lock(left); @@ -3927,7 +3943,6 @@ find_next_key: btrfs_release_path(root, path); goto again; } else { - btrfs_clear_path_blocking(path); goto out; } } @@ -3947,7 +3962,7 @@ find_next_key: path->locks[level - 1] = 1; path->nodes[level - 1] = cur; unlock_up(path, level, 1); - btrfs_clear_path_blocking(path); + btrfs_clear_path_blocking(path, NULL); } out: if (ret == 0) @@ -4086,7 +4101,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) next = read_node_slot(root, c, slot); if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(c)); + btrfs_assert_tree_locked(c); btrfs_tree_lock(next); btrfs_set_lock_blocking(next); } @@ -4111,7 +4126,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(path->nodes[level])); + btrfs_assert_tree_locked(path->nodes[level]); btrfs_tree_lock(next); btrfs_set_lock_blocking(next); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 531db112c8b..5e1d4e30e9d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -43,11 +43,7 @@ struct btrfs_ordered_sum; #define BTRFS_ACL_NOT_CACHED ((void *)-1) -#ifdef CONFIG_LOCKDEP -# define BTRFS_MAX_LEVEL 7 -#else -# define BTRFS_MAX_LEVEL 8 -#endif +#define BTRFS_MAX_LEVEL 8 /* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL @@ -600,13 +596,27 @@ struct btrfs_block_group_item { struct btrfs_space_info { u64 flags; - u64 total_bytes; - u64 bytes_used; - u64 bytes_pinned; - u64 bytes_reserved; - u64 bytes_readonly; - int full; - int force_alloc; + + u64 total_bytes; /* total bytes in the space */ + u64 bytes_used; /* total bytes used on disk */ + u64 bytes_pinned; /* total bytes pinned, will be freed when the + transaction finishes */ + u64 bytes_reserved; /* total bytes the allocator has reserved for + current allocations */ + u64 bytes_readonly; /* total bytes that are read only */ + + /* delalloc accounting */ + u64 bytes_delalloc; /* number of bytes reserved for allocation, + this space is not necessarily reserved yet + by the allocator */ + u64 bytes_may_use; /* number of bytes that may be used for + delalloc */ + + int full; /* indicates that we cannot allocate any more + chunks for this space */ + int force_alloc; /* set if we need to force a chunk alloc for + this space */ + struct list_head list; /* for block groups in our same type */ @@ -774,7 +784,14 @@ struct btrfs_fs_info { struct list_head dirty_cowonly_roots; struct btrfs_fs_devices *fs_devices; + + /* + * the space_info list is almost entirely read only. It only changes + * when we add a new raid type to the FS, and that happens + * very rarely. RCU is used to protect it. + */ struct list_head space_info; + spinlock_t delalloc_lock; spinlock_t new_trans_lock; u64 delalloc_bytes; @@ -1715,7 +1732,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, u64 empty_size); struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u32 blocksize); + u64 bytenr, u32 blocksize, + int level); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 parent, u64 min_bytes, @@ -1785,6 +1803,18 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root); int btrfs_cleanup_reloc_trees(struct btrfs_root *root); int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); +void btrfs_clear_space_info_full(struct btrfs_fs_info *info); + +int btrfs_check_metadata_free_space(struct btrfs_root *root); +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_free_reserved_data_space(struct btrfs_root *root, + struct inode *inode, u64 bytes); +void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, @@ -1834,9 +1864,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); -void btrfs_init_path(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p); -void btrfs_clear_path_blocking(struct btrfs_path *p); void btrfs_unlock_up_safe(struct btrfs_path *p, int level); int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -2032,8 +2060,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); -int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, - int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5aebddd7119..3e18175248e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -75,6 +75,40 @@ struct async_submit_bio { struct btrfs_work work; }; +/* These are used to set the lockdep class on the extent buffer locks. + * The class is set by the readpage_end_io_hook after the buffer has + * passed csum validation but before the pages are unlocked. + * + * The lockdep class is also set by btrfs_init_new_buffer on freshly + * allocated blocks. + * + * The class is based on the level in the tree block, which allows lockdep + * to know that lower nodes nest inside the locks of higher nodes. + * + * We also add a check to make sure the highest level of the tree is + * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this + * code needs update as well. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# if BTRFS_MAX_LEVEL != 8 +# error +# endif +static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; +static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { + /* leaf */ + "btrfs-extent-00", + "btrfs-extent-01", + "btrfs-extent-02", + "btrfs-extent-03", + "btrfs-extent-04", + "btrfs-extent-05", + "btrfs-extent-06", + "btrfs-extent-07", + /* highest possible level */ + "btrfs-extent-08", +}; +#endif + /* * extents on the btree inode are pretty simple, there's one extent * that covers the entire device @@ -347,6 +381,15 @@ static int check_tree_block_fsid(struct btrfs_root *root, return ret; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) +{ + lockdep_set_class_and_name(&eb->lock, + &btrfs_eb_class[level], + btrfs_eb_name[level]); +} +#endif + static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -392,6 +435,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, } found_level = btrfs_header_level(eb); + btrfs_set_buffer_lockdep_class(eb, found_level); + ret = csum_tree_block(root, eb, 1); if (ret) ret = -EIO; @@ -812,7 +857,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) { - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); /* ugh, clear_extent_buffer_dirty can be expensive */ btrfs_set_lock_blocking(buf); @@ -1777,7 +1822,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = find_and_setup_root(tree_root, fs_info, BTRFS_DEV_TREE_OBJECTID, dev_root); dev_root->track_dirty = 1; - if (ret) goto fail_extent_root; @@ -2317,7 +2361,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) btrfs_set_lock_blocking(buf); - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); if (transid != root->fs_info->generation) { printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " "found %llu running %llu\n", diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 494a56eb298..95029db227b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -101,4 +101,14 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btree_lock_page_hook(struct page *page); + + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); +#else +static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, + int level) +{ +} +#endif #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7527523c2d2..fefe83ad205 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -20,6 +20,7 @@ #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/sort.h> +#include <linux/rcupdate.h> #include "compat.h" #include "hash.h" #include "crc32c.h" @@ -60,6 +61,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, int alloc, int mark_free); +static int do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 alloc_bytes, + u64 flags, int force); + static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { return (cache->flags & bits) == bits; @@ -326,13 +331,33 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, { struct list_head *head = &info->space_info; struct btrfs_space_info *found; - list_for_each_entry(found, head, list) { - if (found->flags == flags) + + rcu_read_lock(); + list_for_each_entry_rcu(found, head, list) { + if (found->flags == flags) { + rcu_read_unlock(); return found; + } } + rcu_read_unlock(); return NULL; } +/* + * after adding space to the filesystem, we need to clear the full flags + * on all the space infos. + */ +void btrfs_clear_space_info_full(struct btrfs_fs_info *info) +{ + struct list_head *head = &info->space_info; + struct btrfs_space_info *found; + + rcu_read_lock(); + list_for_each_entry_rcu(found, head, list) + found->full = 0; + rcu_read_unlock(); +} + static u64 div_factor(u64 num, int factor) { if (factor == 10) @@ -1323,8 +1348,25 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - finish_current_insert(trans, root->fs_info->extent_root, 1); - del_pending_extents(trans, root->fs_info->extent_root, 1); + u64 start; + u64 end; + int ret; + + while(1) { + finish_current_insert(trans, root->fs_info->extent_root, 1); + del_pending_extents(trans, root->fs_info->extent_root, 1); + + /* is there more work to do? */ + ret = find_first_extent_bit(&root->fs_info->pending_del, + 0, &start, &end, EXTENT_WRITEBACK); + if (!ret) + continue; + ret = find_first_extent_bit(&root->fs_info->extent_ins, + 0, &start, &end, EXTENT_WRITEBACK); + if (!ret) + continue; + break; + } return 0; } @@ -1882,7 +1924,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, if (!found) return -ENOMEM; - list_add(&found->list, &info->space_info); INIT_LIST_HEAD(&found->block_groups); init_rwsem(&found->groups_sem); spin_lock_init(&found->lock); @@ -1892,9 +1933,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_pinned = 0; found->bytes_reserved = 0; found->bytes_readonly = 0; + found->bytes_delalloc = 0; found->full = 0; found->force_alloc = 0; *space_info = found; + list_add_rcu(&found->list, &info->space_info); return 0; } @@ -1955,6 +1998,233 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) return flags; } +static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) +{ + struct btrfs_fs_info *info = root->fs_info; + u64 alloc_profile; + + if (data) { + alloc_profile = info->avail_data_alloc_bits & + info->data_alloc_profile; + data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; + } else if (root == root->fs_info->chunk_root) { + alloc_profile = info->avail_system_alloc_bits & + info->system_alloc_profile; + data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; + } else { + alloc_profile = info->avail_metadata_alloc_bits & + info->metadata_alloc_profile; + data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; + } + + return btrfs_reduce_alloc_profile(root, data); +} + +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) +{ + u64 alloc_target; + + alloc_target = btrfs_get_alloc_profile(root, 1); + BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, + alloc_target); +} + +/* + * for now this just makes sure we have at least 5% of our metadata space free + * for use. + */ +int btrfs_check_metadata_free_space(struct btrfs_root *root) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 alloc_target, thresh; + int committed = 0, ret; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + +again: + spin_lock(&meta_sinfo->lock); + if (!meta_sinfo->full) + thresh = meta_sinfo->total_bytes * 80; + else + thresh = meta_sinfo->total_bytes * 95; + + do_div(thresh, 100); + + if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { + struct btrfs_trans_handle *trans; + if (!meta_sinfo->full) { + meta_sinfo->force_alloc = 1; + spin_unlock(&meta_sinfo->lock); + + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; + + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + 2 * 1024 * 1024, alloc_target, 0); + btrfs_end_transaction(trans, root); + goto again; + } + spin_unlock(&meta_sinfo->lock); + + if (!committed) { + committed = 1; + trans = btrfs_join_transaction(root, 1); + if (!trans) + return -ENOMEM; + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + goto again; + } + return -ENOSPC; + } + spin_unlock(&meta_sinfo->lock); + + return 0; +} + +/* + * This will check the space that the inode allocates from to make sure we have + * enough space for bytes. + */ +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + int ret = 0, committed = 0; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + data_sinfo = BTRFS_I(inode)->space_info; +again: + /* make sure we have enough space to handle the data first */ + spin_lock(&data_sinfo->lock); + if (data_sinfo->total_bytes - data_sinfo->bytes_used - + data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - + data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - + data_sinfo->bytes_may_use < bytes) { + struct btrfs_trans_handle *trans; + + /* + * if we don't have enough free bytes in this space then we need + * to alloc a new chunk. + */ + if (!data_sinfo->full) { + u64 alloc_target; + + data_sinfo->force_alloc = 1; + spin_unlock(&data_sinfo->lock); + + alloc_target = btrfs_get_alloc_profile(root, 1); + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; + + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + bytes + 2 * 1024 * 1024, + alloc_target, 0); + btrfs_end_transaction(trans, root); + if (ret) + return ret; + goto again; + } + spin_unlock(&data_sinfo->lock); + + /* commit the current transaction and try again */ + if (!committed) { + committed = 1; + trans = btrfs_join_transaction(root, 1); + if (!trans) + return -ENOMEM; + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + goto again; + } + + printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" + ", %llu bytes_used, %llu bytes_reserved, " + "%llu bytes_pinned, %llu bytes_readonly, %llu may use" + "%llu total\n", bytes, data_sinfo->bytes_delalloc, + data_sinfo->bytes_used, data_sinfo->bytes_reserved, + data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, + data_sinfo->bytes_may_use, data_sinfo->total_bytes); + return -ENOSPC; + } + data_sinfo->bytes_may_use += bytes; + BTRFS_I(inode)->reserved_bytes += bytes; + spin_unlock(&data_sinfo->lock); + + return btrfs_check_metadata_free_space(root); +} + +/* + * if there was an error for whatever reason after calling + * btrfs_check_data_free_space, call this so we can cleanup the counters. + */ +void btrfs_free_reserved_data_space(struct btrfs_root *root, + struct inode *inode, u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + data_sinfo = BTRFS_I(inode)->space_info; + spin_lock(&data_sinfo->lock); + data_sinfo->bytes_may_use -= bytes; + BTRFS_I(inode)->reserved_bytes -= bytes; + spin_unlock(&data_sinfo->lock); +} + +/* called when we are adding a delalloc extent to the inode's io_tree */ +void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + + /* get the space info for where this inode will be storing its data */ + data_sinfo = BTRFS_I(inode)->space_info; + + /* make sure we have enough space to handle the data first */ + spin_lock(&data_sinfo->lock); + data_sinfo->bytes_delalloc += bytes; + + /* + * we are adding a delalloc extent without calling + * btrfs_check_data_free_space first. This happens on a weird + * writepage condition, but shouldn't hurt our accounting + */ + if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { + data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; + BTRFS_I(inode)->reserved_bytes = 0; + } else { + data_sinfo->bytes_may_use -= bytes; + BTRFS_I(inode)->reserved_bytes -= bytes; + } + + spin_unlock(&data_sinfo->lock); +} + +/* called when we are clearing an delalloc extent from the inode's io_tree */ +void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *info; + + info = BTRFS_I(inode)->space_info; + + spin_lock(&info->lock); + info->bytes_delalloc -= bytes; + spin_unlock(&info->lock); +} + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force) @@ -2211,13 +2481,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, u64 end; u64 priv; u64 search = 0; - u64 skipped = 0; struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_path *path; struct pending_extent_op *extent_op, *tmp; struct list_head insert_list, update_list; int ret; - int num_inserts = 0, max_inserts; + int num_inserts = 0, max_inserts, restart = 0; path = btrfs_alloc_path(); INIT_LIST_HEAD(&insert_list); @@ -2233,19 +2502,19 @@ again: ret = find_first_extent_bit(&info->extent_ins, search, &start, &end, EXTENT_WRITEBACK); if (ret) { - if (skipped && all && !num_inserts && + if (restart && !num_inserts && list_empty(&update_list)) { - skipped = 0; + restart = 0; search = 0; continue; } - mutex_unlock(&info->extent_ins_mutex); break; } ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); if (!ret) { - skipped = 1; + if (all) + restart = 1; search = end + 1; if (need_resched()) { mutex_unlock(&info->extent_ins_mutex); @@ -2264,7 +2533,7 @@ again: list_add_tail(&extent_op->list, &insert_list); search = end + 1; if (num_inserts == max_inserts) { - mutex_unlock(&info->extent_ins_mutex); + restart = 1; break; } } else if (extent_op->type == PENDING_BACKREF_UPDATE) { @@ -2280,7 +2549,6 @@ again: * somebody marked this thing for deletion then just unlock it and be * done, the free_extents will handle it */ - mutex_lock(&info->extent_ins_mutex); list_for_each_entry_safe(extent_op, tmp, &update_list, list) { clear_extent_bits(&info->extent_ins, extent_op->bytenr, extent_op->bytenr + extent_op->num_bytes - 1, @@ -2302,6 +2570,10 @@ again: if (!list_empty(&update_list)) { ret = update_backrefs(trans, extent_root, path, &update_list); BUG_ON(ret); + + /* we may have COW'ed new blocks, so lets start over */ + if (all) + restart = 1; } /* @@ -2309,9 +2581,9 @@ again: * need to make sure everything is cleaned then reset everything and * go back to the beginning */ - if (!num_inserts && all && skipped) { + if (!num_inserts && restart) { search = 0; - skipped = 0; + restart = 0; INIT_LIST_HEAD(&update_list); INIT_LIST_HEAD(&insert_list); goto again; @@ -2368,27 +2640,19 @@ again: BUG_ON(ret); /* - * if we broke out of the loop in order to insert stuff because we hit - * the maximum number of inserts at a time we can handle, then loop - * back and pick up where we left off + * if restart is set for whatever reason we need to go back and start + * searching through the pending list again. + * + * We just inserted some extents, which could have resulted in new + * blocks being allocated, which would result in new blocks needing + * updates, so if all is set we _must_ restart to get the updated + * blocks. */ - if (num_inserts == max_inserts) { - INIT_LIST_HEAD(&insert_list); - INIT_LIST_HEAD(&update_list); - num_inserts = 0; - goto again; - } - - /* - * again, if we need to make absolutely sure there are no more pending - * extent operations left and we know that we skipped some, go back to - * the beginning and do it all again - */ - if (all && skipped) { + if (restart || all) { INIT_LIST_HEAD(&insert_list); INIT_LIST_HEAD(&update_list); search = 0; - skipped = 0; + restart = 0; num_inserts = 0; goto again; } @@ -2709,6 +2973,8 @@ again: goto again; } + if (!err) + finish_current_insert(trans, extent_root, 0); return err; } @@ -2859,7 +3125,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, if (data & BTRFS_BLOCK_GROUP_METADATA) { last_ptr = &root->fs_info->last_alloc; - empty_cluster = 64 * 1024; + if (!btrfs_test_opt(root, SSD)) + empty_cluster = 64 * 1024; } if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) @@ -3091,6 +3358,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) (unsigned long long)(info->total_bytes - info->bytes_used - info->bytes_pinned - info->bytes_reserved), (info->full) ? "" : "not "); + printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," + " may_use=%llu, used=%llu\n", info->total_bytes, + info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, + info->bytes_used); down_read(&info->groups_sem); list_for_each_entry(cache, &info->block_groups, list) { @@ -3117,24 +3388,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, { int ret; u64 search_start = 0; - u64 alloc_profile; struct btrfs_fs_info *info = root->fs_info; - if (data) { - alloc_profile = info->avail_data_alloc_bits & - info->data_alloc_profile; - data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; - } else if (root == root->fs_info->chunk_root) { - alloc_profile = info->avail_system_alloc_bits & - info->system_alloc_profile; - data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; - } else { - alloc_profile = info->avail_metadata_alloc_bits & - info->metadata_alloc_profile; - data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; - } + data = btrfs_get_alloc_profile(root, data); a |