diff options
-rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 3 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 46 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 7 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 869 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 4 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 415 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 3 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 11 |
9 files changed, 925 insertions, 435 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index b7addbfd8c2..eb36ae981bd 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o export.o tree-log.o acl.o + ref-cache.o export.o tree-log.o acl.o free-space-cache.o else # Normal Makefile diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 18e84472abb..6f467901246 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2725,9 +2725,8 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, total_size = total_data + (nr * sizeof(struct btrfs_item)); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); - if (ret == 0) { + if (ret == 0) return -EEXIST; - } if (ret < 0) goto out; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eb65fd80888..730aae3bc18 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -483,7 +483,6 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_DUP (1 << 5) #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) - struct btrfs_block_group_item { __le64 used; __le64 chunk_objectid; @@ -498,17 +497,40 @@ struct btrfs_space_info { int full; int force_alloc; struct list_head list; + + /* for block groups in our same type */ + struct list_head block_groups; + spinlock_t lock; +}; + +struct btrfs_free_space { + struct rb_node bytes_index; + struct rb_node offset_index; + u64 offset; + u64 bytes; }; struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - struct btrfs_space_info *space_info; spinlock_t lock; u64 pinned; u64 flags; int cached; int ro; + int dirty; + + struct btrfs_space_info *space_info; + + /* free space cache stuff */ + struct rb_root free_space_bytes; + struct rb_root free_space_offset; + + /* block group cache stuff */ + struct rb_node cache_node; + + /* for block groups in the same raid type */ + struct list_head list; }; struct btrfs_device; @@ -525,8 +547,10 @@ struct btrfs_fs_info { struct btrfs_root *log_root_tree; struct radix_tree_root fs_roots_radix; - struct extent_io_tree free_space_cache; - struct extent_io_tree block_group_cache; + /* block group cache stuff */ + spinlock_t block_group_cache_lock; + struct rb_root block_group_cache_tree; + struct extent_io_tree pinned_extents; struct extent_io_tree pending_del; struct extent_io_tree extent_ins; @@ -1814,4 +1838,18 @@ int btrfs_sync_fs(struct super_block *sb, int wait); int btrfs_check_acl(struct inode *inode, int mask); int btrfs_init_acl(struct inode *inode, struct inode *dir); int btrfs_acl_chmod(struct inode *inode); + +/* free-space-cache.c */ +int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, + u64 bytenr, u64 size); +int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, + u64 bytenr, u64 size); +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache + *block_group); +struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache + *block_group, u64 offset, + u64 bytes); +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes); +u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f6f7821d43a..535bd0fe1a7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1410,10 +1410,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; - extent_io_tree_init(&fs_info->free_space_cache, - fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&fs_info->block_group_cache, - fs_info->btree_inode->i_mapping, GFP_NOFS); + spin_lock_init(&fs_info->block_group_cache_lock); + fs_info->block_group_cache_tree.rb_node = NULL; + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); extent_io_tree_init(&fs_info->pending_del, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1c10ffc837c..813566acc5d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -29,12 +29,6 @@ #include "locking.h" #include "ref-cache.h" -#define BLOCK_GROUP_DATA EXTENT_WRITEBACK -#define BLOCK_GROUP_METADATA EXTENT_UPTODATE -#define BLOCK_GROUP_SYSTEM EXTENT_NEW - -#define BLOCK_GROUP_DIRTY EXTENT_DIRTY - static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -62,6 +56,127 @@ void maybe_unlock_mutex(struct btrfs_root *root) } } +static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) +{ + return (cache->flags & bits) == bits; +} + +/* + * this adds the block group to the fs_info rb tree for the block group + * cache + */ +int btrfs_add_block_group_cache(struct btrfs_fs_info *info, + struct btrfs_block_group_cache *block_group) +{ + struct rb_node **p; + struct rb_node *parent = NULL; + struct btrfs_block_group_cache *cache; + + spin_lock(&info->block_group_cache_lock); + p = &info->block_group_cache_tree.rb_node; + + while (*p) { + parent = *p; + cache = rb_entry(parent, struct btrfs_block_group_cache, + cache_node); + if (block_group->key.objectid < cache->key.objectid) { + p = &(*p)->rb_left; + } else if (block_group->key.objectid > cache->key.objectid) { + p = &(*p)->rb_right; + } else { + spin_unlock(&info->block_group_cache_lock); + return -EEXIST; + } + } + + rb_link_node(&block_group->cache_node, parent, p); + rb_insert_color(&block_group->cache_node, + &info->block_group_cache_tree); + spin_unlock(&info->block_group_cache_lock); + + return 0; +} + +/* + * This will return the block group at or after bytenr if contains is 0, else + * it will return the block group that contains the bytenr + */ +static struct btrfs_block_group_cache * +block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, + int contains) +{ + struct btrfs_block_group_cache *cache, *ret = NULL; + struct rb_node *n; + u64 end, start; + + spin_lock(&info->block_group_cache_lock); + n = info->block_group_cache_tree.rb_node; + + while (n) { + cache = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + end = cache->key.objectid + cache->key.offset - 1; + start = cache->key.objectid; + + if (bytenr < start) { + if (!contains && (!ret || start < ret->key.objectid)) + ret = cache; + n = n->rb_left; + } else if (bytenr > start) { + if (contains && bytenr <= end) { + ret = cache; + break; + } + n = n->rb_right; + } else { + ret = cache; + break; + } + } + spin_unlock(&info->block_group_cache_lock); + + return ret; +} + +/* + * this is only called by cache_block_group, since we could have freed extents + * we need to check the pinned_extents for any extents that can't be used yet + * since their free space will be released as soon as the transaction commits. + */ +static int add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end) +{ + u64 extent_start, extent_end, size; + int ret; + + while (start < end) { + ret = find_first_extent_bit(&info->pinned_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY); + if (ret) + break; + + if (extent_start == start) { + start = extent_end + 1; + } else if (extent_start > start && extent_start < end) { + size = extent_start - start; + ret = btrfs_add_free_space(block_group, start, size); + BUG_ON(ret); + start = extent_end + 1; + } else { + break; + } + } + + if (start < end) { + size = end - start; + ret = btrfs_add_free_space(block_group, start, size); + BUG_ON(ret); + } + + return 0; +} + static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -69,10 +184,8 @@ static int cache_block_group(struct btrfs_root *root, int ret = 0; struct btrfs_key key; struct extent_buffer *leaf; - struct extent_io_tree *free_space_cache; int slot; u64 last = 0; - u64 hole_size; u64 first_free; int found = 0; @@ -80,7 +193,6 @@ static int cache_block_group(struct btrfs_root *root, return 0; root = root->fs_info->extent_root; - free_space_cache = &root->fs_info->free_space_cache; if (block_group->cached) return 0; @@ -96,7 +208,8 @@ static int cache_block_group(struct btrfs_root *root, * skip the locking here */ path->skip_locking = 1; - first_free = block_group->key.objectid; + first_free = max_t(u64, block_group->key.objectid, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); key.objectid = block_group->key.objectid; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -119,32 +232,28 @@ static int cache_block_group(struct btrfs_root *root, ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; - if (ret == 0) { + if (ret == 0) continue; - } else { + else break; - } } btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid < block_group->key.objectid) { + if (key.objectid < block_group->key.objectid) goto next; - } + if (key.objectid >= block_group->key.objectid + - block_group->key.offset) { + block_group->key.offset) break; - } if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { if (!found) { last = first_free; found = 1; } - if (key.objectid > last) { - hole_size = key.objectid - last; - set_extent_dirty(free_space_cache, last, - last + hole_size - 1, - GFP_NOFS); - } + + add_new_free_space(block_group, root->fs_info, last, + key.objectid); + last = key.objectid + key.offset; } next: @@ -153,13 +262,11 @@ next: if (!found) last = first_free; - if (block_group->key.objectid + - block_group->key.offset > last) { - hole_size = block_group->key.objectid + - block_group->key.offset - last; - set_extent_dirty(free_space_cache, last, - last + hole_size - 1, GFP_NOFS); - } + + add_new_free_space(block_group, root->fs_info, last, + block_group->key.objectid + + block_group->key.offset); + block_group->cached = 1; ret = 0; err: @@ -167,166 +274,79 @@ err: return ret; } +/* + * return the block group that starts at or after bytenr + */ struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *block_group = NULL; - u64 ptr; - u64 start; - u64 end; - int ret; + struct btrfs_block_group_cache *cache; - bytenr = max_t(u64, bytenr, - BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); - block_group_cache = &info->block_group_cache; - ret = find_first_extent_bit(block_group_cache, - bytenr, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | - BLOCK_GROUP_SYSTEM); - if (ret) { - return NULL; - } - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - return NULL; + cache = block_group_cache_tree_search(info, bytenr, 0); - block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; - return block_group; + return cache; } +/* + * return the block group that contains teh given bytenr + */ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *block_group = NULL; - u64 ptr; - u64 start; - u64 end; - int ret; + struct btrfs_block_group_cache *cache; - bytenr = max_t(u64, bytenr, - BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); - block_group_cache = &info->block_group_cache; - ret = find_first_extent_bit(block_group_cache, - bytenr, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | - BLOCK_GROUP_SYSTEM); - if (ret) { - return NULL; - } - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - return NULL; + cache = block_group_cache_tree_search(info, bytenr, 1); - block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; - if (block_group->key.objectid <= bytenr && bytenr < - block_group->key.objectid + block_group->key.offset) - return block_group; - return NULL; + return cache; } -static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) -{ - return (cache->flags & bits) == bits; -} - -static int noinline find_search_start(struct btrfs_root *root, - struct btrfs_block_group_cache **cache_ret, - u64 *start_ret, u64 num, int data) +static int noinline find_free_space(struct btrfs_root *root, + struct btrfs_block_group_cache **cache_ret, + u64 *start_ret, u64 num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; - struct extent_io_tree *free_space_cache; - struct extent_state *state; + struct btrfs_free_space *info = NULL; u64 last; - u64 start = 0; - u64 cache_miss = 0; u64 total_fs_bytes; u64 search_start = *start_ret; - int wrapped = 0; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - free_space_cache = &root->fs_info->free_space_cache; if (!cache) goto out; + last = max(search_start, cache->key.objectid); + again: ret = cache_block_group(root, cache); - if (ret) { + if (ret) goto out; - } - last = max(search_start, cache->key.objectid); - if (!block_group_bits(cache, data) || cache->ro) + if (cache->ro || !block_group_bits(cache, data)) goto new_group; - spin_lock_irq(&free_space_cache->lock); - state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); - while(1) { - if (!state) { - if (!cache_miss) - cache_miss = last; - spin_unlock_irq(&free_space_cache->lock); - goto new_group; - } - - start = max(last, state->start); - last = state->end + 1; - if (last - start < num) { - do { - state = extent_state_next(state); - } while(state && !(state->state & EXTENT_DIRTY)); - continue; - } - spin_unlock_irq(&free_space_cache->lock); - if (cache->ro) { - goto new_group; - } - if (start + num > cache->key.objectid + cache->key.offset) - goto new_group; - if (!block_group_bits(cache, data)) { - printk("block group bits don't match %Lu %d\n", cache->flags, data); - } - *start_ret = start; + info = btrfs_find_free_space(cache, last, num); + if (info) { + *start_ret = info->offset; return 0; } -out: - cache = btrfs_lookup_block_group(root->fs_info, search_start); - if (!cache) { - printk("Unable to find block group for %Lu\n", search_start); - WARN_ON(1); - } - return -ENOSPC; new_group: last = cache->key.objectid + cache->key.offset; -wrapped: + cache = btrfs_lookup_first_block_group(root->fs_info, last); - if (!cache || cache->key.objectid >= total_fs_bytes) { -no_cache: - if (!wrapped) { - wrapped = 1; - last = search_start; - goto wrapped; - } + if (!cache || cache->key.objectid >= total_fs_bytes) goto out; - } - if (cache_miss && !cache->cached) { - cache_block_group(root, cache); - last = cache_miss; - cache = btrfs_lookup_first_block_group(root->fs_info, last); - } - cache_miss = 0; - cache = btrfs_find_block_group(root, cache, last, data, 0); - if (!cache) - goto no_cache; + *cache_ret = cache; goto again; + +out: + return -ENOSPC; } static u64 div_factor(u64 num, int factor) @@ -338,16 +358,19 @@ static u64 div_factor(u64 num, int factor) return num; } -static int block_group_state_bits(u64 flags) +static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, + u64 flags) { - int bits = 0; - if (flags & BTRFS_BLOCK_GROUP_DATA) - bits |= BLOCK_GROUP_DATA; - if (flags & BTRFS_BLOCK_GROUP_METADATA) - bits |= BLOCK_GROUP_METADATA; - if (flags & BTRFS_BLOCK_GROUP_SYSTEM) - bits |= BLOCK_GROUP_SYSTEM; - return bits; + struct list_head *head = &info->space_info; + struct list_head *cur; + struct btrfs_space_info *found; + list_for_each(cur, head) { + found = list_entry(cur, struct btrfs_space_info, list); + if (found->flags == flags) + return found; + } + return NULL; + } static struct btrfs_block_group_cache * @@ -356,28 +379,19 @@ __btrfs_find_block_group(struct btrfs_root *root, u64 search_start, int data, int owner) { struct btrfs_block_group_cache *cache; - struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *sinfo; u64 used; u64 last = 0; - u64 start; - u64 end; u64 free_check; - u64 ptr; - int bit; - int ret; int full_search = 0; int factor = 10; int wrapped = 0; - block_group_cache = &info->block_group_cache; - if (data & BTRFS_BLOCK_GROUP_METADATA) factor = 9; - bit = block_group_state_bits(data); - if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_first_block_group(info, search_start); @@ -408,20 +422,30 @@ __btrfs_find_block_group(struct btrfs_root *root, else last = search_start; } + sinfo = __find_space_info(root->fs_info, data); + if (!sinfo) + goto found; again: while(1) { - ret = find_first_extent_bit(block_group_cache, last, - &start, &end, bit); - if (ret) - break; + struct list_head *l; - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) { - last = end + 1; - continue; + cache = NULL; + + spin_lock(&sinfo->lock); + list_for_each(l, &sinfo->block_groups) { + struct btrfs_block_group_cache *entry; + entry = list_entry(l, struct btrfs_block_group_cache, + list); + if ((entry->key.objectid >= last) && + (!cache || (entry->key.objectid < + cache->key.objectid))) + cache = entry; } + spin_unlock(&sinfo->lock); + + if (!cache) + break; - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; spin_lock(&cache->lock); last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); @@ -462,6 +486,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, ret = __btrfs_find_block_group(root, hint, search_start, data, owner); return ret; } + static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset) { @@ -1175,34 +1200,37 @@ fail: int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *cache; - int ret; + struct btrfs_block_group_cache *cache, *entry; + struct rb_node *n; int err = 0; int werr = 0; struct btrfs_path *path; u64 last = 0; - u64 start; - u64 end; - u64 ptr; - block_group_cache = &root->fs_info->block_group_cache; path = btrfs_alloc_path(); if (!path) return -ENOMEM; mutex_lock(&root->fs_info->alloc_mutex); while(1) { - ret = find_first_extent_bit(block_group_cache, last, - &start, &end, BLOCK_GROUP_DIRTY); - if (ret) - break; + cache = NULL; + spin_lock(&root->fs_info->block_group_cache_lock); + for (n = rb_first(&root->fs_info->block_group_cache_tree); + n; n = rb_next(n)) { + entry = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + if (entry->dirty) { + cache = entry; + break; + } + } + spin_unlock(&root->fs_info->block_group_cache_lock); - last = end + 1; - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) + if (!cache) break; - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; + + last += cache->key.offset; + err = write_one_cache_group(trans, root, path, cache); /* @@ -1214,29 +1242,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, werr = err; continue; } - clear_extent_bits(block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); + + cache->dirty = 0; } btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); return werr; } -static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, - u64 flags) -{ - struct list_head *head = &info->space_info; - struct list_head *cur; - struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); - if (found->flags == flags) - return found; - } - return NULL; - -} - static int update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, struct btrfs_space_info **space_info) @@ -1256,6 +1269,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return -ENOMEM; list_add(&found->list, &info->space_info); + INIT_LIST_HEAD(&found->block_groups); + spin_lock_init(&found->lock); found->flags = flags; found->total_bytes = total_bytes; found->bytes_used = bytes_used; @@ -1318,7 +1333,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 thresh; u64 start; u64 num_bytes; - int ret; + int ret = 0; flags = reduce_alloc_profile(extent_root, flags); @@ -1355,10 +1370,11 @@ printk("space info full %Lu\n", flags); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); + out_unlock: mutex_unlock(&extent_root->fs_info->chunk_mutex); out: - return 0; + return ret; } static int update_block_group(struct btrfs_trans_handle *trans, @@ -1371,8 +1387,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 total = num_bytes; u64 old_val; u64 byte_in_group; - u64 start; - u64 end; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); while(total) { @@ -1382,12 +1396,9 @@ static int update_block_group(struct btrfs_trans_handle *trans, } byte_in_group = bytenr - cache->key.objectid; WARN_ON(byte_in_group > cache->key.offset); - start = cache->key.objectid; - end = start + cache->key.offset - 1; - set_extent_bits(&info->block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); spin_lock(&cache->lock); + cache->dirty = 1; old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { @@ -1401,9 +1412,11 @@ static int update_block_group(struct btrfs_trans_handle *trans, btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); if (mark_free) { - set_extent_dirty(&info->free_space_cache, - bytenr, bytenr + num_bytes - 1, - GFP_NOFS); + int ret; + ret = btrfs_add_free_space(cache, bytenr, + num_bytes); + if (ret) + return -1; } } total -= num_bytes; @@ -1414,16 +1427,13 @@ static int update_block_group(struct btrfs_trans_handle *trans, static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) { - u64 start; - u64 end; - int ret; - ret = find_first_extent_bit(&root->fs_info->block_group_cache, - search_start, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | - BLOCK_GROUP_SYSTEM); - if (ret) + struct btrfs_block_group_cache *cache; + + cache = btrfs_lookup_first_block_group(root->fs_info, search_start); + if (!cache) return 0; - return start; + + return cache->key.objectid; } @@ -1501,8 +1511,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 start; u64 end; int ret; - struct extent_io_tree *free_space_cache; - free_space_cache = &root->fs_info->free_space_cache; + struct btrfs_block_group_cache *cache; mutex_lock(&root->fs_info->alloc_mutex); while(1) { @@ -1512,7 +1521,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, break; btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); - set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + cache = btrfs_lookup_block_group(root->fs_info, start); + if (cache->cached) + btrfs_add_free_space(cache, start, end - start + 1); if (need_resched()) { mutex_unlock(&root->fs_info->alloc_mutex); cond_resched(); @@ -1875,9 +1886,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, /* if metadata always pin */ if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + struct btrfs_block_group_cache *cache; + /* btrfs_free_reserved_extent */ - set_extent_dirty(&root->fs_info->free_space_cache, - bytenr, bytenr + num_bytes - 1, GFP_NOFS); + cache = btrfs_lookup_block_group(root->fs_info, bytenr); + BUG_ON(!cache); + btrfs_add_free_space(cache, bytenr, num_bytes); return 0; } pin = 1; @@ -1942,8 +1956,6 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, u64 total_needed = num_bytes; u64 *last_ptr = NULL; struct btrfs_block_group_cache *block_group; - int full_scan = 0; - int wrapped = 0; int chunk_alloc_done = 0; int empty_cluster = 2 * 1024 * 1024; int allowed_chunk_alloc = 0; @@ -1959,9 +1971,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, empty_cluster = 256 * 1024; } - if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { + if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) last_ptr = &root->fs_info->last_data_alloc; - } + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { last_ptr = &root->fs_info->last_log_alloc; if (!last_ptr == 0 && root->fs_info->last_alloc) { @@ -1972,9 +1984,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (last_ptr) { if (*last_ptr) hint_byte = *last_ptr; - else { + else empty_size += empty_cluster; - } } search_start = max(search_start, first_logical_byte(root, 0)); @@ -1983,145 +1994,172 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); - if (hint_byte) { - block_group = btrfs_lookup_first_block_group(info, hint_byte); - if (!block_group) - hint_byte = search_start; - block_group = btrfs_find_block_group(root, block_group, - hint_byte, data, 1); - if (last_ptr && *last_ptr == 0 && block_group) - hint_byte = block_group->key.objectid; - } else { - block_group = btrfs_find_block_group(root, - trans->block_group, - search_start, data, 1); - } search_start = max(search_start, hint_byte); - total_needed += empty_size; -check_failed: - if (!block_group) { - block_group = btrfs_lookup_first_block_group(info, - search_start); - if (!block_group) - block_group = btrfs_lookup_first_block_group(info, - orig_search_start); - } - if (full_scan && !chunk_alloc_done) { - if (allowed_chunk_alloc) { - do_chunk_alloc(trans, root, - num_bytes + 2 * 1024 * 1024, data, 1); - allowed_chunk_alloc = 0; - } else if (block_group && block_group_bits(block_group, data)) { - block_group->space_info->force_alloc = 1; +new_group: + block_group = btrfs_lookup_block_group(info, search_start); + + /* + * Ok this looks a little tricky, buts its really simple. First if we + * didn't find a block group obviously we want to start over. + * Secondly, if the block group we found does not match the type we + * need, and we have a last_ptr and its not 0, chances are the last + * allocation we made was at the end of the block group, so lets go + * ahead and skip the looking through the rest of the block groups and + * start at the beginning. This helps with metadata allocations, + * since you are likely to have a bunch of data block groups to search + * through first before you realize that you need to start over, so go + * ahead and start over and save the time. + */ + if (!block_group || (!block_group_bits(block_group, data) && + last_ptr && *last_ptr)) { + if (search_start != orig_search_start) { + if (last_ptr && *last_ptr) + *last_ptr = 0; + search_start = orig_search_start; + goto new_group; + } else if (!chunk_alloc_done && allowed_chunk_alloc) { + ret = do_chunk_alloc(trans, root, + num_bytes + 2 * 1024 * 1024, + data, 1); + if (ret < 0) { + struct btrfs_space_info *info; + + info = __find_space_info(root->fs_info, data); + goto error; + } + BUG_ON(ret); + chunk_alloc_done = 1; + search_start = orig_search_start; + goto new_group; + } else { + ret = -ENOSPC; + goto error; } - chunk_alloc_done = 1; - } - ret = find_search_start(root, &block_group, &search_start, - total_needed, data); - if (ret == -ENOSPC && last_ptr && *last_ptr) { - *last_ptr = 0; - block_group = btrfs_lookup_first_block_group(info, - orig_search_start); - search_start = orig_search_start; - ret = find_search_start(root, &block_group, &search_start, - total_needed, data); } - if (ret == -ENOSPC) - goto enospc; - if (ret) - goto error; - if (last_ptr && *last_ptr && search_start != *last_ptr) { - *last_ptr = 0; - if (!empty_size) { - empty_size += empty_cluster; - total_needed += empty_size; + /* + * this is going to seach through all of the existing block groups it + * can find, so if we don't find something we need to see if we can + * allocate what we need. + */ + ret = find_free_space(root, &block_group, &search_start, + total_needed, data); + if (ret == -ENOSPC) { + /* + * instead of allocating, start at the original search start + * and see if there is something to be found, if not then we + * allocate + */ + if (search_start != orig_search_start) { + if (last_ptr && *last_ptr) { + *last_ptr = 0; + total_needed += empty_cluster; + } + search_start = orig_search_start; + goto new_group; } - block_group = btrfs_lookup_first_block_group(info, - orig_search_start); - search_start = orig_search_start; - ret = find_search_start(root, &block_group, - &search_start, total_needed, data); - if (ret == -ENOSPC) - goto enospc; - if (ret) + + /* + * we've already allocated, we're pretty screwed + */ + if (chunk_alloc_done) { goto error; + } else if (!allowed_chunk_alloc && block_group && + block_group_bits(block_group, data)) { + block_group->space_info->force_alloc = 1; + goto error; + } else if (!allowed_chunk_alloc) { + goto error; + } + + ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, + data, 1); + if (ret < 0) + goto error; + + BUG_ON(ret); + chunk_alloc_done = 1; + if (block_group) + search_start = block_group->key.objectid + + block_group->key.offset; + else + search_start = orig_search_start; + goto new_group; } + if (ret) + goto error; + search_start = stripe_align(root, search_start); ins->objectid = search_start; ins->offset = num_bytes; - if (ins->objectid + num_bytes >= search_end) - goto enospc; + if (ins->objectid + num_bytes >= search_end) { + search_start = orig_search_start; + if (chunk_alloc_done) { + ret = -ENOSPC; + goto error; + } + goto new_group; + } if (ins->objectid + num_bytes > block_group->key.objectid + block_group->key.offset) { + if (search_start == orig_search_start && chunk_alloc_ |