diff options
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
| -rw-r--r-- | fs/btrfs/free-space-cache.c | 3221 |
1 files changed, 2171 insertions, 1050 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a0390657451..2b0a627cb5f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -20,22 +20,25 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/math64.h> +#include <linux/ratelimit.h> #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" #include "disk-io.h" +#include "extent_io.h" +#include "inode-map.h" #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) -static void recalculate_thresholds(struct btrfs_block_group_cache - *block_group); -static int link_free_space(struct btrfs_block_group_cache *block_group, +static int link_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info); +static void unlink_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info); -struct inode *lookup_free_space_inode(struct btrfs_root *root, - struct btrfs_block_group_cache - *block_group, struct btrfs_path *path) +static struct inode *__lookup_free_space_inode(struct btrfs_root *root, + struct btrfs_path *path, + u64 offset) { struct btrfs_key key; struct btrfs_key location; @@ -45,22 +48,15 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, struct inode *inode = NULL; int ret; - spin_lock(&block_group->lock); - if (block_group->inode) - inode = igrab(block_group->inode); - spin_unlock(&block_group->lock); - if (inode) - return inode; - key.objectid = BTRFS_FREE_SPACE_OBJECTID; - key.offset = block_group->key.objectid; + key.offset = offset; key.type = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ERR_PTR(ret); if (ret > 0) { - btrfs_release_path(root, path); + btrfs_release_path(path); return ERR_PTR(-ENOENT); } @@ -69,7 +65,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, struct btrfs_free_space_header); btrfs_free_space_key(leaf, header, &disk_key); btrfs_disk_key_to_cpu(&location, &disk_key); - btrfs_release_path(root, path); + btrfs_release_path(path); inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); if (!inode) @@ -81,8 +77,41 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return ERR_PTR(-ENOENT); } + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + + return inode; +} + +struct inode *lookup_free_space_inode(struct btrfs_root *root, + struct btrfs_block_group_cache + *block_group, struct btrfs_path *path) +{ + struct inode *inode = NULL; + u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; + + spin_lock(&block_group->lock); + if (block_group->inode) + inode = igrab(block_group->inode); + spin_unlock(&block_group->lock); + if (inode) + return inode; + + inode = __lookup_free_space_inode(root, path, + block_group->key.objectid); + if (IS_ERR(inode)) + return inode; + spin_lock(&block_group->lock); - if (!root->fs_info->closing) { + if (!((BTRFS_I(inode)->flags & flags) == flags)) { + btrfs_info(root->fs_info, + "Old style space inode found, converting."); + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | + BTRFS_INODE_NODATACOW; + block_group->disk_cache_state = BTRFS_DC_CLEAR; + } + + if (!block_group->iref) { block_group->inode = igrab(inode); block_group->iref = 1; } @@ -91,27 +120,27 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return inode; } -int create_free_space_inode(struct btrfs_root *root, - struct btrfs_trans_handle *trans, - struct btrfs_block_group_cache *block_group, - struct btrfs_path *path) +static int __create_free_space_inode(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + u64 ino, u64 offset) { struct btrfs_key key; struct btrfs_disk_key disk_key; struct btrfs_free_space_header *header; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; - u64 objectid; + u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; int ret; - ret = btrfs_find_free_objectid(trans, root, 0, &objectid); - if (ret < 0) - return ret; - - ret = btrfs_insert_empty_inode(trans, root, path, objectid); + ret = btrfs_insert_empty_inode(trans, root, path, ino); if (ret) return ret; + /* We inline crc's for the free disk space cache */ + if (ino != BTRFS_FREE_INO_OBJECTID) + flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; + leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); @@ -124,23 +153,21 @@ int create_free_space_inode(struct btrfs_root *root, btrfs_set_inode_uid(leaf, inode_item, 0); btrfs_set_inode_gid(leaf, inode_item, 0); btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | - BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); + btrfs_set_inode_flags(leaf, inode_item, flags); btrfs_set_inode_nlink(leaf, inode_item, 1); btrfs_set_inode_transid(leaf, inode_item, trans->transid); - btrfs_set_inode_block_group(leaf, inode_item, - block_group->key.objectid); + btrfs_set_inode_block_group(leaf, inode_item, offset); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root, path); + btrfs_release_path(path); key.objectid = BTRFS_FREE_SPACE_OBJECTID; - key.offset = block_group->key.objectid; + key.offset = offset; key.type = 0; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(struct btrfs_free_space_header)); if (ret < 0) { - btrfs_release_path(root, path); + btrfs_release_path(path); return ret; } leaf = path->nodes[0]; @@ -149,29 +176,54 @@ int create_free_space_inode(struct btrfs_root *root, memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); btrfs_set_free_space_key(leaf, header, &disk_key); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root, path); + btrfs_release_path(path); return 0; } +int create_free_space_inode(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path) +{ + int ret; + u64 ino; + + ret = btrfs_find_free_objectid(root, &ino); + if (ret < 0) + return ret; + + return __create_free_space_inode(root, trans, path, ino, + block_group->key.objectid); +} + +int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, + struct btrfs_block_rsv *rsv) +{ + u64 needed_bytes; + int ret; + + /* 1 for slack space, 1 for updating the inode */ + needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + + btrfs_calc_trans_metadata_size(root, 1); + + spin_lock(&rsv->lock); + if (rsv->reserved < needed_bytes) + ret = -ENOSPC; + else + ret = 0; + spin_unlock(&rsv->lock); + return ret; +} + int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, - struct btrfs_path *path, struct inode *inode) { - loff_t oldsize; int ret = 0; - trans->block_rsv = root->orphan_block_rsv; - ret = btrfs_block_rsv_check(trans, root, - root->orphan_block_rsv, - 0, 5); - if (ret) - return ret; - - oldsize = i_size_read(inode); btrfs_i_size_write(inode, 0); - truncate_pagecache(inode, oldsize, 0); + truncate_pagecache(inode, 0); /* * We don't need an orphan item because truncating the free space cache @@ -180,11 +232,15 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, ret = btrfs_truncate_inode_items(trans, root, inode, 0, BTRFS_EXTENT_DATA_KEY); if (ret) { - WARN_ON(1); + btrfs_abort_transaction(trans, root, ret); return ret; } - return btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode(trans, root, inode); + if (ret) + btrfs_abort_transaction(trans, root, ret); + + return ret; } static int readahead_cache(struct inode *inode) @@ -206,604 +262,1017 @@ static int readahead_cache(struct inode *inode) return 0; } -int load_free_space_cache(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *block_group) +struct io_ctl { + void *cur, *orig; + struct page *page; + struct page **pages; + struct btrfs_root *root; + unsigned long size; + int index; + int num_pages; + unsigned check_crcs:1; +}; + +static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, + struct btrfs_root *root, int write) +{ + int num_pages; + int check_crcs = 0; + + num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + + if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) + check_crcs = 1; + + /* Make sure we can fit our crcs into the first page */ + if (write && check_crcs && + (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) + return -ENOSPC; + + memset(io_ctl, 0, sizeof(struct io_ctl)); + + io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); + if (!io_ctl->pages) + return -ENOMEM; + + io_ctl->num_pages = num_pages; + io_ctl->root = root; + io_ctl->check_crcs = check_crcs; + + return 0; +} + +static void io_ctl_free(struct io_ctl *io_ctl) +{ + kfree(io_ctl->pages); +} + +static void io_ctl_unmap_page(struct io_ctl *io_ctl) +{ + if (io_ctl->cur) { + kunmap(io_ctl->page); + io_ctl->cur = NULL; + io_ctl->orig = NULL; + } +} + +static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) +{ + ASSERT(io_ctl->index < io_ctl->num_pages); + io_ctl->page = io_ctl->pages[io_ctl->index++]; + io_ctl->cur = kmap(io_ctl->page); + io_ctl->orig = io_ctl->cur; + io_ctl->size = PAGE_CACHE_SIZE; + if (clear) + memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); +} + +static void io_ctl_drop_pages(struct io_ctl *io_ctl) +{ + int i; + + io_ctl_unmap_page(io_ctl); + + for (i = 0; i < io_ctl->num_pages; i++) { + if (io_ctl->pages[i]) { + ClearPageChecked(io_ctl->pages[i]); + unlock_page(io_ctl->pages[i]); + page_cache_release(io_ctl->pages[i]); + } + } +} + +static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, + int uptodate) { - struct btrfs_root *root = fs_info->tree_root; - struct inode *inode; - struct btrfs_free_space_header *header; - struct extent_buffer *leaf; struct page *page; - struct btrfs_path *path; - u32 *checksums = NULL, *crc; - char *disk_crcs = NULL; - struct btrfs_key key; - struct list_head bitmaps; - u64 num_entries; - u64 num_bitmaps; - u64 generation; - u32 cur_crc = ~(u32)0; - pgoff_t index = 0; - unsigned long first_page_offset; - int num_checksums; - int ret = 0; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); + int i; + + for (i = 0; i < io_ctl->num_pages; i++) { + page = find_or_create_page(inode->i_mapping, i, mask); + if (!page) { + io_ctl_drop_pages(io_ctl); + return -ENOMEM; + } + io_ctl->pages[i] = page; + if (uptodate && !PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + btrfs_err(BTRFS_I(inode)->root->fs_info, + "error reading free space cache"); + io_ctl_drop_pages(io_ctl); + return -EIO; + } + } + } + + for (i = 0; i < io_ctl->num_pages; i++) { + clear_page_dirty_for_io(io_ctl->pages[i]); + set_page_extent_mapped(io_ctl->pages[i]); + } + + return 0; +} + +static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) +{ + __le64 *val; + + io_ctl_map_page(io_ctl, 1); /* - * If we're unmounting then just return, since this does a search on the - * normal root and not the commit root and we could deadlock. + * Skip the csum areas. If we don't check crcs then we just have a + * 64bit chunk at the front of the first page. */ - smp_mb(); - if (fs_info->closing) - return 0; + if (io_ctl->check_crcs) { + io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); + io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); + } else { + io_ctl->cur += sizeof(u64); + io_ctl->size -= sizeof(u64) * 2; + } + + val = io_ctl->cur; + *val = cpu_to_le64(generation); + io_ctl->cur += sizeof(u64); +} + +static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) +{ + __le64 *gen; /* - * If this block group has been marked to be cleared for one reason or - * another then we can't trust the on disk cache, so just return. + * Skip the crc area. If we don't check crcs then we just have a 64bit + * chunk at the front of the first page. */ - spin_lock(&block_group->lock); - if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { - spin_unlock(&block_group->lock); + if (io_ctl->check_crcs) { + io_ctl->cur += sizeof(u32) * io_ctl->num_pages; + io_ctl->size -= sizeof(u64) + + (sizeof(u32) * io_ctl->num_pages); + } else { + io_ctl->cur += sizeof(u64); + io_ctl->size -= sizeof(u64) * 2; + } + + gen = io_ctl->cur; + if (le64_to_cpu(*gen) != generation) { + printk_ratelimited(KERN_ERR "BTRFS: space cache generation " + "(%Lu) does not match inode (%Lu)\n", *gen, + generation); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + io_ctl->cur += sizeof(u64); + return 0; +} + +static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) +{ + u32 *tmp; + u32 crc = ~(u32)0; + unsigned offset = 0; + + if (!io_ctl->check_crcs) { + io_ctl_unmap_page(io_ctl); + return; + } + + if (index == 0) + offset = sizeof(u32) * io_ctl->num_pages; + + crc = btrfs_csum_data(io_ctl->orig + offset, crc, + PAGE_CACHE_SIZE - offset); + btrfs_csum_final(crc, (char *)&crc); + io_ctl_unmap_page(io_ctl); + tmp = kmap(io_ctl->pages[0]); + tmp += index; + *tmp = crc; + kunmap(io_ctl->pages[0]); +} + +static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) +{ + u32 *tmp, val; + u32 crc = ~(u32)0; + unsigned offset = 0; + + if (!io_ctl->check_crcs) { + io_ctl_map_page(io_ctl, 0); return 0; } - spin_unlock(&block_group->lock); - INIT_LIST_HEAD(&bitmaps); + if (index == 0) + offset = sizeof(u32) * io_ctl->num_pages; - path = btrfs_alloc_path(); - if (!path) + tmp = kmap(io_ctl->pages[0]); + tmp += index; + val = *tmp; + kunmap(io_ctl->pages[0]); + + io_ctl_map_page(io_ctl, 0); + crc = btrfs_csum_data(io_ctl->orig + offset, crc, + PAGE_CACHE_SIZE - offset); + btrfs_csum_final(crc, (char *)&crc); + if (val != crc) { + printk_ratelimited(KERN_ERR "BTRFS: csum mismatch on free " + "space cache\n"); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + + return 0; +} + +static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, + void *bitmap) +{ + struct btrfs_free_space_entry *entry; + + if (!io_ctl->cur) + return -ENOSPC; + + entry = io_ctl->cur; + entry->offset = cpu_to_le64(offset); + entry->bytes = cpu_to_le64(bytes); + entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : + BTRFS_FREE_SPACE_EXTENT; + io_ctl->cur += sizeof(struct btrfs_free_space_entry); + io_ctl->size -= sizeof(struct btrfs_free_space_entry); + + if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) return 0; - inode = lookup_free_space_inode(root, block_group, path); - if (IS_ERR(inode)) { - btrfs_free_path(path); + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + + /* No more pages to map */ + if (io_ctl->index >= io_ctl->num_pages) return 0; + + /* map the next page */ + io_ctl_map_page(io_ctl, 1); + return 0; +} + +static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) +{ + if (!io_ctl->cur) + return -ENOSPC; + + /* + * If we aren't at the start of the current page, unmap this one and + * map the next one if there is any left. + */ + if (io_ctl->cur != io_ctl->orig) { + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + if (io_ctl->index >= io_ctl->num_pages) + return -ENOSPC; + io_ctl_map_page(io_ctl, 0); } - /* Nothing in the space cache, goodbye */ - if (!i_size_read(inode)) { - btrfs_free_path(path); - goto out; + memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + if (io_ctl->index < io_ctl->num_pages) + io_ctl_map_page(io_ctl, 0); + return 0; +} + +static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) +{ + /* + * If we're not on the boundary we know we've modified the page and we + * need to crc the page. + */ + if (io_ctl->cur != io_ctl->orig) + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + else + io_ctl_unmap_page(io_ctl); + + while (io_ctl->index < io_ctl->num_pages) { + io_ctl_map_page(io_ctl, 1); + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + } +} + +static int io_ctl_read_entry(struct io_ctl *io_ctl, + struct btrfs_free_space *entry, u8 *type) +{ + struct btrfs_free_space_entry *e; + int ret; + + if (!io_ctl->cur) { + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + } + + e = io_ctl->cur; + entry->offset = le64_to_cpu(e->offset); + entry->bytes = le64_to_cpu(e->bytes); + *type = e->type; + io_ctl->cur += sizeof(struct btrfs_free_space_entry); + io_ctl->size -= sizeof(struct btrfs_free_space_entry); + + if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) + return 0; + + io_ctl_unmap_page(io_ctl); + + return 0; +} + +static int io_ctl_read_bitmap(struct io_ctl *io_ctl, + struct btrfs_free_space *entry) +{ + int ret; + + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + + memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); + io_ctl_unmap_page(io_ctl); + + return 0; +} + +/* + * Since we attach pinned extents after the fact we can have contiguous sections + * of free space that are split up in entries. This poses a problem with the + * tree logging stuff since it could have allocated across what appears to be 2 + * entries since we would have merged the entries when adding the pinned extents + * back to the free space cache. So run through the space cache that we just + * loaded and merge contiguous entries. This will make the log replay stuff not + * blow up and it will make for nicer allocator behavior. + */ +static void merge_space_tree(struct btrfs_free_space_ctl *ctl) +{ + struct btrfs_free_space *e, *prev = NULL; + struct rb_node *n; + +again: + spin_lock(&ctl->tree_lock); + for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { + e = rb_entry(n, struct btrfs_free_space, offset_index); + if (!prev) + goto next; + if (e->bitmap || prev->bitmap) + goto next; + if (prev->offset + prev->bytes == e->offset) { + unlink_free_space(ctl, prev); + unlink_free_space(ctl, e); + prev->bytes += e->bytes; + kmem_cache_free(btrfs_free_space_cachep, e); + link_free_space(ctl, prev); + prev = NULL; + spin_unlock(&ctl->tree_lock); + goto again; + } +next: + prev = e; } + spin_unlock(&ctl->tree_lock); +} + +static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, + struct btrfs_free_space_ctl *ctl, + struct btrfs_path *path, u64 offset) +{ + struct btrfs_free_space_header *header; + struct extent_buffer *leaf; + struct io_ctl io_ctl; + struct btrfs_key key; + struct btrfs_free_space *e, *n; + struct list_head bitmaps; + u64 num_entries; + u64 num_bitmaps; + u64 generation; + u8 type; + int ret = 0; + + INIT_LIST_HEAD(&bitmaps); + + /* Nothing in the space cache, goodbye */ + if (!i_size_read(inode)) + return 0; key.objectid = BTRFS_FREE_SPACE_OBJECTID; - key.offset = block_group->key.objectid; + key.offset = offset; key.type = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret) { - btrfs_free_path(path); - goto out; + if (ret < 0) + return 0; + else if (ret > 0) { + btrfs_release_path(path); + return 0; } + ret = -1; + leaf = path->nodes[0]; header = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_free_space_header); num_entries = btrfs_free_space_entries(leaf, header); num_bitmaps = btrfs_free_space_bitmaps(leaf, header); generation = btrfs_free_space_generation(leaf, header); - btrfs_free_path(path); + btrfs_release_path(path); + + if (!BTRFS_I(inode)->generation) { + btrfs_info(root->fs_info, + "The free space cache file (%llu) is invalid. skip it\n", + offset); + return 0; + } if (BTRFS_I(inode)->generation != generation) { - printk(KERN_ERR "btrfs: free space inode generation (%llu) did" - " not match free space cache generation (%llu) for " - "block group %llu\n", - (unsigned long long)BTRFS_I(inode)->generation, - (unsigned long long)generation, - (unsigned long long)block_group->key.objectid); - goto free_cache; + btrfs_err(root->fs_info, + "free space inode generation (%llu) " + "did not match free space cache generation (%llu)", + BTRFS_I(inode)->generation, generation); + return 0; } if (!num_entries) - goto out; + return 0; - /* Setup everything for doing checksumming */ - num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; - checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); - if (!checksums) - goto out; - first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); - disk_crcs = kzalloc(first_page_offset, GFP_NOFS); - if (!disk_crcs) - goto out; + ret = io_ctl_init(&io_ctl, inode, root, 0); + if (ret) + return ret; ret = readahead_cache(inode); - if (ret) { - ret = 0; + if (ret) goto out; - } - while (1) { - struct btrfs_free_space_entry *entry; - struct btrfs_free_space *e; - void *addr; - unsigned long offset = 0; - unsigned long start_offset = 0; - int need_loop = 0; + ret = io_ctl_prepare_pages(&io_ctl, inode, 1); + if (ret) + goto out; - if (!num_entries && !num_bitmaps) - break; + ret = io_ctl_check_crc(&io_ctl, 0); + if (ret) + goto free_cache; - if (index == 0) { - start_offset = first_page_offset; - offset = start_offset; - } + ret = io_ctl_check_generation(&io_ctl, generation); + if (ret) + goto free_cache; - page = grab_cache_page(inode->i_mapping, index); - if (!page) { - ret = 0; + while (num_entries) { + e = kmem_cache_zalloc(btrfs_free_space_cachep, + GFP_NOFS); + if (!e) goto free_cache; - } - if (!PageUptodate(page)) { - btrfs_readpage(NULL, page); - lock_page(page); - if (!PageUptodate(page)) { - unlock_page(page); - page_cache_release(page); - printk(KERN_ERR "btrfs: error reading free " - "space cache: %llu\n", - (unsigned long long) - block_group->key.objectid); - goto free_cache; - } - } - addr = kmap(page); - - if (index == 0) { - u64 *gen; - - memcpy(disk_crcs, addr, first_page_offset); - gen = addr + (sizeof(u32) * num_checksums); - if (*gen != BTRFS_I(inode)->generation) { - printk(KERN_ERR "btrfs: space cache generation" - " (%llu) does not match inode (%llu) " - "for block group %llu\n", - (unsigned long long)*gen, - (unsigned long long) - BTRFS_I(inode)->generation, - (unsigned long long) - block_group->key.objectid); - kunmap(page); - unlock_page(page); - page_cache_release(page); - goto free_cache; - } - crc = (u32 *)disk_crcs; - } - entry = addr + start_offset; - - /* First lets check our crc before we do anything fun */ - cur_crc = ~(u32)0; - cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, - PAGE_CACHE_SIZE - start_offset); - btrfs_csum_final(cur_crc, (char *)&cur_crc); - if (cur_crc != *crc) { - printk(KERN_ERR "btrfs: crc mismatch for page %lu in " - "block group %llu\n", index, - (unsigned long long)block_group->key.objectid); - kunmap(page); - unlock_page(page); - page_cache_release(page); + ret = io_ctl_read_entry(&io_ctl, e, &type); + if (ret) { + kmem_cache_free(btrfs_free_space_cachep, e); goto free_cache; } - crc++; - while (1) { - if (!num_entries) - break; + if (!e->bytes) { + kmem_cache_free(btrfs_free_space_cachep, e); + goto free_cache; + } - need_loop = 1; - e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); - if (!e) { - kunmap(page); - unlock_page(page); - page_cache_release(page); + if (type == BTRFS_FREE_SPACE_EXTENT) { + spin_lock(&ctl->tree_lock); + ret = link_free_space(ctl, e); + spin_unlock(&ctl->tree_lock); + if (ret) { + btrfs_err(root->fs_info, + "Duplicate entries in free space cache, dumping"); + kmem_cache_free(btrfs_free_space_cachep, e); goto free_cache; } - - e->offset = le64_to_cpu(entry->offset); - e->bytes = le64_to_cpu(entry->bytes); - if (!e->bytes) { - kunmap(page); - kfree(e); - unlock_page(page); - page_cache_release(page); + } else { + ASSERT(num_bitmaps); + num_bitmaps--; + e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + if (!e->bitmap) { + kmem_cache_free( + btrfs_free_space_cachep, e); goto free_cache; } - - if (entry->type == BTRFS_FREE_SPACE_EXTENT) { - spin_lock(&block_group->tree_lock); - ret = link_free_space(block_group, e); - spin_unlock(&block_group->tree_lock); - BUG_ON(ret); - } else { - e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); - if (!e->bitmap) { - kunmap(page); - kfree(e); - unlock_page(page); - page_cache_release(page); - goto free_cache; - } - spin_lock(&block_group->tree_lock); - ret = link_free_space(block_group, e); - block_group->total_bitmaps++; - recalculate_thresholds(block_group); - spin_unlock(&block_group->tree_lock); - list_add_tail(&e->list, &bitmaps); + spin_lock(&ctl->tree_lock); + ret = link_free_space(ctl, e); + ctl->total_bitmaps++; + ctl->op->recalc_thresholds(ctl); + spin_unlock(&ctl->tree_lock); + if (ret) { + btrfs_err(root->fs_info, + "Duplicate entries in free space cache, dumping"); + kmem_cache_free(btrfs_free_space_cachep, e); + goto free_cache; } - - num_entries--; - offset += sizeof(struct btrfs_free_space_entry); - if (offset + sizeof(struct btrfs_free_space_entry) >= - PAGE_CACHE_SIZE) - break; - entry++; + list_add_tail(&e->list, &bitmaps); } - /* - * We read an entry out of this page, we need to move on to the - * next page. - */ - if (need_loop) { - kunmap(page); - goto next; - } + num_entries--; + } - /* - * We add the bitmaps at the end of the entries in order that - * the bitmap entries are added to the cache. - */ - e = list_entry(bitmaps.next, struct btrfs_free_space, list); + io_ctl_unmap_page(&io_ctl); + + /* + * We add the bitmaps at the end of the entries in order that + * the bitmap entries are added to the cache. + */ + list_for_each_entry_safe(e, n, &bitmaps, list) { list_del_init(&e->list); - memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); - kunmap(page); - num_bitmaps--; -next: - unlock_page(page); - page_cache_release(page); - index++; + ret = io_ctl_read_bitmap(&io_ctl, e); + if (ret) + goto free_cache; } + io_ctl_drop_pages(&io_ctl); + merge_space_tree(ctl); ret = 1; out: - kfree(checksums); - kfree(disk_crcs); - iput(inode); + io_ctl_free(&io_ctl); return ret; - free_cache: - /* This cache is bogus, make sure it gets cleared */ - spin_lock(&block_group->lock); - block_group->disk_cache_state = BTRFS_DC_CLEAR; - spin_unlock(&block_group->lock); - btrfs_remove_free_space_cache(block_group); + io_ctl_drop_pages(&io_ctl); + __btrfs_remove_free_space_cache(ctl); goto out; } -int btrfs_write_out_cache(struct btrfs_root *root, - struct btrfs_trans_handle *trans, - struct btrfs_block_group_cache *block_group, - struct btrfs_path *path) +int load_free_space_cache(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group) { - struct btrfs_free_space_header *header; - struct extent_buffer *leaf; + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_root *root = fs_info->tree_root; struct inode *inode; - struct rb_node *node; - struct list_head *pos, *n; - struct page *page; - struct extent_state *cached_state = NULL; - struct list_head bitmap_list; - struct btrfs_key key; - u64 bytes = 0; - u32 *crc, *checksums; - pgoff_t index = 0, last_index = 0; - unsigned long first_page_offset; - int num_checksums; - int entries = 0; - int bitmaps = 0; + struct btrfs_path *path; int ret = 0; + bool matched; + u64 used = btrfs_block_group_used(&block_group->item); - root = root->fs_info->tree_root; - - INIT_LIST_HEAD(&bitmap_list); - + /* + * If this block group has been marked to be cleared for one reason or + * another then we can't trust the on disk cache, so just return. + */ spin_lock(&block_group->lock); - if (block_group->disk_cache_state < BTRFS_DC_SETUP) { + if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { spin_unlock(&block_group->lock); return 0; } spin_unlock(&block_group->lock); - inode = lookup_free_space_inode(root, block_group, path); - if (IS_ERR(inode)) + path = btrfs_alloc_path(); + if (!path) return 0; + path->search_commit_root = 1; + path->skip_locking = 1; - if (!i_size_read(inode)) { - iput(inode); + inode = lookup_free_space_inode(root, block_group, path); + if (IS_ERR(inode)) { + btrfs_free_path(path); return 0; } - node = rb_first(&block_group->free_space_offset); - if (!node) { - iput(inode); - return 0; + /* We may have converted the inode and made the cache invalid. */ + spin_lock(&block_group->lock); + if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { + spin_unlock(&block_group->lock); + btrfs_free_path(path); + goto out; } + spin_unlock(&block_group->lock); - last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; - filemap_write_and_wait(inode->i_mapping); - btrfs_wait_ordered_range(inode, inode->i_size & - ~(root->sectorsize - 1), (u64)-1); - - /* We need a checksum per page. */ - num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; - crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); - if (!crc) { - iput(inode); - return 0; - } + ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, + path, block_group->key.objectid); + btrfs_free_path(path); + if (ret <= 0) + goto out; - /* Since the first page has all of our checksums and our generation we - * need to calculate the offset into the page that we can start writing - * our entries. - */ - first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); + spin_lock(&ctl->tree_lock); + matched = (ctl->free_space == (block_group->key.offset - used - + block_group->bytes_super)); + spin_unlock(&ctl->tree_lock); - /* - * Lock all pages first so we can lock the extent safely. - * - * NOTE: Because we hold the ref the entire time we're going to write to - * the page find_get_page should never fail, so we don't do a check - * after find_get_page at this point. Just putting this here so people - * know and don't freak out. - */ - while (index <= last_index) { - page = grab_cache_page(inode->i_mapping, index); - if (!page) { - pgoff_t i = 0; - - while (i < index) { - page = find_get_page(inode->i_mapping, i); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); - i++; - } - goto out_free; - } - index++; + if (!matched) { + __btrfs_remove_free_space_cache(ctl); + btrfs_warn(fs_info, "block group %llu has wrong amount of free space", + block_group->key.objectid); + ret = -1; } +out: + if (ret < 0) { + /* This cache is bogus, make sure it gets cleared */ + spin_lock(&block_group->lock); + block_group->disk_cache_state = BTRFS_DC_CLEAR; + spin_unlock(&block_group->lock); + ret = 0; - index = 0; - lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, - 0, &cached_state, GFP_NOFS); - - /* Write out the extent entries */ - do { - struct btrfs_free_space_entry *entry; - void *addr; - unsigned long offset = 0; - unsigned long start_offset = 0; - - if (index == 0) { - start_offset = first_page_offset; - offset = start_offset; - } - - page = find_get_page(inode->i_mapping, index); + btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now", + block_group->key.objectid); + } - addr = kmap(page); - entry = addr + start_offset; + iput(inode); + return ret; +} - memset(addr, 0, PAGE_CACHE_SIZE); - while (1) { - struct btrfs_free_space *e; +static noinline_for_stack +int write_cache_extent_entries(struct io_ctl *io_ctl, + struct btrfs_free_space_ctl *ctl, + struct btrfs_block_group_cache *block_group, + int *entries, int *bitmaps, + struct list_head *bitmap_list) +{ + int ret; + struct btrfs_free_cluster *cluster = NULL; + struct rb_node *node = rb_first(&ctl->free_space_offset); - e = rb_entry(node, struct btrfs_free_space, offset_index); - entries++; + /* Get the cluster for this block_group if it exists */ + if (block_group && !list_empty(&block_group->cluster_list)) { + cluster = list_entry(block_group->cluster_list.next, + struct btrfs_free_cluster, + block_group_list); + } - entry->offset = cpu_to_le64(e->offset); - entry->bytes = cpu_to_le64(e->bytes); - if (e->bitmap) { - entry->type = BTRFS_FREE_SPACE_BITMAP; - list_add_tail(&e->list, &bitmap_list); - bitmaps++; - } else { - entry->type = BTRFS_FREE_SPACE_EXTENT; - } - node = rb_next(node); - if (!node) - break; - offset += sizeof(struct btrfs_free_space_entry); - if (offset + sizeof(struct btrfs_free_space_entry) >= - PAGE_CACHE_SIZE) - break; - entry++; - } - *crc = ~(u32)0; - *crc = btrfs_csum_data(root, addr + start_offset, *crc, - PAGE_CACHE_SIZE - start_offset); - kunmap(page); + if (!node && cluster) { + node = rb_first(&cluster->root); + cluster = NULL; + } - btrfs_csum_final(*crc, (char *)crc); - crc++; + /* Write out the extent entries */ + while (node) { + struct btrfs_free_space *e; - bytes += PAGE_CACHE_SIZE; + e = rb_entry(node, struct btrfs_free_space, offset_index); + *entries += 1; - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); + ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, + e->bitmap); + if (ret) + goto fail; - /* - * We need to release our reference we got for grab_cache_page, - * except for the first page which will hold our checksums, we - * do that below. - */ - if (index != 0) { - unlock_page(page); - page_cache_release(page); + if (e->bitmap) { + list_add_tail(&e->list, bitmap_list); + *bitmaps += 1; + } + node = rb_next(node); + if (!node && cluster) { + node = rb_first(&cluster->root); + cluster = NULL; } - - page_cache_release(page); - - index++; - } while (node); - - /* Write out the bitmaps */ - list_for_each_safe(pos, n, &bitmap_list) { - void *addr; - struct btrfs_free_space *entry = - list_entry(pos, struct btrfs_free_space, list); - - page = find_get_page(inode->i_mapping, index); - - addr = kmap(page); - memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); - *crc = ~(u32)0; - *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); - kunmap(page); - btrfs_csum_final(*crc, (char *)crc); - crc++; - bytes += PAGE_CACHE_SIZE; - - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); - list_del_init(&entry->list); - index++; - } - - /* Zero out the rest of the pages just to make sure */ - while (index <= last_index) { - void *addr; - - page = find_get_page(inode->i_mapping, index); - - addr = kmap(page); - memset(addr, 0, PAGE_CACHE_SIZE); - kunmap(page); - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); - bytes += PAGE_CACHE_SIZE; - index++; - } - - btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); - - /* Write the checksums and trans id to the first page */ - { - void *addr; - u64 *gen; - - page = find_get_page(inode->i_mapping, 0); - - addr = kmap(page); - memcpy(addr, checksums, sizeof(u32) * num_checksums); - gen = addr + (sizeof(u32) * num_checksums); - *gen = trans->transid; - kunmap(page); - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); } - BTRFS_I(inode)->generation = trans->transid; - - unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, &cached_state, GFP_NOFS); + return 0; +fail: + return -ENOSPC; +} - filemap_write_and_wait(inode->i_mapping); +static noinline_for_stack int +update_cache_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + struct btrfs_path *path, u64 offset, + int entries, int bitmaps) +{ + struct btrfs_key key; + struct btrfs_free_space_header *header; + struct extent_buffer *leaf; + int ret; key.objectid = BTRFS_FREE_SPACE_OBJECTID; - key.offset = block_group->key.objectid; + key.offset = offset; key.type = 0; - ret = btrfs_search_slot(trans, root, &key, path, 1, 1); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { - ret = 0; - clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, - EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); - goto out_free; + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, + GFP_NOFS); + goto fail; } leaf = path->nodes[0]; if (ret > 0) { struct btrfs_key found_key; - BUG_ON(!path->slots[0]); + ASSERT(path->slots[0]); path->slots[0]--; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || - found_key.offset != block_group->key.objectid) { - ret = 0; - clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, - EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 0, 0, NULL, - GFP_NOFS); - btrfs_release_path(root, path); - goto out_free; + found_key.offset != offset) { + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, + inode->i_size - 1, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, + NULL, GFP_NOFS); + btrfs_release_path(path); + goto fail; } } + + BTRFS_I(inode)->generation = trans->transid; header = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_free_space_header); btrfs_set_free_space_entries(leaf, header, entries); btrfs_set_free_space_bitmaps(leaf, header, bitmaps); btrfs_set_free_space_generation(leaf, header, trans->transid); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root, path); + btrfs_release_path(path); - ret = 1; + return 0; -out_free: - if (ret == 0) { - invalidate_inode_pages2_range(inode->i_mapping, 0, index); +fail: + return -1; +} + +static noinline_for_stack int +write_pinned_extent_entries(struct btrfs_root *root, + struct btrfs_block_group_cache *block_group, + struct io_ctl *io_ctl, + int *entries) +{ + u64 start, extent_start, extent_end, len; + struct extent_io_tree *unpin = NULL; + int ret; + + if (!block_group) + return 0; + + /* + * We want to add any pinned extents to our free space cache + * so we don't leak the space + * + * We shouldn't have switched the pinned extents yet so this is the + * right one + */ + unpin = root->fs_info->pinned_extents; + + start = block_group->key.objectid; + + while (start < block_group->key.objectid + block_group->key.offset) { + ret = find_first_extent_bit(unpin, start, + &extent_start, &extent_end, + EXTENT_DIRTY, NULL); + if (ret) + return 0; + + /* This pinned extent is out of our range */ + if (extent_start >= block_group->key.objectid + + block_group->key.offset) + return 0; + + extent_start = max(extent_start, start); + extent_end = min(block_group->key.objectid + + block_group->key.offset, extent_end + 1); + len = extent_end - extent_start; + + *entries += 1; + ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); + if (ret) + return -ENOSPC; + + start = extent_end; + } + + return 0; +} + +static noinline_for_stack int +write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list) +{ + struct list_head *pos, *n; + int ret; + + /* Write out the bitmaps */ + list_for_each_safe(pos, n, bitmap_list) { + struct btrfs_free_space *entry = + list_entry(pos, struct btrfs_free_space, list); + + ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); + if (ret) + return -ENOSPC; + list_del_init(&entry->list); + } + + return 0; +} + +static int flush_dirty_cache(struct inode *inode) +{ + int ret; + + ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (ret) + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, + GFP_NOFS); + + return ret; +} + +static void noinline_for_stack +cleanup_write_cache_enospc(struct inode *inode, + struct io_ctl *io_ctl, + struct extent_state **cached_state, + struct list_head *bitmap_list) +{ + struct list_head *pos, *n; + + list_for_each_safe(pos, n, bitmap_list) { + struct btrfs_free_space *entry = + list_entry(pos, struct btrfs_free_space, list); + list_del_init(&entry->list); + } + io_ctl_drop_pages(io_ctl); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, + i_size_read(inode) - 1, cached_state, + GFP_NOFS); +} + +/** + * __btrfs_write_out_cache - write out cached info to an inode + * @root - the root the inode belongs to + * @ctl - the free space cache we are going to write out + * @block_group - the block_group for this cache if it belongs to a block_group + * @trans - the trans handle + * @path - the path to use + * @offset - the offset for the key we'll insert + * + * This function writes out a free space cache struct to disk for quick recovery + * on mount. This will return 0 if it was successfull in writing the cache out, + * and -1 if it was not. + */ +static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, + struct btrfs_free_space_ctl *ctl, + struct btrfs_block_group_cache *block_group, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, u64 offset) +{ + struct extent_state *cached_state = NULL; + struct io_ctl io_ctl; + LIST_HEAD(bitmap_list); + int entries = 0; + int bitmaps = 0; + int ret; + + if (!i_size_read(inode)) + return -1; + + ret = io_ctl_init(&io_ctl, inode, root, 1); + if (ret) + return -1; + + if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { + down_write(&block_group->data_rwsem); spin_lock(&block_group->lock); - block_group->disk_cache_state = BTRFS_DC_ERROR; + if (block_group->delalloc_bytes) { + block_group->disk_cache_state = BTRFS_DC_WRITTEN; + spin_unlock(&block_group->lock); + up_write(&block_group->data_rwsem); + BTRFS_I(inode)->generation = 0; + ret = 0; + goto out; + } spin_unlock(&block_group->lock); + } + + /* Lock all pages first so we can lock the extent safely. */ + io_ctl_prepare_pages(&io_ctl, inode, 0); + + lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, + 0, &cached_state); + + io_ctl_set_generation(&io_ctl, trans->transid); + + /* Write out the extent entries in the free space cache */ + ret = write_cache_extent_entries(&io_ctl, ctl, + block_group, &entries, &bitmaps, + &bitmap_list); + if (ret) + goto out_nospc; + + /* + * Some spaces that are freed in the current transaction are pinned, + * they will be added into free space cache after the transaction is + * committed, we shouldn't lose them. + */ + ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); + if (ret) + goto out_nospc; + + /* At last, we write out all the bitmaps. */ + ret = write_bitmap_entries(&io_ctl, &bitmap_list); + if (ret) + goto out_nospc; + + /* Zero out the rest of the pages just to make sure */ + io_ctl_zero_remaining_pages(&io_ctl); + + /* Everything is written out, now we dirty the pages in the file. */ + ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, + 0, i_size_read(inode), &cached_state); + if (ret) + goto out_nospc; + + if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) + up_write(&block_group->data_rwsem); + /* + * Release the pages and unlock the extent, we will flush + * them out later + */ + io_ctl_drop_pages(&io_ctl); + + unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, + i_size_read(inode) - 1, &cached_state, GFP_NOFS); + + /* Flush the dirty pages in the cache file. */ + ret = flush_dirty_cache(inode); + if (ret) + goto out; + + /* Update the cache item to tell everyone this cache file is valid. */ + ret = update_cache_item(trans, root, inode, path, offset, + entries, bitmaps); +out: + io_ctl_free(&io_ctl); + if (ret) { + invalidate_inode_pages2(inode->i_mapping); BTRFS_I(inode)->generation = 0; } - kfree(checksums); btrfs_update_inode(trans, root, inode); + return ret; + +out_nospc: + cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); + + if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) + up_write(&block_group->data_rwsem); + + goto out; +} + +int btrfs_write_out_cache(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct inode *inode; + int ret = 0; + + root = root->fs_info->tree_root; + + spin_lock(&block_group->lock); + if (block_group->disk_cache_state < BTRFS_DC_SETUP) { + spin_unlock(&block_group->lock); + return 0; + } + + if (block_group->delalloc_bytes) { + block_group->disk_cache_state = BTRFS_DC_WRITTEN; + spin_unlock(&block_group->lock); + return 0; + } + spin_unlock(&block_group->lock); + + inode = lookup_free_space_inode(root, block_group, path); + if (IS_ERR(inode)) + return 0; + + ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, + path, block_group->key.objectid); + if (ret) { + spin_lock(&block_group->lock); + block_group->disk_cache_state = BTRFS_DC_ERROR; + spin_unlock(&block_group->lock); + ret = 0; +#ifdef DEBUG + btrfs_err(root->fs_info, + "failed to write free space cache for block group %llu", + block_group->key.objectid); +#endif + } + iput(inode); return ret; } -static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, +static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, u64 offset) { - BUG_ON(offset < bitmap_start); + ASSERT(offset >= bitmap_start); offset -= bitmap_start; - return (unsigned long)(div64_u64(offset, sectorsize)); + return (unsigned long)(div_u64(offset, unit)); } -static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) +static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) { - return (unsigned long)(div64_u64(bytes, sectorsize)); + return (unsigned long)(div_u64(bytes, unit)); } -static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, +static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) { u64 bitmap_start; u64 bytes_per_bitmap; - bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; - bitmap_start = offset - block_group->key.objectid; + bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; + bitmap_start = offset - ctl->start; bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); bitmap_start *= bytes_per_bitmap; - bitmap_start += block_group->key.objectid; + bitmap_start += ctl->start; return bitmap_start; } @@ -838,10 +1307,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset, * logically. */ if (bitmap) { - WARN_ON(info->bitmap); + if (info->bitmap) { + WARN_ON_ONCE(1); + return -EEXIST; + } p = &(*p)->rb_right; } else { - WARN_ON(!info->bitmap); + if (!info->bitmap) { + WARN_ON_ONCE(1); + return -EEXIST; + } p = &(*p)->rb_left; } } @@ -861,10 +1336,10 @@ static int tree_insert_offset(struct rb_root *root, u64 offset, * offset. */ static struct btrfs_free_space * -tree_search_offset(struct btrfs_block_group_cache *block_group, +tree_search_offset(struct btrfs_free_space_ctl *ctl, u64 offset, int bitmap_only, int fuzzy) { - struct rb_node *n = block_group->free_space_offset.rb_node; + struct rb_node *n = ctl->free_space_offset.rb_node; struct btrfs_free_space *entry, *prev = NULL; /* find entry that is closest to the 'offset' */ @@ -910,18 +1385,13 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, * if previous extent entry covers the offset, * we should return it instead of the bitmap entry */ - n = &entry->offset_index; - while (1) { - n = rb_prev(n); - if (!n) - break; + n = rb_prev(&entry->offset_index); + if (n) { prev = rb_entry(n, struct btrfs_free_space, offset_index); - if (!prev->bitmap) { - if (prev->offset + prev->bytes > offset) - entry = prev; - break; - } + if (!prev->bitmap && + prev->offset + prev->bytes > offset) + entry = prev; } } return entry; @@ -937,7 +1407,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, if (n) { entry = rb_entry(n, struct btrfs_free_space, offset_index); - BUG_ON(entry->offset > offset); + ASSERT(entry->offset <= offset); } else { if (fuzzy) return entry; @@ -947,21 +1417,15 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, } if (entry->bitmap) { - n = &entry->offset_index; - while (1) { - n = rb_prev(n); - if (!n) - break; + n = rb_prev(&entry->offset_index); + if (n) { prev = rb_entry(n, struct btrfs_free_space, offset_index); - if (!prev->bitmap) { - if (prev->offset + prev->bytes > offset) - return prev; - break; - } + if (!prev->bitmap && + prev->offset + prev->bytes > offset) + return prev; } - if (entry->offset + BITS_PER_BITMAP * - block_group->sectorsize > offset) + if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) return entry; } else if (entry->offset + entry->bytes > offset) return entry; @@ -972,7 +1436,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, while (1) { if (entry->bitmap) { if (entry->offset + BITS_PER_BITMAP * - block_group->sectorsize > offset) + ctl->unit > offset) break; } else { if (entry->offset + entry->bytes > offset) @@ -988,42 +1452,49 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, } static inline void -__unlink_free_space(struct btrfs_block_group_cache *block_group, +__unlink_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info) { - rb_erase(&info->offset_index, &block_group->free_space_offset); - block_group->free_extents--; + rb_erase(&info->offset_index, &ctl->free_space_offset); + ctl->free_extents--; } -static void unlink_free_space(struct btrfs_block_group_cache *block_group, +static void unlink_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info) { - __unlink_free_space(block_group, info); - block_group->free_space -= info->bytes; + __unlink_free_space(ctl, info); + ctl->free_space -= info->bytes; } -static int link_free_space(struct btrfs_block_group_cache *block_group, +static int link_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info) { int ret = 0; - BUG_ON(!info->bitmap && !info->bytes); - ret = tree_insert_offset(&block_group->free_space_offset, info->offset, + ASSERT(info->bytes || info->bitmap); + ret = tree_insert_offset(&ctl->free_space_offset, info->offset, &info->offset_index, (info->bitmap != NULL)); if (ret) return ret; - block_group->free_space += info->bytes; - block_group->free_extents++; + ctl->free_space += info->bytes; + ctl->free_extents++; return ret; } -static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) +static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) { + struct btrfs_block_group_cache *block_group = ctl->private; u64 max_bytes; u64 bitmap_bytes; u64 extent_bytes; u64 size = block_group->key.offset; + u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; + int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); + + max_bitmaps = max(max_bitmaps, 1); + + ASSERT(ctl->total_bitmaps <= max_bitmaps); /* * The goal is to keep the total amount of memory used per 1gb of space @@ -1041,10 +1512,10 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as * we add more bitmaps. */ - bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; + bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; if (bitmap_bytes >= max_bytes) { - block_group->extents_thresh = 0; + ctl->extents_thresh = 0; return; } @@ -1055,144 +1526,180 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) extent_bytes = max_bytes - bitmap_bytes; extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); - block_group->extents_thresh = + ctl->extents_thresh = div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); } -static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, - struct btrfs_free_space *info, u64 offset, - u64 bytes) +static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, + u64 offset, u64 bytes) { - unsigned long start, end; - unsigned long i; + unsigned long start, count; - start = offset_to_bit(info->offset, block_group->sectorsize, offset); - end = start + bytes_to_bits(bytes, block_group->sectorsize); - BUG_ON(end > BITS_PER_BITMAP); + start = offset_to_bit(info->offset, ctl->unit, offset); + count = bytes_to_bits(bytes, ctl->unit); + ASSERT(start + count <= BITS_PER_BITMAP); - for (i = start; i < end; i++) - clear_bit(i, info->bitmap); + bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; - block_group->free_space -= bytes; } -static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, +static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, u64 offset, + u64 bytes) +{ + __bitmap_clear_bits(ctl, info, offset, bytes); + ctl->free_space -= bytes; +} + +static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, u64 offset, u64 bytes) { - unsigned long start, end; - unsigned long i; + unsigned long start, count; - start = offset_to_bit(info->offset, block_group->sectorsize, offset); - end = start + bytes_to_bits(bytes, block_group->sectorsize); - BUG_ON(end > BITS_PER_BITMAP); + start = offset_to_bit(info->offset, ctl->unit, offset); + count = bytes_to_bits(bytes, ctl->unit); + ASSERT(start + count <= BITS_PER_BITMAP); - for (i = start; i < end; i++) - set_bit(i, info->bitmap); + bitmap_set(info->bitmap, start, count); info->bytes += bytes; - block_group->free_space += bytes; + ctl->free_space += bytes; } -static int search_bitmap(struct btrfs_block_group_cache *block_group, +/* + * If we can not find suitable extent, we will use bytes to record + * the size of the max extent. + */ +static int search_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *bitmap_info, u64 *offset, u64 *bytes) { unsigned long found_bits = 0; + unsigned long max_bits = 0; unsigned long bits, i; unsigned long next_zero; + unsigned long extent_bits; - i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, + i = offset_to_bit(bitmap_info->offset, ctl->unit, max_t(u64, *offset, bitmap_info->offset)); - bits = bytes_to_bits(*bytes, block_group->sectorsize); + bits = bytes_to_bits(*bytes, ctl->unit); - for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); - i < BITS_PER_BITMAP; - i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { + for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { next_zero = find_next_zero_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); - if ((next_zero - i) >= bits) { - found_bits = next_zero - i; + extent_bits = next_zero - i; + if (extent_bits >= bits) { + found_bits = extent_bits; break; + } else if (extent_bits > max_bits) { + max_bits = extent_bits; } i = next_zero; } if (found_bits) { - *offset = (u64)(i * block_group->sectorsize) + - bitmap_info->offset; - *bytes = (u64)(found_bits) * block_group->sectorsize; + *offset = (u64)(i * ctl->unit) + bitmap_info->offset; + *bytes = (u64)(found_bits) * ctl->unit; return 0; } + *bytes = (u64)(max_bits) * ctl->unit; return -1; } -static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache - *block_group, u64 *offset, - u64 *bytes, int debug) +/* Cache the size of the max extent in bytes */ +static struct btrfs_free_space * +find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, + unsigned long align, u64 *max_extent_size) { struct btrfs_free_space *entry; struct rb_node *node; + u64 tmp; + u64 align_off; int ret; - if (!block_group->free_space_offset.rb_node) - return NULL; + if (!ctl->free_space_offset.rb_node) + goto out; - entry = tree_search_offset(block_group, - offset_to_bitmap(block_group, *offset), - 0, 1); + entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); if (!entry) - return NULL; + goto out; for (node = &entry->offset_index; node; node = rb_next(node)) { entry = rb_entry(node, struct btrfs_free_space, offset_index); - if (entry->bytes < *bytes) + if (entry->bytes < *bytes) { + if (entry->bytes > *max_extent_size) + *max_extent_size = entry->bytes; + continue; + } + + /* make sure the space returned is big enough + * to match our requested alignment + */ + if (*bytes >= align) { + tmp = entry->offset - ctl->start + align - 1; + do_div(tmp, align); + tmp = tmp * align + ctl->start; + align_off = tmp - entry->offset; + } else { + align_off = 0; + tmp = entry->offset; + } + + if (entry->bytes < *bytes + align_off) { + if (entry->bytes > *max_extent_size) + *max_extent_size = entry->bytes; continue; + } if (entry->bitmap) { - ret = search_bitmap(block_group, entry, offset, bytes); - if (!ret) + u64 size = *bytes; + + ret = search_bitmap(ctl, entry, &tmp, &size); + if (!ret) { + *offset = tmp; + *bytes = size; return entry; + } else if (size > *max_extent_size) { + *max_extent_size = size; + } continue; } - *offset = entry->offset; - *bytes = entry->bytes; + *offset = tmp; + *bytes = entry->bytes - align_off; return entry; } - +out: return NULL; } -static void add_new_bitmap(struct btrfs_block_group_cache *block_group, +static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, u64 offset) { - u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; - int max_bitmaps = (int)div64_u64(block_group->key.offset + - bytes_per_bg - 1, bytes_per_bg); - BUG_ON(block_group->total_bitmaps >= max_bitmaps); - - info->offset = offset_to_bitmap(block_group, offset); + info->offset = offset_to_bitmap(ctl, offset); info->bytes = 0; - link_free_space(block_group, info); - block_group->total_bitmaps++; + INIT_LIST_HEAD(&info->list); + link_free_space(ctl, info); + ctl->total_bitmaps++; - recalculate_thresholds(block_group); + ctl->op->recalc_thresholds(ctl); } -static void free_bitmap(struct btrfs_block_group_cache *block_group, +static void free_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *bitmap_info) { - unlink_free_space(block_group, bitmap_info); + unlink_free_space(ctl, bitmap_info); kfree(bitmap_info->bitmap); - kfree(bitmap_info); - block_group->total_bitmaps--; - recalculate_thresholds(block_group); + kmem_cache_free(btrfs_free_space_cachep, bitmap_info); + ctl->total_bitmaps--; + ctl->op->recalc_thresholds(ctl); } -static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, +static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *bitmap_info, u64 *offset, u64 *bytes) { @@ -1201,40 +1708,35 @@ static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_gro int ret; again: - end = bitmap_info->offset + - (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; + end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; /* - * XXX - this can go away after a few releases. - * - * since the only user of btrfs_remove_free_space is the tree logging - * stuff, and the only way to test that is under crash conditions, we - * want to have this debug stuff here just in case somethings not - * working. Search the bitmap for the space we are trying to use to - * make sure its actually there. If its not there then we need to stop - * because something has gone wrong. + * We need to search for bits in this bitmap. We could only cover some + * of the extent in this bitmap thanks to how we add space, so we need + * to search for as much as it as we can and clear that amount, and then + * go searching for the next bit. */ search_start = *offset; - search_bytes = *bytes; + search_bytes = ctl->unit; search_bytes = min(search_bytes, end - search_start + 1); - ret = search_bitmap(block_group, bitmap_info, &search_start, - &search_bytes); - BUG_ON(ret < 0 || search_start != *offset); + ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); + if (ret < 0 || search_start != *offset) + return -EINVAL; - if (*offset > bitmap_info->offset && *offset + *bytes > end) { - bitmap_clear_bits(block_group, bitmap_info, *offset, - end - *offset + 1); - *bytes -= end - *offset + 1; - *offset = end + 1; - } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { - bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); - *bytes = 0; - } + /* We may have found more bits than what we need */ + search_bytes = min(search_bytes, *bytes); + + /* Cannot clear past the end of the bitmap */ + search_bytes = min(search_bytes, end - search_start + 1); + + bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); + *offset += search_bytes; + *bytes -= search_bytes; if (*bytes) { struct rb_node *next = rb_next(&bitmap_info->offset_index); if (!bitmap_info->bytes) - free_bitmap(block_group, bitmap_info); + free_bitmap(ctl, bitmap_info); /* * no entry after this bitmap, but we still have bytes to @@ -1260,70 +1762,149 @@ again: * everything over again. */ search_start = *offset; - search_bytes = *bytes; - ret = search_bitmap(block_group, bitmap_info, &search_start, + search_bytes = ctl->unit; + ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); if (ret < 0 || search_start != *offset) return -EAGAIN; goto again; } else if (!bitmap_info->bytes) - free_bitmap(block_group, bitmap_info); + free_bitmap(ctl, bitmap_info); return 0; } -static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, - struct btrfs_free_space *info) +static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, u64 offset, + u64 bytes) { - struct btrfs_free_space *bitmap_info; - int added = 0; - u64 bytes, offset, end; - int ret; + u64 bytes_to_set = 0; + u64 end; + + end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); + + bytes_to_set = min(end - offset, bytes); + + bitmap_set_bits(ctl, info, offset, bytes_to_set); + + return bytes_to_set; + +} + +static bool use_bitmap(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + struct btrfs_block_group_cache *block_group = ctl->private; /* * If we are below the extents threshold then we can add this as an * extent, and don't have to deal with the bitmap */ - if (block_group->free_extents < block_group->extents_thresh && - info->bytes > block_group->sectorsize * 4) - return 0; + if (ctl->free_extents < ctl->extents_thresh) { + /* + * If this block group has some small extents we don't want to + * use up all of our free slots in the cache with them, we want + * to reserve them to larger extents, however if we have plent + * of cache left then go ahead an dadd them, no sense in adding + * the overhead of a bitmap if we don't have to. + */ + if (info->bytes <= block_group->sectorsize * 4) { + if (ctl->free_extents * 2 <= ctl->extents_thresh) + return false; + } else { + return false; + } + } /* - * some block groups are so tiny they can't be enveloped by a bitmap, so - * don't even bother to create a bitmap for this + * The original block groups from mkfs can be really small, like 8 + * megabytes, so don't bother with a bitmap for those entries. However + * some block groups can be smaller than what a bitmap would cover but + * are still large enough that they could overflow the 32k memory limit, + * so allow those block groups to still be allowed to have a bitmap + * entry. */ - if (BITS_PER_BITMAP * block_group->sectorsize > - block_group->key.offset) - return 0; + if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) + return false; + + return true; +} + +static struct btrfs_free_space_op free_space_op = { + .recalc_thresholds = recalculate_thresholds, + .use_bitmap = use_bitmap, +}; + +static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + struct btrfs_free_space *bitmap_info; + struct btrfs_block_group_cache *block_group = NULL; + int added = 0; + u64 bytes, offset, bytes_added; + int ret; bytes = info->bytes; offset = info->offset; + if (!ctl->op->use_bitmap(ctl, info)) + return 0; + + if (ctl->op == &free_space_op) + block_group = ctl->private; again: - bitmap_info = tree_search_offset(block_group, - offset_to_bitmap(block_group, offset), + /* + * Since we link bitmaps right into the cluster we need to see if we + * have a cluster here, and if so and it has our bitmap we need to add + * the free space to that bitmap. + */ + if (block_group && !list_empty(&block_group->cluster_list)) { + struct btrfs_free_cluster *cluster; + struct rb_node *node; + struct btrfs_free_space *entry; + + cluster = list_entry(block_group->cluster_list.next, + struct btrfs_free_cluster, + block_group_list); + spin_lock(&cluster->lock); + node = rb_first(&cluster->root); + if (!node) { + spin_unlock(&cluster->lock); + goto no_cluster_bitmap; + } + + entry = rb_entry(node, struct btrfs_free_space, offset_index); + if (!entry->bitmap) { + spin_unlock(&cluster->lock); + goto no_cluster_bitmap; + } + + if (entry->offset == offset_to_bitmap(ctl, offset)) { + bytes_added = add_bytes_to_bitmap(ctl, entry, + offset, bytes); + bytes -= bytes_added; + offset += bytes_added; + } + spin_unlock(&cluster->lock); + if (!bytes) { + ret = 1; + goto out; + } + } + +no_cluster_bitmap: + bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1, 0); if (!bitmap_info) { - BUG_ON(added); + ASSERT(added == 0); goto new_bitmap; } - end = bitmap_info->offset + - (u64)(BITS_PER_BITMAP * block_group->sectorsize); - - if (offset >= bitmap_info->offset && offset + bytes > end) { - bitmap_set_bits(block_group, bitmap_info, offset, - end - offset); - bytes -= end - offset; - offset = end; - added = 0; - } else if (offset >= bitmap_info->offset && offset + bytes <= end) { - bitmap_set_bits(block_group, bitmap_info, offset, bytes); - bytes = 0; - } else { - BUG(); - } + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); + bytes -= bytes_added; + offset += bytes_added; + added = 0; if (!bytes) { ret = 1; @@ -1333,19 +1914,19 @@ again: new_bitmap: if (info && info->bitmap) { - add_new_bitmap(block_group, info, offset); + add_new_bitmap(ctl, info, offset); added = 1; info = NULL; goto again; } else { - spin_unlock(&block_group->tree_lock); + spin_unlock(&ctl->tree_lock); /* no pre-allocated info, allocate a new one */ if (!info) { - info = kzalloc(sizeof(struct btrfs_free_space), - GFP_NOFS); + info = kmem_cache_zalloc(btrfs_free_space_cachep, + GFP_NOFS); if (!info) { - spin_lock(&block_group->tree_lock); + spin_lock(&ctl->tree_lock); ret = -ENOMEM; goto out; } @@ -1353,7 +1934,7 @@ new_bitmap: /* allocate the bitmap */ info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); - spin_lock(&block_group->tree_lock); + spin_lock(&ctl->tree_lock); if (!info->bitmap) { ret = -ENOMEM; goto out; @@ -1365,13 +1946,13 @@ out: if (info) { if (info->bitmap) kfree(info->bitmap); - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); } return ret; } -bool try_merge_free_space(struct btrfs_block_group_cache *block_group, +static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, bool update_stat) { struct btrfs_free_space *left_info; @@ -1385,54 +1966,54 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, * are adding, if there is remove that struct and add a new one to * cover the entire range */ - right_info = tree_search_offset(block_group, offset + bytes, 0, 0); + right_info = tree_search_offset(ctl, offset + bytes, 0, 0); if (right_info && rb_prev(&right_info->offset_index)) left_info = rb_entry(rb_prev(&right_info->offset_index), struct btrfs_free_space, offset_index); else - left_info = tree_search_offset(block_group, offset - 1, 0, 0); + left_info = tree_search_offset(ctl, offset - 1, 0, 0); if (right_info && !right_info->bitmap) { if (update_stat) - unlink_free_space(block_group, right_info); + unlink_free_space(ctl, right_info); else - __unlink_free_space(block_group, right_info); + __unlink_free_space(ctl, right_info); info->bytes += right_info->bytes; - kfree(right_info); + kmem_cache_free(btrfs_free_space_cachep, right_info); merged = true; } if (left_info && !left_info->bitmap && left_info->offset + left_info->bytes == offset) { if (update_stat) - unlink_free_space(block_group, left_info); + unlink_free_space(ctl, left_info); else - __unlink_free_space(block_group, left_info); + __unlink_free_space(ctl, left_info); info->offset = left_info->offset; info->bytes += left_info->bytes; - kfree(left_info); + kmem_cache_free(btrfs_free_space_cachep, left_info); merged = true; } return merged; } -int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, - u64 offset, u64 bytes) +int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, + u64 offset, u64 bytes) { struct btrfs_free_space *info; int ret = 0; - info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); + info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); if (!info) return -ENOMEM; info->offset = offset; info->bytes = bytes; - spin_lock(&block_group->tree_lock); + spin_lock(&ctl->tree_lock); - if (try_merge_free_space(block_group, info, true)) + if (try_merge_free_space(ctl, info, true)) goto link; /* @@ -1440,7 +2021,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, * extent then we know we're going to have to allocate a new extent, so * before we do that see if we need to drop this into a bitmap */ - ret = insert_into_bitmap(block_group, info); + ret = insert_into_bitmap(ctl, info); if (ret < 0) { goto out; } else if (ret) { @@ -1448,15 +2029,15 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, goto out; } link: - ret = link_free_space(block_group, info); + ret = link_free_space(ctl, info); if (ret) - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); out: - spin_unlock(&block_group->tree_lock); + spin_unlock(&ctl->tree_lock); if (ret) { - printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); - BUG_ON(ret == -EEXIST); + printk(KERN_CRIT "BTRFS: unable to add free space :%d\n", ret); + ASSERT(ret != -EEXIST); } return ret; @@ -1465,116 +2046,89 @@ out: int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, u64 offset, u64 bytes) { + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *info; - struct btrfs_free_space *next_info = NULL; - int ret = 0; + int ret; + bool re_search = false; - spin_lock(&block_group->tree_lock); + spin_lock(&ctl->tree_lock); again: - info = tree_search_offset(block_group, offset, 0, 0); + ret = 0; + if (!bytes) + goto out_lock; + + info = tree_search_offset(ctl, offset, 0, 0); if (!info) { /* * oops didn't find an extent that matched the space we wanted * to remove, look for a bitmap instead */ - info = tree_search_offset(block_group, - offset_to_bitmap(block_group, offset), + info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1, 0); if (!info) { - WARN_ON(1); - goto out_lock; - } - } - - if (info->bytes < bytes && rb_next(&info->offset_index)) { - u64 end; - next_info = rb_entry(rb_next(&info->offset_index), - struct btrfs_free_space, - offset_index); - - if (next_info->bitmap) - end = next_info->offset + BITS_PER_BITMAP * - block_group->sectorsize - 1; - else - end = next_info->offset + next_info->bytes; - - if (next_info->bytes < bytes || - next_info->offset > offset || offset > end) { - printk(KERN_CRIT "Found free space at %llu, size %llu," - " trying to use %llu\n", - (unsigned long long)info->offset, - (unsigned long long)info->bytes, - (unsigned long long)bytes); - WARN_ON(1); - ret = -EINVAL; + /* + * If we found a partial bit of our free space in a + * bitmap but then couldn't find the other part this may + * be a problem, so WARN about it. + */ + WARN_ON(re_search); goto out_lock; } - - info = next_info; } - if (info->bytes == bytes) { - unlink_free_space(block_group, info); - if (info->bitmap) { - kfree(info->bitmap); - block_group->total_bitmaps--; - } - kfree(info); - goto out_lock; - } + re_search = false; + if (!info->bitmap) { + unlink_free_space(ctl, info); + if (offset == info->offset) { + u64 to_free = min(bytes, info->bytes); - if (!info->bitmap && info->offset == offset) { - unlink_free_space(block_group, info); - info->offset += bytes; - info->bytes -= bytes; - link_free_space(block_group, info); - goto out_lock; - } + info->bytes -= to_free; + info->offset += to_free; + if (info->bytes) { + ret = link_free_space(ctl, info); + WARN_ON(ret); + } else { + kmem_cache_free(btrfs_free_space_cachep, info); + } - if (!info->bitmap && info->offset <= offset && - info->offset + info->bytes >= offset + bytes) { - u64 old_start = info->offset; - /* - * we're freeing space in the middle of the info, - * this can happen during tree log replay - * - * first unlink the old info and then - * insert it again after the hole we're creating - */ - unlink_free_space(block_group, info); - if (offset + bytes < info->offset + info->bytes) { - u64 old_end = info->offset + info->bytes; + offset += to_free; + bytes -= to_free; + goto again; + } else { + u64 old_end = info->bytes + info->offset; - info->offset = offset + bytes; - info->bytes = old_end - info->offset; - ret = link_free_space(block_group, info); + info->bytes = offset - info->offset; + ret = link_free_space(ctl, info); WARN_ON(ret); if (ret) goto out_lock; - } else { - /* the hole we're creating ends at the end - * of the info struct, just free the info - */ - kfree(info); - } - spin_unlock(&block_group->tree_lock); - /* step two, insert a new info struct to cover - * anything before the hole - */ - ret = btrfs_add_free_space(block_group, old_start, - offset - old_start); - WARN_ON(ret); - goto out; + /* Not enough bytes in this entry to satisfy us */ + if (old_end < offset + bytes) { + bytes -= old_end - offset; + offset = old_end; + goto again; + } else if (old_end == offset + bytes) { + /* all done */ + goto out_lock; + } + spin_unlock(&ctl->tree_lock); + + ret = btrfs_add_free_space(block_group, offset + bytes, + old_end - (offset + bytes)); + WARN_ON(ret); + goto out; + } } - ret = remove_from_bitmap(block_group, info, &offset, &bytes); - if (ret == -EAGAIN) + ret = remove_from_bitmap(ctl, info, &offset, &bytes); + if (ret == -EAGAIN) { + re_search = true; goto again; - BUG_ON(ret); + } out_lock: - spin_unlock(&block_group->tree_lock); + spin_unlock(&ctl->tree_lock); out: return ret; } @@ -1582,38 +2136,43 @@ out: void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, u64 bytes) { + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *info; struct rb_node *n; int count = 0; - for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { + for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { info = rb_entry(n, struct btrfs_free_space, offset_index); - if (info->bytes >= bytes) + if (info->bytes >= bytes && !block_group->ro) count++; - printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", - (unsigned long long)info->offset, - (unsigned long long)info->bytes, + btrfs_crit(block_group->fs_info, + "entry offset %llu, bytes %llu, bitmap %s", + info->offset, info->bytes, (info->bitmap) ? "yes" : "no"); } - printk(KERN_INFO "block group has cluster?: %s\n", + btrfs_info(block_group->fs_info, "block group has cluster?: %s", list_empty(&block_group->cluster_list) ? "no" : "yes"); - printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" - "\n", count); + btrfs_info(block_group->fs_info, + "%d blocks of free space at or bigger than bytes is", count); } -u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) +void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) { - struct btrfs_free_space *info; - struct rb_node *n; - u64 ret = 0; + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; - for (n = rb_first(&block_group->free_space_offset); n; - n = rb_next(n)) { - info = rb_entry(n, struct btrfs_free_space, offset_index); - ret += info->bytes; - } + spin_lock_init(&ctl->tree_lock); + ctl->unit = block_group->sectorsize; + ctl->start = block_group->key.objectid; + ctl->private = block_group; + ctl->op = &free_space_op; - return ret; + /* + * we only want to have 32k of ram per block group for keeping + * track of free space, and if we pass 1/2 of that we want to + * start converting things over to using bitmaps + */ + ctl->extents_thresh = ((1024 * 32) / 2) / + sizeof(struct btrfs_free_space); } /* @@ -1627,32 +2186,31 @@ __btrfs_return_cluster_to_free_space( struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster) { + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry; struct rb_node *node; - bool bitmap; spin_lock(&cluster->lock); if (cluster->block_group != block_group) goto out; - bitmap = cluster->points_to_bitmap; cluster->block_group = NULL; cluster->window_start = 0; list_del_init(&cluster->block_group_list); - cluster->points_to_bitmap = false; - - if (bitmap) - goto out; node = rb_first(&cluster->root); while (node) { + bool bitmap; + entry = rb_entry(node, struct btrfs_free_space, offset_index); node = rb_next(&entry->offset_index); rb_erase(&entry->offset_index, &cluster->root); - BUG_ON(entry->bitmap); - try_merge_free_space(block_group, entry, false); - tree_insert_offset(&block_group->free_space_offset, - entry->offset, &entry->offset_index, 0); + + bitmap = (entry->bitmap != NULL); + if (!bitmap) + try_merge_free_space(ctl, entry, false); + tree_insert_offset(&ctl->free_space_offset, + entry->offset, &entry->offset_index, bitmap); } cluster->root = RB_ROOT; @@ -1662,14 +2220,42 @@ out: return 0; } -void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) +static void __btrfs_remove_free_space_cache_locked( + struct btrfs_free_space_ctl *ctl) { struct btrfs_free_space *info; struct rb_node *node; + + while ((node = rb_last(&ctl->free_space_offset)) != NULL) { + info = rb_entry(node, struct btrfs_free_space, offset_index); + if (!info->bitmap) { + unlink_free_space(ctl, info); + kmem_cache_free(btrfs_free_space_cachep, info); + } else { + free_bitmap(ctl, info); + } + if (need_resched()) { + spin_unlock(&ctl->tree_lock); + cond_resched(); + spin_lock(&ctl->tree_lock); + } + } +} + +void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) +{ + spin_lock(&ctl->tree_lock); + __btrfs_remove_free_space_cache_locked(ctl); + spin_unlock(&ctl->tree_lock); +} + +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_cluster *cluster; struct list_head *head; - spin_lock(&block_group->tree_lock); + spin_lock(&ctl->tree_lock); while ((head = block_group->cluster_list.next) != &block_group->cluster_list) { cluster = list_entry(head, struct btrfs_free_cluster, @@ -1678,58 +2264,57 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) WARN_ON(cluster->block_group != block_group); __btrfs_return_cluster_to_free_space(block_group, cluster); if (need_resched()) { - spin_unlock(&block_group->tree_lock); - cond_resched(); - spin_lock(&block_group->tree_lock); - } - } - - while ((node = rb_last(&block_group->free_space_offset)) != NULL) { - info = rb_entry(node, struct btrfs_free_space, offset_index); - unlink_free_space(block_group, info); - if (info->bitmap) - kfree(info->bitmap); - kfree(info); - if (need_resched()) { - spin_unlock(&block_group->tree_lock); + spin_unlock(&ctl->tree_lock); cond_resched(); - spin_lock(&block_group->tree_lock); + spin_lock(&ctl->tree_lock); } } + __btrfs_remove_free_space_cache_locked(ctl); + spin_unlock(&ctl->tree_lock); - spin_unlock(&block_group->tree_lock); } u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, - u64 offset, u64 bytes, u64 empty_size) + u64 offset, u64 bytes, u64 empty_size, + u64 *max_extent_size) { + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry = NULL; u64 bytes_search = bytes + empty_size; u64 ret = 0; + u64 align_gap = 0; + u64 align_gap_len = 0; - spin_lock(&block_group->tree_lock); - entry = find_free_space(block_group, &offset, &bytes_search, 0); + spin_lock(&ctl->tree_lock); + entry = find_free_space(ctl, &offset, &bytes_search, + block_group->full_stripe_len, max_extent_size); if (!entry) goto out; ret = offset; if (entry->bitmap) { - bitmap_clear_bits(block_group, entry, offset, bytes); + bitmap_clear_bits(ctl, entry, offset, bytes); if (!entry->bytes) - free_bitmap(block_group, entry); + free_bitmap(ctl, entry); } else { - unlink_free_space(block_group, entry); - entry->offset += bytes; - entry->bytes -= bytes; + unlink_free_space(ctl, entry); + align_gap_len = offset - entry->offset; + align_gap = entry->offset; + + entry->offset = offset + bytes; + WARN_ON(entry->bytes < bytes + align_gap_len); + + entry->bytes -= bytes + align_gap_len; if (!entry->bytes) - kfree(entry); + kmem_cache_free(btrfs_free_space_cachep, entry); else - link_free_space(block_group, entry); + link_free_space(ctl, entry); } - out: - spin_unlock(&block_group->tree_lock); + spin_unlock(&ctl->tree_lock); + if (align_gap_len) + __btrfs_add_free_space(ctl, align_gap, align_gap_len); return ret; } @@ -1745,6 +2330,7 @@ int btrfs_return_cluster_to_free_space( struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster) { + struct btrfs_free_space_ctl *ctl; int ret; /* first, get a safe pointer to the block group */ @@ -1763,10 +2349,12 @@ int btrfs_return_cluster_to_free_space( atomic_inc(&block_group->count); spin_unlock(&cluster->lock); + ctl = block_group->free_space_ctl; + /* now return any extents the cluster had on it */ - spin_lock(&block_group->tree_lock); + spin_lock(&ctl->tree_lock); ret = __btrfs_return_cluster_to_free_space(block_group, cluster); - spin_unlock(&block_group->tree_lock); + spin_unlock(&ctl->tree_lock); /* finally drop our ref */ btrfs_put_block_group(block_group); @@ -1775,50 +2363,28 @@ int btrfs_return_cluster_to_free_space( static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, - u64 bytes, u64 min_start) + struct btrfs_free_space *entry, + u64 bytes, u64 min_start, + u64 *max_extent_size) { - struct btrfs_free_space *entry; + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; int err; u64 search_start = cluster->window_start; u64 search_bytes = bytes; u64 ret = 0; - spin_lock(&block_group->tree_lock); - spin_lock(&cluster->lock); - - if (!cluster->points_to_bitmap) - goto out; - - if (cluster->block_group != block_group) - goto out; - - /* - * search_start is the beginning of the bitmap, but at some point it may - * be a good idea to point to the actual start of the free area in the - * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only - * to 1 to make sure we get the bitmap entry - */ - entry = tree_search_offset(block_group, - offset_to_bitmap(block_group, search_start), - 1, 0); - if (!entry || !entry->bitmap) - goto out; - search_start = min_start; search_bytes = bytes; - err = search_bitmap(block_group, entry, &search_start, - &search_bytes); - if (err) - goto out; + err = search_bitmap(ctl, entry, &search_start, &search_bytes); + if (err) { + if (search_bytes > *max_extent_size) + *max_extent_size = search_bytes; + return 0; + } ret = search_start; - bitmap_clear_bits(block_group, entry, ret, bytes); - if (entry->bytes == 0) - free_bitmap(block_group, entry); -out: - spin_unlock(&cluster->lock); - spin_unlock(&block_group->tree_lock); + __bitmap_clear_bits(ctl, entry, ret, bytes); return ret; } @@ -1830,16 +2396,13 @@ out: */ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 bytes, - u64 min_start) + u64 min_start, u64 *max_extent_size) { + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry = NULL; struct rb_node *node; u64 ret = 0; - if (cluster->points_to_bitmap) - return btrfs_alloc_from_bitmap(block_group, cluster, bytes, - min_start); - spin_lock(&cluster->lock); if (bytes > cluster->max_size) goto out; @@ -1852,11 +2415,12 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, goto out; entry = rb_entry(node, struct btrfs_free_space, offset_index); + while (1) { + if (entry->bytes < bytes && entry->bytes > *max_extent_size) + *max_extent_size = entry->bytes; - while(1) { - if (entry->bytes < bytes || entry->offset < min_start) { - struct rb_node *node; - + if (entry->bytes < bytes || + (!entry->bitmap && entry->offset < min_start)) { node = rb_next(&entry->offset_index); if (!node) break; @@ -1864,10 +2428,27 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, offset_index); continue; } - ret = entry->offset; - entry->offset += bytes; - entry->bytes -= bytes; + if (entry->bitmap) { + ret = btrfs_alloc_from_bitmap(block_group, + cluster, entry, bytes, + cluster->window_start, + max_extent_size); + if (ret == 0) { + node = rb_next(&entry->offset_index); + if (!node) + break; + entry = rb_entry(node, struct btrfs_free_space, + offset_index); + continue; + } + cluster->window_start += bytes; + } else { + ret = entry->offset; + + entry->offset += bytes; + entry->bytes -= bytes; + } if (entry->bytes == 0) rb_erase(&entry->offset_index, &cluster->root); @@ -1879,15 +2460,20 @@ out: if (!ret) return 0; - spin_lock(&block_group->tree_lock); + spin_lock(&ctl->tree_lock); - block_group->free_space -= bytes; + ctl->free_space -= bytes; if (entry->bytes == 0) { - block_group->free_extents--; - kfree(entry); + ctl->free_extents--; + if (entry->bitmap) { + kfree(entry->bitmap); + ctl->total_bitmaps--; + ctl->op->recalc_thresholds(ctl); + } + kmem_cache_free(btrfs_free_space_cachep, entry); } - spin_unlock(&block_group->tree_lock); + spin_unlock(&ctl->tree_lock); return ret; } @@ -1895,30 +2481,30 @@ out: static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, struct btrfs_free_space *entry, struct btrfs_free_cluster *cluster, - u64 offset, u64 bytes, u64 min_bytes) + u64 offset, u64 bytes, + u64 cont1_bytes, u64 min_bytes) { + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; unsigned long next_zero; unsigned long i; - unsigned long search_bits; - unsigned long total_bits; + unsigned long want_bits; + unsigned long min_bits; unsigned long found_bits; unsigned long start = 0; unsigned long total_found = 0; - bool found = false; + int ret; - i = offset_to_bit(entry->offset, block_group->sectorsize, + i = offset_to_bit(entry->offset, ctl->unit, max_t(u64, offset, entry->offset)); - search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); - total_bits = bytes_to_bits(bytes, block_group->sectorsize); + want_bits = bytes_to_bits(bytes, ctl->unit); + min_bits = bytes_to_bits(min_bytes, ctl->unit); again: found_bits = 0; - for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); - i < BITS_PER_BITMAP; - i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { + for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { next_zero = find_next_zero_bit(entry->bitmap, BITS_PER_BITMAP, i); - if (next_zero - i >= search_bits) { + if (next_zero - i >= min_bits) { found_bits = next_zero - i; break; } @@ -1926,77 +2512,218 @@ again: } if (!found_bits) - return -1; + return -ENOSPC; - if (!found) { + if (!total_found) { start = i; - found = true; + cluster->max_size = 0; } total_found += found_bits; - if (cluster->max_size < found_bits * block_group->sectorsize) - cluster->max_size = found_bits * block_group->sectorsize; + if (cluster->max_size < found_bits * ctl->unit) + cluster->max_size = found_bits * ctl->unit; - if (total_found < total_bits) { - i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); - if (i - start > total_bits * 2) { - total_found = 0; - cluster->max_size = 0; - found = false; - } + if (total_found < want_bits || cluster->max_size < cont1_bytes) { + i = next_zero + 1; goto again; } - cluster->window_start = start * block_group->sectorsize + - entry->offset; - cluster->points_to_bitmap = true; + cluster->window_start = start * ctl->unit + entry->offset; + rb_erase(&entry->offset_index, &ctl->free_space_offset); + ret = tree_insert_offset(&cluster->root, entry->offset, + &entry->offset_index, 1); + ASSERT(!ret); /* -EEXIST; Logic error */ + + trace_btrfs_setup_cluster(block_group, cluster, + total_found * ctl->unit, 1); + return 0; +} + +/* + * This searches the block group for just extents to fill the cluster with. + * Try to find a cluster with at least bytes total bytes, at least one + * extent of cont1_bytes, and other clusters of at least min_bytes. + */ +static noinline int +setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, + struct list_head *bitmaps, u64 offset, u64 bytes, + u64 cont1_bytes, u64 min_bytes) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *first = NULL; + struct btrfs_free_space *entry = NULL; + struct btrfs_free_space *last; + struct rb_node *node; + u64 window_free; + u64 max_extent; + u64 total_size = 0; + + entry = tree_search_offset(ctl, offset, 0, 1); + if (!entry) + return -ENOSPC; + + /* + * We don't want bitmaps, so just move along until we find a normal + * extent entry. + */ + while (entry->bitmap || entry->bytes < min_bytes) { + if (entry->bitmap && list_empty(&entry->list)) + list_add_tail(&entry->list, bitmaps); + node = rb_next(&entry->offset_index); + if (!node) + return -ENOSPC; + entry = rb_entry(node, struct btrfs_free_space, offset_index); + } + + window_free = entry->bytes; + max_extent = entry->bytes; + first = entry; + last = entry; + + for (node = rb_next(&entry->offset_index); node; + node = rb_next(&entry->offset_index)) { + entry = rb_entry(node, struct btrfs_free_space, offset_index); + + if (entry->bitmap) { + if (list_empty(&entry->list)) + list_add_tail(&entry->list, bitmaps); + continue; + } + + if (entry->bytes < min_bytes) + continue; + + last = entry; + window_free += entry->bytes; + if (entry->bytes > max_extent) + max_extent = entry->bytes; + } + + if (window_free < bytes || max_extent < cont1_bytes) + return -ENOSPC; + + cluster->window_start = first->offset; + + node = &first->offset_index; + + /* + * now we've found our entries, pull them out of the free space + * cache and put them into the cluster rbtree + */ + do { + int ret; + + entry = rb_entry(node, struct btrfs_free_space, offset_index); + node = rb_next(&entry->offset_index); + if (entry->bitmap || entry->bytes < min_bytes) + continue; + + rb_erase(&entry->offset_index, &ctl->free_space_offset); + ret = tree_insert_offset(&cluster->root, entry->offset, + &entry->offset_index, 0); + total_size += entry->bytes; + ASSERT(!ret); /* -EEXIST; Logic error */ + } while (node && entry != last); + cluster->max_size = max_extent; + trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); return 0; } /* + * This specifically looks for bitmaps that may work in the cluster, we assume + * that we have already failed to find extents that will work. + */ +static noinline int +setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, + struct list_head *bitmaps, u64 offset, u64 bytes, + u64 cont1_bytes, u64 min_bytes) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *entry; + int ret = -ENOSPC; + u64 bitmap_offset = offset_to_bitmap(ctl, offset); + + if (ctl->total_bitmaps == 0) + return -ENOSPC; + + /* + * The bitmap that covers offset won't be in the list unless offset + * is just its start offset. + */ + entry = list_first_entry(bitmaps, struct btrfs_free_space, list); + if (entry->offset != bitmap_offset) { + entry = tree_search_offset(ctl, bitmap_offset, 1, 0); + if (entry && list_empty(&entry->list)) + list_add(&entry->list, bitmaps); + } + + list_for_each_entry(entry, bitmaps, list) { + if (entry->bytes < bytes) + continue; + ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, + bytes, cont1_bytes, min_bytes); + if (!ret) + return 0; + } + + /* + * The bitmaps list has all the bitmaps that record free space + * starting after offset, so no more search is required. + */ + return -ENOSPC; +} + +/* * here we try to find a cluster of blocks in a block group. The goal - * is to find at least bytes free and up to empty_size + bytes free. + * is to find at least bytes+empty_size. * We might not find them all in one contiguous area. * * returns zero and sets up cluster if things worked out, otherwise * it returns -enospc */ -int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_find_space_cluster(struct btrfs_root *root, struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size) { - struct btrfs_free_space *entry = NULL; - struct rb_node *node; - struct btrfs_free_space *next; - struct btrfs_free_space *last = NULL; + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *entry, *tmp; + LIST_HEAD(bitmaps); u64 min_bytes; - u64 window_start; - u64 window_free; - u64 max_extent = 0; - bool found_bitmap = false; + u64 cont1_bytes; int ret; - /* for metadata, allow allocates with more holes */ + /* + * Choose the minimum extent size we'll require for this + * cluster. For SSD_SPREAD, don't allow any fragmentation. + * For metadata, allow allocates with smaller extents. For + * data, keep it dense. + */ if (btrfs_test_opt(root, SSD_SPREAD)) { - min_bytes = bytes + empty_size; + cont1_bytes = min_bytes = bytes + empty_size; } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { - /* - * we want to do larger allocations when we are - * flushing out the delayed refs, it helps prevent - * making more work as we go along. - */ - if (trans->transaction->delayed_refs.flushing) - min_bytes = max(bytes, (bytes + empty_size) >> 1); - else - min_bytes = max(bytes, (bytes + empty_size) >> 4); - } else - min_bytes = max(bytes, (bytes + empty_size) >> 2); + cont1_bytes = bytes; + min_bytes = block_group->sectorsize; + } else { + cont1_bytes = max(bytes, (bytes + empty_size) >> 2); + min_bytes = block_group->sectorsize; + } + + spin_lock(&ctl->tree_lock); + + /* + * If we know we don't have enough space to make a cluster don't even + * bother doing all the work to try and find one. + */ + if (ctl->free_space < bytes) { + spin_unlock(&ctl->tree_lock); + return -ENOSPC; + } - spin_lock(&block_group->tree_lock); spin_lock(&cluster->lock); /* someone already found a cluster, hooray */ @@ -2004,153 +2731,547 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ret = 0; goto out; } -again: - entry = tree_search_offset(block_group, offset, found_bitmap, 1); - if (!entry) { - ret = -ENOSPC; - goto out; + + trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, + min_bytes); + + INIT_LIST_HEAD(&bitmaps); + ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, + bytes + empty_size, + cont1_bytes, min_bytes); + if (ret) + ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, + offset, bytes + empty_size, + cont1_bytes, min_bytes); + + /* Clear our temporary list */ + list_for_each_entry_safe(entry, tmp, &bitmaps, list) + list_del_init(&entry->list); + + if (!ret) { + atomic_inc(&block_group->count); + list_add_tail(&cluster->block_group_list, + &block_group->cluster_list); + cluster->block_group = block_group; + } else { + trace_btrfs_failed_cluster_setup(block_group); } +out: + spin_unlock(&cluster->lock); + spin_unlock(&ctl->tree_lock); - /* - * If found_bitmap is true, we exhausted our search for extent entries, - * and we just want to search all of the bitmaps that we can find, and - * ignore any extent entries we find. - */ - while (entry->bitmap || found_bitmap || - (!entry->bitmap && entry->bytes < min_bytes)) { - struct rb_node *node = rb_next(&entry->offset_index); - - if (entry->bitmap && entry->bytes > bytes + empty_size) { - ret = btrfs_bitmap_cluster(block_group, entry, cluster, - offset, bytes + empty_size, - min_bytes); - if (!ret) - goto got_it; - } + return ret; +} - if (!node) { - ret = -ENOSPC; - goto out; - } - entry = rb_entry(node, struct btrfs_free_space, offset_index); +/* + * simple code to zero out a cluster + */ +void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) +{ + spin_lock_init(&cluster->lock); + spin_lock_init(&cluster->refill_lock); + cluster->root = RB_ROOT; + cluster->max_size = 0; + INIT_LIST_HEAD(&cluster->block_group_list); + cluster->block_group = NULL; +} + +static int do_trimming(struct btrfs_block_group_cache *block_group, + u64 *total_trimmed, u64 start, u64 bytes, + u64 reserved_start, u64 reserved_bytes) +{ + struct btrfs_space_info *space_info = block_group->space_info; + struct btrfs_fs_info *fs_info = block_group->fs_info; + int ret; + int update = 0; + u64 trimmed = 0; + + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + if (!block_group->ro) { + block_group->reserved += reserved_bytes; + space_info->bytes_reserved += reserved_bytes; + update = 1; } + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); - /* - * We already searched all the extent entries from the passed in offset - * to the end and didn't find enough space for the cluster, and we also - * didn't find any bitmaps that met our criteria, just go ahead and exit - */ - if (found_bitmap) { - ret = -ENOSPC; - goto out; + ret = btrfs_error_discard_extent(fs_info->extent_root, + start, bytes, &trimmed); + if (!ret) + *total_trimmed += trimmed; + + btrfs_add_free_space(block_group, reserved_start, reserved_bytes); + + if (update) { + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + if (block_group->ro) + space_info->bytes_readonly += reserved_bytes; + block_group->reserved -= reserved_bytes; + space_info->bytes_reserved -= reserved_bytes; + spin_unlock(&space_info->lock); + spin_unlock(&block_group->lock); } - cluster->points_to_bitmap = false; - window_start = entry->offset; - window_free = entry->bytes; - last = entry; - max_extent = entry->bytes; + return ret; +} - while (1) { - /* out window is just right, lets fill it */ - if (window_free >= bytes + empty_size) +static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, + u64 *total_trimmed, u64 start, u64 end, u64 minlen) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *entry; + struct rb_node *node; + int ret = 0; + u64 extent_start; + u64 extent_bytes; + u64 bytes; + + while (start < end) { + spin_lock(&ctl->tree_lock); + + if (ctl->free_space < minlen) { + spin_unlock(&ctl->tree_lock); break; + } - node = rb_next(&last->offset_index); - if (!node) { - if (found_bitmap) - goto again; - ret = -ENOSPC; - goto out; + entry = tree_search_offset(ctl, start, 0, 1); + if (!entry) { + spin_unlock(&ctl->tree_lock); + break; } - next = rb_entry(node, struct btrfs_free_space, offset_index); - /* - * we found a bitmap, so if this search doesn't result in a - * cluster, we know to go and search again for the bitmaps and - * start looking for space there - */ - if (next->bitmap) { - if (!found_bitmap) - offset = next->offset; - found_bitmap = true; - last = next; - continue; + /* skip bitmaps */ + while (entry->bitmap) { + node = rb_next(&entry->offset_index); + if (!node) { + spin_unlock(&ctl->tree_lock); + goto out; + } + entry = rb_entry(node, struct btrfs_free_space, + offset_index); } - /* - * we haven't filled the empty size and the window is - * very large. reset and try again - */ - if (next->offset - (last->offset + last->bytes) > 128 * 1024 || - next->offset - window_start > (bytes + empty_size) * 2) { - entry = next; - window_start = entry->offset; - window_free = entry->bytes; - last = entry; - max_extent = entry->bytes; - } else { - last = next; - window_free += next->bytes; - if (entry->bytes > max_extent) - max_extent = entry->bytes; + if (entry->offset >= end) { + spin_unlock(&ctl->tree_lock); + break; + } + + extent_start = entry->offset; + extent_bytes = entry->bytes; + start = max(start, extent_start); + bytes = min(extent_start + extent_bytes, end) - start; + if (bytes < minlen) { + spin_unlock(&ctl->tree_lock); + goto next; + } + + unlink_free_space(ctl, entry); + kmem_cache_free(btrfs_free_space_cachep, entry); + + spin_unlock(&ctl->tree_lock); + + ret = do_trimming(block_group, total_trimmed, start, bytes, + extent_start, extent_bytes); + if (ret) + break; +next: + start += bytes; + + if (fatal_signal_pending(current)) { + ret = -ERESTARTSYS; + break; } + + cond_resched(); } +out: + return ret; +} - cluster->window_start = entry->offset; +static int trim_bitmaps(struct btrfs_block_group_cache *block_group, + u64 *total_trimmed, u64 start, u64 end, u64 minlen) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *entry; + int ret = 0; + int ret2; + u64 bytes; + u64 offset = offset_to_bitmap(ctl, start); - /* - * now we've found our entries, pull them out of the free space - * cache and put them into the cluster rbtree - * - * The cluster includes an rbtree, but only uses the offset index - * of each free space cache entry. - */ - while (1) { - node = rb_next(&entry->offset_index); - if (entry->bitmap && node) { - entry = rb_entry(node, struct btrfs_free_space, - offset_index); - continue; - } else if (entry->bitmap && !node) { + while (offset < end) { + bool next_bitmap = false; + + spin_lock(&ctl->tree_lock); + + if (ctl->free_space < minlen) { + spin_unlock(&ctl->tree_lock); break; } - rb_erase(&entry->offset_index, &block_group->free_space_offset); - ret = tree_insert_offset(&cluster->root, entry->offset, - &entry->offset_index, 0); - BUG_ON(ret); + entry = tree_search_offset(ctl, offset, 1, 0); + if (!entry) { + spin_unlock(&ctl->tree_lock); + next_bitmap = true; + goto next; + } + + bytes = minlen; + ret2 = search_bitmap(ctl, entry, &start, &bytes); + if (ret2 || start >= end) { + spin_unlock(&ctl->tree_lock); + next_bitmap = true; + goto next; + } + + bytes = min(bytes, end - start); + if (bytes < minlen) { + spin_unlock(&ctl->tree_lock); + goto next; + } + + bitmap_clear_bits(ctl, entry, start, bytes); + if (entry->bytes == 0) + free_bitmap(ctl, entry); + + spin_unlock(&ctl->tree_lock); + + ret = do_trimming(block_group, total_trimmed, start, bytes, + start, bytes); + if (ret) + break; +next: + if (next_bitmap) { + offset += BITS_PER_BITMAP * ctl->unit; + } else { + start += bytes; + if (start >= offset + BITS_PER_BITMAP * ctl->unit) + offset += BITS_PER_BITMAP * ctl->unit; + } - if (!node || entry == last) + if (fatal_signal_pending(current)) { + ret = -ERESTARTSYS; break; + } - entry = rb_entry(node, struct btrfs_free_space, offset_index); + cond_resched(); } - cluster->max_size = max_extent; -got_it: - ret = 0; - atomic_inc(&block_group->count); - list_add_tail(&cluster->block_group_list, &block_group->cluster_list); - cluster->block_group = block_group; + return ret; +} + +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen) +{ + int ret; + + *trimmed = 0; + + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); + if (ret) + return ret; + + ret = trim_bitmaps(block_group, trimmed, start, end, minlen); + + return ret; +} + +/* + * Find the left-most item in the cache tree, and then return the + * smallest inode number in the item. + * + * Note: the returned inode number may not be the smallest one in + * the tree, if the left-most item is a bitmap. + */ +u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) +{ + struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; + struct btrfs_free_space *entry = NULL; + u64 ino = 0; + + spin_lock(&ctl->tree_lock); + + if (RB_EMPTY_ROOT(&ctl->free_space_offset)) + goto out; + + entry = rb_entry(rb_first(&ctl->free_space_offset), + struct btrfs_free_space, offset_index); + + if (!entry->bitmap) { + ino = entry->offset; + + unlink_free_space(ctl, entry); + entry->offset++; + entry->bytes--; + if (!entry->bytes) + kmem_cache_free(btrfs_free_space_cachep, entry); + else + link_free_space(ctl, entry); + } else { + u64 offset = 0; + u64 count = 1; + int ret; + + ret = search_bitmap(ctl, entry, &offset, &count); + /* Logic error; Should be empty if it can't find anything */ + ASSERT(!ret); + + ino = offset; + bitmap_clear_bits(ctl, entry, offset, 1); + if (entry->bytes == 0) + free_bitmap(ctl, entry); + } out: - spin_unlock(&cluster->lock); - spin_unlock(&block_group->tree_lock); + spin_unlock(&ctl->tree_lock); + + return ino; +} + +struct inode *lookup_free_ino_inode(struct btrfs_root *root, + struct btrfs_path *path) +{ + struct inode *inode = NULL; + + spin_lock(&root->cache_lock); + if (root->cache_inode) + inode = igrab(root->cache_inode); + spin_unlock(&root->cache_lock); + if (inode) + return inode; + + inode = __lookup_free_space_inode(root, path, 0); + if (IS_ERR(inode)) + return inode; + + spin_lock(&root->cache_lock); + if (!btrfs_fs_closing(root->fs_info)) + root->cache_inode = igrab(inode); + spin_unlock(&root->cache_lock); + + return inode; +} + +int create_free_ino_inode(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path) +{ + return __create_free_space_inode(root, trans, path, + BTRFS_FREE_INO_OBJECTID, 0); +} + +int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +{ + struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; + struct btrfs_path *path; + struct inode *inode; + int ret = 0; + u64 root_gen = btrfs_root_generation(&root->root_item); + + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + + /* + * If we're unmounting then just return, since this does a search on the + * normal root and not the commit root and we could deadlock. + */ + if (btrfs_fs_closing(fs_info)) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return 0; + + inode = lookup_free_ino_inode(root, path); + if (IS_ERR(inode)) + goto out; + + if (root_gen != BTRFS_I(inode)->generation) + goto out_put; + + ret = __load_free_space_cache(root, inode, ctl, path, 0); + + if (ret < 0) + btrfs_err(fs_info, + "failed to load free ino cache for root %llu", + root->root_key.objectid); +out_put: + iput(inode); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_write_out_ino_cache(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct inode *inode) +{ + struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; + int ret; + + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + + ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); + if (ret) { + btrfs_delalloc_release_metadata(inode, inode->i_size); +#ifdef DEBUG + btrfs_err(root->fs_info, + "failed to write free ino cache for root %llu", + root->root_key.objectid); +#endif + } return ret; } +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* - * simple code to zero out a cluster + * Use this if you need to make a bitmap or extent entry specifically, it + * doesn't do any of the merging that add_free_space does, this acts a lot like + * how the free space cache loading stuff works, so you can get really weird + * configurations. */ -void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) +int test_add_free_space_entry(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes, bool bitmap) { - spin_lock_init(&cluster->lock); - spin_lock_init(&cluster->refill_lock); - cluster->root = RB_ROOT; - cluster->max_size = 0; - cluster->points_to_bitmap = false; - INIT_LIST_HEAD(&cluster->block_group_list); - cluster->block_group = NULL; + struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; + struct btrfs_free_space *info = NULL, *bitmap_info; + void *map = NULL; + u64 bytes_added; + int ret; + +again: + if (!info) { + info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); + if (!info) + return -ENOMEM; + } + + if (!bitmap) { + spin_lock(&ctl->tree_lock); + info->offset = offset; + info->bytes = bytes; + ret = link_free_space(ctl, info); + spin_unlock(&ctl->tree_lock); + if (ret) + kmem_cache_free(btrfs_free_space_cachep, info); + return ret; + } + + if (!map) { + map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + if (!map) { + kmem_cache_free(btrfs_free_space_cachep, info); + return -ENOMEM; + } + } + + spin_lock(&ctl->tree_lock); + bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), + 1, 0); + if (!bitmap_info) { + info->bitmap = map; + map = NULL; + add_new_bitmap(ctl, info, offset); + bitmap_info = info; + } + + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); + bytes -= bytes_added; + offset += bytes_added; + spin_unlock(&ctl->tree_lock); + + if (bytes) + goto again; + + if (map) + kfree(map); + return 0; } +/* + * Checks to see if the given range is in the free space cache. This is really + * just used to check the absence of space, so if there is free space in the + * range at all we will return 1. + */ +int test_check_exists(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes) +{ + struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; + struct btrfs_free_space *info; + int ret = 0; + + spin_lock(&ctl->tree_lock); + info = tree_search_offset(ctl, offset, 0, 0); + if (!info) { + info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), + 1, 0); + if (!info) + goto out; + } + +have_info: + if (info->bitmap) { + u64 bit_off, bit_bytes; + struct rb_node *n; + struct btrfs_free_space *tmp; + + bit_off = offset; + bit_bytes = ctl->unit; + ret = search_bitmap(ctl, info, &bit_off, &bit_bytes); + if (!ret) { + if (bit_off == offset) { + ret = 1; + goto out; + } else if (bit_off > offset && + offset + bytes > bit_off) { + ret = 1; + goto out; + } + } + + n = rb_prev(&info->offset_index); + while (n) { + tmp = rb_entry(n, struct btrfs_free_space, + offset_index); + if (tmp->offset + tmp->bytes < offset) + break; + if (offset + bytes < tmp->offset) { + n = rb_prev(&info->offset_index); + continue; + } + info = tmp; + goto have_info; + } + + n = rb_next(&info->offset_index); + while (n) { + tmp = rb_entry(n, struct btrfs_free_space, + offset_index); + if (offset + bytes < tmp->offset) + break; + if (tmp->offset + tmp->bytes < offset) { + n = rb_next(&info->offset_index); + continue; + } + info = tmp; + goto have_info; + } + + goto out; + } + + if (info->offset == offset) { + ret = 1; + goto out; + } + + if (offset > info->offset && offset < info->offset + info->bytes) + ret = 1; +out: + spin_unlock(&ctl->tree_lock); + return ret; +} +#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ |
