diff options
Diffstat (limited to 'fs')
77 files changed, 2198 insertions, 1118 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 6fcb1e7095c..92828281a30 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -57,7 +57,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page) buffer = kmap(page); offset = page_offset(page); - retval = v9fs_file_readn(filp, buffer, NULL, offset, PAGE_CACHE_SIZE); + retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset); if (retval < 0) goto done; diff --git a/fs/Kconfig b/fs/Kconfig index a97263be6a9..0e7da7bb5d9 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -186,32 +186,7 @@ source "fs/romfs/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" - -config NILFS2_FS - tristate "NILFS2 file system support (EXPERIMENTAL)" - depends on BLOCK && EXPERIMENTAL - select CRC32 - help - NILFS2 is a log-structured file system (LFS) supporting continuous - snapshotting. In addition to versioning capability of the entire - file system, users can even restore files mistakenly overwritten or - destroyed just a few seconds ago. Since this file system can keep - consistency like conventional LFS, it achieves quick recovery after - system crashes. - - NILFS2 creates a number of checkpoints every few seconds or per - synchronous write basis (unless there is no change). Users can - select significant versions among continuously created checkpoints, - and can change them into snapshots which will be preserved for long - periods until they are changed back to checkpoints. Each - snapshot is mountable as a read-only file system concurrently with - its writable mount, and this feature is convenient for online backup. - - Some features including atime, extended attributes, and POSIX ACLs, - are not supported yet. - - To compile this file system support as a module, choose M here: the - module will be called nilfs2. If unsure, say N. +source "fs/nilfs2/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/block_dev.c b/fs/block_dev.c index 3a6d4fb2a32..94dfda24c06 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -564,6 +564,16 @@ struct block_device *bdget(dev_t dev) EXPORT_SYMBOL(bdget); +/** + * bdgrab -- Grab a reference to an already referenced block device + * @bdev: Block device to grab a reference to. + */ +struct block_device *bdgrab(struct block_device *bdev) +{ + atomic_inc(&bdev->bd_inode->i_count); + return bdev; +} + long nr_blockdev_pages(void) { struct block_device *bdev; diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 6e4f6c50a12..019e8af449a 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -424,11 +424,11 @@ int btrfs_requeue_work(struct btrfs_work *work) * list */ if (worker->idle) { - spin_lock_irqsave(&worker->workers->lock, flags); + spin_lock(&worker->workers->lock); worker->idle = 0; list_move_tail(&worker->worker_list, &worker->workers->worker_list); - spin_unlock_irqrestore(&worker->workers->lock, flags); + spin_unlock(&worker->workers->lock); } if (!worker->working) { wake = 1; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 60a45f3a4e9..3fdcc0512d3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -557,19 +557,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) btrfs_disk_key_to_cpu(&k1, disk); - if (k1.objectid > k2->objectid) - return 1; - if (k1.objectid < k2->objectid) - return -1; - if (k1.type > k2->type) - return 1; - if (k1.type < k2->type) - return -1; - if (k1.offset > k2->offset) - return 1; - if (k1.offset < k2->offset) - return -1; - return 0; + return btrfs_comp_cpu_keys(&k1, k2); } /* @@ -1052,9 +1040,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - if (btrfs_header_nritems(mid) > 2) - return 0; - if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; @@ -1701,6 +1686,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct extent_buffer *b; int slot; int ret; + int err; int level; int lowest_unlock = 1; u8 lowest_level = 0; @@ -1737,8 +1723,6 @@ again: p->locks[level] = 1; if (cow) { - int wret; - /* * if we don't really need to cow this block * then we don't want to set the path blocking, @@ -1749,12 +1733,12 @@ again: btrfs_set_path_blocking(p); - wret = btrfs_cow_block(trans, root, b, - p->nodes[level + 1], - p->slots[level + 1], &b); - if (wret) { + err = btrfs_cow_block(trans, root, b, + p->nodes[level + 1], + p->slots[level + 1], &b); + if (err) { free_extent_buffer(b); - ret = wret; + ret = err; goto done; } } @@ -1793,41 +1777,45 @@ cow_done: ret = bin_search(b, key, level, &slot); if (level != 0) { - if (ret && slot > 0) + int dec = 0; + if (ret && slot > 0) { + dec = 1; slot -= 1; + } p->slots[level] = slot; - ret = setup_nodes_for_search(trans, root, p, b, level, + err = setup_nodes_for_search(trans, root, p, b, level, ins_len); - if (ret == -EAGAIN) + if (err == -EAGAIN) goto again; - else if (ret) + if (err) { + ret = err; goto done; + } b = p->nodes[level]; slot = p->slots[level]; unlock_up(p, level, lowest_unlock); - /* this is only true while dropping a snapshot */ if (level == lowest_level) { - ret = 0; + if (dec) + p->slots[level]++; goto done; } - ret = read_block_for_search(trans, root, p, + err = read_block_for_search(trans, root, p, &b, level, slot, key); - if (ret == -EAGAIN) + if (err == -EAGAIN) goto again; - - if (ret == -EIO) + if (err) { + ret = err; goto done; + } if (!p->skip_locking) { - int lret; - btrfs_clear_path_blocking(p, NULL); - lret = btrfs_try_spin_lock(b); + err = btrfs_try_spin_lock(b); - if (!lret) { + if (!err) { btrfs_set_path_blocking(p); btrfs_tree_lock(b); btrfs_clear_path_blocking(p, b); @@ -1837,16 +1825,14 @@ cow_done: p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < ins_len) { - int sret; - btrfs_set_path_blocking(p); - sret = split_leaf(trans, root, key, - p, ins_len, ret == 0); + err = split_leaf(trans, root, key, + p, ins_len, ret == 0); btrfs_clear_path_blocking(p, NULL); - BUG_ON(sret > 0); - if (sret) { - ret = sret; + BUG_ON(err > 0); + if (err) { + ret = err; goto done; } } @@ -3807,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } /* delete the leaf if it is mostly empty */ - if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) { + if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below @@ -4042,10 +4028,9 @@ out: * calling this function. */ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int lowest_level, + struct btrfs_key *key, int level, int cache_only, u64 min_trans) { - int level = lowest_level; int slot; struct extent_buffer *c; @@ -4058,11 +4043,40 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, c = path->nodes[level]; next: if (slot >= btrfs_header_nritems(c)) { - level++; - if (level == BTRFS_MAX_LEVEL) + int ret; + int orig_lowest; + struct btrfs_key cur_key; + if (level + 1 >= BTRFS_MAX_LEVEL || + !path->nodes[level + 1]) return 1; - continue; + + if (path->locks[level + 1]) { + level++; + continue; + } + + slot = btrfs_header_nritems(c) - 1; + if (level == 0) + btrfs_item_key_to_cpu(c, &cur_key, slot); + else + btrfs_node_key_to_cpu(c, &cur_key, slot); + + orig_lowest = path->lowest_level; + btrfs_release_path(root, path); + path->lowest_level = level; + ret = btrfs_search_slot(NULL, root, &cur_key, path, + 0, 0); + path->lowest_level = orig_lowest; + if (ret < 0) + return ret; + + c = path->nodes[level]; + slot = path->slots[level]; + if (ret == 0) + slot++; + goto next; } + if (level == 0) btrfs_item_key_to_cpu(c, key, slot); else { @@ -4146,7 +4160,8 @@ again: * advance the path if there are now more items available. */ if (nritems > 0 && path->slots[0] < nritems - 1) { - path->slots[0]++; + if (ret == 0) + path->slots[0]++; ret = 0; goto done; } @@ -4278,10 +4293,10 @@ int btrfs_previous_item(struct btrfs_root *root, path->slots[0]--; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.type == type) - return 0; if (found_key.objectid < min_objectid) break; + if (found_key.type == type) + return 0; if (found_key.objectid == min_objectid && found_key.type < type) break; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 98a87383871..837435ce84c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -481,7 +481,7 @@ struct btrfs_shared_data_ref { struct btrfs_extent_inline_ref { u8 type; - u64 offset; + __le64 offset; } __attribute__ ((__packed__)); /* old style backrefs item */ @@ -689,6 +689,7 @@ struct btrfs_space_info { struct list_head block_groups; spinlock_t lock; struct rw_semaphore groups_sem; + atomic_t caching_threads; }; /* @@ -707,6 +708,9 @@ struct btrfs_free_cluster { /* first extent starting offset */ u64 window_start; + /* if this cluster simply points at a bitmap in the block group */ + bool points_to_bitmap; + struct btrfs_block_group_cache *block_group; /* * when a cluster is allocated from a block group, we put the @@ -716,24 +720,37 @@ struct btrfs_free_cluster { struct list_head block_group_list; }; +enum btrfs_caching_type { + BTRFS_CACHE_NO = 0, + BTRFS_CACHE_STARTED = 1, + BTRFS_CACHE_FINISHED = 2, +}; + struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; + struct btrfs_fs_info *fs_info; spinlock_t lock; - struct mutex cache_mutex; u64 pinned; u64 reserved; u64 flags; - int cached; + u64 sectorsize; + int extents_thresh; + int free_extents; + int total_bitmaps; int ro; int dirty; + /* cache tracking stuff */ + wait_queue_head_t caching_q; + int cached; + struct btrfs_space_info *space_info; /* free space cache stuff */ spinlock_t tree_lock; - struct rb_root free_space_bytes; struct rb_root free_space_offset; + u64 free_space; /* block group cache stuff */ struct rb_node cache_node; @@ -808,6 +825,7 @@ struct btrfs_fs_info { struct mutex drop_mutex; struct mutex volume_mutex; struct mutex tree_reloc_mutex; + struct rw_semaphore extent_commit_sem; /* * this protects the ordered operations list only while we are @@ -1988,6 +2006,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, u64 bytes); void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes); +void btrfs_free_pinned_extents(struct btrfs_fs_info *info); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d28d29c95f7..e83be2e4602 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1639,6 +1639,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); mutex_init(&fs_info->tree_reloc_mutex); + init_rwsem(&fs_info->extent_commit_sem); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); @@ -1799,6 +1800,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_super_chunk_root(disk_super), blocksize, generation); BUG_ON(!chunk_root->node); + if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { + printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", + sb->s_id); + goto fail_chunk_root; + } btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); chunk_root->commit_root = btrfs_root_node(chunk_root); @@ -1826,6 +1832,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, blocksize, generation); if (!tree_root->node) goto fail_chunk_root; + if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { + printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", + sb->s_id); + goto fail_tree_root; + } btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); @@ -2322,6 +2333,9 @@ int close_ctree(struct btrfs_root *root) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } + fs_info->closing = 2; + smp_mb(); + if (fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", (unsigned long long)fs_info->delalloc_bytes); @@ -2343,6 +2357,7 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->csum_root->commit_root); btrfs_free_block_groups(root->fs_info); + btrfs_free_pinned_extents(root->fs_info); del_fs_roots(fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a5aca3997d4..dc84daee6bc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -21,6 +21,7 @@ #include <linux/blkdev.h> #include <linux/sort.h> #include <linux/rcupdate.h> +#include <linux/kthread.h> #include "compat.h" #include "hash.h" #include "ctree.h" @@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force); +static noinline int +block_group_cache_done(struct btrfs_block_group_cache *cache) +{ + smp_mb(); + return cache->cached == BTRFS_CACHE_FINISHED; +} + static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { return (cache->flags & bits) == bits; @@ -146,20 +154,70 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, } /* + * We always set EXTENT_LOCKED for the super mirror extents so we don't + * overwrite them, so those bits need to be unset. Also, if we are unmounting + * with pinned extents still sitting there because we had a block group caching, + * we need to clear those now, since we are done. + */ +void btrfs_free_pinned_extents(struct btrfs_fs_info *info) +{ + u64 start, end, last = 0; + int ret; + + while (1) { + ret = find_first_extent_bit(&info->pinned_extents, last, + &start, &end, + EXTENT_LOCKED|EXTENT_DIRTY); + if (ret) + break; + + clear_extent_bits(&info->pinned_extents, start, end, + EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); + last = end+1; + } +} + +static int remove_sb_from_cache(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + BUG_ON(ret); + while (nr--) { + try_lock_extent(&fs_info->pinned_extents, + logical[nr], + logical[nr] + stripe_len - 1, GFP_NOFS); + } + kfree(logical); + } + + return 0; +} + +/* * this is only called by cache_block_group, since we could have freed extents * we need to check the pinned_extents for any extents that can't be used yet * since their free s |