diff options
Diffstat (limited to 'fs/btrfs/reada.c')
| -rw-r--r-- | fs/btrfs/reada.c | 133 |
1 files changed, 88 insertions, 45 deletions
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 2373b39a132..09230cf3a24 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -27,6 +27,7 @@ #include "volumes.h" #include "disk-io.h" #include "transaction.h" +#include "dev-replace.h" #undef DEBUG @@ -54,7 +55,6 @@ * than the 2 started one after another. */ -#define MAX_MIRRORS 2 #define MAX_IN_FLIGHT 6 struct reada_extctl { @@ -69,9 +69,9 @@ struct reada_extent { u32 blocksize; int err; struct list_head extctl; - struct kref refcnt; + int refcnt; spinlock_t lock; - struct reada_zone *zones[MAX_MIRRORS]; + struct reada_zone *zones[BTRFS_MAX_MIRRORS]; int nzones; struct btrfs_device *scheduled_for; }; @@ -84,7 +84,8 @@ struct reada_zone { spinlock_t lock; int locked; struct btrfs_device *device; - struct btrfs_device *devs[MAX_MIRRORS]; /* full list, incl self */ + struct btrfs_device *devs[BTRFS_MAX_MIRRORS]; /* full list, incl + * self */ int ndevs; struct kref refcnt; }; @@ -126,7 +127,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); if (!re) @@ -188,8 +189,8 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, */ #ifdef DEBUG if (rec->generation != generation) { - printk(KERN_DEBUG "generation mismatch for " - "(%llu,%d,%llu) %llu != %llu\n", + btrfs_debug(root->fs_info, + "generation mismatch for (%llu,%d,%llu) %llu != %llu", key.objectid, key.type, key.offset, rec->generation, generation); } @@ -250,14 +251,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, struct btrfs_bio *bbio) { int ret; - int looped = 0; struct reada_zone *zone; struct btrfs_block_group_cache *cache = NULL; u64 start; u64 end; int i; -again: zone = NULL; spin_lock(&fs_info->reada_lock); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, @@ -274,9 +273,6 @@ again: spin_unlock(&fs_info->reada_lock); } - if (looped) - return NULL; - cache = btrfs_lookup_block_group(fs_info, logical); if (!cache) return NULL; @@ -305,15 +301,17 @@ again: spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&dev->reada_zones, - (unsigned long)zone->end >> PAGE_CACHE_SHIFT, + (unsigned long)(zone->end >> PAGE_CACHE_SHIFT), zone); - spin_unlock(&fs_info->reada_lock); - if (ret) { + if (ret == -EEXIST) { kfree(zone); - looped = 1; - goto again; + ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, + logical >> PAGE_CACHE_SHIFT, 1); + if (ret == 1) + kref_get(&zone->refcnt); } + spin_unlock(&fs_info->reada_lock); return zone; } @@ -323,26 +321,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, struct btrfs_key *top, int level) { int ret; - int looped = 0; struct reada_extent *re = NULL; + struct reada_extent *re_exist = NULL; struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct btrfs_bio *bbio = NULL; struct btrfs_device *dev; + struct btrfs_device *prev_dev; u32 blocksize; u64 length; int nzones = 0; int i; unsigned long index = logical >> PAGE_CACHE_SHIFT; + int dev_replace_is_ongoing; -again: spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); - if (re || looped) + if (re) return re; re = kzalloc(sizeof(*re), GFP_NOFS); @@ -355,19 +353,21 @@ again: re->top = *top; INIT_LIST_HEAD(&re->extctl); spin_lock_init(&re->lock); - kref_init(&re->refcnt); + re->refcnt = 1; /* * map block */ length = blocksize; - ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &bbio, 0); + ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, &length, + &bbio, 0); if (ret || !bbio || length < blocksize) goto error; - if (bbio->num_stripes > MAX_MIRRORS) { - printk(KERN_ERR "btrfs readahead: more than %d copies not " - "supported", MAX_MIRRORS); + if (bbio->num_stripes > BTRFS_MAX_MIRRORS) { + btrfs_err(root->fs_info, + "readahead: more than %d copies not supported", + BTRFS_MAX_MIRRORS); goto error; } @@ -396,32 +396,72 @@ again: } /* insert extent in reada_tree + all per-device trees, all or nothing */ + btrfs_dev_replace_lock(&fs_info->dev_replace); spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&fs_info->reada_tree, index, re); + if (ret == -EEXIST) { + re_exist = radix_tree_lookup(&fs_info->reada_tree, index); + BUG_ON(!re_exist); + re_exist->refcnt++; + spin_unlock(&fs_info->reada_lock); + btrfs_dev_replace_unlock(&fs_info->dev_replace); + goto error; + } if (ret) { spin_unlock(&fs_info->reada_lock); - if (ret != -ENOMEM) { - /* someone inserted the extent in the meantime */ - looped = 1; - } + btrfs_dev_replace_unlock(&fs_info->dev_replace); goto error; } + prev_dev = NULL; + dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( + &fs_info->dev_replace); for (i = 0; i < nzones; ++i) { dev = bbio->stripes[i].dev; + if (dev == prev_dev) { + /* + * in case of DUP, just add the first zone. As both + * are on the same device, there's nothing to gain + * from adding both. + * Also, it wouldn't work, as the tree is per device + * and adding would fail with EEXIST + */ + continue; + } + if (!dev->bdev) { + /* + * cannot read ahead on missing device, but for RAID5/6, + * REQ_GET_READ_MIRRORS return 1. So don't skip missing + * device for such case. + */ + if (nzones > 1) + continue; + } + if (dev_replace_is_ongoing && + dev == fs_info->dev_replace.tgtdev) { + /* + * as this device is selected for reading only as + * a last resort, skip it for read ahead. + */ + continue; + } + prev_dev = dev; ret = radix_tree_insert(&dev->reada_extents, index, re); if (ret) { while (--i >= 0) { dev = bbio->stripes[i].dev; BUG_ON(dev == NULL); + /* ignore whether the entry was inserted */ radix_tree_delete(&dev->reada_extents, index); } BUG_ON(fs_info == NULL); radix_tree_delete(&fs_info->reada_tree, index); spin_unlock(&fs_info->reada_lock); + btrfs_dev_replace_unlock(&fs_info->dev_replace); goto error; } } spin_unlock(&fs_info->reada_lock); + btrfs_dev_replace_unlock(&fs_info->dev_replace); kfree(bbio); return re; @@ -450,13 +490,7 @@ error: } kfree(bbio); kfree(re); - if (looped) - goto again; - return NULL; -} - -static void reada_kref_dummy(struct kref *kr) -{ + return re_exist; } static void reada_extent_put(struct btrfs_fs_info *fs_info, @@ -466,7 +500,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, unsigned long index = re->logical >> PAGE_CACHE_SHIFT; spin_lock(&fs_info->reada_lock); - if (!kref_put(&re->refcnt, reada_kref_dummy)) { + if (--re->refcnt) { spin_unlock(&fs_info->reada_lock); return; } @@ -661,7 +695,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, return 0; } dev->reada_next = re->logical + re->blocksize; - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); @@ -708,13 +742,18 @@ static void reada_start_machine_worker(struct btrfs_work *work) { struct reada_machine_work *rmw; struct btrfs_fs_info *fs_info; + int old_ioprio; rmw = container_of(work, struct reada_machine_work, work); fs_info = rmw->fs_info; kfree(rmw); + old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), + task_nice_ioprio(current)); + set_task_ioprio(current, BTRFS_IOPRIO_READA); __reada_start_machine(fs_info); + set_task_ioprio(current, old_ioprio); } static void __reada_start_machine(struct btrfs_fs_info *fs_info) @@ -759,10 +798,10 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) /* FIXME we cannot handle this properly right now */ BUG(); } - rmw->work.func = reada_start_machine_worker; + btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); rmw->fs_info = fs_info; - btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work); + btrfs_queue_work(fs_info->readahead_workers, &rmw->work); } #ifdef DEBUG @@ -904,7 +943,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, generation = btrfs_header_generation(node); free_extent_buffer(node); - reada_add_block(rc, start, &max_key, level, generation); + if (reada_add_block(rc, start, &max_key, level, generation)) { + kfree(rc); + return ERR_PTR(-ENOMEM); + } reada_start_machine(root->fs_info); @@ -919,10 +961,11 @@ int btrfs_reada_wait(void *handle) while (atomic_read(&rc->elems)) { wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, 5 * HZ); - dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); + dump_devs(rc->root->fs_info, + atomic_read(&rc->elems) < 10 ? 1 : 0); } - dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); + dump_devs(rc->root->fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0); kref_put(&rc->refcnt, reada_control_release); |
