aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/reada.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/reada.c')
-rw-r--r--fs/btrfs/reada.c133
1 files changed, 88 insertions, 45 deletions
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 2373b39a132..09230cf3a24 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -27,6 +27,7 @@
#include "volumes.h"
#include "disk-io.h"
#include "transaction.h"
+#include "dev-replace.h"
#undef DEBUG
@@ -54,7 +55,6 @@
* than the 2 started one after another.
*/
-#define MAX_MIRRORS 2
#define MAX_IN_FLIGHT 6
struct reada_extctl {
@@ -69,9 +69,9 @@ struct reada_extent {
u32 blocksize;
int err;
struct list_head extctl;
- struct kref refcnt;
+ int refcnt;
spinlock_t lock;
- struct reada_zone *zones[MAX_MIRRORS];
+ struct reada_zone *zones[BTRFS_MAX_MIRRORS];
int nzones;
struct btrfs_device *scheduled_for;
};
@@ -84,7 +84,8 @@ struct reada_zone {
spinlock_t lock;
int locked;
struct btrfs_device *device;
- struct btrfs_device *devs[MAX_MIRRORS]; /* full list, incl self */
+ struct btrfs_device *devs[BTRFS_MAX_MIRRORS]; /* full list, incl
+ * self */
int ndevs;
struct kref refcnt;
};
@@ -126,7 +127,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
- kref_get(&re->refcnt);
+ re->refcnt++;
spin_unlock(&fs_info->reada_lock);
if (!re)
@@ -188,8 +189,8 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
*/
#ifdef DEBUG
if (rec->generation != generation) {
- printk(KERN_DEBUG "generation mismatch for "
- "(%llu,%d,%llu) %llu != %llu\n",
+ btrfs_debug(root->fs_info,
+ "generation mismatch for (%llu,%d,%llu) %llu != %llu",
key.objectid, key.type, key.offset,
rec->generation, generation);
}
@@ -250,14 +251,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
struct btrfs_bio *bbio)
{
int ret;
- int looped = 0;
struct reada_zone *zone;
struct btrfs_block_group_cache *cache = NULL;
u64 start;
u64 end;
int i;
-again:
zone = NULL;
spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
@@ -274,9 +273,6 @@ again:
spin_unlock(&fs_info->reada_lock);
}
- if (looped)
- return NULL;
-
cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache)
return NULL;
@@ -305,15 +301,17 @@ again:
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&dev->reada_zones,
- (unsigned long)zone->end >> PAGE_CACHE_SHIFT,
+ (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
zone);
- spin_unlock(&fs_info->reada_lock);
- if (ret) {
+ if (ret == -EEXIST) {
kfree(zone);
- looped = 1;
- goto again;
+ ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
+ logical >> PAGE_CACHE_SHIFT, 1);
+ if (ret == 1)
+ kref_get(&zone->refcnt);
}
+ spin_unlock(&fs_info->reada_lock);
return zone;
}
@@ -323,26 +321,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct btrfs_key *top, int level)
{
int ret;
- int looped = 0;
struct reada_extent *re = NULL;
+ struct reada_extent *re_exist = NULL;
struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct btrfs_bio *bbio = NULL;
struct btrfs_device *dev;
+ struct btrfs_device *prev_dev;
u32 blocksize;
u64 length;
int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
+ int dev_replace_is_ongoing;
-again:
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
- kref_get(&re->refcnt);
+ re->refcnt++;
spin_unlock(&fs_info->reada_lock);
- if (re || looped)
+ if (re)
return re;
re = kzalloc(sizeof(*re), GFP_NOFS);
@@ -355,19 +353,21 @@ again:
re->top = *top;
INIT_LIST_HEAD(&re->extctl);
spin_lock_init(&re->lock);
- kref_init(&re->refcnt);
+ re->refcnt = 1;
/*
* map block
*/
length = blocksize;
- ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &bbio, 0);
+ ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, &length,
+ &bbio, 0);
if (ret || !bbio || length < blocksize)
goto error;
- if (bbio->num_stripes > MAX_MIRRORS) {
- printk(KERN_ERR "btrfs readahead: more than %d copies not "
- "supported", MAX_MIRRORS);
+ if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
+ btrfs_err(root->fs_info,
+ "readahead: more than %d copies not supported",
+ BTRFS_MAX_MIRRORS);
goto error;
}
@@ -396,32 +396,72 @@ again:
}
/* insert extent in reada_tree + all per-device trees, all or nothing */
+ btrfs_dev_replace_lock(&fs_info->dev_replace);
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
+ if (ret == -EEXIST) {
+ re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
+ BUG_ON(!re_exist);
+ re_exist->refcnt++;
+ spin_unlock(&fs_info->reada_lock);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ goto error;
+ }
if (ret) {
spin_unlock(&fs_info->reada_lock);
- if (ret != -ENOMEM) {
- /* someone inserted the extent in the meantime */
- looped = 1;
- }
+ btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
+ prev_dev = NULL;
+ dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
+ &fs_info->dev_replace);
for (i = 0; i < nzones; ++i) {
dev = bbio->stripes[i].dev;
+ if (dev == prev_dev) {
+ /*
+ * in case of DUP, just add the first zone. As both
+ * are on the same device, there's nothing to gain
+ * from adding both.
+ * Also, it wouldn't work, as the tree is per device
+ * and adding would fail with EEXIST
+ */
+ continue;
+ }
+ if (!dev->bdev) {
+ /*
+ * cannot read ahead on missing device, but for RAID5/6,
+ * REQ_GET_READ_MIRRORS return 1. So don't skip missing
+ * device for such case.
+ */
+ if (nzones > 1)
+ continue;
+ }
+ if (dev_replace_is_ongoing &&
+ dev == fs_info->dev_replace.tgtdev) {
+ /*
+ * as this device is selected for reading only as
+ * a last resort, skip it for read ahead.
+ */
+ continue;
+ }
+ prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) {
while (--i >= 0) {
dev = bbio->stripes[i].dev;
BUG_ON(dev == NULL);
+ /* ignore whether the entry was inserted */
radix_tree_delete(&dev->reada_extents, index);
}
BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
}
spin_unlock(&fs_info->reada_lock);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace);
kfree(bbio);
return re;
@@ -450,13 +490,7 @@ error:
}
kfree(bbio);
kfree(re);
- if (looped)
- goto again;
- return NULL;
-}
-
-static void reada_kref_dummy(struct kref *kr)
-{
+ return re_exist;
}
static void reada_extent_put(struct btrfs_fs_info *fs_info,
@@ -466,7 +500,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
spin_lock(&fs_info->reada_lock);
- if (!kref_put(&re->refcnt, reada_kref_dummy)) {
+ if (--re->refcnt) {
spin_unlock(&fs_info->reada_lock);
return;
}
@@ -661,7 +695,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
return 0;
}
dev->reada_next = re->logical + re->blocksize;
- kref_get(&re->refcnt);
+ re->refcnt++;
spin_unlock(&fs_info->reada_lock);
@@ -708,13 +742,18 @@ static void reada_start_machine_worker(struct btrfs_work *work)
{
struct reada_machine_work *rmw;
struct btrfs_fs_info *fs_info;
+ int old_ioprio;
rmw = container_of(work, struct reada_machine_work, work);
fs_info = rmw->fs_info;
kfree(rmw);
+ old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
+ task_nice_ioprio(current));
+ set_task_ioprio(current, BTRFS_IOPRIO_READA);
__reada_start_machine(fs_info);
+ set_task_ioprio(current, old_ioprio);
}
static void __reada_start_machine(struct btrfs_fs_info *fs_info)
@@ -759,10 +798,10 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
/* FIXME we cannot handle this properly right now */
BUG();
}
- rmw->work.func = reada_start_machine_worker;
+ btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
rmw->fs_info = fs_info;
- btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work);
+ btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
}
#ifdef DEBUG
@@ -904,7 +943,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
generation = btrfs_header_generation(node);
free_extent_buffer(node);
- reada_add_block(rc, start, &max_key, level, generation);
+ if (reada_add_block(rc, start, &max_key, level, generation)) {
+ kfree(rc);
+ return ERR_PTR(-ENOMEM);
+ }
reada_start_machine(root->fs_info);
@@ -919,10 +961,11 @@ int btrfs_reada_wait(void *handle)
while (atomic_read(&rc->elems)) {
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
5 * HZ);
- dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
+ dump_devs(rc->root->fs_info,
+ atomic_read(&rc->elems) < 10 ? 1 : 0);
}
- dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
+ dump_devs(rc->root->fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
kref_put(&rc->refcnt, reada_control_release);