diff options
Diffstat (limited to 'drivers/md/linear.c')
| -rw-r--r-- | drivers/md/linear.c | 421 |
1 files changed, 204 insertions, 217 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 09658b21847..56f534b4a2d 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -16,26 +16,40 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/raid/linear.h> +#include <linux/blkdev.h> +#include <linux/raid/md_u.h> +#include <linux/seq_file.h> +#include <linux/module.h> +#include <linux/slab.h> +#include "md.h" +#include "linear.h" /* * find which device holds a particular offset */ -static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) +static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) { - dev_info_t *hash; - linear_conf_t *conf = mddev_to_conf(mddev); - sector_t idx = sector >> conf->sector_shift; + int lo, mid, hi; + struct linear_conf *conf; + + lo = 0; + hi = mddev->raid_disks - 1; + conf = rcu_dereference(mddev->private); /* - * sector_div(a,b) returns the remainer and sets a to a/b + * Binary Search */ - (void)sector_div(idx, conf->spacing); - hash = conf->hash_table[idx]; - while (sector >= hash->num_sectors + hash->start_sector) - hash++; - return hash; + while (hi > lo) { + + mid = (hi + lo) / 2; + if (sector < conf->disks[mid].end_sector) + hi = mid; + else + lo = mid + 1; + } + + return conf->disks + lo; } /** @@ -50,13 +64,24 @@ static int linear_mergeable_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *biovec) { - mddev_t *mddev = q->queuedata; - dev_info_t *dev0; + struct mddev *mddev = q->queuedata; + struct dev_info *dev0; unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); + int maxbytes = biovec->bv_len; + struct request_queue *subq; + rcu_read_lock(); dev0 = which_dev(mddev, sector); - maxsectors = dev0->num_sectors - (sector - dev0->start_sector); + maxsectors = dev0->end_sector - sector; + subq = bdev_get_queue(dev0->rdev->bdev); + if (subq->merge_bvec_fn) { + bvm->bi_bdev = dev0->rdev->bdev; + bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors; + maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm, + biovec)); + } + rcu_read_unlock(); if (maxsectors < bio_sectors) maxsectors = 0; @@ -64,49 +89,58 @@ static int linear_mergeable_bvec(struct request_queue *q, maxsectors -= bio_sectors; if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) - return biovec->bv_len; - /* The bytes available at this offset could be really big, - * so we cap at 2^31 to avoid overflow */ - if (maxsectors > (1 << (31-9))) - return 1<<31; - return maxsectors << 9; -} - -static void linear_unplug(struct request_queue *q) -{ - mddev_t *mddev = q->queuedata; - linear_conf_t *conf = mddev_to_conf(mddev); - int i; + return maxbytes; - for (i=0; i < mddev->raid_disks; i++) { - struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); - blk_unplug(r_queue); - } + if (maxsectors > (maxbytes >> 9)) + return maxbytes; + else + return maxsectors << 9; } static int linear_congested(void *data, int bits) { - mddev_t *mddev = data; - linear_conf_t *conf = mddev_to_conf(mddev); + struct mddev *mddev = data; + struct linear_conf *conf; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + + rcu_read_lock(); + conf = rcu_dereference(mddev->private); + for (i = 0; i < mddev->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } + + rcu_read_unlock(); return ret; } -static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) +static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks) { - linear_conf_t *conf; - dev_info_t **table; - mdk_rdev_t *rdev; - int i, nb_zone, cnt; - sector_t min_sectors; - sector_t curr_sector; - - conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), + struct linear_conf *conf; + sector_t array_sectors; + + rcu_read_lock(); + conf = rcu_dereference(mddev->private); + WARN_ONCE(sectors || raid_disks, + "%s does not support generic reshape\n", __func__); + array_sectors = conf->array_sectors; + rcu_read_unlock(); + + return array_sectors; +} + +static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) +{ + struct linear_conf *conf; + struct md_rdev *rdev; + int i, cnt; + bool discard_supported = false; + + conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info), GFP_KERNEL); if (!conf) return NULL; @@ -114,123 +148,53 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) cnt = 0; conf->array_sectors = 0; - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { int j = rdev->raid_disk; - dev_info_t *disk = conf->disks + j; + struct dev_info *disk = conf->disks + j; + sector_t sectors; if (j < 0 || j >= raid_disks || disk->rdev) { - printk("linear: disk numbering problem. Aborting!\n"); + printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n", + mdname(mddev)); goto out; } disk->rdev = rdev; + if (mddev->chunk_sectors) { + sectors = rdev->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev->sectors = sectors * mddev->chunk_sectors; + } - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. - */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); - - disk->num_sectors = rdev->size * 2; - conf->array_sectors += rdev->size * 2; + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + conf->array_sectors += rdev->sectors; cnt++; + + if (blk_queue_discard(bdev_get_queue(rdev->bdev))) + discard_supported = true; } if (cnt != raid_disks) { - printk("linear: not enough drives present. Aborting!\n"); + printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n", + mdname(mddev)); goto out; } - min_sectors = conf->array_sectors; - sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *)); - if (min_sectors == 0) - min_sectors = 1; - - /* min_sectors is the minimum spacing that will fit the hash - * table in one PAGE. This may be much smaller than needed. - * We find the smallest non-terminal set of consecutive devices - * that is larger than min_sectors and use the size of that as - * the actual spacing - */ - conf->spacing = conf->array_sectors; - for (i=0; i < cnt-1 ; i++) { - sector_t tmp = 0; - int j; - for (j = i; j < cnt - 1 && tmp < min_sectors; j++) - tmp += conf->disks[j].num_sectors; - if (tmp >= min_sectors && tmp < conf->spacing) - conf->spacing = tmp; - } + if (!discard_supported) + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + else + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); - /* spacing may be too large for sector_div to work with, - * so we might need to pre-shift - */ - conf->sector_shift = 0; - if (sizeof(sector_t) > sizeof(u32)) { - sector_t space = conf->spacing; - while (space > (sector_t)(~(u32)0)) { - space >>= 1; - conf->sector_shift++; - } - } /* - * This code was restructured to work around a gcc-2.95.3 internal - * compiler error. Alter it with care. + * Here we calculate the device offsets. */ - { - sector_t sz; - unsigned round; - unsigned long base; - - sz = conf->array_sectors >> conf->sector_shift; - sz += 1; /* force round-up */ - base = conf->spacing >> conf->sector_shift; - round = sector_div(sz, base); - nb_zone = sz + (round ? 1 : 0); - } - BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *)); - - conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone, - GFP_KERNEL); - if (!conf->hash_table) - goto out; + conf->disks[0].end_sector = conf->disks[0].rdev->sectors; - /* - * Here we generate the linear hash table - * First calculate the device offsets. - */ - conf->disks[0].start_sector = 0; for (i = 1; i < raid_disks; i++) - conf->disks[i].start_sector = - conf->disks[i-1].start_sector + - conf->disks[i-1].num_sectors; - - table = conf->hash_table; - i = 0; - for (curr_sector = 0; - curr_sector < conf->array_sectors; - curr_sector += conf->spacing) { - - while (i < raid_disks-1 && - curr_sector >= conf->disks[i+1].start_sector) - i++; - - *table ++ = conf->disks + i; - } - - if (conf->sector_shift) { - conf->spacing >>= conf->sector_shift; - /* round spacing up so that when we divide by it, - * we err on the side of "too-low", which is safest. - */ - conf->spacing++; - } - - BUG_ON(table - conf->hash_table > nb_zone); + conf->disks[i].end_sector = + conf->disks[i-1].end_sector + + conf->disks[i].rdev->sectors; return conf; @@ -239,26 +203,33 @@ out: return NULL; } -static int linear_run (mddev_t *mddev) +static int linear_run (struct mddev *mddev) { - linear_conf_t *conf; + struct linear_conf *conf; + int ret; - mddev->queue->queue_lock = &mddev->queue->__queue_lock; + if (md_check_no_bitmap(mddev)) + return -EINVAL; conf = linear_conf(mddev, mddev->raid_disks); if (!conf) return 1; mddev->private = conf; - mddev->array_sectors = conf->array_sectors; + md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); - mddev->queue->unplug_fn = linear_unplug; mddev->queue->backing_dev_info.congested_fn = linear_congested; mddev->queue->backing_dev_info.congested_data = mddev; - return 0; + + ret = md_integrity_register(mddev); + if (ret) { + kfree(conf); + mddev->private = NULL; + } + return ret; } -static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) +static int linear_add(struct mddev *mddev, struct md_rdev *rdev) { /* Adding a drive to a linear array allows the array to grow. * It is permitted if the new drive has a matching superblock @@ -268,110 +239,124 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) * The current one is never freed until the array is stopped. * This avoids races. */ - linear_conf_t *newconf; + struct linear_conf *newconf, *oldconf; if (rdev->saved_raid_disk != mddev->raid_disks) return -EINVAL; rdev->raid_disk = rdev->saved_raid_disk; + rdev->saved_raid_disk = -1; newconf = linear_conf(mddev,mddev->raid_disks+1); if (!newconf) return -ENOMEM; - newconf->prev = mddev_to_conf(mddev); - mddev->private = newconf; + oldconf = rcu_dereference_protected(mddev->private, + lockdep_is_held( + &mddev->reconfig_mutex)); mddev->raid_disks++; - mddev->array_sectors = newconf->array_sectors; + rcu_assign_pointer(mddev->private, newconf); + md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); + revalidate_disk(mddev->gendisk); + kfree_rcu(oldconf, rcu); return 0; } -static int linear_stop (mddev_t *mddev) +static int linear_stop (struct mddev *mddev) { - linear_conf_t *conf = mddev_to_conf(mddev); - + struct linear_conf *conf = + rcu_dereference_protected(mddev->private, + lockdep_is_held( + &mddev->reconfig_mutex)); + + /* + * We do not require rcu protection here since + * we hold reconfig_mutex for both linear_add and + * linear_stop, so they cannot race. + * We should make sure any old 'conf's are properly + * freed though. + */ + rcu_barrier(); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ - do { - linear_conf_t *t = conf->prev; - kfree(conf->hash_table); - kfree(conf); - conf = t; - } while (conf); + kfree(conf); + mddev->private = NULL; return 0; } -static int linear_make_request (struct request_queue *q, struct bio *bio) +static void linear_make_request(struct mddev *mddev, struct bio *bio) { - const int rw = bio_data_dir(bio); - mddev_t *mddev = q->queuedata; - dev_info_t *tmp_dev; - int cpu; - - if (unlikely(bio_barrier(bio))) { - bio_endio(bio, -EOPNOTSUPP); - return 0; + char b[BDEVNAME_SIZE]; + struct dev_info *tmp_dev; + struct bio *split; + sector_t start_sector, end_sector, data_offset; + + if (unlikely(bio->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bio); + return; } - cpu = part_stat_lock(); - part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); - part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], - bio_sectors(bio)); - part_stat_unlock(); - - tmp_dev = which_dev(mddev, bio->bi_sector); - - if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors + - tmp_dev->start_sector) - || (bio->bi_sector < - tmp_dev->start_sector))) { - char b[BDEVNAME_SIZE]; - - printk("linear_make_request: Sector %llu out of bounds on " - "dev %s: %llu sectors, offset %llu\n", - (unsigned long long)bio->bi_sector, - bdevname(tmp_dev->rdev->bdev, b), - (unsigned long long)tmp_dev->num_sectors, - (unsigned long long)tmp_dev->start_sector); - bio_io_error(bio); - return 0; - } - if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > - tmp_dev->start_sector + tmp_dev->num_sectors)) { - /* This bio crosses a device boundary, so we have to - * split it. - */ - struct bio_pair *bp; - - bp = bio_split(bio, - tmp_dev->start_sector + tmp_dev->num_sectors - - bio->bi_sector); - - if (linear_make_request(q, &bp->bio1)) - generic_make_request(&bp->bio1); - if (linear_make_request(q, &bp->bio2)) - generic_make_request(&bp->bio2); - bio_pair_release(bp); - return 0; - } - - bio->bi_bdev = tmp_dev->rdev->bdev; - bio->bi_sector = bio->bi_sector - tmp_dev->start_sector - + tmp_dev->rdev->data_offset; + do { + rcu_read_lock(); + + tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); + start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; + end_sector = tmp_dev->end_sector; + data_offset = tmp_dev->rdev->data_offset; + bio->bi_bdev = tmp_dev->rdev->bdev; + + rcu_read_unlock(); + + if (unlikely(bio->bi_iter.bi_sector >= end_sector || + bio->bi_iter.bi_sector < start_sector)) + goto out_of_bounds; + + if (unlikely(bio_end_sector(bio) > end_sector)) { + /* This bio crosses a device boundary, so we have to + * split it. + */ + split = bio_split(bio, end_sector - + bio->bi_iter.bi_sector, + GFP_NOIO, fs_bio_set); + bio_chain(split, bio); + } else { + split = bio; + } - return 1; + split->bi_iter.bi_sector = split->bi_iter.bi_sector - + start_sector + data_offset; + + if (unlikely((split->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { + /* Just ignore it */ + bio_endio(split, 0); + } else + generic_make_request(split); + } while (split != bio); + return; + +out_of_bounds: + printk(KERN_ERR + "md/linear:%s: make_request: Sector %llu out of bounds on " + "dev %s: %llu sectors, offset %llu\n", + mdname(mddev), + (unsigned long long)bio->bi_iter.bi_sector, + bdevname(tmp_dev->rdev->bdev, b), + (unsigned long long)tmp_dev->rdev->sectors, + (unsigned long long)start_sector); + bio_io_error(bio); } -static void linear_status (struct seq_file *seq, mddev_t *mddev) +static void linear_status (struct seq_file *seq, struct mddev *mddev) { - seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); + seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } -static struct mdk_personality linear_personality = +static struct md_personality linear_personality = { .name = "linear", .level = LEVEL_LINEAR, @@ -381,6 +366,7 @@ static struct mdk_personality linear_personality = .stop = linear_stop, .status = linear_status, .hot_add_disk = linear_add, + .size = linear_size, }; static int __init linear_init (void) @@ -397,6 +383,7 @@ static void linear_exit (void) module_init(linear_init); module_exit(linear_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Linear device concatenation personality for MD"); MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ MODULE_ALIAS("md-linear"); MODULE_ALIAS("md-level--1"); |
