diff options
Diffstat (limited to 'drivers/md/linear.c')
| -rw-r--r-- | drivers/md/linear.c | 441 |
1 files changed, 202 insertions, 239 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b1eebf88c20..56f534b4a2d 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -16,35 +16,40 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include <linux/blkdev.h> +#include <linux/raid/md_u.h> +#include <linux/seq_file.h> #include <linux/module.h> - -#include <linux/raid/md.h> #include <linux/slab.h> -#include <linux/raid/linear.h> - -#define MAJOR_NR MD_MAJOR -#define MD_DRIVER -#define MD_PERSONALITY +#include "md.h" +#include "linear.h" /* * find which device holds a particular offset */ -static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) +static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) { - dev_info_t *hash; - linear_conf_t *conf = mddev_to_conf(mddev); - sector_t block = sector >> 1; + int lo, mid, hi; + struct linear_conf *conf; + + lo = 0; + hi = mddev->raid_disks - 1; + conf = rcu_dereference(mddev->private); /* - * sector_div(a,b) returns the remainer and sets a to a/b + * Binary Search */ - block >>= conf->preshift; - (void)sector_div(block, conf->hash_spacing); - hash = conf->hash_table[block]; - while ((sector>>1) >= (hash->size + hash->offset)) - hash++; - return hash; + while (hi > lo) { + + mid = (hi + lo) / 2; + if (sector < conf->disks[mid].end_sector) + hi = mid; + else + lo = mid + 1; + } + + return conf->disks + lo; } /** @@ -59,13 +64,24 @@ static int linear_mergeable_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *biovec) { - mddev_t *mddev = q->queuedata; - dev_info_t *dev0; + struct mddev *mddev = q->queuedata; + struct dev_info *dev0; unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); + int maxbytes = biovec->bv_len; + struct request_queue *subq; + rcu_read_lock(); dev0 = which_dev(mddev, sector); - maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1)); + maxsectors = dev0->end_sector - sector; + subq = bdev_get_queue(dev0->rdev->bdev); + if (subq->merge_bvec_fn) { + bvm->bi_bdev = dev0->rdev->bdev; + bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors; + maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm, + biovec)); + } + rcu_read_unlock(); if (maxsectors < bio_sectors) maxsectors = 0; @@ -73,50 +89,58 @@ static int linear_mergeable_bvec(struct request_queue *q, maxsectors -= bio_sectors; if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) - return biovec->bv_len; - /* The bytes available at this offset could be really big, - * so we cap at 2^31 to avoid overflow */ - if (maxsectors > (1 << (31-9))) - return 1<<31; - return maxsectors << 9; -} - -static void linear_unplug(struct request_queue *q) -{ - mddev_t *mddev = q->queuedata; - linear_conf_t *conf = mddev_to_conf(mddev); - int i; + return maxbytes; - for (i=0; i < mddev->raid_disks; i++) { - struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); - blk_unplug(r_queue); - } + if (maxsectors > (maxbytes >> 9)) + return maxbytes; + else + return maxsectors << 9; } static int linear_congested(void *data, int bits) { - mddev_t *mddev = data; - linear_conf_t *conf = mddev_to_conf(mddev); + struct mddev *mddev = data; + struct linear_conf *conf; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + + rcu_read_lock(); + conf = rcu_dereference(mddev->private); + for (i = 0; i < mddev->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } + + rcu_read_unlock(); return ret; } -static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) +static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks) +{ + struct linear_conf *conf; + sector_t array_sectors; + + rcu_read_lock(); + conf = rcu_dereference(mddev->private); + WARN_ONCE(sectors || raid_disks, + "%s does not support generic reshape\n", __func__); + array_sectors = conf->array_sectors; + rcu_read_unlock(); + + return array_sectors; +} + +static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) { - linear_conf_t *conf; - dev_info_t **table; - mdk_rdev_t *rdev; - int i, nb_zone, cnt; - sector_t min_spacing; - sector_t curr_offset; - struct list_head *tmp; - - conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), + struct linear_conf *conf; + struct md_rdev *rdev; + int i, cnt; + bool discard_supported = false; + + conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info), GFP_KERNEL); if (!conf) return NULL; @@ -124,122 +148,53 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) cnt = 0; conf->array_sectors = 0; - rdev_for_each(rdev, tmp, mddev) { + rdev_for_each(rdev, mddev) { int j = rdev->raid_disk; - dev_info_t *disk = conf->disks + j; + struct dev_info *disk = conf->disks + j; + sector_t sectors; if (j < 0 || j >= raid_disks || disk->rdev) { - printk("linear: disk numbering problem. Aborting!\n"); + printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n", + mdname(mddev)); goto out; } disk->rdev = rdev; + if (mddev->chunk_sectors) { + sectors = rdev->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev->sectors = sectors * mddev->chunk_sectors; + } - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. - */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); - - disk->size = rdev->size; - conf->array_sectors += rdev->size * 2; + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + conf->array_sectors += rdev->sectors; cnt++; + + if (blk_queue_discard(bdev_get_queue(rdev->bdev))) + discard_supported = true; } if (cnt != raid_disks) { - printk("linear: not enough drives present. Aborting!\n"); + printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n", + mdname(mddev)); goto out; } - min_spacing = conf->array_sectors / 2; - sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *)); - - /* min_spacing is the minimum spacing that will fit the hash - * table in one PAGE. This may be much smaller than needed. - * We find the smallest non-terminal set of consecutive devices - * that is larger than min_spacing as use the size of that as - * the actual spacing - */ - conf->hash_spacing = conf->array_sectors / 2; - for (i=0; i < cnt-1 ; i++) { - sector_t sz = 0; - int j; - for (j = i; j < cnt - 1 && sz < min_spacing; j++) - sz += conf->disks[j].size; - if (sz >= min_spacing && sz < conf->hash_spacing) - conf->hash_spacing = sz; - } + if (!discard_supported) + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + else + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); - /* hash_spacing may be too large for sector_div to work with, - * so we might need to pre-shift - */ - conf->preshift = 0; - if (sizeof(sector_t) > sizeof(u32)) { - sector_t space = conf->hash_spacing; - while (space > (sector_t)(~(u32)0)) { - space >>= 1; - conf->preshift++; - } - } /* - * This code was restructured to work around a gcc-2.95.3 internal - * compiler error. Alter it with care. + * Here we calculate the device offsets. */ - { - sector_t sz; - unsigned round; - unsigned long base; - - sz = conf->array_sectors >> (conf->preshift + 1); - sz += 1; /* force round-up */ - base = conf->hash_spacing >> conf->preshift; - round = sector_div(sz, base); - nb_zone = sz + (round ? 1 : 0); - } - BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *)); + conf->disks[0].end_sector = conf->disks[0].rdev->sectors; - conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone, - GFP_KERNEL); - if (!conf->hash_table) - goto out; - - /* - * Here we generate the linear hash table - * First calculate the device offsets. - */ - conf->disks[0].offset = 0; for (i = 1; i < raid_disks; i++) - conf->disks[i].offset = - conf->disks[i-1].offset + - conf->disks[i-1].size; - - table = conf->hash_table; - curr_offset = 0; - i = 0; - for (curr_offset = 0; - curr_offset < conf->array_sectors / 2; - curr_offset += conf->hash_spacing) { - - while (i < raid_disks-1 && - curr_offset >= conf->disks[i+1].offset) - i++; - - *table ++ = conf->disks + i; - } - - if (conf->preshift) { - conf->hash_spacing >>= conf->preshift; - /* round hash_spacing up so that when we divide by it, - * we err on the side of "too-low", which is safest. - */ - conf->hash_spacing++; - } - - BUG_ON(table - conf->hash_table > nb_zone); + conf->disks[i].end_sector = + conf->disks[i-1].end_sector + + conf->disks[i].rdev->sectors; return conf; @@ -248,26 +203,33 @@ out: return NULL; } -static int linear_run (mddev_t *mddev) +static int linear_run (struct mddev *mddev) { - linear_conf_t *conf; + struct linear_conf *conf; + int ret; - mddev->queue->queue_lock = &mddev->queue->__queue_lock; + if (md_check_no_bitmap(mddev)) + return -EINVAL; conf = linear_conf(mddev, mddev->raid_disks); if (!conf) return 1; mddev->private = conf; - mddev->array_sectors = conf->array_sectors; + md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); - mddev->queue->unplug_fn = linear_unplug; mddev->queue->backing_dev_info.congested_fn = linear_congested; mddev->queue->backing_dev_info.congested_data = mddev; - return 0; + + ret = md_integrity_register(mddev); + if (ret) { + kfree(conf); + mddev->private = NULL; + } + return ret; } -static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) +static int linear_add(struct mddev *mddev, struct md_rdev *rdev) { /* Adding a drive to a linear array allows the array to grow. * It is permitted if the new drive has a matching superblock @@ -277,125 +239,124 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) * The current one is never freed until the array is stopped. * This avoids races. */ - linear_conf_t *newconf; + struct linear_conf *newconf, *oldconf; if (rdev->saved_raid_disk != mddev->raid_disks) return -EINVAL; rdev->raid_disk = rdev->saved_raid_disk; + rdev->saved_raid_disk = -1; newconf = linear_conf(mddev,mddev->raid_disks+1); if (!newconf) return -ENOMEM; - newconf->prev = mddev_to_conf(mddev); - mddev->private = newconf; + oldconf = rcu_dereference_protected(mddev->private, + lockdep_is_held( + &mddev->reconfig_mutex)); mddev->raid_disks++; - mddev->array_sectors = newconf->array_sectors; + rcu_assign_pointer(mddev->private, newconf); + md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); + revalidate_disk(mddev->gendisk); + kfree_rcu(oldconf, rcu); return 0; } -static int linear_stop (mddev_t *mddev) +static int linear_stop (struct mddev *mddev) { - linear_conf_t *conf = mddev_to_conf(mddev); - + struct linear_conf *conf = + rcu_dereference_protected(mddev->private, + lockdep_is_held( + &mddev->reconfig_mutex)); + + /* + * We do not require rcu protection here since + * we hold reconfig_mutex for both linear_add and + * linear_stop, so they cannot race. + * We should make sure any old 'conf's are properly + * freed though. + */ + rcu_barrier(); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ - do { - linear_conf_t *t = conf->prev; - kfree(conf->hash_table); - kfree(conf); - conf = t; - } while (conf); + kfree(conf); + mddev->private = NULL; return 0; } -static int linear_make_request (struct request_queue *q, struct bio *bio) +static void linear_make_request(struct mddev *mddev, struct bio *bio) { - const int rw = bio_data_dir(bio); - mddev_t *mddev = q->queuedata; - dev_info_t *tmp_dev; - sector_t block; - - if (unlikely(bio_barrier(bio))) { - bio_endio(bio, -EOPNOTSUPP); - return 0; + char b[BDEVNAME_SIZE]; + struct dev_info *tmp_dev; + struct bio *split; + sector_t start_sector, end_sector, data_offset; + + if (unlikely(bio->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bio); + return; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); - - tmp_dev = which_dev(mddev, bio->bi_sector); - block = bio->bi_sector >> 1; - - if (unlikely(block >= (tmp_dev->size + tmp_dev->offset) - || block < tmp_dev->offset)) { - char b[BDEVNAME_SIZE]; - - printk("linear_make_request: Block %llu out of bounds on " - "dev %s size %llu offset %llu\n", - (unsigned long long)block, - bdevname(tmp_dev->rdev->bdev, b), - (unsigned long long)tmp_dev->size, - (unsigned long long)tmp_dev->offset); - bio_io_error(bio); - return 0; - } - if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > - (tmp_dev->offset + tmp_dev->size)<<1)) { - /* This bio crosses a device boundary, so we have to - * split it. - */ - struct bio_pair *bp; - bp = bio_split(bio, bio_split_pool, - ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); - if (linear_make_request(q, &bp->bio1)) - generic_make_request(&bp->bio1); - if (linear_make_request(q, &bp->bio2)) - generic_make_request(&bp->bio2); - bio_pair_release(bp); - return 0; - } - - bio->bi_bdev = tmp_dev->rdev->bdev; - bio->bi_sector = bio->bi_sector - (tmp_dev->offset << 1) + tmp_dev->rdev->data_offset; + do { + rcu_read_lock(); + + tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); + start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; + end_sector = tmp_dev->end_sector; + data_offset = tmp_dev->rdev->data_offset; + bio->bi_bdev = tmp_dev->rdev->bdev; + + rcu_read_unlock(); + + if (unlikely(bio->bi_iter.bi_sector >= end_sector || + bio->bi_iter.bi_sector < start_sector)) + goto out_of_bounds; + + if (unlikely(bio_end_sector(bio) > end_sector)) { + /* This bio crosses a device boundary, so we have to + * split it. + */ + split = bio_split(bio, end_sector - + bio->bi_iter.bi_sector, + GFP_NOIO, fs_bio_set); + bio_chain(split, bio); + } else { + split = bio; + } - return 1; + split->bi_iter.bi_sector = split->bi_iter.bi_sector - + start_sector + data_offset; + + if (unlikely((split->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { + /* Just ignore it */ + bio_endio(split, 0); + } else + generic_make_request(split); + } while (split != bio); + return; + +out_of_bounds: + printk(KERN_ERR + "md/linear:%s: make_request: Sector %llu out of bounds on " + "dev %s: %llu sectors, offset %llu\n", + mdname(mddev), + (unsigned long long)bio->bi_iter.bi_sector, + bdevname(tmp_dev->rdev->bdev, b), + (unsigned long long)tmp_dev->rdev->sectors, + (unsigned long long)start_sector); + bio_io_error(bio); } -static void linear_status (struct seq_file *seq, mddev_t *mddev) +static void linear_status (struct seq_file *seq, struct mddev *mddev) { -#undef MD_DEBUG -#ifdef MD_DEBUG - int j; - linear_conf_t *conf = mddev_to_conf(mddev); - sector_t s = 0; - - seq_printf(seq, " "); - for (j = 0; j < mddev->raid_disks; j++) - { - char b[BDEVNAME_SIZE]; - s += conf->smallest_size; - seq_printf(seq, "[%s", - bdevname(conf->hash_table[j][0].rdev->bdev,b)); - - while (s > conf->hash_table[j][0].offset + - conf->hash_table[j][0].size) - seq_printf(seq, "/%s] ", - bdevname(conf->hash_table[j][1].rdev->bdev,b)); - else - seq_printf(seq, "] "); - } - seq_printf(seq, "\n"); -#endif - seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); + seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } -static struct mdk_personality linear_personality = +static struct md_personality linear_personality = { .name = "linear", .level = LEVEL_LINEAR, @@ -405,6 +366,7 @@ static struct mdk_personality linear_personality = .stop = linear_stop, .status = linear_status, .hot_add_disk = linear_add, + .size = linear_size, }; static int __init linear_init (void) @@ -421,6 +383,7 @@ static void linear_exit (void) module_init(linear_init); module_exit(linear_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Linear device concatenation personality for MD"); MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ MODULE_ALIAS("md-linear"); MODULE_ALIAS("md-level--1"); |
