aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorDavid Barksdale <amatus@amatus.name>2014-08-13 16:14:13 -0500
committerDavid Barksdale <amatus@amatus.name>2014-08-13 16:14:13 -0500
commitace6c6d243016e272050787c14e27a83ecd94a25 (patch)
treec837edb1ca98b2552fbc7edba47aeb63f98ca1f0 /drivers/md
parent1b6e1688bd215cd7c9cb75650fa815a1ec6567e1 (diff)
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig13
-rw-r--r--drivers/md/Makefile4
-rw-r--r--drivers/md/md.c111
-rw-r--r--drivers/md/md.h14
-rw-r--r--drivers/md/multipath.c2
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid10.c2
-rw-r--r--drivers/md/raid5.c179
-rw-r--r--drivers/md/raid5.h8
9 files changed, 301 insertions, 34 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2158377a135..bb72359d8dc 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -165,6 +165,19 @@ config MULTICORE_RAID456
If unsure, say N.
+config MD_RAID_SKIP_BIO_COPY
+ bool "Skip intermediate bio->cache copy"
+ depends on MD_RAID456
+ default n
+ ---help---
+ Skip intermediate data copying between the bio requested to write and
+ the disk cache in <sh> if the full-stripe write operation is on the
+ way. This might improve the performance of write operations in some
+ dedicated cases but generally eliminating disk cache slows the
+ performance down.
+
+ If unsure, say N.
+
config MD_RAID6_PQ
tristate
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index e355e7f6a53..7d424aa10fa 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -11,8 +11,8 @@ dm-mirror-y += dm-raid1.o
dm-log-userspace-y \
+= dm-log-userspace-base.o dm-log-userspace-transfer.o
md-mod-y += md.o bitmap.o
-raid456-y += raid5.o
-raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
+raid456-y += raid5.o
+raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
raid6int1.o raid6int2.o raid6int4.o \
raid6int8.o raid6int16.o raid6int32.o \
raid6altivec1.o raid6altivec2.o raid6altivec4.o \
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 08f7471d015..1297c9db0cf 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -217,12 +217,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
return 0;
}
rcu_read_lock();
- if (mddev->suspended) {
+ if (mddev->suspended || mddev->barrier) {
DEFINE_WAIT(__wait);
for (;;) {
prepare_to_wait(&mddev->sb_wait, &__wait,
TASK_UNINTERRUPTIBLE);
- if (!mddev->suspended)
+ if (!mddev->suspended && !mddev->barrier)
break;
rcu_read_unlock();
schedule();
@@ -264,11 +264,117 @@ static void mddev_resume(mddev_t *mddev)
int mddev_congested(mddev_t *mddev, int bits)
{
+ if (mddev->barrier)
+ return 1;
return mddev->suspended;
}
EXPORT_SYMBOL(mddev_congested);
+/*
+ * Generic barrier handling for md
+ */
+
+static void md_end_barrier(struct bio *bio, int err)
+{
+ mdk_rdev_t *rdev = bio->bi_private;
+ mddev_t *mddev = rdev->mddev;
+ if (err == -EOPNOTSUPP && mddev->barrier != (void*)1)
+ set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags);
+
+ rdev_dec_pending(rdev, mddev);
+
+ if (atomic_dec_and_test(&mddev->flush_pending)) {
+ if (mddev->barrier == (void*)1) {
+ mddev->barrier = NULL;
+ wake_up(&mddev->sb_wait);
+ } else
+ schedule_work(&mddev->barrier_work);
+ }
+ bio_put(bio);
+}
+
+static void md_submit_barrier(struct work_struct *ws)
+{
+ mddev_t *mddev = container_of(ws, mddev_t, barrier_work);
+ struct bio *bio = mddev->barrier;
+
+ atomic_set(&mddev->flush_pending, 1);
+ if (!test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) {
+ mdk_rdev_t *rdev;
+
+ bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
+ if (mddev->pers->make_request(mddev->queue, bio))
+ generic_make_request(bio);
+ mddev->barrier = (void*)1;
+ rcu_read_lock();
+ list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Faulty, &rdev->flags)) {
+ /* Take two references, one is dropped
+ * when request finishes, one after
+ * we reclaim rcu_read_lock
+ */
+ struct bio *bi;
+ atomic_inc(&rdev->nr_pending);
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
+ bi = bio_alloc(GFP_KERNEL, 0);
+ bi->bi_end_io = md_end_barrier;
+ bi->bi_private = rdev;
+ bi->bi_bdev = rdev->bdev;
+ atomic_inc(&mddev->flush_pending);
+ submit_bio(WRITE_BARRIER, bi);
+ rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
+ }
+ rcu_read_unlock();
+ } else
+ bio_endio(bio, -EOPNOTSUPP);
+ if (atomic_dec_and_test(&mddev->flush_pending)) {
+ mddev->barrier = NULL;
+ wake_up(&mddev->sb_wait);
+ }
+}
+
+void md_barrier_request(mddev_t *mddev, struct bio *bio)
+{
+ mdk_rdev_t *rdev;
+
+ spin_lock_irq(&mddev->write_lock);
+ wait_event_lock_irq(mddev->sb_wait,
+ !mddev->barrier,
+ mddev->write_lock, /*nothing*/);
+ mddev->barrier = bio;
+ spin_unlock_irq(&mddev->write_lock);
+
+ atomic_set(&mddev->flush_pending, 1);
+ INIT_WORK(&mddev->barrier_work, md_submit_barrier);
+
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Faulty, &rdev->flags)) {
+ struct bio *bi;
+
+ atomic_inc(&rdev->nr_pending);
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
+ bi = bio_alloc(GFP_KERNEL, 0);
+ bi->bi_end_io = md_end_barrier;
+ bi->bi_private = rdev;
+ bi->bi_bdev = rdev->bdev;
+ atomic_inc(&mddev->flush_pending);
+ submit_bio(WRITE_BARRIER, bi);
+ rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
+ }
+ rcu_read_unlock();
+ if (atomic_dec_and_test(&mddev->flush_pending))
+ schedule_work(&mddev->barrier_work);
+}
+EXPORT_SYMBOL(md_barrier_request);
static inline mddev_t *mddev_get(mddev_t *mddev)
{
atomic_inc(&mddev->active);
@@ -377,6 +483,7 @@ static mddev_t * mddev_find(dev_t unit)
atomic_set(&new->openers, 0);
atomic_set(&new->active_io, 0);
spin_lock_init(&new->write_lock);
+ atomic_set(&new->flush_pending, 0);
init_waitqueue_head(&new->sb_wait);
init_waitqueue_head(&new->recovery_wait);
new->reshape_position = MaxSector;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 87430fea287..abe8ba3ab01 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -292,6 +292,17 @@ struct mddev_s
struct mutex bitmap_mutex;
struct list_head all_mddevs;
+
+ /* Generic barrier handling.
+ * If there is a pending barrier request, all other
+ * writes are blocked while the devices are flushed.
+ * The last to finish a flush schedules a worker to
+ * submit the barrier request (without the barrier flag),
+ * then submit more flush requests.
+ */
+ struct bio *barrier;
+ atomic_t flush_pending;
+ struct work_struct barrier_work;
};
@@ -430,8 +441,9 @@ extern void md_write_start(mddev_t *mddev, struct bio *bi);
extern void md_write_end(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
-
extern int mddev_congested(mddev_t *mddev, int bits);
+extern void md_barrier_request(mddev_t *mddev, struct bio *bio);
+
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
sector_t sector, int size, struct page *page);
extern void md_super_wait(mddev_t *mddev);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index ee7646f974a..cbc0a99f379 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -145,7 +145,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
int cpu;
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
- bio_endio(bio, -EOPNOTSUPP);
+ md_barrier_request(mddev, bio);
return 0;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index d3a4ce06015..122d07af5b5 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -453,7 +453,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio)
int cpu;
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
- bio_endio(bio, -EOPNOTSUPP);
+ md_barrier_request(mddev, bio);
return 0;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c2cb7b87b44..2fbf867f8b3 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -804,7 +804,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
mdk_rdev_t *blocked_rdev;
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
- bio_endio(bio, -EOPNOTSUPP);
+ md_barrier_request(mddev, bio);
return 0;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 431b9b26ca5..0d403ca12ae 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -639,7 +639,8 @@ static void mark_target_uptodate(struct stripe_head *sh, int target)
return;
tgt = &sh->dev[target];
- set_bit(R5_UPTODATE, &tgt->flags);
+ if(!tgt->dpage)
+ set_bit(R5_UPTODATE, &tgt->flags);
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
clear_bit(R5_Wantcompute, &tgt->flags);
}
@@ -681,6 +682,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
struct dma_async_tx_descriptor *tx;
struct async_submit_ctl submit;
int i;
+ enum async_tx_flags flags = ASYNC_TX_FENCE | ASYNC_TX_XOR_ZERO_DST;
pr_debug("%s: stripe %llu block: %d\n",
__func__, (unsigned long long)sh->sector, target);
@@ -692,7 +694,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
atomic_inc(&sh->count);
- init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
+ init_async_submit(&submit, flags, NULL,
ops_complete_compute, sh, to_addr_conv(sh, percpu));
if (unlikely(count == 1))
tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
@@ -915,6 +917,7 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
struct page **xor_srcs = percpu->scribble;
int count = 0, pd_idx = sh->pd_idx, i;
struct async_submit_ctl submit;
+ enum async_tx_flags flags = ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST;
/* existing parity data subtracted */
struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -929,7 +932,7 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
xor_srcs[count++] = dev->page;
}
- init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ init_async_submit(&submit, flags, tx,
ops_complete_prexor, sh, to_addr_conv(sh, percpu));
tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
@@ -942,9 +945,80 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
int disks = sh->disks;
int i;
+#ifdef CONFIG_MD_RAID_SKIP_BIO_COPY
+ int pd_idx = sh->pd_idx;
+ int qd_idx = sh->raid_conf->level == 6 ?
+ raid6_next_disk(pd_idx, disks) : -1;
+ int fswrite = 1;
+#endif
+
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
+#ifdef CONFIG_MD_RAID_SKIP_BIO_COPY
+ /* initially assume that the operation is a full-stripe write*/
+ for (i = disks; i-- ;) {
+ struct r5dev *dev = &sh->dev[i];
+
+ if (unlikely(i == pd_idx || i == qd_idx))
+ continue;
+ if (unlikely(!test_bit(R5_Wantdrain, &dev->flags)))
+ goto do_copy;
+ if ((test_bit(R5_OVERWRITE, &dev->flags)) &&
+ !r5_next_bio(sh->dev[i].towrite, sh->dev[i].sector)) {
+ /* now check if there is only one bio_vec within
+ * the bio covers the sh->dev[i]
+ */
+ struct bio *pbio = sh->dev[i].towrite;
+ struct bio_vec *bvl;
+ int found = 0;
+ int bvec_page = pbio->bi_sector << 9, k;
+ int dev_page = sh->dev[i].sector << 9;
+
+ /* search for the bio_vec that covers dev[i].page */
+ bio_for_each_segment(bvl, pbio, k) {
+ if (bvec_page == dev_page &&
+ bio_iovec_idx(pbio,k)->bv_len ==
+ STRIPE_SIZE) {
+ /* found the vector which covers the
+ * strip fully
+ */
+ found = 1;
+ break;
+ }
+ bvec_page += bio_iovec_idx(pbio,k)->bv_len;
+ }
+ if (found) {
+ /* save the direct pointer to buffer */
+ if(dev->dpage)
+ printk("BIO bugs\n");
+ BUG_ON(dev->dpage);
+ dev->dpage = bio_iovec_idx(pbio,k)->bv_page;
+ clear_bit(R5_Skipped, &dev->flags);
+ continue;
+ }
+ }
+do_copy:
+ /* come here in two cases:
+ * - the dev[i] is not covered fully with the bio;
+ * - there are more than one bios cover the dev[i].
+ * in both cases do copy from bio to dev[i].page
+ */
+ pr_debug("%s: do copy because of disk %d\n", __FUNCTION__, i);
+ do {
+ /* restore dpages set */
+ sh->dev[i].dpage = NULL;
+ } while (++i != disks);
+ fswrite = 0;
+ break;
+ }
+
+ if (fswrite) {
+ /* won't add new txs right now, so run ops currently pending */
+ async_tx_issue_pending_all();
+ }
+#endif
+
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
struct bio *chosen;
@@ -959,6 +1033,13 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
wbi = dev->written = chosen;
spin_unlock(&sh->lock);
+#ifdef CONFIG_MD_RAID_SKIP_BIO_COPY
+ if (fswrite) {
+ /* just update dev bio vec pointer */
+ dev->vec.bv_page = dev->dpage;
+ continue;
+ }
+#endif
while (wbi && wbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
tx = async_copy_data(1, wbi, dev->page,
@@ -985,8 +1066,10 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
- if (dev->written || i == pd_idx || i == qd_idx)
- set_bit(R5_UPTODATE, &dev->flags);
+ if (dev->dpage)
+ set_bit(R5_Skipped, &dev->flags);
+ else if (dev->written || i == pd_idx || i == qd_idx)
+ set_bit(R5_UPTODATE, &dev->flags);
}
if (sh->reconstruct_state == reconstruct_state_drain_run)
@@ -1026,14 +1109,16 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if (dev->written)
- xor_srcs[count++] = dev->page;
+ xor_srcs[count++] = dev->dpage ?
+ dev->dpage : dev->page;
}
} else {
xor_dest = sh->dev[pd_idx].page;
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if (i != pd_idx)
- xor_srcs[count++] = dev->page;
+ xor_srcs[count++] = dev->dpage ?
+ dev->dpage : dev->page;
}
}
@@ -2437,7 +2522,8 @@ static void handle_stripe_clean_event(raid5_conf_t *conf,
if (sh->dev[i].written) {
dev = &sh->dev[i];
if (!test_bit(R5_LOCKED, &dev->flags) &&
- test_bit(R5_UPTODATE, &dev->flags)) {
+ (test_bit(R5_UPTODATE, &dev->flags) ||
+ test_bit(R5_Skipped, &dev->flags))) {
/* We can return any write requests */
struct bio *wbi, *wbi2;
int bitmap_end = 0;
@@ -2445,6 +2531,17 @@ static void handle_stripe_clean_event(raid5_conf_t *conf,
spin_lock_irq(&conf->device_lock);
wbi = dev->written;
dev->written = NULL;
+
+ if (dev->dpage) {
+ /* with direct writes the raid disk
+ * cache actually isn't UPTODATE
+ */
+ clear_bit(R5_Skipped, &dev->flags);
+ clear_bit(R5_OVERWRITE, &dev->flags);
+ dev->vec.bv_page = dev->page;
+ dev->dpage = NULL;
+ }
+
while (wbi && wbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
wbi2 = r5_next_bio(wbi, dev->sector);
@@ -2947,6 +3044,7 @@ static void handle_stripe5(struct stripe_head *sh)
struct r5dev *dev;
mdk_rdev_t *blocked_rdev = NULL;
int prexor;
+ int dec_preread_active = 0;
memset(&s, 0, sizeof(s));
pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
@@ -3096,12 +3194,8 @@ static void handle_stripe5(struct stripe_head *sh)
set_bit(STRIPE_INSYNC, &sh->state);
}
}
- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- atomic_dec(&conf->preread_active_stripes);
- if (atomic_read(&conf->preread_active_stripes) <
- IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
- }
+ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+ dec_preread_active = 1;
}
/* Now to consider new write requests and what else, if anything
@@ -3208,6 +3302,16 @@ static void handle_stripe5(struct stripe_head *sh)
ops_run_io(sh, &s);
+ if (dec_preread_active) {
+ /* We delay this until after ops_run_io so that if make_request
+ * is waiting on a barrier, it won't continue until the writes
+ * have actually been submitted.
+ */
+ atomic_dec(&conf->preread_active_stripes);
+ if (atomic_read(&conf->preread_active_stripes) <
+ IO_THRESHOLD)
+ md_wakeup_thread(conf->mddev->thread);
+ }
return_io(return_bi);
}
@@ -3221,6 +3325,7 @@ static void handle_stripe6(struct stripe_head *sh)
struct r6_state r6s;
struct r5dev *dev, *pdev, *qdev;
mdk_rdev_t *blocked_rdev = NULL;
+ int dec_preread_active = 0;
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
"pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
@@ -3358,7 +3463,6 @@ static void handle_stripe6(struct stripe_head *sh)
* completed
*/
if (sh->reconstruct_state == reconstruct_state_drain_result) {
- int qd_idx = sh->qd_idx;
sh->reconstruct_state = reconstruct_state_idle;
/* All the 'written' buffers and the parity blocks are ready to
@@ -3380,12 +3484,8 @@ static void handle_stripe6(struct stripe_head *sh)
set_bit(STRIPE_INSYNC, &sh->state);
}
}
- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- atomic_dec(&conf->preread_active_stripes);
- if (atomic_read(&conf->preread_active_stripes) <
- IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
- }
+ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+ dec_preread_active = 1;
}
/* Now to consider new write requests and what else, if anything
@@ -3494,6 +3594,18 @@ static void handle_stripe6(struct stripe_head *sh)
ops_run_io(sh, &s);
+
+ if (dec_preread_active) {
+ /* We delay this until after ops_run_io so that if make_request
+ * is waiting on a barrier, it won't continue until the writes
+ * have actually been submitted.
+ */
+ atomic_dec(&conf->preread_active_stripes);
+ if (atomic_read(&conf->preread_active_stripes) <
+ IO_THRESHOLD)
+ md_wakeup_thread(conf->mddev->thread);
+ }
+
return_io(return_bi);
}
@@ -3741,7 +3853,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
{
mddev_t *mddev = q->queuedata;
raid5_conf_t *conf = mddev->private;
- unsigned int dd_idx;
+ int dd_idx;
struct bio* align_bi;
mdk_rdev_t *rdev;
@@ -3866,7 +3978,13 @@ static int make_request(struct request_queue *q, struct bio * bi)
int cpu, remaining;
if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
- bio_endio(bi, -EOPNOTSUPP);
+ /* Drain all pending writes. We only really need
+ * to ensure they have been submitted, but this is
+ * easier.
+ */
+ mddev->pers->quiesce(mddev, 1);
+ mddev->pers->quiesce(mddev, 0);
+ md_barrier_request(mddev, bi);
return 0;
}
@@ -3990,6 +4108,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
finish_wait(&conf->wait_for_overlap, &w);
set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
+ if (mddev->barrier &&
+ !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+ atomic_inc(&conf->preread_active_stripes);
release_stripe(sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
@@ -4009,6 +4130,14 @@ static int make_request(struct request_queue *q, struct bio * bi)
bio_endio(bi, 0);
}
+
+ if (mddev->barrier) {
+ /* We need to wait for the stripes to all be handled.
+ * So: wait for preread_active_stripes to drop to 0.
+ */
+ wait_event(mddev->thread->wqueue,
+ atomic_read(&conf->preread_active_stripes) == 0);
+ }
return 0;
}
@@ -5104,9 +5233,8 @@ static int stop(mddev_t *mddev)
mddev->thread = NULL;
mddev->queue->backing_dev_info.congested_fn = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
- sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
free_conf(conf);
- mddev->private = NULL;
+ mddev->private = &raid5_attrs_group;
return 0;
}
@@ -5863,6 +5991,7 @@ static void raid5_exit(void)
module_init(raid5_init);
module_exit(raid5_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RAID4/5/6 (striping with parity) personality for MD");
MODULE_ALIAS("md-personality-4"); /* RAID5 */
MODULE_ALIAS("md-raid5");
MODULE_ALIAS("md-raid4");
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index dd708359b45..7ffc683d69d 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -233,6 +233,7 @@ struct stripe_head {
struct bio req;
struct bio_vec vec;
struct page *page;
+ struct page *dpage;
struct bio *toread, *read, *towrite, *written;
sector_t sector; /* sector of this page */
unsigned long flags;
@@ -252,7 +253,7 @@ struct stripe_head_state {
/* r6_state - extra state data only relevant to r6 */
struct r6_state {
- int p_failed, q_failed, failed_num[2];
+ int p_failed, q_failed, qd_idx, failed_num[2];
};
/* Flags */
@@ -275,6 +276,7 @@ struct r6_state {
* filling
*/
#define R5_Wantdrain 13 /* dev->towrite needs to be drained */
+#define R5_Skipped 14 /* SKIP_BIO_COPY completed */
/*
* Write method
*/
@@ -314,6 +316,10 @@ struct r6_state {
#define STRIPE_OP_RECONSTRUCT 4
#define STRIPE_OP_CHECK 5
+#define STRIPE_OP_CHECK_PP 6
+#define STRIPE_OP_CHECK_QP 7
+
+
/*
* Plugging:
*