diff options
Diffstat (limited to 'drivers/md/raid10.c')
| -rw-r--r-- | drivers/md/raid10.c | 211 | 
1 files changed, 102 insertions, 109 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index df7b0a06b0e..cb882aae9e2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1152,14 +1152,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)  	kfree(plug);  } -static void make_request(struct mddev *mddev, struct bio * bio) +static void __make_request(struct mddev *mddev, struct bio *bio)  {  	struct r10conf *conf = mddev->private;  	struct r10bio *r10_bio;  	struct bio *read_bio;  	int i; -	sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask); -	int chunk_sects = chunk_mask + 1;  	const int rw = bio_data_dir(bio);  	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);  	const unsigned long do_fua = (bio->bi_rw & REQ_FUA); @@ -1174,61 +1172,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)  	int max_sectors;  	int sectors; -	if (unlikely(bio->bi_rw & REQ_FLUSH)) { -		md_flush_request(mddev, bio); -		return; -	} - -	/* If this request crosses a chunk boundary, we need to -	 * split it.  This will only happen for 1 PAGE (or less) requests. -	 */ -	if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio) -		     > chunk_sects -		     && (conf->geo.near_copies < conf->geo.raid_disks -			 || conf->prev.near_copies < conf->prev.raid_disks))) { -		struct bio_pair *bp; -		/* Sanity check -- queue functions should prevent this happening */ -		if (bio_segments(bio) > 1) -			goto bad_map; -		/* This is a one page bio that upper layers -		 * refuse to split for us, so we need to split it. -		 */ -		bp = bio_split(bio, -			       chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); - -		/* Each of these 'make_request' calls will call 'wait_barrier'. -		 * If the first succeeds but the second blocks due to the resync -		 * thread raising the barrier, we will deadlock because the -		 * IO to the underlying device will be queued in generic_make_request -		 * and will never complete, so will never reduce nr_pending. -		 * So increment nr_waiting here so no new raise_barriers will -		 * succeed, and so the second wait_barrier cannot block. -		 */ -		spin_lock_irq(&conf->resync_lock); -		conf->nr_waiting++; -		spin_unlock_irq(&conf->resync_lock); - -		make_request(mddev, &bp->bio1); -		make_request(mddev, &bp->bio2); - -		spin_lock_irq(&conf->resync_lock); -		conf->nr_waiting--; -		wake_up(&conf->wait_barrier); -		spin_unlock_irq(&conf->resync_lock); - -		bio_pair_release(bp); -		return; -	bad_map: -		printk("md/raid10:%s: make_request bug: can't convert block across chunks" -		       " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, -		       (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2); - -		bio_io_error(bio); -		return; -	} - -	md_write_start(mddev, bio); -  	/*  	 * Register the new request and wait if the reconstruction  	 * thread has put up a bar for new requests. @@ -1238,24 +1181,25 @@ static void make_request(struct mddev *mddev, struct bio * bio)  	sectors = bio_sectors(bio);  	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && -	    bio->bi_sector < conf->reshape_progress && -	    bio->bi_sector + sectors > conf->reshape_progress) { +	    bio->bi_iter.bi_sector < conf->reshape_progress && +	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {  		/* IO spans the reshape position.  Need to wait for  		 * reshape to pass  		 */  		allow_barrier(conf);  		wait_event(conf->wait_barrier, -			   conf->reshape_progress <= bio->bi_sector || -			   conf->reshape_progress >= bio->bi_sector + sectors); +			   conf->reshape_progress <= bio->bi_iter.bi_sector || +			   conf->reshape_progress >= bio->bi_iter.bi_sector + +			   sectors);  		wait_barrier(conf);  	}  	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&  	    bio_data_dir(bio) == WRITE &&  	    (mddev->reshape_backwards -	     ? (bio->bi_sector < conf->reshape_safe && -		bio->bi_sector + sectors > conf->reshape_progress) -	     : (bio->bi_sector + sectors > conf->reshape_safe && -		bio->bi_sector < conf->reshape_progress))) { +	     ? (bio->bi_iter.bi_sector < conf->reshape_safe && +		bio->bi_iter.bi_sector + sectors > conf->reshape_progress) +	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe && +		bio->bi_iter.bi_sector < conf->reshape_progress))) {  		/* Need to update reshape_position in metadata */  		mddev->reshape_position = conf->reshape_progress;  		set_bit(MD_CHANGE_DEVS, &mddev->flags); @@ -1273,7 +1217,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)  	r10_bio->sectors = sectors;  	r10_bio->mddev = mddev; -	r10_bio->sector = bio->bi_sector; +	r10_bio->sector = bio->bi_iter.bi_sector;  	r10_bio->state = 0;  	/* We might need to issue multiple reads to different @@ -1302,13 +1246,13 @@ read_again:  		slot = r10_bio->read_slot;  		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); -		md_trim_bio(read_bio, r10_bio->sector - bio->bi_sector, -			    max_sectors); +		bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector, +			 max_sectors);  		r10_bio->devs[slot].bio = read_bio;  		r10_bio->devs[slot].rdev = rdev; -		read_bio->bi_sector = r10_bio->devs[slot].addr + +		read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +  			choose_data_offset(r10_bio, rdev);  		read_bio->bi_bdev = rdev->bdev;  		read_bio->bi_end_io = raid10_end_read_request; @@ -1319,15 +1263,15 @@ read_again:  			/* Could not read all from this device, so we will  			 * need another r10_bio.  			 */ -			sectors_handled = (r10_bio->sectors + max_sectors -					   - bio->bi_sector); +			sectors_handled = (r10_bio->sector + max_sectors +					   - bio->bi_iter.bi_sector);  			r10_bio->sectors = max_sectors;  			spin_lock_irq(&conf->device_lock);  			if (bio->bi_phys_segments == 0)  				bio->bi_phys_segments = 2;  			else  				bio->bi_phys_segments++; -			spin_unlock(&conf->device_lock); +			spin_unlock_irq(&conf->device_lock);  			/* Cannot call generic_make_request directly  			 * as that will be queued in __generic_make_request  			 * and subsequent mempool_alloc might block @@ -1341,7 +1285,8 @@ read_again:  			r10_bio->sectors = bio_sectors(bio) - sectors_handled;  			r10_bio->state = 0;  			r10_bio->mddev = mddev; -			r10_bio->sector = bio->bi_sector + sectors_handled; +			r10_bio->sector = bio->bi_iter.bi_sector + +				sectors_handled;  			goto read_again;  		} else  			generic_make_request(read_bio); @@ -1499,7 +1444,8 @@ retry_write:  			bio->bi_phys_segments++;  		spin_unlock_irq(&conf->device_lock);  	} -	sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector; +	sectors_handled = r10_bio->sector + max_sectors - +		bio->bi_iter.bi_sector;  	atomic_set(&r10_bio->remaining, 1);  	bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); @@ -1510,11 +1456,11 @@ retry_write:  		if (r10_bio->devs[i].bio) {  			struct md_rdev *rdev = conf->mirrors[d].rdev;  			mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); -			md_trim_bio(mbio, r10_bio->sector - bio->bi_sector, -				    max_sectors); +			bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector, +				 max_sectors);  			r10_bio->devs[i].bio = mbio; -			mbio->bi_sector	= (r10_bio->devs[i].addr+ +			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+  					   choose_data_offset(r10_bio,  							      rdev));  			mbio->bi_bdev = rdev->bdev; @@ -1553,11 +1499,11 @@ retry_write:  				rdev = conf->mirrors[d].rdev;  			}  			mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); -			md_trim_bio(mbio, r10_bio->sector - bio->bi_sector, -				    max_sectors); +			bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector, +				 max_sectors);  			r10_bio->devs[i].repl_bio = mbio; -			mbio->bi_sector	= (r10_bio->devs[i].addr + +			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +  					   choose_data_offset(  						   r10_bio, rdev));  			mbio->bi_bdev = rdev->bdev; @@ -1591,11 +1537,51 @@ retry_write:  		r10_bio->sectors = bio_sectors(bio) - sectors_handled;  		r10_bio->mddev = mddev; -		r10_bio->sector = bio->bi_sector + sectors_handled; +		r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;  		r10_bio->state = 0;  		goto retry_write;  	}  	one_write_done(r10_bio); +} + +static void make_request(struct mddev *mddev, struct bio *bio) +{ +	struct r10conf *conf = mddev->private; +	sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask); +	int chunk_sects = chunk_mask + 1; + +	struct bio *split; + +	if (unlikely(bio->bi_rw & REQ_FLUSH)) { +		md_flush_request(mddev, bio); +		return; +	} + +	md_write_start(mddev, bio); + + +	do { + +		/* +		 * If this request crosses a chunk boundary, we need to split +		 * it. +		 */ +		if (unlikely((bio->bi_iter.bi_sector & chunk_mask) + +			     bio_sectors(bio) > chunk_sects +			     && (conf->geo.near_copies < conf->geo.raid_disks +				 || conf->prev.near_copies < +				 conf->prev.raid_disks))) { +			split = bio_split(bio, chunk_sects - +					  (bio->bi_iter.bi_sector & +					   (chunk_sects - 1)), +					  GFP_NOIO, fs_bio_set); +			bio_chain(split, bio); +		} else { +			split = bio; +		} + +		__make_request(mddev, split); +	} while (split != bio);  	/* In case raid10d snuck in to freeze_array */  	wake_up(&conf->wait_barrier); @@ -1782,6 +1768,7 @@ static int raid10_spare_active(struct mddev *mddev)  			}  			sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);  		} else if (tmp->rdev +			   && tmp->rdev->recovery_offset == MaxSector  			   && !test_bit(Faulty, &tmp->rdev->flags)  			   && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {  			count++; @@ -2123,10 +2110,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)  		bio_reset(tbio);  		tbio->bi_vcnt = vcnt; -		tbio->bi_size = r10_bio->sectors << 9; +		tbio->bi_iter.bi_size = r10_bio->sectors << 9;  		tbio->bi_rw = WRITE;  		tbio->bi_private = r10_bio; -		tbio->bi_sector = r10_bio->devs[i].addr; +		tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;  		for (j=0; j < vcnt ; j++) {  			tbio->bi_io_vec[j].bv_offset = 0; @@ -2143,7 +2130,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)  		atomic_inc(&r10_bio->remaining);  		md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio)); -		tbio->bi_sector += conf->mirrors[d].rdev->data_offset; +		tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;  		tbio->bi_bdev = conf->mirrors[d].rdev->bdev;  		generic_make_request(tbio);  	} @@ -2613,8 +2600,8 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)  			sectors = sect_to_write;  		/* Write at 'sector' for 'sectors' */  		wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); -		md_trim_bio(wbio, sector - bio->bi_sector, sectors); -		wbio->bi_sector = (r10_bio->devs[i].addr+ +		bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); +		wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+  				   choose_data_offset(r10_bio, rdev) +  				   (sector - r10_bio->sector));  		wbio->bi_bdev = rdev->bdev; @@ -2686,12 +2673,10 @@ read_more:  		(unsigned long long)r10_bio->sector);  	bio = bio_clone_mddev(r10_bio->master_bio,  			      GFP_NOIO, mddev); -	md_trim_bio(bio, -		    r10_bio->sector - bio->bi_sector, -		    max_sectors); +	bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors);  	r10_bio->devs[slot].bio = bio;  	r10_bio->devs[slot].rdev = rdev; -	bio->bi_sector = r10_bio->devs[slot].addr +	bio->bi_iter.bi_sector = r10_bio->devs[slot].addr  		+ choose_data_offset(r10_bio, rdev);  	bio->bi_bdev = rdev->bdev;  	bio->bi_rw = READ | do_sync; @@ -2702,7 +2687,7 @@ read_more:  		struct bio *mbio = r10_bio->master_bio;  		int sectors_handled =  			r10_bio->sector + max_sectors -			- mbio->bi_sector; +			- mbio->bi_iter.bi_sector;  		r10_bio->sectors = max_sectors;  		spin_lock_irq(&conf->device_lock);  		if (mbio->bi_phys_segments == 0) @@ -2720,7 +2705,7 @@ read_more:  		set_bit(R10BIO_ReadError,  			&r10_bio->state);  		r10_bio->mddev = mddev; -		r10_bio->sector = mbio->bi_sector +		r10_bio->sector = mbio->bi_iter.bi_sector  			+ sectors_handled;  		goto read_more; @@ -3158,7 +3143,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,  				bio->bi_end_io = end_sync_read;  				bio->bi_rw = READ;  				from_addr = r10_bio->devs[j].addr; -				bio->bi_sector = from_addr + rdev->data_offset; +				bio->bi_iter.bi_sector = from_addr + +					rdev->data_offset;  				bio->bi_bdev = rdev->bdev;  				atomic_inc(&rdev->nr_pending);  				/* and we write to 'i' (if not in_sync) */ @@ -3182,7 +3168,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,  					bio->bi_private = r10_bio;  					bio->bi_end_io = end_sync_write;  					bio->bi_rw = WRITE; -					bio->bi_sector = to_addr +					bio->bi_iter.bi_sector = to_addr  						+ rdev->data_offset;  					bio->bi_bdev = rdev->bdev;  					atomic_inc(&r10_bio->remaining); @@ -3211,7 +3197,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,  				bio->bi_private = r10_bio;  				bio->bi_end_io = end_sync_write;  				bio->bi_rw = WRITE; -				bio->bi_sector = to_addr + rdev->data_offset; +				bio->bi_iter.bi_sector = to_addr + +					rdev->data_offset;  				bio->bi_bdev = rdev->bdev;  				atomic_inc(&r10_bio->remaining);  				break; @@ -3219,10 +3206,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,  			if (j == conf->copies) {  				/* Cannot recover, so abort the recovery or  				 * record a bad block */ -				put_buf(r10_bio); -				if (rb2) -					atomic_dec(&rb2->remaining); -				r10_bio = rb2;  				if (any_working) {  					/* problem is that there are bad blocks  					 * on other device(s) @@ -3254,6 +3237,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,  					mirror->recovery_disabled  						= mddev->recovery_disabled;  				} +				put_buf(r10_bio); +				if (rb2) +					atomic_dec(&rb2->remaining); +				r10_bio = rb2;  				break;  			}  		} @@ -3329,7 +3316,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,  			bio->bi_private = r10_bio;  			bio->bi_end_io = end_sync_read;  			bio->bi_rw = READ; -			bio->bi_sector = sector + +			bio->bi_iter.bi_sector = sector +  				conf->mirrors[d].rdev->data_offset;  			bio->bi_bdev = conf->mirrors[d].rdev->bdev;  			count++; @@ -3351,7 +3338,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,  			bio->bi_private = r10_bio;  			bio->bi_end_io = end_sync_write;  			bio->bi_rw = WRITE; -			bio->bi_sector = sector + +			bio->bi_iter.bi_sector = sector +  				conf->mirrors[d].replacement->data_offset;  			bio->bi_bdev = conf->mirrors[d].replacement->bdev;  			count++; @@ -3398,7 +3385,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,  			     bio2 = bio2->bi_next) {  				/* remove last page from this bio */  				bio2->bi_vcnt--; -				bio2->bi_size -= len; +				bio2->bi_iter.bi_size -= len;  				bio2->bi_flags &= ~(1<< BIO_SEG_VALID);  			}  			goto bio_full; @@ -3748,7 +3735,8 @@ static int run(struct mddev *mddev)  		    !test_bit(In_sync, &disk->rdev->flags)) {  			disk->head_position = 0;  			mddev->degraded++; -			if (disk->rdev) +			if (disk->rdev && +			    disk->rdev->saved_raid_disk < 0)  				conf->fullsync = 1;  		}  		disk->recovery_disabled = mddev->recovery_disabled - 1; @@ -4385,7 +4373,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,  		set_bit(MD_CHANGE_DEVS, &mddev->flags);  		md_wakeup_thread(mddev->thread);  		wait_event(mddev->sb_wait, mddev->flags == 0 || -			   kthread_should_stop()); +			   test_bit(MD_RECOVERY_INTR, &mddev->recovery)); +		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { +			allow_barrier(conf); +			return sectors_done; +		}  		conf->reshape_safe = mddev->reshape_position;  		allow_barrier(conf);  	} @@ -4414,7 +4406,7 @@ read_more:  	read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);  	read_bio->bi_bdev = rdev->bdev; -	read_bio->bi_sector = (r10_bio->devs[r10_bio->read_slot].addr +	read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr  			       + rdev->data_offset);  	read_bio->bi_private = r10_bio;  	read_bio->bi_end_io = end_sync_read; @@ -4422,7 +4414,7 @@ read_more:  	read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);  	read_bio->bi_flags |= 1 << BIO_UPTODATE;  	read_bio->bi_vcnt = 0; -	read_bio->bi_size = 0; +	read_bio->bi_iter.bi_size = 0;  	r10_bio->master_bio = read_bio;  	r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; @@ -4448,7 +4440,8 @@ read_more:  		bio_reset(b);  		b->bi_bdev = rdev2->bdev; -		b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; +		b->bi_iter.bi_sector = r10_bio->devs[s/2].addr + +			rdev2->new_data_offset;  		b->bi_private = r10_bio;  		b->bi_end_io = end_reshape_write;  		b->bi_rw = WRITE; @@ -4475,7 +4468,7 @@ read_more:  			     bio2 = bio2->bi_next) {  				/* Remove last page from this bio */  				bio2->bi_vcnt--; -				bio2->bi_size -= len; +				bio2->bi_iter.bi_size -= len;  				bio2->bi_flags &= ~(1<<BIO_SEG_VALID);  			}  			goto bio_full;  | 
