diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 11 | ||||
-rw-r--r-- | block/Makefile | 2 | ||||
-rw-r--r-- | block/as-iosched.c | 10 | ||||
-rw-r--r-- | block/blk-barrier.c | 72 | ||||
-rw-r--r-- | block/blk-core.c | 205 | ||||
-rw-r--r-- | block/blk-integrity.c | 1 | ||||
-rw-r--r-- | block/blk-iopoll.c | 227 | ||||
-rw-r--r-- | block/blk-merge.c | 43 | ||||
-rw-r--r-- | block/blk-settings.c | 139 | ||||
-rw-r--r-- | block/blk-sysfs.c | 31 | ||||
-rw-r--r-- | block/blk.h | 1 | ||||
-rw-r--r-- | block/bsg.c | 6 | ||||
-rw-r--r-- | block/cfq-iosched.c | 321 | ||||
-rw-r--r-- | block/cmd-filter.c | 233 | ||||
-rw-r--r-- | block/compat_ioctl.c | 13 | ||||
-rw-r--r-- | block/elevator.c | 3 | ||||
-rw-r--r-- | block/genhd.c | 28 | ||||
-rw-r--r-- | block/ioctl.c | 66 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 44 |
19 files changed, 868 insertions, 588 deletions
diff --git a/block/Kconfig b/block/Kconfig index 95a86adc33a..9be0b56eaee 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -48,9 +48,9 @@ config LBDAF If unsure, say Y. config BLK_DEV_BSG - bool "Block layer SG support v4 (EXPERIMENTAL)" - depends on EXPERIMENTAL - ---help--- + bool "Block layer SG support v4" + default y + help Saying Y here will enable generic SG (SCSI generic) v4 support for any block device. @@ -60,7 +60,10 @@ config BLK_DEV_BSG protocols (e.g. Task Management Functions and SMP in Serial Attached SCSI). - If unsure, say N. + This option is required by recent UDEV versions to properly + access device serial numbers, etc. + + If unsure, say Y. config BLK_DEV_INTEGRITY bool "Block layer data integrity support" diff --git a/block/Makefile b/block/Makefile index e9fa4dd690f..ba74ca6bfa1 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - ioctl.o genhd.o scsi_ioctl.o cmd-filter.o + blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/as-iosched.c b/block/as-iosched.c index 7a12cf6ee1d..ce8ba57c655 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -146,7 +146,7 @@ enum arq_state { #define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2) #define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state) -static DEFINE_PER_CPU(unsigned long, ioc_count); +static DEFINE_PER_CPU(unsigned long, as_ioc_count); static struct completion *ioc_gone; static DEFINE_SPINLOCK(ioc_gone_lock); @@ -161,7 +161,7 @@ static void as_antic_stop(struct as_data *ad); static void free_as_io_context(struct as_io_context *aic) { kfree(aic); - elv_ioc_count_dec(ioc_count); + elv_ioc_count_dec(as_ioc_count); if (ioc_gone) { /* * AS scheduler is exiting, grab exit lock and check @@ -169,7 +169,7 @@ static void free_as_io_context(struct as_io_context *aic) * complete ioc_gone and set it back to NULL. */ spin_lock(&ioc_gone_lock); - if (ioc_gone && !elv_ioc_count_read(ioc_count)) { + if (ioc_gone && !elv_ioc_count_read(as_ioc_count)) { complete(ioc_gone); ioc_gone = NULL; } @@ -211,7 +211,7 @@ static struct as_io_context *alloc_as_io_context(void) ret->seek_total = 0; ret->seek_samples = 0; ret->seek_mean = 0; - elv_ioc_count_inc(ioc_count); + elv_ioc_count_inc(as_ioc_count); } return ret; @@ -1507,7 +1507,7 @@ static void __exit as_exit(void) ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); - if (elv_ioc_count_read(ioc_count)) + if (elv_ioc_count_read(as_ioc_count)) wait_for_completion(&all_gone); synchronize_rcu(); } diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 30022b4e2f6..8873b9b439f 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -348,6 +348,10 @@ static void blkdev_discard_end_io(struct bio *bio, int err) clear_bit(BIO_UPTODATE, &bio->bi_flags); } + if (bio->bi_private) + complete(bio->bi_private); + __free_page(bio_page(bio)); + bio_put(bio); } @@ -357,49 +361,73 @@ static void blkdev_discard_end_io(struct bio *bio, int err) * @sector: start sector * @nr_sects: number of sectors to discard * @gfp_mask: memory allocation flags (for bio_alloc) + * @flags: DISCARD_FL_* flags to control behaviour * * Description: - * Issue a discard request for the sectors in question. Does not wait. + * Issue a discard request for the sectors in question. */ -int blkdev_issue_discard(struct block_device *bdev, - sector_t sector, sector_t nr_sects, gfp_t gfp_mask) +int blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, int flags) { - struct request_queue *q; + DECLARE_COMPLETION_ONSTACK(wait); + struct request_queue *q = bdev_get_queue(bdev); + int type = flags & DISCARD_FL_BARRIER ? + DISCARD_BARRIER : DISCARD_NOBARRIER; struct bio *bio; + struct page *page; int ret = 0; - if (bdev->bd_disk == NULL) - return -ENXIO; - - q = bdev_get_queue(bdev); if (!q) return -ENXIO; - if (!q->prepare_discard_fn) + if (!blk_queue_discard(q)) return -EOPNOTSUPP; while (nr_sects && !ret) { - bio = bio_alloc(gfp_mask, 0); - if (!bio) - return -ENOMEM; + unsigned int sector_size = q->limits.logical_block_size; + unsigned int max_discard_sectors = + min(q->limits.max_discard_sectors, UINT_MAX >> 9); + bio = bio_alloc(gfp_mask, 1); + if (!bio) + goto out; + bio->bi_sector = sector; bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; + if (flags & DISCARD_FL_WAIT) + bio->bi_private = &wait; - bio->bi_sector = sector; + /* + * Add a zeroed one-sector payload as that's what + * our current implementations need. If we'll ever need + * more the interface will need revisiting. + */ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto out_free_bio; + if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) + goto out_free_page; - if (nr_sects > queue_max_hw_sectors(q)) { - bio->bi_size = queue_max_hw_sectors(q) << 9; - nr_sects -= queue_max_hw_sectors(q); - sector += queue_max_hw_sectors(q); + /* + * And override the bio size - the way discard works we + * touch many more blocks on disk than the actual payload + * length. + */ + if (nr_sects > max_discard_sectors) { + bio->bi_size = max_discard_sectors << 9; + nr_sects -= max_discard_sectors; + sector += max_discard_sectors; } else { bio->bi_size = nr_sects << 9; nr_sects = 0; } + bio_get(bio); - submit_bio(DISCARD_BARRIER, bio); + submit_bio(type, bio); + + if (flags & DISCARD_FL_WAIT) + wait_for_completion(&wait); - /* Check if it failed immediately */ if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; else if (!bio_flagged(bio, BIO_UPTODATE)) @@ -407,5 +435,11 @@ int blkdev_issue_discard(struct block_device *bdev, bio_put(bio); } return ret; +out_free_page: + __free_page(page); +out_free_bio: + bio_put(bio); +out: + return -ENOMEM; } EXPORT_SYMBOL(blkdev_issue_discard); diff --git a/block/blk-core.c b/block/blk-core.c index b06cf5c2a82..81f34311659 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -34,6 +34,7 @@ #include "blk.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); @@ -501,6 +502,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; q->backing_dev_info.state = 0; q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; + q->backing_dev_info.name = "block"; err = bdi_init(&q->backing_dev_info); if (err) { @@ -575,13 +577,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) return NULL; } - /* - * if caller didn't supply a lock, they get per-queue locking with - * our embedded lock - */ - if (!lock) - lock = &q->__queue_lock; - q->request_fn = rfn; q->prep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; @@ -595,8 +590,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->sg_reserved_size = INT_MAX; - blk_set_cmd_filter_defaults(&q->cmd_filter); - /* * all done */ @@ -1120,31 +1113,26 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_type = REQ_TYPE_FS; /* - * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) + * Inherit FAILFAST from bio (for read-ahead, and explicit + * FAILFAST). FAILFAST flags are identical for req and bio. */ - if (bio_rw_ahead(bio)) - req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | - REQ_FAILFAST_DRIVER); - if (bio_failfast_dev(bio)) - req->cmd_flags |= REQ_FAILFAST_DEV; - if (bio_failfast_transport(bio)) - req->cmd_flags |= REQ_FAILFAST_TRANSPORT; - if (bio_failfast_driver(bio)) - req->cmd_flags |= REQ_FAILFAST_DRIVER; - - if (unlikely(bio_discard(bio))) { + if (bio_rw_flagged(bio, BIO_RW_AHEAD)) + req->cmd_flags |= REQ_FAILFAST_MASK; + else + req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK; + + if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) { req->cmd_flags |= REQ_DISCARD; - if (bio_barrier(bio)) + if (bio_rw_flagged(bio, BIO_RW_BARRIER)) req->cmd_flags |= REQ_SOFTBARRIER; - req->q->prepare_discard_fn(req->q, req); - } else if (unlikely(bio_barrier(bio))) + } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) req->cmd_flags |= REQ_HARDBARRIER; - if (bio_sync(bio)) + if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) req->cmd_flags |= REQ_RW_SYNC; - if (bio_rw_meta(bio)) + if (bio_rw_flagged(bio, BIO_RW_META)) req->cmd_flags |= REQ_RW_META; - if (bio_noidle(bio)) + if (bio_rw_flagged(bio, BIO_RW_NOIDLE)) req->cmd_flags |= REQ_NOIDLE; req->errors = 0; @@ -1159,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) */ static inline bool queue_should_plug(struct request_queue *q) { - return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); + return !(blk_queue_nonrot(q) && blk_queue_queuing(q)); } static int __make_request(struct request_queue *q, struct bio *bio) @@ -1168,10 +1156,16 @@ static int __make_request(struct request_queue *q, struct bio *bio) int el_ret; unsigned int bytes = bio->bi_size; const unsigned short prio = bio_prio(bio); - const int sync = bio_sync(bio); - const int unplug = bio_unplug(bio); + const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); + const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG); + const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; int rw_flags; + if (bio_rw_flagged(bio, BIO_RW_BARRIER) && bio_has_data(bio) && + (q->next_ordered == QUEUE_ORDERED_NONE)) { + bio_endio(bio, -EOPNOTSUPP); + return 0; + } /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even @@ -1181,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (unlikely(bio_barrier(bio)) || elv_queue_empty(q)) + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1194,6 +1188,9 @@ static int __make_request(struct request_queue *q, struct bio *bio) trace_block_bio_backmerge(q, bio); + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) + blk_rq_set_mixed_merge(req); + req->biotail->bi_next = bio; req->biotail = bio; req->__data_len += bytes; @@ -1213,6 +1210,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) trace_block_bio_frontmerge(q, bio); + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { + blk_rq_set_mixed_merge(req); + req->cmd_flags &= ~REQ_FAILFAST_MASK; + req->cmd_flags |= ff; + } + bio->bi_next = req->bio; req->bio = bio; @@ -1434,7 +1437,8 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; } - if (unlikely(nr_sectors > queue_max_hw_sectors(q))) { + if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) && + nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), bio_sectors(bio), @@ -1460,24 +1464,20 @@ static inline void __generic_make_request(struct bio *bio) if (old_sector != -1) trace_block_remap(q, bio, old_dev, old_sector); - trace_block_bio_queue(q, bio); - old_sector = bio->bi_sector; old_dev = bio->bi_bdev->bd_dev; if (bio_check_eod(bio, nr_sectors)) goto end_io; - if (bio_discard(bio) && !q->prepare_discard_fn) { - err = -EOPNOTSUPP; - goto end_io; - } - if (bio_barrier(bio) && bio_has_data(bio) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { + if (bio_rw_flagged(bio, BIO_RW_DISCARD) && + !blk_queue_discard(q)) { err = -EOPNOTSUPP; goto end_io; } + trace_block_bio_queue(q, bio); + ret = q->make_request_fn(q, bio); } while (ret); @@ -1662,6 +1662,50 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); +/** + * blk_rq_err_bytes - determine number of bytes till the next failure boundary + * @rq: request to examine + * + * Description: + * A request could be merge of IOs which require different failure + * handling. This function determines the number of bytes which + * can be failed from the beginning of the request without + * crossing into area which need to be retried further. + * + * Return: + * The number of bytes to fail. + * + * Context: + * queue_lock must be held. + */ +unsigned int blk_rq_err_bytes(const struct request *rq) +{ + unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; + unsigned int bytes = 0; + struct bio *bio; + + if (!(rq->cmd_flags & REQ_MIXED_MERGE)) + return blk_rq_bytes(rq); + + /* + * Currently the only 'mixing' which can happen is between + * different fastfail types. We can safely fail portions + * which have all the failfast bits that the first one has - + * the ones which are at least as eager to fail as the first + * one. + */ + for (bio = rq->bio; bio; bio = bio->bi_next) { + if ((bio->bi_rw & ff) != ff) + break; + bytes += bio->bi_size; + } + + /* this could lead to infinite loop */ + BUG_ON(blk_rq_bytes(rq) && !bytes); + return bytes; +} +EXPORT_SYMBOL_GPL(blk_rq_err_bytes); + static void blk_account_io_completion(struct request *req, unsigned int bytes) { if (blk_do_io_stat(req)) { @@ -1815,8 +1859,15 @@ void blk_dequeue_request(struct request *rq) * and to it is freed is accounted as io that is in progress at * the driver side. */ - if (blk_account_rq(rq)) + if (blk_account_rq(rq)) { q->in_flight[rq_is_sync(rq)]++; + /* + * Mark this device as supporting hardware queuing, if + * we have more IOs in flight than 4. + */ + if (!blk_queue_queuing(q) && queue_in_flight(q) > 4) + set_bit(QUEUE_FLAG_CQ, &q->queue_flags); + } } /** @@ -2008,6 +2059,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) if (blk_fs_request(req) || blk_discard_rq(req)) req->__sector += total_bytes >> 9; + /* mixed attributes always follow the first bio */ + if (req->cmd_flags & REQ_MIXED_MERGE) { + req->cmd_flags &= ~REQ_FAILFAST_MASK; + req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK; + } + /* * If total number of sectors is less than the first segment * size, something has gone terribly wrong. @@ -2145,7 +2202,7 @@ bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { return blk_end_bidi_request(rq, error, nr_bytes, 0); } -EXPORT_SYMBOL_GPL(blk_end_request); +EXPORT_SYMBOL(blk_end_request); /** * blk_end_request_all - Helper function for drives to finish the request. @@ -2166,7 +2223,7 @@ void blk_end_request_all(struct request *rq, int error) pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); BUG_ON(pending); } -EXPORT_SYMBOL_GPL(blk_end_request_all); +EXPORT_SYMBOL(blk_end_request_all); /** * blk_end_request_cur - Helper function to finish the current request chunk. @@ -2184,7 +2241,26 @@ bool blk_end_request_cur(struct request *rq, int error) { return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } -EXPORT_SYMBOL_GPL(blk_end_request_cur); +EXPORT_SYMBOL(blk_end_request_cur); + +/** + * blk_end_request_err - Finish a request till the next failure boundary. + * @rq: the request to finish till the next failure boundary for + * @error: must be negative errno + * + * Description: + * Complete @rq till the next failure boundary. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + */ +bool blk_end_request_err(struct request *rq, int error) +{ + WARN_ON(error >= 0); + return blk_end_request(rq, error, blk_rq_err_bytes(rq)); +} +EXPORT_SYMBOL_GPL(blk_end_request_err); /** * __blk_end_request - Helper function for drivers to complete the request. @@ -2203,7 +2279,7 @@ bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { return __blk_end_bidi_request(rq, error, nr_bytes, 0); } -EXPORT_SYMBOL_GPL(__blk_end_request); +EXPORT_SYMBOL(__blk_end_request); /** * __blk_end_request_all - Helper function for drives to finish the request. @@ -2224,7 +2300,7 @@ void __blk_end_request_all(struct request *rq, int error) pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); BUG_ON(pending); } -EXPORT_SYMBOL_GPL(__blk_end_request_all); +EXPORT_SYMBOL(__blk_end_request_all); /** * __blk_end_request_cur - Helper function to finish the current request chunk. @@ -2243,14 +2319,33 @@ bool __blk_end_request_cur(struct request *rq, int error) { return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); } -EXPORT_SYMBOL_GPL(__blk_end_request_cur); +EXPORT_SYMBOL(__blk_end_request_cur); + +/** + * __blk_end_request_err - Finish a request till the next failure boundary. + * @rq: the request to finish till the next failure boundary for + * @error: must be negative errno + * + * Description: + * Complete @rq till the next failure boundary. Must be called + * with queue lock held. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + */ +bool __blk_end_request_err(struct request *rq, int error) +{ + WARN_ON(error >= 0); + return __blk_end_request(rq, error, blk_rq_err_bytes(rq)); +} +EXPORT_SYMBOL_GPL(__blk_end_request_err); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { - /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and - we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ - rq->cmd_flags |= (bio->bi_rw & 3); + /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ + rq->cmd_flags |= bio->bi_rw & REQ_RW; if (bio_has_data(bio)) { rq->nr_phys_segments = bio_phys_segments(q, bio); @@ -2365,7 +2460,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, __bio_clone(bio, bio_src); if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, gfp_mask)) + bio_integrity_clone(bio, bio_src, gfp_mask, bs)) goto free_and_out; if (bio_ctr && bio_ctr(bio, bio_src, data)) @@ -2397,6 +2492,14 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); +int kblockd_schedule_delayed_work(struct request_queue *q, + struct delayed_work *work, + unsigned long delay) +{ + return queue_delayed_work(kblockd_workqueue, work, delay); +} +EXPORT_SYMBOL(kblockd_schedule_delayed_work); + int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 73e28d35568..15c630813b1 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -379,6 +379,7 @@ void blk_integrity_unregister(struct gendisk *disk) kobject_uevent(&bi->kobj, KOBJ_REMOVE); kobject_del(&bi->kobj); + kobject_put(&bi->kobj); kmem_cache_free(integrity_cachep, bi); disk->integrity = NULL; } diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c new file mode 100644 index 00000000000..ca564202ed7 --- /dev/null +++ b/block/blk-iopoll.c @@ -0,0 +1,227 @@ +/* + * Functions related to interrupt-poll handling in the block layer. This + * is similar to NAPI for network devices. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/blk-iopoll.h> +#include <linux/delay.h> + +#include "blk.h" + +int blk_iopoll_enabled = 1; +EXPORT_SYMBOL(blk_iopoll_enabled); + +static unsigned int blk_iopoll_budget __read_mostly = 256; + +static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); + +/** + * blk_iopoll_sched - Schedule a run of the iopoll handler + * @iop: The parent iopoll structure + * + * Description: + * Add this blk_iopoll structure to the pending poll list and trigger the + * raise of the blk iopoll softirq. The driver must already have gotten a + * succesful return from blk_iopoll_sched_prep() before calling this. + **/ +void blk_iopoll_sched(struct blk_iopoll *iop) +{ + unsigned long flags; + + local_irq_save(flags); + list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll)); + __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + local_irq_restore(flags); +} +EXPORT_SYMBOL(blk_iopoll_sched); + +/** + * __blk_iopoll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * See blk_iopoll_complete(). This function must be called with interrupts + * disabled. + **/ +void __blk_iopoll_complete(struct blk_iopoll *iop) +{ + list_del(&iop->list); + smp_mb__before_clear_bit(); + clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(__blk_iopoll_complete); + +/** + * blk_iopoll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * If a driver consumes less than the assigned budget in its run of the + * iopoll handler, it'll end the polled mode by calling this function. The + * iopoll handler will not be invoked again before blk_iopoll_sched_prep() + * is called. + **/ +void blk_iopoll_complete(struct blk_iopoll *iopoll) +{ + unsigned long flags; + + local_irq_save(flags); + __blk_iopoll_complete(iopoll); + local_irq_restore(flags); +} +EXPORT_SYMBOL(blk_iopoll_complete); + +static void blk_iopoll_softirq(struct softirq_action *h) +{ + struct list_head *list = &__get_cpu_var(blk_cpu_iopoll); + int rearm = 0, budget = blk_iopoll_budget; + unsigned long start_time = jiffies; + + local_irq_disable(); + + while (!list_empty(list)) { + struct blk_iopoll *iop; + int work, weight; + + /* + * If softirq window is exhausted then punt. + */ + if (budget <= 0 || time_after(jiffies, start_time)) { + rearm = 1; + break; + } + + local_irq_enable(); + + /* Even though interrupts have been re-enabled, this + * access is safe because interrupts can only add new + * entries to the tail of this list, and only ->poll() + * calls can remove this head entry from the list. + */ + iop = list_entry(list->next, struct blk_iopoll, list); + + weight = iop->weight; + work = 0; + if (test_bit(IOPOLL_F_SCHED, &iop->state)) + work = iop->poll(iop, weight); + + budget -= work; + + local_irq_disable(); + + /* + * Drivers must not modify the iopoll state, if they + * consume their assigned weight (or more, some drivers can't + * easily just stop processing, they have to complete an + * entire mask of commands).In such cases this code + * still "owns" the iopoll instance and therefore can + * move the instance around on the list at-will. + */ + if (work >= weight) { + if (blk_iopoll_disable_pending(iop)) + __blk_iopoll_complete(iop); + else + list_move_tail(&iop->list, list); + } + } + + if (rearm) + __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + + local_irq_enable(); +} + +/** + * blk_iopoll_disable - Disable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Disable io polling and wait for any pending callbacks to have completed. + **/ +void blk_iopoll_disable(struct blk_iopoll *iop) +{ + set_bit(IOPOLL_F_DISABLE, &iop->state); + while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state)) + msleep(1); + clear_bit(IOPOLL_F_DISABLE, &iop->state); +} +EXPORT_SYMBOL(blk_iopoll_disable); + +/** + * blk_iopoll_enable - Enable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Enable iopoll on this @iop. Note that the handler run will not be + * scheduled, it will only mark it as active. + **/ +void blk_iopoll_enable(struct blk_iopoll *iop) +{ + BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); + smp_mb__before_clear_bit(); + clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(blk_iopoll_enable); + +/** + * blk_iopoll_init - Initialize this @iop + * @iop: The parent iopoll structure + * @weight: The default weight (or command completion budget) + * @poll_fn: The handler to invoke + * + * Description: + * Initialize this blk_iopoll structure. Before being actively used, the + * driver must call blk_iopoll_enable(). + **/ +void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) +{ + memset(iop, 0, sizeof(*iop)); + INIT_LIST_HEAD(&iop->list); + iop->weight = weight; + iop->poll = poll_fn; + set_bit(IOPOLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(blk_iopoll_init); + +static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + int cpu = (unsigned long) hcpu; + + local_irq_disable(); + list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), + &__get_cpu_var(blk_cpu_iopoll)); + __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + local_irq_enable(); + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = { + .notifier_call = blk_iopoll_cpu_notify, +}; + +static __init int blk_iopoll_setup(void) +{ + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); + + open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq); + register_hotcpu_notifier(&blk_iopoll_cpu_notifier); + return 0; +} +subsys_initcall(blk_iopoll_setup); diff --git a/block/blk-merge.c b/block/blk-merge.c index 39ce64432ba..b0de8574fdc 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -311,6 +311,36 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, return 1; } +/** + * blk_rq_set_mixed_merge - mark a request as mixed merge + * @rq: request to mark as mixed merge + * + * Description: + * @rq is about to be mixed merged. Make sure the attributes + * which can be mixed are set in each bio and mark @rq as mixed + * merged. + */ +void blk_rq_set_mixed_merge(struct request *rq) +{ + unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; + struct bio *bio; + + if (rq->cmd_flags & REQ_MIXED_MERGE) + return; + + /* + * @rq will no longer represent mixable attributes for all the + * contained bios. It will just track those of the first one. + * Distributes the attributs to each bio. + */ + for (bio = rq->bio; bio; bio = bio->bi_next) { + WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && + (bio->bi_rw & REQ_FAILFAST_MASK) != ff); + bio->bi_rw |= ff; + } + rq->cmd_flags |= REQ_MIXED_MERGE; +} + static void blk_account_io_merge(struct request *req) { if (blk_do_io_stat(req)) { @@ -360,6 +390,19 @@ static int attempt_merge(struct request_queue *q, struct request *req, return 0; /* + * If failfast settings disagree or any of the two is already + * a mixed merge, mark both as mixed before proceeding. This + * makes sure that all involved bios have mixable attributes + * set properly. + */ + if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || + (req->cmd_flags & REQ_FAILFAST_MASK) != + (next->cmd_flags & REQ_FAILFAST_MASK)) { + blk_rq_set_mixed_merge(req); + blk_rq_set_mixed_merge(next); + } + + /* * At this point we have either done a back merge * or front merge. We need the smaller start_time of * the merged requests to be the current request diff --git a/block/blk-settings.c b/block/blk-settings.c index bd582a7f531..e0695bca702 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -7,6 +7,7 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ +#include <linux/gcd.h> #include "blk.h" @@ -33,23 +34,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) EXPORT_SYMBOL(blk_queue_prep_rq); /** - * blk_queue_set_discard - set a discard_sectors function for queue - * @q: queue - * @dfn: prepare_discard function - * - * It's possible for a queue to register a discard callback which is used - * to transform a discard request into the appropriate type for the - * hardware. If none is registered, then discard requests are failed - * with %EOPNOTSUPP. - * - */ -void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) -{ - q->prepare_discard_fn = dfn; -} -EXPORT_SYMBOL(blk_queue_set_discard); - -/** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue * @mbfn: merge_bvec_fn @@ -110,7 +94,9 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_hw_segments = MAX_HW_SEGMENTS; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = MAX_SEGMENT_SIZE; - lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS; + lim->max_sectors = BLK_DEF_MAX_SECTORS; + lim->max_hw_sectors = INT_MAX; + lim->max_discard_sectors = SAFE_MAX_SECTORS; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; @@ -163,6 +149,14 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) q->unplug_timer.data = (unsigned long)q; blk_set_default_limits(&q->limits); + blk_queue_max_sectors(q, SAFE_MAX_SECTORS); + + /* + * If the caller didn't supply a lock, fall back to our embedded + * per-queue locks + */ |