diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-02-14 10:45:18 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-02-14 10:45:18 -0800 |
commit | 5e57dc81106b942786f5db8e7ab8788bb9319933 (patch) | |
tree | 4533e01e745bba3614c77200b3fd96dd7af7e04e /drivers/block | |
parent | 0d25e3691186f5ae6feb0229717a60a5169dc5b2 (diff) | |
parent | c8123f8c9cb517403b51aa41c3c46ff5e10b2c17 (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block IO fixes from Jens Axboe:
"Second round of updates and fixes for 3.14-rc2. Most of this stuff
has been queued up for a while. The notable exception is the blk-mq
changes, which are naturally a bit more in flux still.
The pull request contains:
- Two bug fixes for the new immutable vecs, causing crashes with raid
or swap. From Kent.
- Various blk-mq tweaks and fixes from Christoph. A fix for
integrity bio's from Nic.
- A few bcache fixes from Kent and Darrick Wong.
- xen-blk{front,back} fixes from David Vrabel, Matt Rushton, Nicolas
Swenson, and Roger Pau Monne.
- Fix for a vec miscount with integrity vectors from Martin.
- Minor annotations or fixes from Masanari Iida and Rashika Kheria.
- Tweak to null_blk to do more normal FIFO processing of requests
from Shlomo Pongratz.
- Elevator switching bypass fix from Tejun.
- Softlockup in blkdev_issue_discard() fix when !CONFIG_PREEMPT from
me"
* 'for-linus' of git://git.kernel.dk/linux-block: (31 commits)
block: add cond_resched() to potentially long running ioctl discard loop
xen-blkback: init persistent_purge_work work_struct
blk-mq: pair blk_mq_start_request / blk_mq_requeue_request
blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq
block: Fix cloning of discard/write same bios
block: Fix type mismatch in ssize_t_blk_mq_tag_sysfs_show
blk-mq: rework flush sequencing logic
null_blk: use blk_complete_request and blk_mq_complete_request
virtio_blk: use blk_mq_complete_request
blk-mq: rework I/O completions
fs: Add prototype declaration to appropriate header file include/linux/bio.h
fs: Mark function as static in fs/bio-integrity.c
block/null_blk: Fix completion processing from LIFO to FIFO
block: Explicitly handle discard/write same segments
block: Fix nr_vecs for inline integrity vectors
blk-mq: Add bio_integrity setup to blk_mq_make_request
blk-mq: initialize sg_reserved_size
blk-mq: handle dma_drain_size
blk-mq: divert __blk_put_request for MQ ops
blk-mq: support at_head inserations for blk_execute_rq
...
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/null_blk.c | 97 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 7 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 66 | ||||
-rw-r--r-- | drivers/block/xen-blkback/common.h | 5 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 14 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 11 |
6 files changed, 105 insertions, 95 deletions
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 3107282a974..091b9ea14fe 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -60,7 +60,9 @@ enum { NULL_IRQ_NONE = 0, NULL_IRQ_SOFTIRQ = 1, NULL_IRQ_TIMER = 2, +}; +enum { NULL_Q_BIO = 0, NULL_Q_RQ = 1, NULL_Q_MQ = 2, @@ -172,18 +174,20 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) static void end_cmd(struct nullb_cmd *cmd) { - if (cmd->rq) { - if (queue_mode == NULL_Q_MQ) - blk_mq_end_io(cmd->rq, 0); - else { - INIT_LIST_HEAD(&cmd->rq->queuelist); - blk_end_request_all(cmd->rq, 0); - } - } else if (cmd->bio) + switch (queue_mode) { + case NULL_Q_MQ: + blk_mq_end_io(cmd->rq, 0); + return; + case NULL_Q_RQ: + INIT_LIST_HEAD(&cmd->rq->queuelist); + blk_end_request_all(cmd->rq, 0); + break; + case NULL_Q_BIO: bio_endio(cmd->bio, 0); + break; + } - if (queue_mode != NULL_Q_MQ) - free_cmd(cmd); + free_cmd(cmd); } static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) @@ -195,6 +199,7 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) cq = &per_cpu(completion_queues, smp_processor_id()); while ((entry = llist_del_all(&cq->list)) != NULL) { + entry = llist_reverse_order(entry); do { cmd = container_of(entry, struct nullb_cmd, ll_list); end_cmd(cmd); @@ -221,61 +226,31 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) static void null_softirq_done_fn(struct request *rq) { - blk_end_request_all(rq, 0); -} - -#ifdef CONFIG_SMP - -static void null_ipi_cmd_end_io(void *data) -{ - struct completion_queue *cq; - struct llist_node *entry, *next; - struct nullb_cmd *cmd; - - cq = &per_cpu(completion_queues, smp_processor_id()); - - entry = llist_del_all(&cq->list); - - while (entry) { - next = entry->next; - cmd = llist_entry(entry, struct nullb_cmd, ll_list); - end_cmd(cmd); - entry = next; - } -} - -static void null_cmd_end_ipi(struct nullb_cmd *cmd) -{ - struct call_single_data *data = &cmd->csd; - int cpu = get_cpu(); - struct completion_queue *cq = &per_cpu(completion_queues, cpu); - - cmd->ll_list.next = NULL; - - if (llist_add(&cmd->ll_list, &cq->list)) { - data->func = null_ipi_cmd_end_io; - data->flags = 0; - __smp_call_function_single(cpu, data, 0); - } - - put_cpu(); + end_cmd(rq->special); } -#endif /* CONFIG_SMP */ - static inline void null_handle_cmd(struct nullb_cmd *cmd) { /* Complete IO by inline, softirq or timer */ switch (irqmode) { - case NULL_IRQ_NONE: - end_cmd(cmd); - break; case NULL_IRQ_SOFTIRQ: -#ifdef CONFIG_SMP - null_cmd_end_ipi(cmd); -#else + switch (queue_mode) { + case NULL_Q_MQ: + blk_mq_complete_request(cmd->rq); + break; + case NULL_Q_RQ: + blk_complete_request(cmd->rq); + break; + case NULL_Q_BIO: + /* + * XXX: no proper submitting cpu information available. + */ + end_cmd(cmd); + break; + } + break; + case NULL_IRQ_NONE: end_cmd(cmd); -#endif break; case NULL_IRQ_TIMER: null_cmd_end_timer(cmd); @@ -411,6 +386,7 @@ static struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = null_init_hctx, + .complete = null_softirq_done_fn, }; static struct blk_mq_reg null_mq_reg = { @@ -609,13 +585,6 @@ static int __init null_init(void) { unsigned int i; -#if !defined(CONFIG_SMP) - if (irqmode == NULL_IRQ_SOFTIRQ) { - pr_warn("null_blk: softirq completions not available.\n"); - pr_warn("null_blk: using direct completions.\n"); - irqmode = NULL_IRQ_NONE; - } -#endif if (bs > PAGE_SIZE) { pr_warn("null_blk: invalid block size\n"); pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6a680d4de7f..b1cb3f4c4db 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -110,9 +110,9 @@ static int __virtblk_add_req(struct virtqueue *vq, return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); } -static inline void virtblk_request_done(struct virtblk_req *vbr) +static inline void virtblk_request_done(struct request *req) { - struct request *req = vbr->req; + struct virtblk_req *vbr = req->special; int error = virtblk_result(vbr); if (req->cmd_type == REQ_TYPE_BLOCK_PC) { @@ -138,7 +138,7 @@ static void virtblk_done(struct virtqueue *vq) do { virtqueue_disable_cb(vq); while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { - virtblk_request_done(vbr); + blk_mq_complete_request(vbr->req); req_done = true; } if (unlikely(virtqueue_is_broken(vq))) @@ -479,6 +479,7 @@ static struct blk_mq_ops virtio_mq_ops = { .map_queue = blk_mq_map_queue, .alloc_hctx = blk_mq_alloc_single_hw_queue, .free_hctx = blk_mq_free_single_hw_queue, + .complete = virtblk_request_done, }; static struct blk_mq_reg virtio_mq_reg = { diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4b97b86da92..64c60edcdfb 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -299,7 +299,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, BUG_ON(num != 0); } -static void unmap_purged_grants(struct work_struct *work) +void xen_blkbk_unmap_purged_grants(struct work_struct *work) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -375,7 +375,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); - INIT_LIST_HEAD(&blkif->persistent_purge_list); + BUG_ON(!list_empty(&blkif->persistent_purge_list)); root = &blkif->persistent_gnts; purge_list: foreach_grant_safe(persistent_gnt, n, root, node) { @@ -420,7 +420,6 @@ finished: blkif->vbd.overflow_max_grants = 0; /* We can defer this work */ - INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants); schedule_work(&blkif->persistent_purge_work); pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total); return; @@ -625,9 +624,23 @@ purge_gnt_list: print_stats(blkif); } - /* Since we are shutting down remove all pages from the buffer */ - shrink_free_pagepool(blkif, 0 /* All */); + /* Drain pending purge work */ + flush_work(&blkif->persistent_purge_work); + if (log_stats) + print_stats(blkif); + + blkif->xenblkd = NULL; + xen_blkif_put(blkif); + + return 0; +} + +/* + * Remove persistent grants and empty the pool of free pages + */ +void xen_blkbk_free_caches(struct xen_blkif *blkif) +{ /* Free all persistent grant pages */ if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) free_persistent_gnts(blkif, &blkif->persistent_gnts, @@ -636,13 +649,8 @@ purge_gnt_list: BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); blkif->persistent_gnt_c = 0; - if (log_stats) - print_stats(blkif); - - blkif->xenblkd = NULL; - xen_blkif_put(blkif); - - return 0; + /* Since we are shutting down remove all pages from the buffer */ + shrink_free_pagepool(blkif, 0 /* All */); } /* @@ -838,7 +846,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, struct grant_page **pages = pending_req->indirect_pages; struct xen_blkif *blkif = pending_req->blkif; int indirect_grefs, rc, n, nseg, i; - struct blkif_request_segment_aligned *segments = NULL; + struct blkif_request_segment *segments = NULL; nseg = pending_req->nr_pages; indirect_grefs = INDIRECT_PAGES(nseg); @@ -934,9 +942,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif) { atomic_set(&blkif->drain, 1); do { - /* The initial value is one, and one refcnt taken at the - * start of the xen_blkif_schedule thread. */ - if (atomic_read(&blkif->refcnt) <= 2) + if (atomic_read(&blkif->inflight) == 0) break; wait_for_completion_interruptible_timeout( &blkif->drain_complete, HZ); @@ -976,17 +982,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { - xen_blkbk_unmap(pending_req->blkif, + struct xen_blkif *blkif = pending_req->blkif; + + xen_blkbk_unmap(blkif, pending_req->segments, pending_req->nr_pages); - make_response(pending_req->blkif, pending_req->id, + make_response(blkif, pending_req->id, pending_req->operation, pending_req->status); - xen_blkif_put(pending_req->blkif); - if (atomic_read(&pending_req->blkif->refcnt) <= 2) { - if (atomic_read(&pending_req->blkif->drain)) - complete(&pending_req->blkif->drain_complete); + free_req(blkif, pending_req); + /* + * Make sure the request is freed before releasing blkif, + * or there could be a race between free_req and the + * cleanup done in xen_blkif_free during shutdown. + * + * NB: The fact that we might try to wake up pending_free_wq + * before drain_complete (in case there's a drain going on) + * it's not a problem with our current implementation + * because we can assure there's no thread waiting on + * pending_free_wq if there's a drain going on, but it has + * to be taken into account if the current model is changed. + */ + if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { + complete(&blkif->drain_complete); } - free_req(pending_req->blkif, pending_req); + xen_blkif_put(blkif); } } @@ -1240,6 +1259,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. */ xen_blkif_get(blkif); + atomic_inc(&blkif->inflight); for (i = 0; i < nseg; i++) { while ((bio == NULL) || diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 8d8807563d9..be052773ad0 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -57,7 +57,7 @@ #define MAX_INDIRECT_SEGMENTS 256 #define SEGS_PER_INDIRECT_FRAME \ - (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) + (PAGE_SIZE/sizeof(struct blkif_request_segment)) #define MAX_INDIRECT_PAGES \ ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) #define INDIRECT_PAGES(_segs) \ @@ -278,6 +278,7 @@ struct xen_blkif { /* for barrier (drain) requests */ struct completion drain_complete; atomic_t drain; + atomic_t inflight; /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; @@ -376,6 +377,7 @@ int xen_blkif_xenbus_init(void); irqreturn_t xen_blkif_be_int(int irq, void *dev_id); int xen_blkif_schedule(void *arg); int xen_blkif_purge_persistent(void *arg); +void xen_blkbk_free_caches(struct xen_blkif *blkif); int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct backend_info *be, int state); @@ -383,6 +385,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, int xen_blkbk_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state); struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); +void xen_blkbk_unmap_purged_grants(struct work_struct *work); static inline void blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c2014a0aa20..9a547e6b6eb 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -125,8 +125,11 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->persistent_gnts.rb_node = NULL; spin_lock_init(&blkif->free_pages_lock); INIT_LIST_HEAD(&blkif->free_pages); + INIT_LIST_HEAD(&blkif->persistent_purge_list); blkif->free_pages_num = 0; atomic_set(&blkif->persistent_gnt_in_use, 0); + atomic_set(&blkif->inflight, 0); + INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); INIT_LIST_HEAD(&blkif->pending_free); @@ -259,6 +262,17 @@ static void xen_blkif_free(struct xen_blkif *blkif) if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); + /* Remove all persistent grants and the cache of ballooned pages. */ + xen_blkbk_free_caches(blkif); + + /* Make sure everything is drained before shutting down */ + BUG_ON(blkif->persistent_gnt_c != 0); + BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0); + BUG_ON(blkif->free_pages_num != 0); + BUG_ON(!list_empty(&blkif->persistent_purge_list)); + BUG_ON(!list_empty(&blkif->free_pages)); + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); + /* Check that there is no request in use */ list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { list_del(&req->free_list); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8dcfb54f160..efe1b476173 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -162,7 +162,7 @@ static DEFINE_SPINLOCK(minor_lock); #define DEV_NAME "xvd" /* name in /dev */ #define SEGS_PER_INDIRECT_FRAME \ - (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) + (PAGE_SIZE/sizeof(struct blkif_request_segment)) #define INDIRECT_GREFS(_segs) \ ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) @@ -393,7 +393,7 @@ static int blkif_queue_request(struct request *req) unsigned long id; unsigned int fsect, lsect; int i, ref, n; - struct blkif_request_segment_aligned *segments = NULL; + struct blkif_request_segment *segments = NULL; /* * Used to store if we are able to queue the request by just using @@ -550,7 +550,7 @@ static int blkif_queue_request(struct request *req) } else { n = i % SEGS_PER_INDIRECT_FRAME; segments[n] = - (struct blkif_request_segment_aligned) { + (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; @@ -1904,13 +1904,16 @@ static void blkback_changed(struct xenbus_device *dev, case XenbusStateReconfiguring: case XenbusStateReconfigured: case XenbusStateUnknown: - case XenbusStateClosed: break; case XenbusStateConnected: blkfront_connect(info); break; + case XenbusStateClosed: + if (dev->state == XenbusStateClosed) + break; + /* Missed the backend's Closing state -- fallthrough */ case XenbusStateClosing: blkfront_closing(info); break; |