aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-15 16:33:41 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-15 16:33:41 -0800
commitf412f2c60b480fa5140a4b4cb321cd48c64e1812 (patch)
treeaafd5a5922b43daca4abdfa9bb723fc1f334108d
parentcd1177f25069cb494680eedd718e7c6d8fd85d10 (diff)
parent1cf7e9c68fe84248174e998922b39e508375e7c1 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull second round of block driver updates from Jens Axboe: "As mentioned in the original pull request, the bcache bits were pulled because of their dependency on the immutable bio vecs. Kent re-did this part and resubmitted it, so here's the 2nd round of (mostly) driver updates for 3.13. It contains: - The bcache work from Kent. - Conversion of virtio-blk to blk-mq. This removes the bio and request path, and substitutes with the blk-mq path instead. The end result almost 200 deleted lines. Patch is acked by Asias and Christoph, who both did a bunch of testing. - A removal of bootmem.h include from Grygorii Strashko, part of a larger series of his killing the dependency on that header file. - Removal of __cpuinit from blk-mq from Paul Gortmaker" * 'for-linus' of git://git.kernel.dk/linux-block: (56 commits) virtio_blk: blk-mq support blk-mq: remove newly added instances of __cpuinit bcache: defensively handle format strings bcache: Bypass torture test bcache: Delete some slower inline asm bcache: Use ida for bcache block dev minor bcache: Fix sysfs splat on shutdown with flash only devs bcache: Better full stripe scanning bcache: Have btree_split() insert into parent directly bcache: Move spinlock into struct time_stats bcache: Kill sequential_merge option bcache: Kill bch_next_recurse_key() bcache: Avoid deadlocking in garbage collection bcache: Incremental gc bcache: Add make_btree_freeing_key() bcache: Add btree_node_write_sync() bcache: PRECEDING_KEY() bcache: bch_(btree|extent)_ptr_invalid() bcache: Don't bother with bucket refcount for btree node allocations bcache: Debug code improvements ...
-rw-r--r--block/blk-ioc.c1
-rw-r--r--block/blk-mq-cpu.c8
-rw-r--r--block/blk-mq.c6
-rw-r--r--drivers/block/virtio_blk.c322
-rw-r--r--drivers/md/bcache/Kconfig11
-rw-r--r--drivers/md/bcache/alloc.c383
-rw-r--r--drivers/md/bcache/bcache.h327
-rw-r--r--drivers/md/bcache/bset.c289
-rw-r--r--drivers/md/bcache/bset.h93
-rw-r--r--drivers/md/bcache/btree.c1396
-rw-r--r--drivers/md/bcache/btree.h195
-rw-r--r--drivers/md/bcache/closure.c103
-rw-r--r--drivers/md/bcache/closure.h183
-rw-r--r--drivers/md/bcache/debug.c185
-rw-r--r--drivers/md/bcache/debug.h50
-rw-r--r--drivers/md/bcache/journal.c293
-rw-r--r--drivers/md/bcache/journal.h52
-rw-r--r--drivers/md/bcache/movinggc.c87
-rw-r--r--drivers/md/bcache/request.c1102
-rw-r--r--drivers/md/bcache/request.h43
-rw-r--r--drivers/md/bcache/stats.c26
-rw-r--r--drivers/md/bcache/stats.h13
-rw-r--r--drivers/md/bcache/super.c190
-rw-r--r--drivers/md/bcache/sysfs.c42
-rw-r--r--drivers/md/bcache/trace.c1
-rw-r--r--drivers/md/bcache/util.c12
-rw-r--r--drivers/md/bcache/util.h15
-rw-r--r--drivers/md/bcache/writeback.c455
-rw-r--r--drivers/md/bcache/writeback.h46
-rw-r--r--include/trace/events/bcache.h47
-rw-r--r--include/uapi/linux/bcache.h373
31 files changed, 3069 insertions, 3280 deletions
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 46cd7bd18b3..242df01413f 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -6,7 +6,6 @@
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
-#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/slab.h>
#include "blk.h"
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
index f8ea39d7ae5..0045ace9bdf 100644
--- a/block/blk-mq-cpu.c
+++ b/block/blk-mq-cpu.c
@@ -13,8 +13,8 @@
static LIST_HEAD(blk_mq_cpu_notify_list);
static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
-static int __cpuinit blk_mq_main_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int blk_mq_main_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long) hcpu;
struct blk_mq_cpu_notifier *notify;
@@ -28,8 +28,8 @@ static int __cpuinit blk_mq_main_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static void __cpuinit blk_mq_cpu_notify(void *data, unsigned long action,
- unsigned int cpu)
+static void blk_mq_cpu_notify(void *data, unsigned long action,
+ unsigned int cpu)
{
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
/*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c661896e246..862f458d476 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1444,7 +1444,7 @@ void blk_mq_free_queue(struct request_queue *q)
EXPORT_SYMBOL(blk_mq_free_queue);
/* Basically redo blk_mq_init_queue with queue frozen */
-static void __cpuinit blk_mq_queue_reinit(struct request_queue *q)
+static void blk_mq_queue_reinit(struct request_queue *q)
{
blk_mq_freeze_queue(q);
@@ -1461,8 +1461,8 @@ static void __cpuinit blk_mq_queue_reinit(struct request_queue *q)
blk_mq_unfreeze_queue(q);
}
-static int __cpuinit blk_mq_queue_reinit_notify(struct notifier_block *nb,
- unsigned long action, void *hcpu)
+static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
+ unsigned long action, void *hcpu)
{
struct request_queue *q;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f3be496ac8f..588479d58f5 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -11,12 +11,11 @@
#include <linux/string_helpers.h>
#include <scsi/scsi_cmnd.h>
#include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include <linux/numa.h>
#define PART_BITS 4
-static bool use_bio;
-module_param(use_bio, bool, S_IRUGO);
-
static int major;
static DEFINE_IDA(vd_index_ida);
@@ -26,13 +25,11 @@ struct virtio_blk
{
struct virtio_device *vdev;
struct virtqueue *vq;
- wait_queue_head_t queue_wait;
+ spinlock_t vq_lock;
/* The disk structure for the kernel. */
struct gendisk *disk;
- mempool_t *pool;
-
/* Process context for config space updates */
struct work_struct config_work;
@@ -47,31 +44,17 @@ struct virtio_blk
/* Ida index - used to track minor number allocations. */
int index;
-
- /* Scatterlist: can be too big for stack. */
- struct scatterlist sg[/*sg_elems*/];
};
struct virtblk_req
{
struct request *req;
- struct bio *bio;
struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr;
- struct work_struct work;
- struct virtio_blk *vblk;
- int flags;
u8 status;
struct scatterlist sg[];
};
-enum {
- VBLK_IS_FLUSH = 1,
- VBLK_REQ_FLUSH = 2,
- VBLK_REQ_DATA = 4,
- VBLK_REQ_FUA = 8,
-};
-
static inline int virtblk_result(struct virtblk_req *vbr)
{
switch (vbr->status) {
@@ -84,22 +67,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
}
}
-static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
- gfp_t gfp_mask)
-{
- struct virtblk_req *vbr;
-
- vbr = mempool_alloc(vblk->pool, gfp_mask);
- if (!vbr)
- return NULL;
-
- vbr->vblk = vblk;
- if (use_bio)
- sg_init_table(vbr->sg, vblk->sg_elems);
-
- return vbr;
-}
-
static int __virtblk_add_req(struct virtqueue *vq,
struct virtblk_req *vbr,
struct scatterlist *data_sg,
@@ -143,83 +110,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
}
-static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
-{
- struct virtio_blk *vblk = vbr->vblk;
- DEFINE_WAIT(wait);
- int ret;
-
- spin_lock_irq(vblk->disk->queue->queue_lock);
- while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
- have_data)) < 0)) {
- prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
- TASK_UNINTERRUPTIBLE);
-
- spin_unlock_irq(vblk->disk->queue->queue_lock);
- io_schedule();
- spin_lock_irq(vblk->disk->queue->queue_lock);
-
- finish_wait(&vblk->queue_wait, &wait);
- }
-
- virtqueue_kick(vblk->vq);
- spin_unlock_irq(vblk->disk->queue->queue_lock);
-}
-
-static void virtblk_bio_send_flush(struct virtblk_req *vbr)
-{
- vbr->flags |= VBLK_IS_FLUSH;
- vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
- vbr->out_hdr.sector = 0;
- vbr->out_hdr.ioprio = 0;
-
- virtblk_add_req(vbr, false);
-}
-
-static void virtblk_bio_send_data(struct virtblk_req *vbr)
-{
- struct virtio_blk *vblk = vbr->vblk;
- struct bio *bio = vbr->bio;
- bool have_data;
-
- vbr->flags &= ~VBLK_IS_FLUSH;
- vbr->out_hdr.type = 0;
- vbr->out_hdr.sector = bio->bi_sector;
- vbr->out_hdr.ioprio = bio_prio(bio);
-
- if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
- have_data = true;
- if (bio->bi_rw & REQ_WRITE)
- vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
- else
- vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
- } else
- have_data = false;
-
- virtblk_add_req(vbr, have_data);
-}
-
-static void virtblk_bio_send_data_work(struct work_struct *work)
-{
- struct virtblk_req *vbr;
-
- vbr = container_of(work, struct virtblk_req, work);
-
- virtblk_bio_send_data(vbr);
-}
-
-static void virtblk_bio_send_flush_work(struct work_struct *work)
-{
- struct virtblk_req *vbr;
-
- vbr = container_of(work, struct virtblk_req, work);
-
- virtblk_bio_send_flush(vbr);
-}
-
static inline void virtblk_request_done(struct virtblk_req *vbr)
{
- struct virtio_blk *vblk = vbr->vblk;
struct request *req = vbr->req;
int error = virtblk_result(vbr);
@@ -231,92 +123,45 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
req->errors = (error != 0);
}
- __blk_end_request_all(req, error);
- mempool_free(vbr, vblk->pool);
-}
-
-static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
-{
- struct virtio_blk *vblk = vbr->vblk;
-
- if (vbr->flags & VBLK_REQ_DATA) {
- /* Send out the actual write data */
- INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
- queue_work(virtblk_wq, &vbr->work);
- } else {
- bio_endio(vbr->bio, virtblk_result(vbr));
- mempool_free(vbr, vblk->pool);
- }
-}
-
-static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
-{
- struct virtio_blk *vblk = vbr->vblk;
-
- if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
- /* Send out a flush before end the bio */
- vbr->flags &= ~VBLK_REQ_DATA;
- INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
- queue_work(virtblk_wq, &vbr->work);
- } else {
- bio_endio(vbr->bio, virtblk_result(vbr));
- mempool_free(vbr, vblk->pool);
- }
-}
-
-static inline void virtblk_bio_done(struct virtblk_req *vbr)
-{
- if (unlikely(vbr->flags & VBLK_IS_FLUSH))
- virtblk_bio_flush_done(vbr);
- else
- virtblk_bio_data_done(vbr);
+ blk_mq_end_io(req, error);
}
static void virtblk_done(struct virtqueue *vq)
{
struct virtio_blk *vblk = vq->vdev->priv;
- bool bio_done = false, req_done = false;
+ bool req_done = false;
struct virtblk_req *vbr;
unsigned long flags;
unsigned int len;
- spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
+ spin_lock_irqsave(&vblk->vq_lock, flags);
do {
virtqueue_disable_cb(vq);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
- if (vbr->bio) {
- virtblk_bio_done(vbr);
- bio_done = true;
- } else {
- virtblk_request_done(vbr);
- req_done = true;
- }
+ virtblk_request_done(vbr);
+ req_done = true;
}
if (unlikely(virtqueue_is_broken(vq)))
break;
} while (!virtqueue_enable_cb(vq));
+ spin_unlock_irqrestore(&vblk->vq_lock, flags);
+
/* In case queue is stopped waiting for more buffers. */
if (req_done)
- blk_start_queue(vblk->disk->queue);
- spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
-
- if (bio_done)
- wake_up(&vblk->queue_wait);
+ blk_mq_start_stopped_hw_queues(vblk->disk->queue);
}
-static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
- struct request *req)
+static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
{
+ struct virtio_blk *vblk = hctx->queue->queuedata;
+ struct virtblk_req *vbr = req->special;
+ unsigned long flags;
unsigned int num;
- struct virtblk_req *vbr;
+ const bool last = (req->cmd_flags & REQ_END) != 0;
- vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
- if (!vbr)
- /* When another request finishes we'll try again. */
- return false;
+ BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
vbr->req = req;
- vbr->bio = NULL;
if (req->cmd_flags & REQ_FLUSH) {
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0;
@@ -344,7 +189,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
}
}
- num = blk_rq_map_sg(q, vbr->req, vblk->sg);
+ num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
if (num) {
if (rq_data_dir(vbr->req) == WRITE)
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@@ -352,63 +197,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
}
- if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
- mempool_free(vbr, vblk->pool);
- return false;
- }
-
- return true;
-}
-
-static void virtblk_request(struct request_queue *q)
-{
- struct virtio_blk *vblk = q->queuedata;
- struct request *req;
- unsigned int issued = 0;
-
- while ((req = blk_peek_request(q)) != NULL) {
- BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
-
- /* If this request fails, stop queue and wait for something to
- finish to restart it. */
- if (!do_req(q, vblk, req)) {
- blk_stop_queue(q);
- break;
- }
- blk_start_request(req);
- issued++;
- }
-
- if (issued)
+ spin_lock_irqsave(&vblk->vq_lock, flags);
+ if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
+ spin_unlock_irqrestore(&vblk->vq_lock, flags);
+ blk_mq_stop_hw_queue(hctx);
virtqueue_kick(vblk->vq);
-}
-
-static void virtblk_make_request(struct request_queue *q, struct bio *bio)
-{
- struct virtio_blk *vblk = q->queuedata;
- struct virtblk_req *vbr;
-
- BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
-
- vbr = virtblk_alloc_req(vblk, GFP_NOIO);
- if (!vbr) {
- bio_endio(bio, -ENOMEM);
- return;
+ return BLK_MQ_RQ_QUEUE_BUSY;
}
+ spin_unlock_irqrestore(&vblk->vq_lock, flags);
- vbr->bio = bio;
- vbr->flags = 0;
- if (bio->bi_rw & REQ_FLUSH)
- vbr->flags |= VBLK_REQ_FLUSH;
- if (bio->bi_rw & REQ_FUA)
- vbr->flags |= VBLK_REQ_FUA;
- if (bio->bi_size)
- vbr->flags |= VBLK_REQ_DATA;
-
- if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
- virtblk_bio_send_flush(vbr);
- else
- virtblk_bio_send_data(vbr);
+ if (last)
+ virtqueue_kick(vblk->vq);
+ return BLK_MQ_RQ_QUEUE_OK;
}
/* return id (s/n) string for *disk to *id_str
@@ -673,12 +473,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
__ATTR(cache_type, S_IRUGO|S_IWUSR,
virtblk_cache_type_show, virtblk_cache_type_store);
+static struct blk_mq_ops virtio_mq_ops = {
+ .queue_rq = virtio_queue_rq,
+ .map_queue = blk_mq_map_queue,
+ .alloc_hctx = blk_mq_alloc_single_hw_queue,
+ .free_hctx = blk_mq_free_single_hw_queue,
+};
+
+static struct blk_mq_reg virtio_mq_reg = {
+ .ops = &virtio_mq_ops,
+ .nr_hw_queues = 1,
+ .queue_depth = 64,
+ .numa_node = NUMA_NO_NODE,
+ .flags = BLK_MQ_F_SHOULD_MERGE,
+};
+
+static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
+ struct request *rq, unsigned int nr)
+{
+ struct virtio_blk *vblk = data;
+ struct virtblk_req *vbr = rq->special;
+
+ sg_init_table(vbr->sg, vblk->sg_elems);
+}
+
static int virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
struct request_queue *q;
int err, index;
- int pool_size;
u64 cap;
u32 v, blk_size, sg_elems, opt_io_size;
@@ -702,17 +525,14 @@ static int virtblk_probe(struct virtio_device *vdev)
/* We need an extra sg elements at head and tail. */
sg_elems += 2;
- vdev->priv = vblk = kmalloc(sizeof(*vblk) +
- sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
+ vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
if (!vblk) {
err = -ENOMEM;
goto out_free_index;
}
- init_waitqueue_head(&vblk->queue_wait);
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
- sg_init_table(vblk->sg, vblk->sg_elems);
mutex_init(&vblk->config_lock);
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@@ -721,31 +541,27 @@ static int virtblk_probe(struct virtio_device *vdev)
err = init_vq(vblk);
if (err)
goto out_free_vblk;
-
- pool_size = sizeof(struct virtblk_req);
- if (use_bio)
- pool_size += sizeof(struct scatterlist) * sg_elems;
- vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
- if (!vblk->pool) {
- err = -ENOMEM;
- goto out_free_vq;
- }
+ spin_lock_init(&vblk->vq_lock);
/* FIXME: How many partitions? How long is a piece of string? */
vblk->disk = alloc_disk(1 << PART_BITS);
if (!vblk->disk) {
err = -ENOMEM;
- goto out_mempool;
+ goto out_free_vq;
}
- q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
+ virtio_mq_reg.cmd_size =
+ sizeof(struct virtblk_req) +
+ sizeof(struct scatterlist) * sg_elems;
+
+ q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
if (!q) {
err = -ENOMEM;
goto out_put_disk;
}
- if (use_bio)
- blk_queue_make_request(q, virtblk_make_request);
+ blk_mq_init_commands(q, virtblk_init_vbr, vblk);
+
q->queuedata = vblk;
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -848,8 +664,6 @@ out_del_disk:
blk_cleanup_queue(vblk->disk->queue);
out_put_disk:
put_disk(vblk->disk);
-out_mempool:
- mempool_destroy(vblk->pool);
out_free_vq:
vdev->config->del_vqs(vdev);
out_free_vblk:
@@ -881,7 +695,6 @@ static void virtblk_remove(struct virtio_device *vdev)
refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
put_disk(vblk->disk);
- mempool_destroy(vblk->pool);
vdev->config->del_vqs(vdev);
kfree(vblk);
@@ -905,10 +718,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
flush_work(&vblk->config_work);
- spin_lock_irq(vblk->disk->queue->queue_lock);
- blk_stop_queue(vblk->disk->queue);
- spin_unlock_irq(vblk->disk->queue->queue_lock);
- blk_sync_queue(vblk->disk->queue);
+ blk_mq_stop_hw_queues(vblk->disk->queue);
vdev->config->del_vqs(vdev);
return 0;
@@ -921,11 +731,9 @@ static int virtblk_restore(struct virtio_device *vdev)
vblk->config_enable = true;
ret = init_vq(vdev->priv);
- if (!ret) {
- spin_lock_irq(vblk->disk->queue->queue_lock);
- blk_start_queue(vblk->disk->queue);
- spin_unlock_irq(vblk->disk->queue->queue_lock);
- }
+ if (!ret)
+ blk_mq_start_stopped_hw_queues(vblk->disk->queue);
+
return ret;
}
#endif
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index f950c9d29f3..2638417b19a 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -13,15 +13,8 @@ config BCACHE_DEBUG
---help---
Don't select this option unless you're a developer
- Enables extra debugging tools (primarily a fuzz tester)
-
-config BCACHE_EDEBUG
- bool "Extended runtime checks"
- depends on BCACHE
- ---help---
- Don't select this option unless you're a developer
-
- Enables extra runtime checks which significantly affect performance
+ Enables extra debugging tools, allows expensive runtime checks to be
+ turned on.
config BCACHE_CLOSURES_DEBUG
bool "Debug closures"
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index e45f5575fd4..2b46bf1d7e4 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -63,13 +63,12 @@
#include "bcache.h"
#include "btree.h"
+#include <linux/blkdev.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/random.h>
#include <trace/events/bcache.h>
-#define MAX_IN_FLIGHT_DISCARDS 8U
-
/* Bucket heap / gen */
uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
@@ -121,75 +120,6 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
mutex_unlock(&c->bucket_lock);
}
-/* Discard/TRIM */
-
-struct discard {
- struct list_head list;
- struct work_struct work;
- struct cache *ca;
- long bucket;
-
- struct bio bio;
- struct bio_vec bv;
-};
-
-static void discard_finish(struct work_struct *w)
-{
- struct discard *d = container_of(w, struct discard, work);
- struct cache *ca = d->ca;
- char buf[BDEVNAME_SIZE];
-
- if (!test_bit(BIO_UPTODATE, &d->bio.bi_flags)) {
- pr_notice("discard error on %s, disabling",
- bdevname(ca->bdev, buf));
- d->ca->discard = 0;
- }
-
- mutex_lock(&ca->set->bucket_lock);
-
- fifo_push(&ca->free, d->bucket);
- list_add(&d->list, &ca->discards);
- atomic_dec(&ca->discards_in_flight);
-
- mutex_unlock(&ca->set->bucket_lock);
-
- closure_wake_up(&ca->set->bucket_wait);
- wake_up_process(ca->alloc_thread);
-
- closure_put(&ca->set->cl);
-}
-
-static void discard_endio(struct bio *bio, int error)
-{
- struct discard *d = container_of(bio, struct discard, bio);
- schedule_work(&d->work);
-}
-
-static void do_discard(struct cache *ca, long bucket)
-{
- struct discard *d = list_first_entry(&ca->discards,
- struct discard, list);
-
- list_del(&d->list);
- d->bucket = bucket;
-
- atomic_inc(&ca->discards_in_flight);
- closure_get(&ca->set->cl);
-
- bio_init(&d->bio);
-
- d->bio.bi_sector = bucket_to_sector(ca->set, d->bucket);
- d->bio.bi_bdev = ca->bdev;
- d->bio.bi_rw = REQ_WRITE|REQ_DISCARD;
- d->bio.bi_max_vecs = 1;
- d->bio.bi_io_vec = d->bio.bi_inline_vecs;
- d->bio.bi_size = bucket_bytes(ca);
- d->bio.bi_end_io = discard_endio;
- bio_set_prio(&d->bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-
- submit_bio(0, &d->bio);
-}
-
/* Allocation */
static inline bool can_inc_bucket_gen(struct bucket *b)
@@ -280,7 +210,7 @@ static void invalidate_buckets_lru(struct cache *ca)
* multiple times when it can't do anything
*/
ca->invalidate_needs_gc = 1;
- bch_queue_gc(ca->set);
+ wake_up_gc(ca->set);
return;
}
@@ -305,7 +235,7 @@ static void invalidate_buckets_fifo(struct cache *ca)
if (++checked >= ca->sb.nbuckets) {
ca->invalidate_needs_gc = 1;
- bch_queue_gc(ca->set);
+ wake_up_gc(ca->set);
return;
}
}
@@ -330,7 +260,7 @@ static void invalidate_buckets_random(struct cache *ca)
if (++checked >= ca->sb.nbuckets / 2) {
ca->invalidate_needs_gc = 1;
- bch_queue_gc(ca->set);
+ wake_up_gc(ca->set);
return;
}
}
@@ -398,16 +328,18 @@ static int bch_allocator_thread(void *arg)
else
break;
- allocator_wait(ca, (int) fifo_free(&ca->free) >
- atomic_read(&ca->discards_in_flight));
-
if (ca->discard) {
- allocator_wait(ca, !list_empty(&ca->discards));
- do_discard(ca, bucket);
- } else {
- fifo_push(&ca->free, bucket);
- closure_wake_up(&ca->set->bucket_wait);
+ mutex_unlock(&ca->set->bucket_lock);
+ blkdev_issue_discard(ca->bdev,
+ bucket_to_sector(ca->set, bucket),
+ ca->sb.block_size, GFP_KERNEL, 0);
+ mutex_lock(&ca->set->bucket_lock);
}
+
+ allocator_wait(ca, !fifo_full(&ca->free));
+
+ fifo_push(&ca->free, bucket);
+ wake_up(&ca->set->bucket_wait);
}
/*
@@ -433,16 +365,40 @@ static int bch_allocator_thread(void *arg)
}
}
-long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl)
+long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
{
- long r = -1;
-again:
+ DEFINE_WAIT(w);
+ struct bucket *b;
+ long r;
+
+ /* fastpath */
+ if (fifo_used(&ca->free) > ca->watermark[watermark]) {
+ fifo_pop(&ca->free, r);
+ goto out;
+ }
+
+ if (!wait)
+ return -1;
+
+ while (1) {
+ if (fifo_used(&ca->free) > ca->watermark[watermark]) {
+ fifo_pop(&ca->free, r);
+ break;
+ }
+
+ prepare_to_wait(&ca->set->bucket_wait, &w,
+ TASK_UNINTERRUPTIBLE);
+
+ mutex_unlock(&ca->set->bucket_lock);
+ schedule();
+ mutex_lock(&ca->set->bucket_lock);
+ }
+
+ finish_wait(&ca->set->bucket_wait, &w);
+out:
wake_up_process(ca->alloc_thread);
- if (fifo_used(&ca->free) > ca->watermark[watermark] &&
- fifo_pop(&ca->free, r)) {
- struct bucket *b = ca->buckets + r;
-#ifdef CONFIG_BCACHE_EDEBUG
+ if (expensive_debug_checks(ca->set)) {
size_t iter;
long i;
@@ -455,36 +411,23 @@ again:
BUG_ON(i == r);
fifo_for_each(i, &ca->unused, iter)
BUG_ON(i == r);
-#endif
- BUG_ON(atomic_read(&b->pin) != 1);
-
- SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
-
- if (watermark <= WATERMARK_METADATA) {
- SET_GC_MARK(b, GC_MARK_METADATA);
- b->prio = BTREE_PRIO;
- } else {
- SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
- b->prio = INITIAL_PRIO;
- }
-
- return r;
}
- trace_bcache_alloc_fail(ca);
+ b = ca->buckets + r;
- if (cl) {
- closure_wait(&ca->set->bucket_wait, cl);
+ BUG_ON(atomic_read(&b->pin) != 1);
- if (closure_blocking(cl)) {
- mutex_unlock(&ca->set->bucket_lock);
- closure_sync(cl);
- mutex_lock(&ca->set->bucket_lock);
- goto again;
- }
+ SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
+
+ if (watermark <= WATERMARK_METADATA) {
+ SET_GC_MARK(b, GC_MARK_METADATA);
+ b->prio = BTREE_PRIO;
+ } else {
+ SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
+ b->prio = INITIAL_PRIO;
}
- return -1;
+ return r;
}
void bch_bucket_free(struct cache_set *c, struct bkey *k)
@@ -501,7 +444,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
}
int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
- struct bkey *k, int n, struct closure *cl)
+ struct bkey *k, int n, bool wait)
{
int i;
@@ -514,7 +457,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
for (i = 0; i < n; i++) {
struct cache *ca = c->cache_by_alloc[i];
- long b = bch_bucket_alloc(ca, watermark, cl);
+ long b = bch_bucket_alloc(ca, watermark, wait);
if (b == -1)
goto err;
@@ -529,22 +472,202 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
return 0;
err:
bch_bucket_free(c, k);
- __bkey_put(c, k);
+ bkey_put(c, k);
return -1;
}
int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
- struct bkey *k, int n, struct closure *cl)
+ struct bkey *k, int n, bool wait)
{
int ret;
mutex_lock(&c->bucket_lock);
- ret = __bch_bucket_alloc_set(c, watermark, k, n, cl);
+ ret = __bch_bucket_alloc_set(c, watermark, k, n, wait);
mutex_unlock(&c->bucket_lock);
return ret;
}
+/* Sector allocator */
+
+struct open_bucket {
+ struct list_head list;
+ unsigned last_write_point;
+ unsigned sectors_free;
+ BKEY_PADDED(key);
+};
+
+/*
+ * We keep multiple buckets open for writes, and try to segregate different
+ *