aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/aoe/aoecmd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/aoe/aoecmd.c')
-rw-r--r--drivers/block/aoe/aoecmd.c329
1 files changed, 194 insertions, 135 deletions
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 25ef5c014fc..422b7d84f68 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
+/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
/*
* aoecmd.c
* Filesystem request handling methods
@@ -35,14 +35,27 @@ module_param(aoe_maxout, int, 0644);
MODULE_PARM_DESC(aoe_maxout,
"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
-static wait_queue_head_t ktiowq;
-static struct ktstate kts;
+/* The number of online cpus during module initialization gives us a
+ * convenient heuristic cap on the parallelism used for ktio threads
+ * doing I/O completion. It is not important that the cap equal the
+ * actual number of running CPUs at any given time, but because of CPU
+ * hotplug, we take care to use ncpus instead of using
+ * num_online_cpus() after module initialization.
+ */
+static int ncpus;
+
+/* mutex lock used for synchronization while thread spawning */
+static DEFINE_MUTEX(ktio_spawn_lock);
+
+static wait_queue_head_t *ktiowq;
+static struct ktstate *kts;
/* io completion queue */
-static struct {
+struct iocq_ktio {
struct list_head head;
spinlock_t lock;
-} iocq;
+};
+static struct iocq_ktio *iocq;
static struct page *empty_page;
@@ -51,8 +64,9 @@ new_skb(ulong len)
{
struct sk_buff *skb;
- skb = alloc_skb(len, GFP_ATOMIC);
+ skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC);
if (skb) {
+ skb_reserve(skb, MAX_HEADER);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb->protocol = __constant_htons(ETH_P_AOE);
@@ -182,8 +196,7 @@ aoe_freetframe(struct frame *f)
t = f->t;
f->buf = NULL;
- f->lba = 0;
- f->bv = NULL;
+ memset(&f->iter, 0, sizeof(f->iter));
f->r_skb = NULL;
f->flags = 0;
list_add(&f->head, &t->ffree);
@@ -281,21 +294,14 @@ newframe(struct aoedev *d)
}
static void
-skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt)
+skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
{
int frag = 0;
- ulong fcnt;
-loop:
- fcnt = bv->bv_len - (off - bv->bv_offset);
- if (fcnt > cnt)
- fcnt = cnt;
- skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
- cnt -= fcnt;
- if (cnt <= 0)
- return;
- bv++;
- off = bv->bv_offset;
- goto loop;
+ struct bio_vec bv;
+
+ __bio_for_each_segment(bv, bio, iter, iter)
+ skb_fill_page_desc(skb, frag++, bv.bv_page,
+ bv.bv_offset, bv.bv_len);
}
static void
@@ -332,12 +338,10 @@ ata_rw_frameinit(struct frame *f)
t->nout++;
f->waited = 0;
f->waited_total = 0;
- if (f->buf)
- f->lba = f->buf->sector;
/* set up ata header */
- ah->scnt = f->bcnt >> 9;
- put_lba(ah, f->lba);
+ ah->scnt = f->iter.bi_size >> 9;
+ put_lba(ah, f->iter.bi_sector);
if (t->d->flags & DEVFL_EXT) {
ah->aflags |= AOEAFL_EXT;
} else {
@@ -346,11 +350,11 @@ ata_rw_frameinit(struct frame *f)
ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
}
if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
- skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
+ skb_fillup(skb, f->buf->bio, f->iter);
ah->aflags |= AOEAFL_WRITE;
- skb->len += f->bcnt;
- skb->data_len = f->bcnt;
- skb->truesize += f->bcnt;
+ skb->len += f->iter.bi_size;
+ skb->data_len = f->iter.bi_size;
+ skb->truesize += f->iter.bi_size;
t->wpkts++;
} else {
t->rpkts++;
@@ -366,10 +370,8 @@ aoecmd_ata_rw(struct aoedev *d)
{
struct frame *f;
struct buf *buf;
- struct aoetgt *t;
struct sk_buff *skb;
struct sk_buff_head queue;
- ulong bcnt, fbcnt;
buf = nextbuf(d);
if (buf == NULL)
@@ -377,40 +379,22 @@ aoecmd_ata_rw(struct aoedev *d)
f = newframe(d);
if (f == NULL)
return 0;
- t = *d->tgt;
- bcnt = d->maxbcnt;
- if (bcnt == 0)
- bcnt = DEFAULTBCNT;
- if (bcnt > buf->resid)
- bcnt = buf->resid;
- fbcnt = bcnt;
- f->bv = buf->bv;
- f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
- do {
- if (fbcnt < buf->bv_resid) {
- buf->bv_resid -= fbcnt;
- buf->resid -= fbcnt;
- break;
- }
- fbcnt -= buf->bv_resid;
- buf->resid -= buf->bv_resid;
- if (buf->resid == 0) {
- d->ip.buf = NULL;
- break;
- }
- buf->bv++;
- buf->bv_resid = buf->bv->bv_len;
- WARN_ON(buf->bv_resid == 0);
- } while (fbcnt);
/* initialize the headers & frame */
f->buf = buf;
- f->bcnt = bcnt;
- ata_rw_frameinit(f);
+ f->iter = buf->iter;
+ f->iter.bi_size = min_t(unsigned long,
+ d->maxbcnt ?: DEFAULTBCNT,
+ f->iter.bi_size);
+ bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
+
+ if (!buf->iter.bi_size)
+ d->ip.buf = NULL;
/* mark all tracking fields and load out */
buf->nframesout += 1;
- buf->sector += bcnt >> 9;
+
+ ata_rw_frameinit(f);
skb = skb_clone(f->skb, GFP_ATOMIC);
if (skb) {
@@ -471,7 +455,6 @@ resend(struct aoedev *d, struct frame *f)
struct sk_buff *skb;
struct sk_buff_head queue;
struct aoe_hdr *h;
- struct aoe_atahdr *ah;
struct aoetgt *t;
char buf[128];
u32 n;
@@ -486,7 +469,6 @@ resend(struct aoedev *d, struct frame *f)
return;
}
h = (struct aoe_hdr *) skb_mac_header(skb);
- ah = (struct aoe_atahdr *) (h+1);
if (!(f->flags & FFL_PROBE)) {
snprintf(buf, sizeof(buf),
@@ -603,10 +585,7 @@ reassign_frame(struct frame *f)
skb = nf->skb;
nf->skb = f->skb;
nf->buf = f->buf;
- nf->bcnt = f->bcnt;
- nf->lba = f->lba;
- nf->bv = f->bv;
- nf->bv_off = f->bv_off;
+ nf->iter = f->iter;
nf->waited = 0;
nf->waited_total = f->waited_total;
nf->sent = f->sent;
@@ -638,19 +617,19 @@ probe(struct aoetgt *t)
}
f->flags |= FFL_PROBE;
ifrotate(t);
- f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
+ f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
ata_rw_frameinit(f);
skb = f->skb;
- for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
+ for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
if (n < PAGE_SIZE)
m = n;
else
m = PAGE_SIZE;
skb_fill_page_desc(skb, frag, empty_page, 0, m);
}
- skb->len += f->bcnt;
- skb->data_len = f->bcnt;
- skb->truesize += f->bcnt;
+ skb->len += f->iter.bi_size;
+ skb->data_len = f->iter.bi_size;
+ skb->truesize += f->iter.bi_size;
skb = skb_clone(f->skb, GFP_ATOMIC);
if (skb) {
@@ -887,21 +866,15 @@ rqbiocnt(struct request *r)
static void
bio_pageinc(struct bio *bio)
{
- struct bio_vec *bv;
+ struct bio_vec bv;
struct page *page;
- int i;
+ struct bvec_iter iter;
- bio_for_each_segment(bv, bio, i) {
- page = bv->bv_page;
+ bio_for_each_segment(bv, bio, iter) {
/* Non-zero page count for non-head members of
- * compound pages is no longer allowed by the kernel,
- * but this has never been seen here.
+ * compound pages is no longer allowed by the kernel.
*/
- if (unlikely(PageCompound(page)))
- if (compound_trans_head(page) != page) {
- pr_crit("page tail used for block I/O\n");
- BUG();
- }
+ page = compound_head(bv.bv_page);
atomic_inc(&page->_count);
}
}
@@ -909,27 +882,24 @@ bio_pageinc(struct bio *bio)
static void
bio_pagedec(struct bio *bio)
{
- struct bio_vec *bv;
- int i;
+ struct page *page;
+ struct bio_vec bv;
+ struct bvec_iter iter;
- bio_for_each_segment(bv, bio, i)
- atomic_dec(&bv->bv_page->_count);
+ bio_for_each_segment(bv, bio, iter) {
+ page = compound_head(bv.bv_page);
+ atomic_dec(&page->_count);
+ }
}
static void
bufinit(struct buf *buf, struct request *rq, struct bio *bio)
{
- struct bio_vec *bv;
-
memset(buf, 0, sizeof(*buf));
buf->rq = rq;
buf->bio = bio;
- buf->resid = bio->bi_size;
- buf->sector = bio->bi_sector;
+ buf->iter = bio->bi_iter;
bio_pageinc(bio);
- buf->bv = bv = &bio->bi_io_vec[bio->bi_idx];
- buf->bv_resid = bv->bv_len;
- WARN_ON(buf->bv_resid == 0);
}
static struct buf *
@@ -1114,24 +1084,18 @@ gettgt(struct aoedev *d, char *addr)
}
static void
-bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt)
+bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
{
- ulong fcnt;
- char *p;
int soff = 0;
-loop:
- fcnt = bv->bv_len - (off - bv->bv_offset);
- if (fcnt > cnt)
- fcnt = cnt;
- p = page_address(bv->bv_page) + off;
- skb_copy_bits(skb, soff, p, fcnt);
- soff += fcnt;
- cnt -= fcnt;
- if (cnt <= 0)
- return;
- bv++;
- off = bv->bv_offset;
- goto loop;
+ struct bio_vec bv;
+
+ iter.bi_size = cnt;
+
+ __bio_for_each_segment(bv, bio, iter, iter) {
+ char *p = page_address(bv.bv_page) + bv.bv_offset;
+ skb_copy_bits(skb, soff, p, bv.bv_len);
+ soff += bv.bv_len;
+ }
}
void
@@ -1147,7 +1111,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
do {
bio = rq->bio;
bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags);
- } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size));
+ } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
/* cf. http://lkml.org/lkml/2006/10/31/28 */
if (!fastfail)
@@ -1224,7 +1188,15 @@ noskb: if (buf)
clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
break;
}
- bvcpy(f->bv, f->bv_off, skb, n);
+ if (n > f->iter.bi_size) {
+ pr_err_ratelimited("%s e%ld.%d. bytes=%ld need=%u\n",
+ "aoe: too-large data size in read from",
+ (long) d->aoemajor, d->aoeminor,
+ n, f->iter.bi_size);
+ clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+ break;
+ }
+ bvcpy(skb, f->buf->bio, f->iter, n);
case ATA_CMD_PIO_WRITE:
case ATA_CMD_PIO_WRITE_EXT:
spin_lock_irq(&d->lock);
@@ -1267,7 +1239,7 @@ out:
aoe_freetframe(f);
- if (buf && --buf->nframesout == 0 && buf->resid == 0)
+ if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
aoe_end_buf(d, buf);
spin_unlock_irq(&d->lock);
@@ -1279,23 +1251,36 @@ out:
* Returns true iff responses needing processing remain.
*/
static int
-ktio(void)
+ktio(int id)
{
struct frame *f;
struct list_head *pos;
int i;
+ int actual_id;
for (i = 0; ; ++i) {
if (i == MAXIOC)
return 1;
- if (list_empty(&iocq.head))
+ if (list_empty(&iocq[id].head))
return 0;
- pos = iocq.head.next;
+ pos = iocq[id].head.next;
list_del(pos);
- spin_unlock_irq(&iocq.lock);
f = list_entry(pos, struct frame, head);
+ spin_unlock_irq(&iocq[id].lock);
ktiocomplete(f);
- spin_lock_irq(&iocq.lock);
+
+ /* Figure out if extra threads are required. */
+ actual_id = f->t->d->aoeminor % ncpus;
+
+ if (!kts[actual_id].active) {
+ BUG_ON(id != 0);
+ mutex_lock(&ktio_spawn_lock);
+ if (!kts[actual_id].active
+ && aoe_ktstart(&kts[actual_id]) == 0)
+ kts[actual_id].active = 1;
+ mutex_unlock(&ktio_spawn_lock);
+ }
+ spin_lock_irq(&iocq[id].lock);
}
}
@@ -1312,7 +1297,7 @@ kthread(void *vp)
complete(&k->rendez); /* tell spawner we're running */
do {
spin_lock_irq(k->lock);
- more = k->fn();
+ more = k->fn(k->id);
if (!more) {
add_wait_queue(k->waitq, &wait);
__set_current_state(TASK_INTERRUPTIBLE);
@@ -1341,7 +1326,7 @@ aoe_ktstart(struct ktstate *k)
struct task_struct *task;
init_completion(&k->rendez);
- task = kthread_run(kthread, k, k->name);
+ task = kthread_run(kthread, k, "%s", k->name);
if (task == NULL || IS_ERR(task))
return -ENOMEM;
k->task = task;
@@ -1354,13 +1339,24 @@ aoe_ktstart(struct ktstate *k)
static void
ktcomplete(struct frame *f, struct sk_buff *skb)
{
+ int id;
ulong flags;
f->r_skb = skb;
- spin_lock_irqsave(&iocq.lock, flags);
- list_add_tail(&f->head, &iocq.head);
- spin_unlock_irqrestore(&iocq.lock, flags);
- wake_up(&ktiowq);
+ id = f->t->d->aoeminor % ncpus;
+ spin_lock_irqsave(&iocq[id].lock, flags);
+ if (!kts[id].active) {
+ spin_unlock_irqrestore(&iocq[id].lock, flags);
+ /* The thread with id has not been spawned yet,
+ * so delegate the work to the main thread and
+ * try spawning a new thread.
+ */
+ id = 0;
+ spin_lock_irqsave(&iocq[id].lock, flags);
+ }
+ list_add_tail(&f->head, &iocq[id].head);
+ spin_unlock_irqrestore(&iocq[id].lock, flags);
+ wake_up(&ktiowq[id]);
}
struct sk_buff *
@@ -1698,7 +1694,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
{
if (buf == NULL)
return;
- buf->resid = 0;
+ buf->iter.bi_size = 0;
clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
if (buf->nframesout == 0)
aoe_end_buf(d, buf);
@@ -1707,6 +1703,17 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
void
aoe_flush_iocq(void)
{
+ int i;
+
+ for (i = 0; i < ncpus; i++) {
+ if (kts[i].active)
+ aoe_flush_iocq_by_index(i);
+ }
+}
+
+void
+aoe_flush_iocq_by_index(int id)
+{
struct frame *f;
struct aoedev *d;
LIST_HEAD(flist);
@@ -1714,9 +1721,9 @@ aoe_flush_iocq(void)
struct sk_buff *skb;
ulong flags;
- spin_lock_irqsave(&iocq.lock, flags);
- list_splice_init(&iocq.head, &flist);
- spin_unlock_irqrestore(&iocq.lock, flags);
+ spin_lock_irqsave(&iocq[id].lock, flags);
+ list_splice_init(&iocq[id].head, &flist);
+ spin_unlock_irqrestore(&iocq[id].lock, flags);
while (!list_empty(&flist)) {
pos = flist.next;
list_del(pos);
@@ -1739,6 +1746,8 @@ int __init
aoecmd_init(void)
{
void *p;
+ int i;
+ int ret;
/* get_zeroed_page returns page with ref count 1 */
p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
@@ -1746,22 +1755,72 @@ aoecmd_init(void)
return -ENOMEM;
empty_page = virt_to_page(p);
- INIT_LIST_HEAD(&iocq.head);
- spin_lock_init(&iocq.lock);
- init_waitqueue_head(&ktiowq);
- kts.name = "aoe_ktio";
- kts.fn = ktio;
- kts.waitq = &ktiowq;
- kts.lock = &iocq.lock;
- return aoe_ktstart(&kts);
+ ncpus = num_online_cpus();
+
+ iocq = kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL);
+ if (!iocq)
+ return -ENOMEM;
+
+ kts = kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL);
+ if (!kts) {
+ ret = -ENOMEM;
+ goto kts_fail;
+ }
+
+ ktiowq = kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL);
+ if (!ktiowq) {
+ ret = -ENOMEM;
+ goto ktiowq_fail;
+ }
+
+ mutex_init(&ktio_spawn_lock);
+
+ for (i = 0; i < ncpus; i++) {
+ INIT_LIST_HEAD(&iocq[i].head);
+ spin_lock_init(&iocq[i].lock);
+ init_waitqueue_head(&ktiowq[i]);
+ snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i);
+ kts[i].fn = ktio;
+ kts[i].waitq = &ktiowq[i];
+ kts[i].lock = &iocq[i].lock;
+ kts[i].id = i;
+ kts[i].active = 0;
+ }
+ kts[0].active = 1;
+ if (aoe_ktstart(&kts[0])) {
+ ret = -ENOMEM;
+ goto ktstart_fail;
+ }
+ return 0;
+
+ktstart_fail:
+ kfree(ktiowq);
+ktiowq_fail:
+ kfree(kts);
+kts_fail:
+ kfree(iocq);
+
+ return ret;
}
void
aoecmd_exit(void)
{
- aoe_ktstop(&kts);
+ int i;
+
+ for (i = 0; i < ncpus; i++)
+ if (kts[i].active)
+ aoe_ktstop(&kts[i]);
+
aoe_flush_iocq();
+ /* Free up the iocq and thread speicific configuration
+ * allocated during startup.
+ */
+ kfree(iocq);
+ kfree(kts);
+ kfree(ktiowq);
+
free_page((unsigned long) page_address(empty_page));
empty_page = NULL;
}