aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/aoe/aoedev.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/aoe/aoedev.c')
-rw-r--r--drivers/block/aoe/aoedev.c455
1 files changed, 352 insertions, 103 deletions
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 6b5110a4745..e774c50b684 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */
+/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
/*
* aoedev.c
* AoE device utility functions; maintains device list.
@@ -9,30 +9,140 @@
#include <linux/netdevice.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/kdev_t.h>
+#include <linux/moduleparam.h>
+#include <linux/string.h>
#include "aoe.h"
static void dummy_timer(ulong);
-static void aoedev_freedev(struct aoedev *);
static void freetgt(struct aoedev *d, struct aoetgt *t);
static void skbpoolfree(struct aoedev *d);
+static int aoe_dyndevs = 1;
+module_param(aoe_dyndevs, int, 0644);
+MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
+
static struct aoedev *devlist;
static DEFINE_SPINLOCK(devlist_lock);
-struct aoedev *
-aoedev_by_aoeaddr(int maj, int min)
+/* Because some systems will have one, many, or no
+ * - partitions,
+ * - slots per shelf,
+ * - or shelves,
+ * we need some flexibility in the way the minor numbers
+ * are allocated. So they are dynamic.
+ */
+#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
+
+static DEFINE_SPINLOCK(used_minors_lock);
+static DECLARE_BITMAP(used_minors, N_DEVS);
+
+static int
+minor_get_dyn(ulong *sysminor)
{
- struct aoedev *d;
ulong flags;
+ ulong n;
+ int error = 0;
+
+ spin_lock_irqsave(&used_minors_lock, flags);
+ n = find_first_zero_bit(used_minors, N_DEVS);
+ if (n < N_DEVS)
+ set_bit(n, used_minors);
+ else
+ error = -1;
+ spin_unlock_irqrestore(&used_minors_lock, flags);
+
+ *sysminor = n * AOE_PARTITIONS;
+ return error;
+}
- spin_lock_irqsave(&devlist_lock, flags);
+static int
+minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
+{
+ ulong flags;
+ ulong n;
+ int error = 0;
+ enum {
+ /* for backwards compatibility when !aoe_dyndevs,
+ * a static number of supported slots per shelf */
+ NPERSHELF = 16,
+ };
+
+ if (aoemin >= NPERSHELF) {
+ pr_err("aoe: %s %d slots per shelf\n",
+ "static minor device numbers support only",
+ NPERSHELF);
+ error = -1;
+ goto out;
+ }
- for (d=devlist; d; d=d->next)
- if (d->aoemajor == maj && d->aoeminor == min)
- break;
+ n = aoemaj * NPERSHELF + aoemin;
+ if (n >= N_DEVS) {
+ pr_err("aoe: %s with e%ld.%d\n",
+ "cannot use static minor device numbers",
+ aoemaj, aoemin);
+ error = -1;
+ goto out;
+ }
+
+ spin_lock_irqsave(&used_minors_lock, flags);
+ if (test_bit(n, used_minors)) {
+ pr_err("aoe: %s %lu\n",
+ "existing device already has static minor number",
+ n);
+ error = -1;
+ } else
+ set_bit(n, used_minors);
+ spin_unlock_irqrestore(&used_minors_lock, flags);
+ *sysminor = n * AOE_PARTITIONS;
+out:
+ return error;
+}
+
+static int
+minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
+{
+ if (aoe_dyndevs)
+ return minor_get_dyn(sysminor);
+ else
+ return minor_get_static(sysminor, aoemaj, aoemin);
+}
+
+static void
+minor_free(ulong minor)
+{
+ ulong flags;
+
+ minor /= AOE_PARTITIONS;
+ BUG_ON(minor >= N_DEVS);
+ spin_lock_irqsave(&used_minors_lock, flags);
+ BUG_ON(!test_bit(minor, used_minors));
+ clear_bit(minor, used_minors);
+ spin_unlock_irqrestore(&used_minors_lock, flags);
+}
+
+/*
+ * Users who grab a pointer to the device with aoedev_by_aoeaddr
+ * automatically get a reference count and must be responsible
+ * for performing a aoedev_put. With the addition of async
+ * kthread processing I'm no longer confident that we can
+ * guarantee consistency in the face of device flushes.
+ *
+ * For the time being, we only bother to add extra references for
+ * frames sitting on the iocq. When the kthreads finish processing
+ * these frames, they will aoedev_put the device.
+ */
+
+void
+aoedev_put(struct aoedev *d)
+{
+ ulong flags;
+
+ spin_lock_irqsave(&devlist_lock, flags);
+ d->ref--;
spin_unlock_irqrestore(&devlist_lock, flags);
- return d;
}
static void
@@ -47,128 +157,247 @@ dummy_timer(ulong vp)
add_timer(&d->timer);
}
+static void
+aoe_failip(struct aoedev *d)
+{
+ struct request *rq;
+ struct bio *bio;
+ unsigned long n;
+
+ aoe_failbuf(d, d->ip.buf);
+
+ rq = d->ip.rq;
+ if (rq == NULL)
+ return;
+ while ((bio = d->ip.nxbio)) {
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ d->ip.nxbio = bio->bi_next;
+ n = (unsigned long) rq->special;
+ rq->special = (void *) --n;
+ }
+ if ((unsigned long) rq->special == 0)
+ aoe_end_request(d, rq, 0);
+}
+
+static void
+downdev_frame(struct list_head *pos)
+{
+ struct frame *f;
+
+ f = list_entry(pos, struct frame, head);
+ list_del(pos);
+ if (f->buf) {
+ f->buf->nframesout--;
+ aoe_failbuf(f->t->d, f->buf);
+ }
+ aoe_freetframe(f);
+}
+
void
aoedev_downdev(struct aoedev *d)
{
- struct aoetgt **t, **te;
- struct frame *f, *e;
- struct buf *buf;
- struct bio *bio;
+ struct aoetgt *t, **tt, **te;
+ struct list_head *head, *pos, *nx;
+ struct request *rq;
+ int i;
- t = d->targets;
- te = t + NTARGETS;
- for (; t < te && *t; t++) {
- f = (*t)->frames;
- e = f + (*t)->nframes;
- for (; f < e; f->tag = FREETAG, f->buf = NULL, f++) {
- if (f->tag == FREETAG || f->buf == NULL)
- continue;
- buf = f->buf;
- bio = buf->bio;
- if (--buf->nframesout == 0
- && buf != d->inprocess) {
- mempool_free(buf, d->bufpool);
- bio_endio(bio, -EIO);
- }
- }
- (*t)->maxout = (*t)->nframes;
- (*t)->nout = 0;
+ d->flags &= ~DEVFL_UP;
+
+ /* clean out active and to-be-retransmitted buffers */
+ for (i = 0; i < NFACTIVE; i++) {
+ head = &d->factive[i];
+ list_for_each_safe(pos, nx, head)
+ downdev_frame(pos);
}
- buf = d->inprocess;
- if (buf) {
- bio = buf->bio;
- mempool_free(buf, d->bufpool);
- bio_endio(bio, -EIO);
+ head = &d->rexmitq;
+ list_for_each_safe(pos, nx, head)
+ downdev_frame(pos);
+
+ /* reset window dressings */
+ tt = d->targets;
+ te = tt + d->ntargets;
+ for (; tt < te && (t = *tt); tt++) {
+ aoecmd_wreset(t);
+ t->nout = 0;
}
- d->inprocess = NULL;
- d->htgt = NULL;
-
- while (!list_empty(&d->bufq)) {
- buf = container_of(d->bufq.next, struct buf, bufs);
- list_del(d->bufq.next);
- bio = buf->bio;
- mempool_free(buf, d->bufpool);
- bio_endio(bio, -EIO);
+
+ /* clean out the in-process request (if any) */
+ aoe_failip(d);
+
+ /* fast fail all pending I/O */
+ if (d->blkq) {
+ while ((rq = blk_peek_request(d->blkq))) {
+ blk_start_request(rq);
+ aoe_end_request(d, rq, 1);
+ }
}
if (d->gd)
set_capacity(d->gd, 0);
+}
- d->flags &= ~DEVFL_UP;
+/* return whether the user asked for this particular
+ * device to be flushed
+ */
+static int
+user_req(char *s, size_t slen, struct aoedev *d)
+{
+ const char *p;
+ size_t lim;
+
+ if (!d->gd)
+ return 0;
+ p = kbasename(d->gd->disk_name);
+ lim = sizeof(d->gd->disk_name);
+ lim -= p - d->gd->disk_name;
+ if (slen < lim)
+ lim = slen;
+
+ return !strncmp(s, p, lim);
}
static void
-aoedev_freedev(struct aoedev *d)
+freedev(struct aoedev *d)
{
struct aoetgt **t, **e;
+ int freeing = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&d->lock, flags);
+ if (d->flags & DEVFL_TKILL
+ && !(d->flags & DEVFL_FREEING)) {
+ d->flags |= DEVFL_FREEING;
+ freeing = 1;
+ }
+ spin_unlock_irqrestore(&d->lock, flags);
+ if (!freeing)
+ return;
- cancel_work_sync(&d->work);
+ del_timer_sync(&d->timer);
if (d->gd) {
+ aoedisk_rm_debugfs(d);
aoedisk_rm_sysfs(d);
del_gendisk(d->gd);
put_disk(d->gd);
+ blk_cleanup_queue(d->blkq);
}
t = d->targets;
- e = t + NTARGETS;
+ e = t + d->ntargets;
for (; t < e && *t; t++)
freetgt(d, *t);
if (d->bufpool)
mempool_destroy(d->bufpool);
skbpoolfree(d);
- blk_cleanup_queue(d->blkq);
- kfree(d);
+ minor_free(d->sysminor);
+
+ spin_lock_irqsave(&d->lock, flags);
+ d->flags |= DEVFL_FREED;
+ spin_unlock_irqrestore(&d->lock, flags);
}
-int
-aoedev_flush(const char __user *str, size_t cnt)
+enum flush_parms {
+ NOT_EXITING = 0,
+ EXITING = 1,
+};
+
+static int
+flush(const char __user *str, size_t cnt, int exiting)
{
ulong flags;
struct aoedev *d, **dd;
- struct aoedev *rmd = NULL;
char buf[16];
int all = 0;
+ int specified = 0; /* flush a specific device */
+ unsigned int skipflags;
+
+ skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
- if (cnt >= 3) {
+ if (!exiting && cnt >= 3) {
if (cnt > sizeof buf)
cnt = sizeof buf;
if (copy_from_user(buf, str, cnt))
return -EFAULT;
all = !strncmp(buf, "all", 3);
+ if (!all)
+ specified = 1;
}
+ flush_scheduled_work();
+ /* pass one: without sleeping, do aoedev_downdev */
spin_lock_irqsave(&devlist_lock, flags);
- dd = &devlist;
- while ((d = *dd)) {
+ for (d = devlist; d; d = d->next) {
spin_lock(&d->lock);
- if ((!all && (d->flags & DEVFL_UP))
- || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
- || d->nopen) {
- spin_unlock(&d->lock);
- dd = &d->next;
- continue;
- }
- *dd = d->next;
+ if (exiting) {
+ /* unconditionally take each device down */
+ } else if (specified) {
+ if (!user_req(buf, cnt, d))
+ goto cont;
+ } else if ((!all && (d->flags & DEVFL_UP))
+ || d->flags & skipflags
+ || d->nopen
+ || d->ref)
+ goto cont;
+
aoedev_downdev(d);
d->flags |= DEVFL_TKILL;
+cont:
spin_unlock(&d->lock);
- d->next = rmd;
- rmd = d;
}
spin_unlock_irqrestore(&devlist_lock, flags);
- while ((d = rmd)) {
- rmd = d->next;
- del_timer_sync(&d->timer);
- aoedev_freedev(d); /* must be able to sleep */
+
+ /* pass two: call freedev, which might sleep,
+ * for aoedevs marked with DEVFL_TKILL
+ */
+restart:
+ spin_lock_irqsave(&devlist_lock, flags);
+ for (d = devlist; d; d = d->next) {
+ spin_lock(&d->lock);
+ if (d->flags & DEVFL_TKILL
+ && !(d->flags & DEVFL_FREEING)) {
+ spin_unlock(&d->lock);
+ spin_unlock_irqrestore(&devlist_lock, flags);
+ freedev(d);
+ goto restart;
+ }
+ spin_unlock(&d->lock);
}
+
+ /* pass three: remove aoedevs marked with DEVFL_FREED */
+ for (dd = &devlist, d = *dd; d; d = *dd) {
+ struct aoedev *doomed = NULL;
+
+ spin_lock(&d->lock);
+ if (d->flags & DEVFL_FREED) {
+ *dd = d->next;
+ doomed = d;
+ } else {
+ dd = &d->next;
+ }
+ spin_unlock(&d->lock);
+ if (doomed)
+ kfree(doomed->targets);
+ kfree(doomed);
+ }
+ spin_unlock_irqrestore(&devlist_lock, flags);
+
return 0;
}
-/* I'm not really sure that this is a realistic problem, but if the
-network driver goes gonzo let's just leak memory after complaining. */
+int
+aoedev_flush(const char __user *str, size_t cnt)
+{
+ return flush(str, cnt, NOT_EXITING);
+}
+
+/* This has been confirmed to occur once with Tms=3*1000 due to the
+ * driver changing link and not processing its transmit ring. The
+ * problem is hard enough to solve by returning an error that I'm
+ * still punting on "solving" this.
+ */
static void
skbfree(struct sk_buff *skb)
{
- enum { Sms = 100, Tms = 3*1000};
+ enum { Sms = 250, Tms = 30 * 1000};
int i = Tms / Sms;
if (skb == NULL)
@@ -182,6 +411,7 @@ skbfree(struct sk_buff *skb)
"cannot free skb -- memory leaked.");
return;
}
+ skb->truesize -= skb->data_len;
skb_shinfo(skb)->nr_frags = skb->data_len = 0;
skb_trim(skb, 0);
dev_kfree_skb(skb);
@@ -198,26 +428,43 @@ skbpoolfree(struct aoedev *d)
__skb_queue_head_init(&d->skbpool);
}
-/* find it or malloc it */
+/* find it or allocate it */
struct aoedev *
-aoedev_by_sysminor_m(ulong sysminor)
+aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
{
struct aoedev *d;
+ int i;
ulong flags;
+ ulong sysminor = 0;
spin_lock_irqsave(&devlist_lock, flags);
for (d=devlist; d; d=d->next)
- if (d->sysminor == sysminor)
+ if (d->aoemajor == maj && d->aoeminor == min) {
+ spin_lock(&d->lock);
+ if (d->flags & DEVFL_TKILL) {
+ spin_unlock(&d->lock);
+ d = NULL;
+ goto out;
+ }
+ d->ref++;
+ spin_unlock(&d->lock);
break;
- if (d)
+ }
+ if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
goto out;
d = kcalloc(1, sizeof *d, GFP_ATOMIC);
if (!d)
goto out;
+ d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
+ if (!d->targets) {
+ kfree(d);
+ d = NULL;
+ goto out;
+ }
+ d->ntargets = NTARGETS;
INIT_WORK(&d->work, aoecmd_sleepwork);
spin_lock_init(&d->lock);
- skb_queue_head_init(&d->sendq);
skb_queue_head_init(&d->skbpool);
init_timer(&d->timer);
d->timer.data = (ulong) d;
@@ -226,11 +473,15 @@ aoedev_by_sysminor_m(ulong sysminor)
add_timer(&d->timer);
d->bufpool = NULL; /* defer to aoeblk_gdalloc */
d->tgt = d->targets;
- INIT_LIST_HEAD(&d->bufq);
+ d->ref = 1;
+ for (i = 0; i < NFACTIVE; i++)
+ INIT_LIST_HEAD(&d->factive[i]);
+ INIT_LIST_HEAD(&d->rexmitq);
d->sysminor = sysminor;
- d->aoemajor = AOEMAJOR(sysminor);
- d->aoeminor = AOEMINOR(sysminor);
- d->mintimer = MINTIMER;
+ d->aoemajor = maj;
+ d->aoeminor = min;
+ d->rttavg = RTTAVG_INIT;
+ d->rttdev = RTTDEV_INIT;
d->next = devlist;
devlist = d;
out:
@@ -241,33 +492,31 @@ aoedev_by_sysminor_m(ulong sysminor)
static void
freetgt(struct aoedev *d, struct aoetgt *t)
{
- struct frame *f, *e;
+ struct frame *f;
+ struct list_head *pos, *nx, *head;
+ struct aoeif *ifp;
- f = t->frames;
- e = f + t->nframes;
- for (; f < e; f++)
+ for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
+ if (!ifp->nd)
+ break;
+ dev_put(ifp->nd);
+ }
+
+ head = &t->ffree;
+ list_for_each_safe(pos, nx, head) {
+ list_del(pos);
+ f = list_entry(pos, struct frame, head);
skbfree(f->skb);
- kfree(t->frames);
+ kfree(f);
+ }
kfree(t);
}
void
aoedev_exit(void)
{
- struct aoedev *d;
- ulong flags;
-
- while ((d = devlist)) {
- devlist = d->next;
-
- spin_lock_irqsave(&d->lock, flags);
- aoedev_downdev(d);
- d->flags |= DEVFL_TKILL;
- spin_unlock_irqrestore(&d->lock, flags);
-
- del_timer_sync(&d->timer);
- aoedev_freedev(d);
- }
+ flush_scheduled_work();
+ flush(NULL, 0, EXITING);
}
int __init