diff options
Diffstat (limited to 'drivers/block/aoe')
| -rw-r--r-- | drivers/block/aoe/aoe.h | 78 | ||||
| -rw-r--r-- | drivers/block/aoe/aoeblk.c | 210 | ||||
| -rw-r--r-- | drivers/block/aoe/aoechr.c | 10 | ||||
| -rw-r--r-- | drivers/block/aoe/aoecmd.c | 986 | ||||
| -rw-r--r-- | drivers/block/aoe/aoedev.c | 244 | ||||
| -rw-r--r-- | drivers/block/aoe/aoemain.c | 2 | ||||
| -rw-r--r-- | drivers/block/aoe/aoenet.c | 20 |
7 files changed, 1100 insertions, 450 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index d2ed7f18d1a..9220f8e833d 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,5 +1,5 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ -#define VERSION "50" +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ +#define VERSION "85" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" @@ -10,7 +10,7 @@ #define AOE_PARTITIONS (16) #endif -#define WHITESPACE " \t\v\f\n" +#define WHITESPACE " \t\v\f\n," enum { AOECMD_ATA, @@ -73,45 +73,55 @@ enum { DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */ DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ DEVFL_GDALLOC = (1<<3), /* need to alloc gendisk */ - DEVFL_KICKME = (1<<4), /* slow polling network card catch */ - DEVFL_NEWSIZE = (1<<5), /* need to update dev size in block layer */ + DEVFL_GD_NOW = (1<<4), /* allocating gendisk */ + DEVFL_KICKME = (1<<5), /* slow polling network card catch */ + DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ + DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */ + DEVFL_FREED = (1<<8), /* device has been cleaned up */ }; enum { DEFAULTBCNT = 2 * 512, /* 2 sectors */ MIN_BUFS = 16, - NTARGETS = 8, + NTARGETS = 4, NAOEIFS = 8, NSKBPOOLMAX = 256, NFACTIVE = 61, TIMERTICK = HZ / 10, - MINTIMER = HZ >> 2, - MAXTIMER = HZ << 1, + RTTSCALE = 8, + RTTDSCALE = 3, + RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE, + RTTDEV_INIT = RTTAVG_INIT / 4, + + HARD_SCORN_SECS = 10, /* try another remote port after this */ + MAX_TAINT = 1000, /* cap on aoetgt taint */ }; struct buf { ulong nframesout; - ulong resid; - ulong bv_resid; - sector_t sector; struct bio *bio; - struct bio_vec *bv; + struct bvec_iter iter; struct request *rq; }; +enum frame_flags { + FFL_PROBE = 1, +}; + struct frame { struct list_head head; u32 tag; + struct timeval sent; /* high-res time packet was sent */ + u32 sent_jiffs; /* low-res jiffies-based sent time */ ulong waited; + ulong waited_total; struct aoetgt *t; /* parent target I belong to */ - sector_t lba; struct sk_buff *skb; /* command skb freed on module exit */ struct sk_buff *r_skb; /* response skb for async processing */ struct buf *buf; - struct bio_vec *bv; - ulong bcnt; - ulong bv_off; + struct bvec_iter iter; + char flags; }; struct aoeif { @@ -122,36 +132,40 @@ struct aoeif { struct aoetgt { unsigned char addr[6]; - ushort nframes; + ushort nframes; /* cap on frames to use */ struct aoedev *d; /* parent device I belong to */ struct list_head ffree; /* list of free frames */ struct aoeif ifs[NAOEIFS]; struct aoeif *ifp; /* current aoeif in use */ - ushort nout; - ushort maxout; - ulong falloc; - ulong lastwadj; /* last window adjustment */ + ushort nout; /* number of AoE commands outstanding */ + ushort maxout; /* current value for max outstanding */ + ushort next_cwnd; /* incr maxout after decrementing to zero */ + ushort ssthresh; /* slow start threshold */ + ulong falloc; /* number of allocated frames */ + int taint; /* how much we want to avoid this aoetgt */ int minbcnt; int wpkts, rpkts; + char nout_probes; }; struct aoedev { struct aoedev *next; ulong sysminor; ulong aoemajor; + u32 rttavg; /* scaled AoE round trip time average */ + u32 rttdev; /* scaled round trip time mean deviation */ u16 aoeminor; u16 flags; u16 nopen; /* (bd_openers isn't available without sleeping) */ - u16 rttavg; /* round trip average of requests/responses */ - u16 mintimer; u16 fw_ver; /* version of blade's firmware */ u16 lasttag; /* last tag sent */ u16 useme; ulong ref; struct work_struct work;/* disk create work struct */ struct gendisk *gd; + struct dentry *debugfs; struct request_queue *blkq; - struct hd_geometry geo; + struct hd_geometry geo; sector_t ssize; struct timer_list timer; spinlock_t lock; @@ -164,11 +178,12 @@ struct aoedev { } ip; ulong maxbcnt; struct list_head factive[NFACTIVE]; /* hash of active frames */ - struct aoetgt *targets[NTARGETS]; + struct list_head rexmitq; /* deferred retransmissions */ + struct aoetgt **targets; + ulong ntargets; /* number of allocated aoetgt pointers */ struct aoetgt **tgt; /* target in use when working */ - struct aoetgt *htgt; /* target needing rexmit assistance */ - ulong ntargets; ulong kicked; + char ident[512]; }; /* kthread tracking */ @@ -176,14 +191,17 @@ struct ktstate { struct completion rendez; struct task_struct *task; wait_queue_head_t *waitq; - int (*fn) (void); - char *name; + int (*fn) (int); + char name[12]; spinlock_t *lock; + int id; + int active; }; int aoeblk_init(void); void aoeblk_exit(void); void aoeblk_gdalloc(void *); +void aoedisk_rm_debugfs(struct aoedev *d); void aoedisk_rm_sysfs(struct aoedev *d); int aoechr_init(void); @@ -195,12 +213,14 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor); struct sk_buff *aoecmd_ata_rsp(struct sk_buff *); void aoecmd_cfg_rsp(struct sk_buff *); void aoecmd_sleepwork(struct work_struct *); +void aoecmd_wreset(struct aoetgt *t); void aoecmd_cleanslate(struct aoedev *); void aoecmd_exit(void); int aoecmd_init(void); struct sk_buff *aoecmd_ata_id(struct aoedev *); void aoe_freetframe(struct frame *); void aoe_flush_iocq(void); +void aoe_flush_iocq_by_index(int); void aoe_end_request(struct aoedev *, struct request *, int); int aoe_ktstart(struct ktstate *k); void aoe_ktstop(struct ktstate *k); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 00dfc5008ad..dd73e1ff175 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoeblk.c * block device routines @@ -16,10 +16,20 @@ #include <linux/netdevice.h> #include <linux/mutex.h> #include <linux/export.h> +#include <linux/moduleparam.h> +#include <linux/debugfs.h> +#include <scsi/sg.h> #include "aoe.h" static DEFINE_MUTEX(aoeblk_mutex); static struct kmem_cache *buf_pool_cache; +static struct dentry *aoe_debugfs_dir; + +/* GPFS needs a larger value than the default. */ +static int aoe_maxsectors; +module_param(aoe_maxsectors, int, 0644); +MODULE_PARM_DESC(aoe_maxsectors, + "When nonzero, set the maximum number of sectors per I/O request"); static ssize_t aoedisk_show_state(struct device *dev, struct device_attribute *attr, char *page) @@ -59,7 +69,7 @@ static ssize_t aoedisk_show_netif(struct device *dev, nd = nds; ne = nd + ARRAY_SIZE(nds); t = d->targets; - te = t + NTARGETS; + te = t + d->ntargets; for (; t < te && *t; t++) { ifp = (*t)->ifs; e = ifp + NAOEIFS; @@ -91,6 +101,63 @@ static ssize_t aoedisk_show_fwver(struct device *dev, return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver); } +static ssize_t aoedisk_show_payload(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct gendisk *disk = dev_to_disk(dev); + struct aoedev *d = disk->private_data; + + return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt); +} + +static int aoedisk_debugfs_show(struct seq_file *s, void *ignored) +{ + struct aoedev *d; + struct aoetgt **t, **te; + struct aoeif *ifp, *ife; + unsigned long flags; + char c; + + d = s->private; + seq_printf(s, "rttavg: %d rttdev: %d\n", + d->rttavg >> RTTSCALE, + d->rttdev >> RTTDSCALE); + seq_printf(s, "nskbpool: %d\n", skb_queue_len(&d->skbpool)); + seq_printf(s, "kicked: %ld\n", d->kicked); + seq_printf(s, "maxbcnt: %ld\n", d->maxbcnt); + seq_printf(s, "ref: %ld\n", d->ref); + + spin_lock_irqsave(&d->lock, flags); + t = d->targets; + te = t + d->ntargets; + for (; t < te && *t; t++) { + c = '\t'; + seq_printf(s, "falloc: %ld\n", (*t)->falloc); + seq_printf(s, "ffree: %p\n", + list_empty(&(*t)->ffree) ? NULL : (*t)->ffree.next); + seq_printf(s, "%pm:%d:%d:%d\n", (*t)->addr, (*t)->nout, + (*t)->maxout, (*t)->nframes); + seq_printf(s, "\tssthresh:%d\n", (*t)->ssthresh); + seq_printf(s, "\ttaint:%d\n", (*t)->taint); + seq_printf(s, "\tr:%d\n", (*t)->rpkts); + seq_printf(s, "\tw:%d\n", (*t)->wpkts); + ifp = (*t)->ifs; + ife = ifp + ARRAY_SIZE((*t)->ifs); + for (; ifp->nd && ifp < ife; ifp++) { + seq_printf(s, "%c%s", c, ifp->nd->name); + c = ','; + } + seq_puts(s, "\n"); + } + spin_unlock_irqrestore(&d->lock, flags); + + return 0; +} + +static int aoe_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, aoedisk_debugfs_show, inode->i_private); +} static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL); static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL); @@ -99,12 +166,14 @@ static struct device_attribute dev_attr_firmware_version = { .attr = { .name = "firmware-version", .mode = S_IRUGO }, .show = aoedisk_show_fwver, }; +static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL); static struct attribute *aoe_attrs[] = { &dev_attr_state.attr, &dev_attr_mac.attr, &dev_attr_netif.attr, &dev_attr_firmware_version.attr, + &dev_attr_payload.attr, NULL, }; @@ -112,6 +181,44 @@ static const struct attribute_group attr_group = { .attrs = aoe_attrs, }; +static const struct file_operations aoe_debugfs_fops = { + .open = aoe_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void +aoedisk_add_debugfs(struct aoedev *d) +{ + struct dentry *entry; + char *p; + + if (aoe_debugfs_dir == NULL) + return; + p = strchr(d->gd->disk_name, '/'); + if (p == NULL) + p = d->gd->disk_name; + else + p++; + BUG_ON(*p == '\0'); + entry = debugfs_create_file(p, 0444, aoe_debugfs_dir, d, + &aoe_debugfs_fops); + if (IS_ERR_OR_NULL(entry)) { + pr_info("aoe: cannot create debugfs file for %s\n", + d->gd->disk_name); + return; + } + BUG_ON(d->debugfs); + d->debugfs = entry; +} +void +aoedisk_rm_debugfs(struct aoedev *d) +{ + debugfs_remove(d->debugfs); + d->debugfs = NULL; +} + static int aoedisk_add_sysfs(struct aoedev *d) { @@ -129,9 +236,18 @@ aoeblk_open(struct block_device *bdev, fmode_t mode) struct aoedev *d = bdev->bd_disk->private_data; ulong flags; + if (!virt_addr_valid(d)) { + pr_crit("aoe: invalid device pointer in %s\n", + __func__); + WARN_ON(1); + return -ENODEV; + } + if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL) + return -ENODEV; + mutex_lock(&aoeblk_mutex); spin_lock_irqsave(&d->lock, flags); - if (d->flags & DEVFL_UP) { + if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) { d->nopen++; spin_unlock_irqrestore(&d->lock, flags); mutex_unlock(&aoeblk_mutex); @@ -142,7 +258,7 @@ aoeblk_open(struct block_device *bdev, fmode_t mode) return -ENODEV; } -static int +static void aoeblk_release(struct gendisk *disk, fmode_t mode) { struct aoedev *d = disk->private_data; @@ -153,11 +269,9 @@ aoeblk_release(struct gendisk *disk, fmode_t mode) if (--d->nopen == 0) { spin_unlock_irqrestore(&d->lock, flags); aoecmd_cfg(d->aoemajor, d->aoeminor); - return 0; + return; } spin_unlock_irqrestore(&d->lock, flags); - - return 0; } static void @@ -195,9 +309,38 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } +static int +aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg) +{ + struct aoedev *d; + + if (!arg) + return -EINVAL; + + d = bdev->bd_disk->private_data; + if ((d->flags & DEVFL_UP) == 0) { + pr_err("aoe: disk not up\n"); + return -ENODEV; + } + + if (cmd == HDIO_GET_IDENTITY) { + if (!copy_to_user((void __user *) arg, &d->ident, + sizeof(d->ident))) + return 0; + return -EFAULT; + } + + /* udev calls scsi_id, which uses SG_IO, resulting in noise */ + if (cmd != SG_IO) + pr_info("aoe: unknown ioctl 0x%x\n", cmd); + + return -ENOTTY; +} + static const struct block_device_operations aoe_bdops = { .open = aoeblk_open, .release = aoeblk_release, + .ioctl = aoeblk_ioctl, .getgeo = aoeblk_getgeo, .owner = THIS_MODULE, }; @@ -212,6 +355,18 @@ aoeblk_gdalloc(void *vp) struct request_queue *q; enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; ulong flags; + int late = 0; + + spin_lock_irqsave(&d->lock, flags); + if (d->flags & DEVFL_GDALLOC + && !(d->flags & DEVFL_TKILL) + && !(d->flags & DEVFL_GD_NOW)) + d->flags |= DEVFL_GD_NOW; + else + late = 1; + spin_unlock_irqrestore(&d->lock, flags); + if (late) + return; gd = alloc_disk(AOE_PARTITIONS); if (gd == NULL) { @@ -231,23 +386,24 @@ aoeblk_gdalloc(void *vp) if (q == NULL) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); - mempool_destroy(mp); - goto err_disk; + goto err_mempool; } - d->blkq = blk_alloc_queue(GFP_KERNEL); - if (!d->blkq) - goto err_mempool; - d->blkq->backing_dev_info.name = "aoe"; - if (bdi_init(&d->blkq->backing_dev_info)) - goto err_blkq; spin_lock_irqsave(&d->lock, flags); - blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS); + WARN_ON(!(d->flags & DEVFL_GD_NOW)); + WARN_ON(!(d->flags & DEVFL_GDALLOC)); + WARN_ON(d->flags & DEVFL_TKILL); + WARN_ON(d->gd); + WARN_ON(d->flags & DEVFL_UP); + blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); + q->backing_dev_info.name = "aoe"; q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; d->bufpool = mp; d->blkq = gd->queue = q; q->queuedata = d; d->gd = gd; + if (aoe_maxsectors) + blk_queue_max_hw_sectors(q, aoe_maxsectors); gd->major = AOE_MAJOR; gd->first_minor = d->sysminor; gd->fops = &aoe_bdops; @@ -263,24 +419,30 @@ aoeblk_gdalloc(void *vp) add_disk(gd); aoedisk_add_sysfs(d); + aoedisk_add_debugfs(d); + + spin_lock_irqsave(&d->lock, flags); + WARN_ON(!(d->flags & DEVFL_GD_NOW)); + d->flags &= ~DEVFL_GD_NOW; + spin_unlock_irqrestore(&d->lock, flags); return; -err_blkq: - blk_cleanup_queue(d->blkq); - d->blkq = NULL; err_mempool: - mempool_destroy(d->bufpool); + mempool_destroy(mp); err_disk: put_disk(gd); err: spin_lock_irqsave(&d->lock, flags); - d->flags &= ~DEVFL_GDALLOC; + d->flags &= ~DEVFL_GD_NOW; + schedule_work(&d->work); spin_unlock_irqrestore(&d->lock, flags); } void aoeblk_exit(void) { + debugfs_remove_recursive(aoe_debugfs_dir); + aoe_debugfs_dir = NULL; kmem_cache_destroy(buf_pool_cache); } @@ -292,7 +454,11 @@ aoeblk_init(void) 0, 0, NULL); if (buf_pool_cache == NULL) return -ENOMEM; - + aoe_debugfs_dir = debugfs_create_dir("aoe", NULL); + if (IS_ERR_OR_NULL(aoe_debugfs_dir)) { + pr_info("aoe: cannot create debugfs directory\n"); + aoe_debugfs_dir = NULL; + } return 0; } diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index ed57a890c64..ab41be625a5 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -39,6 +39,11 @@ struct ErrMsg { }; static DEFINE_MUTEX(aoechr_mutex); + +/* A ring buffer of error messages, to be read through + * "/dev/etherd/err". When no messages are present, + * readers will block waiting for messages to appear. + */ static struct ErrMsg emsgs[NMSG]; static int emsgs_head_idx, emsgs_tail_idx; static struct completion emsgs_comp; @@ -134,13 +139,12 @@ bail: spin_unlock_irqrestore(&emsgs_lock, flags); return; } - mp = kmalloc(n, GFP_ATOMIC); + mp = kmemdup(msg, n, GFP_ATOMIC); if (mp == NULL) { printk(KERN_ERR "aoe: allocation failure, len=%ld\n", n); goto bail; } - memcpy(mp, msg, n); em->msg = mp; em->flags |= EMFL_VALID; em->len = n; @@ -282,7 +286,7 @@ aoechr_init(void) int n, i; n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops); - if (n < 0) { + if (n < 0) { printk(KERN_ERR "aoe: can't register char device\n"); return n; } diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 3804a0af3ef..422b7d84f68 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoecmd.c * Filesystem request handling methods @@ -22,6 +22,7 @@ #define MAXIOC (8192) /* default meant to avoid most soft lockups */ static void ktcomplete(struct frame *, struct sk_buff *); +static int count_targets(struct aoedev *d, int *untainted); static struct buf *nextbuf(struct aoedev *); @@ -29,27 +30,43 @@ static int aoe_deadsecs = 60 * 3; module_param(aoe_deadsecs, int, 0644); MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); -static int aoe_maxout = 16; +static int aoe_maxout = 64; module_param(aoe_maxout, int, 0644); MODULE_PARM_DESC(aoe_maxout, "Only aoe_maxout outstanding packets for every MAC on eX.Y."); -static wait_queue_head_t ktiowq; -static struct ktstate kts; +/* The number of online cpus during module initialization gives us a + * convenient heuristic cap on the parallelism used for ktio threads + * doing I/O completion. It is not important that the cap equal the + * actual number of running CPUs at any given time, but because of CPU + * hotplug, we take care to use ncpus instead of using + * num_online_cpus() after module initialization. + */ +static int ncpus; + +/* mutex lock used for synchronization while thread spawning */ +static DEFINE_MUTEX(ktio_spawn_lock); + +static wait_queue_head_t *ktiowq; +static struct ktstate *kts; /* io completion queue */ -static struct { +struct iocq_ktio { struct list_head head; spinlock_t lock; -} iocq; +}; +static struct iocq_ktio *iocq; + +static struct page *empty_page; static struct sk_buff * new_skb(ulong len) { struct sk_buff *skb; - skb = alloc_skb(len, GFP_ATOMIC); + skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC); if (skb) { + skb_reserve(skb, MAX_HEADER); skb_reset_mac_header(skb); skb_reset_network_header(skb); skb->protocol = __constant_htons(ETH_P_AOE); @@ -59,6 +76,23 @@ new_skb(ulong len) } static struct frame * +getframe_deferred(struct aoedev *d, u32 tag) +{ + struct list_head *head, *pos, *nx; + struct frame *f; + + head = &d->rexmitq; + list_for_each_safe(pos, nx, head) { + f = list_entry(pos, struct frame, head); + if (f->tag == tag) { + list_del(pos); + return f; + } + } + return NULL; +} + +static struct frame * getframe(struct aoedev *d, u32 tag) { struct frame *f; @@ -162,8 +196,9 @@ aoe_freetframe(struct frame *f) t = f->t; f->buf = NULL; - f->bv = NULL; + memset(&f->iter, 0, sizeof(f->iter)); f->r_skb = NULL; + f->flags = 0; list_add(&f->head, &t->ffree); } @@ -217,20 +252,25 @@ newframe(struct aoedev *d) struct frame *f; struct aoetgt *t, **tt; int totout = 0; + int use_tainted; + int has_untainted; - if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */ + if (!d->targets || !d->targets[0]) { printk(KERN_ERR "aoe: NULL TARGETS!\n"); return NULL; } tt = d->tgt; /* last used target */ - for (;;) { + for (use_tainted = 0, has_untainted = 0;;) { tt++; - if (tt >= &d->targets[NTARGETS] || !*tt) + if (tt >= &d->targets[d->ntargets] || !*tt) tt = d->targets; t = *tt; - totout += t->nout; + if (!t->taint) { + has_untainted = 1; + totout += t->nout; + } if (t->nout < t->maxout - && t != d->htgt + && (use_tainted || !t->taint) && t->ifp->nd) { f = newtframe(d, t); if (f) { @@ -239,8 +279,12 @@ newframe(struct aoedev *d) return f; } } - if (tt == d->tgt) /* we've looped and found nada */ - break; + if (tt == d->tgt) { /* we've looped and found nada */ + if (!use_tainted && !has_untainted) + use_tainted = 1; + else + break; + } } if (totout == 0) { d->kicked++; @@ -250,21 +294,14 @@ newframe(struct aoedev *d) } static void -skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt) +skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter) { int frag = 0; - ulong fcnt; -loop: - fcnt = bv->bv_len - (off - bv->bv_offset); - if (fcnt > cnt) - fcnt = cnt; - skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt); - cnt -= fcnt; - if (cnt <= 0) - return; - bv++; - off = bv->bv_offset; - goto loop; + struct bio_vec bv; + + __bio_for_each_segment(bv, bio, iter, iter) + skb_fill_page_desc(skb, frag++, bv.bv_page, + bv.bv_offset, bv.bv_len); } static void @@ -277,84 +314,47 @@ fhash(struct frame *f) list_add_tail(&f->head, &d->factive[n]); } -static int -aoecmd_ata_rw(struct aoedev *d) +static void +ata_rw_frameinit(struct frame *f) { - struct frame *f; + struct aoetgt *t; struct aoe_hdr *h; struct aoe_atahdr *ah; - struct buf *buf; - struct aoetgt *t; struct sk_buff *skb; - struct sk_buff_head queue; - ulong bcnt, fbcnt; char writebit, extbit; - writebit = 0x10; - extbit = 0x4; - - buf = nextbuf(d); - if (buf == NULL) - return 0; - f = newframe(d); - if (f == NULL) - return 0; - t = *d->tgt; - bcnt = d->maxbcnt; - if (bcnt == 0) - bcnt = DEFAULTBCNT; - if (bcnt > buf->resid) - bcnt = buf->resid; - fbcnt = bcnt; - f->bv = buf->bv; - f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid); - do { - if (fbcnt < buf->bv_resid) { - buf->bv_resid -= fbcnt; - buf->resid -= fbcnt; - break; - } - fbcnt -= buf->bv_resid; - buf->resid -= buf->bv_resid; - if (buf->resid == 0) { - d->ip.buf = NULL; - break; - } - buf->bv++; - buf->bv_resid = buf->bv->bv_len; - WARN_ON(buf->bv_resid == 0); - } while (fbcnt); - - /* initialize the headers & frame */ skb = f->skb; h = (struct aoe_hdr *) skb_mac_header(skb); - ah = (struct aoe_atahdr *) (h+1); - skb_put(skb, sizeof *h + sizeof *ah); + ah = (struct aoe_atahdr *) (h + 1); + skb_put(skb, sizeof(*h) + sizeof(*ah)); memset(h, 0, skb->len); - f->tag = aoehdr_atainit(d, t, h); + + writebit = 0x10; + extbit = 0x4; + + t = f->t; + f->tag = aoehdr_atainit(t->d, t, h); fhash(f); t->nout++; f->waited = 0; - f->buf = buf; - f->bcnt = bcnt; - f->lba = buf->sector; + f->waited_total = 0; /* set up ata header */ - ah->scnt = bcnt >> 9; - put_lba(ah, buf->sector); - if (d->flags & DEVFL_EXT) { + ah->scnt = f->iter.bi_size >> 9; + put_lba(ah, f->iter.bi_sector); + if (t->d->flags & DEVFL_EXT) { ah->aflags |= AOEAFL_EXT; } else { extbit = 0; ah->lba3 &= 0x0f; ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ } - if (bio_data_dir(buf->bio) == WRITE) { - skb_fillup(skb, f->bv, f->bv_off, bcnt); + if (f->buf && bio_data_dir(f->buf->bio) == WRITE) { + skb_fillup(skb, f->buf->bio, f->iter); ah->aflags |= AOEAFL_WRITE; - skb->len += bcnt; - skb->data_len = bcnt; - skb->truesize += bcnt; + skb->len += f->iter.bi_size; + skb->data_len = f->iter.bi_size; + skb->truesize += f->iter.bi_size; t->wpkts++; } else { t->rpkts++; @@ -362,14 +362,44 @@ aoecmd_ata_rw(struct aoedev *d) } ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit; + skb->dev = t->ifp->nd; +} + +static int +aoecmd_ata_rw(struct aoedev *d) +{ + struct frame *f; + struct buf *buf; + struct sk_buff *skb; + struct sk_buff_head queue; + + buf = nextbuf(d); + if (buf == NULL) + return 0; + f = newframe(d); + if (f == NULL) + return 0; + + /* initialize the headers & frame */ + f->buf = buf; + f->iter = buf->iter; + f->iter.bi_size = min_t(unsigned long, + d->maxbcnt ?: DEFAULTBCNT, + f->iter.bi_size); + bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size); + + if (!buf->iter.bi_size) + d->ip.buf = NULL; /* mark all tracking fields and load out */ buf->nframesout += 1; - buf->sector += bcnt >> 9; - skb->dev = t->ifp->nd; - skb = skb_clone(skb, GFP_ATOMIC); + ata_rw_frameinit(f); + + skb = skb_clone(f->skb, GFP_ATOMIC); if (skb) { + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); @@ -425,7 +455,6 @@ resend(struct aoedev *d, struct frame *f) struct sk_buff *skb; struct sk_buff_head queue; struct aoe_hdr *h; - struct aoe_atahdr *ah; struct aoetgt *t; char buf[128]; u32 n; @@ -440,13 +469,15 @@ resend(struct aoedev *d, struct frame *f) return; } h = (struct aoe_hdr *) skb_mac_header(skb); - ah = (struct aoe_atahdr *) (h+1); - snprintf(buf, sizeof buf, - "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n", - "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n, - h->src, h->dst, t->nout); - aoechr_error(buf); + if (!(f->flags & FFL_PROBE)) { + snprintf(buf, sizeof(buf), + "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n", + "retransmit", d->aoemajor, d->aoeminor, + f->tag, jiffies, n, + h->src, h->dst, t->nout); + aoechr_error(buf); + } f->tag = n; fhash(f); @@ -458,12 +489,46 @@ resend(struct aoedev *d, struct frame *f) skb = skb_clone(skb, GFP_ATOMIC); if (skb == NULL) return; + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); } static int +tsince_hr(struct frame *f) +{ + struct timeval now; + int n; + + do_gettimeofday(&now); + n = now.tv_usec - f->sent.tv_usec; + n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC; + + if (n < 0) + n = -n; + + /* For relatively long periods, use jiffies to avoid + * discrepancies caused by updates to the system time. + * + * On system with HZ of 1000, 32-bits is over 49 days + * worth of jiffies, or over 71 minutes worth of usecs. + * + * Jiffies overflow is handled by subtraction of unsigned ints: + * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe + * $3 = 4 + * (gdb) + */ + if (n > USEC_PER_SEC / 4) { + n = ((u32) jiffies) - f->sent_jiffs; + n *= USEC_PER_SEC / HZ; + } + + return n; +} + +static int tsince(u32 tag) { int n; @@ -472,7 +537,7 @@ tsince(u32 tag) n -= tag & 0xffff; if (n < 0) n += 1<<16; - return n; + return jiffies_to_usecs(n + 1); } static struct aoeif * @@ -503,70 +568,186 @@ ejectif(struct aoetgt *t, struct aoeif *ifp) dev_put(nd); } -static int -sthtith(struct aoedev *d) +static struct frame * +reassign_frame(struct frame *f) { - struct frame *f, *nf; - struct list_head *nx, *pos, *head; + struct frame *nf; struct sk_buff *skb; - struct aoetgt *ht = d->htgt; - int i; - for (i = 0; i < NFACTIVE; i++) { - head = &d->factive[i]; - list_for_each_safe(pos, nx, head) { - f = list_entry(pos, struct frame, head); - if (f->t != ht) - continue; + nf = newframe(f->t->d); + if (!nf) + return NULL; + if (nf->t == f->t) { + aoe_freetframe(nf); + return NULL; + } - nf = newframe(d); - if (!nf) - return 0; + skb = nf->skb; + nf->skb = f->skb; + nf->buf = f->buf; + nf->iter = f->iter; + nf->waited = 0; + nf->waited_total = f->waited_total; + nf->sent = f->sent; + nf->sent_jiffs = f->sent_jiffs; + f->skb = skb; + + return nf; +} - /* remove frame from active list */ - list_del(pos); +static void +probe(struct aoetgt *t) +{ + struct aoedev *d; + struct frame *f; + struct sk_buff *skb; + struct sk_buff_head queue; + size_t n, m; + int frag; + + d = t->d; + f = newtframe(d, t); + if (!f) { + pr_err("%s %pm for e%ld.%d: %s\n", + "aoe: cannot probe remote address", + t->addr, + (long) d->aoemajor, d->aoeminor, + "no frame available"); + return; + } + f->flags |= FFL_PROBE; + ifrotate(t); + f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT; + ata_rw_frameinit(f); + skb = f->skb; + for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) { + if (n < PAGE_SIZE) + m = n; + else + m = PAGE_SIZE; + skb_fill_page_desc(skb, frag, empty_page, 0, m); + } + skb->len += f->iter.bi_size; + skb->data_len = f->iter.bi_size; + skb->truesize += f->iter.bi_size; + + skb = skb_clone(f->skb, GFP_ATOMIC); + if (skb) { + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; + __skb_queue_head_init(&queue); + __skb_queue_tail(&queue, skb); + aoenet_xmit(&queue); + } +} + +static long +rto(struct aoedev *d) +{ + long t; + + t = 2 * d->rttavg >> RTTSCALE; + t += 8 * d->rttdev >> RTTDSCALE; + if (t == 0) + t = 1; + + return t; +} + +static void +rexmit_deferred(struct aoedev *d) +{ + struct aoetgt *t; + struct frame *f; + struct frame *nf; + struct list_head *pos, *nx, *head; + int since; + int untainted; - /* reassign all pertinent bits to new outbound frame */ - skb = nf->skb; - nf->skb = f->skb; - nf->buf = f->buf; - nf->bcnt = f->bcnt; - nf->lba = f->lba; - nf->bv = f->bv; - nf->bv_off = f->bv_off; - nf->waited = 0; - f->skb = skb; + count_targets(d, &untainted); + + head = &d->rexmitq; + list_for_each_safe(pos, nx, head) { + f = list_entry(pos, struct frame, head); + t = f->t; + if (t->taint) { + if (!(f->flags & FFL_PROBE)) { + nf = reassign_frame(f); + if (nf) { + if (t->nout_probes == 0 + && untainted > 0) { + probe(t); + t->nout_probes++; + } + list_replace(&f->head, &nf->head); + pos = &nf->head; + aoe_freetframe(f); + f = nf; + t = f->t; + } + } else if (untainted < 1) { + /* don't probe w/o other untainted aoetgts */ + goto stop_probe; + } else if (tsince_hr(f) < t->taint * rto(d)) { + /* reprobe slowly when taint is high */ + continue; + } + } else if (f->flags & FFL_PROBE) { +stop_probe: /* don't probe untainted aoetgts */ + list_del(pos); aoe_freetframe(f); - ht->nout--; - nf->t->nout++; - resend(d, nf); + /* leaving d->kicked, because this is routine */ + f->t->d->flags |= DEVFL_KICKME; + continue; } + if (t->nout >= t->maxout) + continue; + list_del(pos); + t->nout++; + if (f->flags & FFL_PROBE) + t->nout_probes++; + since = tsince_hr(f); + f->waited += since; + f->waited_total += since; + resend(d, f); } - /* We've cleaned up the outstanding so take away his - * interfaces so he won't be used. We should remove him from - * the target array here, but cleaning up a target is - * involved. PUNT! - */ - memset(ht->ifs, 0, sizeof ht->ifs); - d->htgt = NULL; - return 1; } -static inline unsigned char -ata_scnt(unsigned char *packet) { - struct aoe_hdr *h; - struct aoe_atahdr *ah; +/* An aoetgt accumulates demerits quickly, and successful + * probing redeems the aoetgt slowly. + */ +static void +scorn(struct aoetgt *t) +{ + int n; - h = (struct aoe_hdr *) packet; - ah = (struct aoe_atahdr *) (h+1); - return ah->scnt; + n = t->taint++; + t->taint += t->taint * 2; + if (n > t->taint) + t->taint = n; + if (t->taint > MAX_TAINT) + t->taint = MAX_TAINT; +} + +static int +count_targets(struct aoedev *d, int *untainted) +{ + int i, good; + + for (i = good = 0; i < d->ntargets && d->targets[i]; ++i) + if (d->targets[i]->taint == 0) + good++; + + if (untainted) + *untainted = good; + return i; } static void rexmit_timer(ulong vp) { struct aoedev *d; - struct aoetgt *t, **tt, **te; + struct aoetgt *t; struct aoeif *ifp; struct frame *f; struct list_head *head, *pos, *nx; @@ -574,15 +755,18 @@ rexmit_timer(ulong vp) register long timeout; ulong flags, n; int i; + int utgts; /* number of aoetgt descriptors (not slots) */ + int since; d = (struct aoedev *) vp; - /* timeout is always ~150% of the moving average */ - timeout = d->rttavg; - timeout += timeout >> 1; - spin_lock_irqsave(&d->lock, flags); + /* timeout based on observed timings and variations */ + timeout = rto(d); + + utgts = count_targets(d, NULL); + if (d->flags & DEVFL_TKILL) { spin_unlock_irqrestore(&d->lock, flags); return; @@ -593,67 +777,61 @@ rexmit_timer(ulong vp) head = &d->factive[i]; list_for_each_safe(pos, nx, head) { f = list_entry(pos, struct frame, head); - if (tsince(f->tag) < timeout) + if (tsince_hr(f) < timeout) break; /* end of expired frames */ /* move to flist for later processing */ list_move_tail(pos, &flist); } } - /* window check */ - tt = d->targets; - te = tt + d->ntargets; - for (; tt < te && (t = *tt); tt++) { - if (t->nout == t->maxout - && t->maxout < t->nframes - && (jiffies - t->lastwadj)/HZ > 10) { - t->maxout++; - t->lastwadj = jiffies; - } - } - - if (!list_empty(&flist)) { /* retransmissions necessary */ - n = d->rttavg <<= 1; - if (n > MAXTIMER) - d->rttavg = MAXTIMER; - } /* process expired frames */ while (!list_empty(&flist)) { pos = flist.next; f = list_entry(pos, struct frame, head); - n = f->waited += timeout; - n /= HZ; - if (n > aoe_deadsecs) { + since = tsince_hr(f); + n = f->waited_total + since; + n /= USEC_PER_SEC; + if (aoe_deadsecs + && n > aoe_deadsecs + && !(f->flags & FFL_PROBE)) { /* Waited too long. Device failure. * Hang all frames on first hash bucket for downdev * to clean up. */ list_splice(&flist, &d->factive[0]); aoedev_downdev(d); - break; + goto out; } - list_del(pos); t = f->t; - if (n > aoe_deadsecs/2) - d->htgt = t; /* see if another target can help */ - - if (t->nout == t->maxout) { - if (t->maxout > 1) - t->maxout--; - t->lastwadj = jiffies; + n = f->waited + since; + n /= USEC_PER_SEC; + if (aoe_deadsecs && utgts > 0 + && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS)) + scorn(t); /* avoid this target */ + + if (t->maxout != 1) { + t->ssthresh = t->maxout / 2; + t->maxout = 1; } - ifp = getif(t, f->skb->dev); - if (ifp && ++ifp->lost > (t->nframes << 1) - && (ifp != t->ifs || t->ifs[1].nd)) { - ejectif(t, ifp); - ifp = NULL; + if (f->flags & FFL_PROBE) { + t->nout_probes--; + } else { + ifp = getif(t, f->skb->dev); + if (ifp && ++ifp->lost > (t->nframes << 1) + && (ifp != t->ifs || t->ifs[1].nd)) { + ejectif(t, ifp); + ifp = NULL; + } } - resend(d, f); + list_move_tail(pos, &d->rexmitq); + t->nout--; } + rexmit_deferred(d); - if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) { +out: + if ((d->flags & DEVFL_KICKME) && d->blkq) { d->flags &= ~DEVFL_KICKME; d->blkq->request_fn(d->blkq); } @@ -688,21 +866,15 @@ rqbiocnt(struct request *r) static void bio_pageinc(struct bio *bio) { - struct bio_vec *bv; + struct bio_vec bv; struct page *page; - int i; + struct bvec_iter iter; - bio_for_each_segment(bv, bio, i) { - page = bv->bv_page; + bio_for_each_segment(bv, bio, iter) { /* Non-zero page count for non-head members of - * compound pages is no longer allowed by the kernel, - * but this has never been seen here. + * compound pages is no longer allowed by the kernel. */ - if (unlikely(PageCompound(page))) - if (compound_trans_head(page) != page) { - pr_crit("page tail used for block I/O\n"); - BUG(); - } + page = compound_head(bv.bv_page); atomic_inc(&page->_count); } } @@ -710,27 +882,24 @@ bio_pageinc(struct bio *bio) static void bio_pagedec(struct bio *bio) { - struct bio_vec *bv; - int i; + struct page *page; + struct bio_vec bv; + struct bvec_iter iter; - bio_for_each_segment(bv, bio, i) - atomic_dec(&bv->bv_page->_count); + bio_for_each_segment(bv, bio, iter) { + page = compound_head(bv.bv_page); + atomic_dec(&page->_count); + } } static void bufinit(struct buf *buf, struct request *rq, struct bio *bio) { - struct bio_vec *bv; - memset(buf, 0, sizeof(*buf)); buf->rq = rq; buf->bio = bio; - buf->resid = bio->bi_size; - buf->sector = bio->bi_sector; + buf->iter = bio->bi_iter; bio_pageinc(bio); - buf->bv = bv = &bio->bi_io_vec[bio->bi_idx]; - buf->bv_resid = bv->bv_len; - WARN_ON(buf->bv_resid == 0); } static struct buf * @@ -774,8 +943,7 @@ nextbuf(struct aoedev *d) void aoecmd_work(struct aoedev *d) { - if (d->htgt && !sthtith(d)) - return; + rexmit_deferred(d); while (aoecmd_ata_rw(d)) ; } @@ -809,6 +977,17 @@ aoecmd_sleepwork(struct work_struct *work) } static void +ata_ident_fixstring(u16 *id, int ns) +{ + u16 s; + + while (ns-- > 0) { + s = *id; + *id++ = s >> 8 | s << 8; + } +} + +static void ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) { u64 ssize; @@ -843,6 +1022,11 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) d->geo.sectors = get_unaligned_le16(&id[56 << 1]); } + ata_ident_fixstring((u16 *) &id[10<<1], 10); /* serial */ + ata_ident_fixstring((u16 *) &id[23<<1], 4); /* firmware */ + ata_ident_fixstring((u16 *) &id[27<<1], 20); /* model */ + memcpy(d->ident, id, sizeof(d->ident)); + if (d->ssize != ssize) printk(KERN_INFO "aoe: %pm e%ld.%d v%04x has %llu sectors\n", @@ -862,26 +1046,28 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) } static void -calc_rttavg(struct aoedev *d, int rtt) +calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt) { register long n; n = rtt; - if (n < 0) { - n = -rtt; - if (n < MINTIMER) - n = MINTIMER; - else if (n > MAXTIMER) - n = MAXTIMER; - d->mintimer += (n - d->mintimer) >> 1; - } else if (n < d->mintimer) - n = d->mintimer; - else if (n > MAXTIMER) - n = MAXTIMER; - - /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ - n -= d->rttavg; - d->rttavg += n >> 2; + + /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */ + n -= d->rttavg >> RTTSCALE; + d->rttavg += n; + if (n < 0) + n = -n; + n -= d->rttdev >> RTTDSCALE; + d->rttdev += n; + + if (!t || t->maxout >= t->nframes) + return; + if (t->maxout < t->ssthresh) + t->maxout += 1; + else if (t->nout == t->maxout && t->next_cwnd-- == 0) { + t->maxout += 1; + t->next_cwnd = t->maxout; + } } static struct aoetgt * @@ -890,7 +1076,7 @@ gettgt(struct aoedev *d, char *addr) struct aoetgt **t, **e; t = d->targets; - e = t + NTARGETS; + e = t + d->ntargets; for (; t < e && *t; t++) if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0) return *t; @@ -898,24 +1084,18 @@ gettgt(struct aoedev *d, char *addr) } static void -bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt) +bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt) { - ulong fcnt; - char *p; int soff = 0; -loop: - fcnt = bv->bv_len - (off - bv->bv_offset); - if (fcnt > cnt) - fcnt = cnt; - p = page_address(bv->bv_page) + off; - skb_copy_bits(skb, soff, p, fcnt); - soff += fcnt; - cnt -= fcnt; - if (cnt <= 0) - return; - bv++; - off = bv->bv_offset; - goto loop; + struct bio_vec bv; + + iter.bi_size = cnt; + + __bio_for_each_segment(bv, bio, iter, iter) { + char *p = page_address(bv.bv_page) + bv.bv_offset; + skb_copy_bits(skb, soff, p, bv.bv_len); + soff += bv.bv_len; + } } void @@ -931,11 +1111,11 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) do { bio = rq->bio; bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags); - } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size)); + } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size)); /* cf. http://lkml.org/lkml/2006/10/31/28 */ if (!fastfail) - q->request_fn(q); + __blk_run_queue(q); } static void @@ -966,19 +1146,22 @@ ktiocomplete(struct frame *f) struct aoeif *ifp; struct aoedev *d; long n; + int untainted; if (f == NULL) return; t = f->t; d = t->d; + skb = f->r_skb; + buf = f->buf; + if (f->flags & FFL_PROBE) + goto out; + if (!skb) /* just fail the buf. */ + goto noskb; hout = (struct aoe_hdr *) skb_mac_header(f->skb); ahout = (struct aoe_atahdr *) (hout+1); - buf = f->buf; - skb = f->r_skb; - if (skb == NULL) - goto noskb; /* just fail the buf. */ hin = (struct aoe_hdr *) skb->data; skb_pull(skb, sizeof(*hin)); @@ -988,9 +1171,9 @@ ktiocomplete(struct frame *f) pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n", ahout->cmdstat, ahin->cmdstat, d->aoemajor, d->aoeminor); -noskb: if (buf) +noskb: if (buf) clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); - goto badrsp; + goto out; } n = ahout->scnt << 9; @@ -998,25 +1181,35 @@ noskb: if (buf) case ATA_CMD_PIO_READ: case ATA_CMD_PIO_READ_EXT: if (skb->len < n) { - pr_err("aoe: runt data size in read. skb->len=%d need=%ld\n", - skb->len, n); + pr_err("%s e%ld.%d. skb->len=%d need=%ld\n", + "aoe: runt data size in read from", + (long) d->aoemajor, d->aoeminor, + skb->len, n); + clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); + break; + } + if (n > f->iter.bi_size) { + pr_err_ratelimited("%s e%ld.%d. bytes=%ld need=%u\n", + "aoe: too-large data size in read from", + (long) d->aoemajor, d->aoeminor, + n, f->iter.bi_size); clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); break; } - bvcpy(f->bv, f->bv_off, skb, n); + bvcpy(skb, f->buf->bio, f->iter, n); case ATA_CMD_PIO_WRITE: case ATA_CMD_PIO_WRITE_EXT: spin_lock_irq(&d->lock); ifp = getif(t, skb->dev); if (ifp) ifp->lost = 0; - if (d->htgt == t) /* I'll help myself, thank you. */ - d->htgt = NULL; spin_unlock_irq(&d->lock); break; case ATA_CMD_ID_ATA: if (skb->len < 512) { - pr_info("aoe: runt data size in ataid. skb->len=%d\n", + pr_info("%s e%ld.%d. skb->len=%d need=512\n", + "aoe: runt data size in ataid from", + (long) d->aoemajor, d->aoeminor, skb->len); break; } @@ -1032,16 +1225,23 @@ noskb: if (buf) be16_to_cpu(get_unaligned(&hin->major)), hin->minor); } -badrsp: +out: spin_lock_irq(&d->lock); + if (t->taint > 0 + && --t->taint > 0 + && t->nout_probes == 0) { + count_targets(d, &untainted); + if (untainted > 0) { + probe(t); + t->nout_probes++; + } + } aoe_freetframe(f); - if (buf && --buf->nframesout == 0 && buf->resid == 0) + if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0) aoe_end_buf(d, buf); - aoecmd_work(d); - spin_unlock_irq(&d->lock); aoedev_put(d); dev_kfree_skb(skb); @@ -1051,23 +1251,36 @@ badrsp: * Returns true iff responses needing processing remain. */ static int -ktio(void) +ktio(int id) { struct frame *f; struct list_head *pos; int i; + int actual_id; for (i = 0; ; ++i) { if (i == MAXIOC) return 1; - if (list_empty(&iocq.head)) + if (list_empty(&iocq[id].head)) return 0; - pos = iocq.head.next; + pos = iocq[id].head.next; list_del(pos); - spin_unlock_irq(&iocq.lock); f = list_entry(pos, struct frame, head); + spin_unlock_irq(&iocq[id].lock); ktiocomplete(f); - spin_lock_irq(&iocq.lock); + + /* Figure out if extra threads are required. */ + actual_id = f->t->d->aoeminor % ncpus; + + if (!kts[actual_id].active) { + BUG_ON(id != 0); + mutex_lock(&ktio_spawn_lock); + if (!kts[actual_id].active + && aoe_ktstart(&kts[actual_id]) == 0) + kts[actual_id].active = 1; + mutex_unlock(&ktio_spawn_lock); + } + spin_lock_irq(&iocq[id].lock); } } @@ -1084,7 +1297,7 @@ kthread(void *vp) complete(&k->rendez); /* tell spawner we're running */ do { spin_lock_irq(k->lock); - more = k->fn(); + more = k->fn(k->id); if (!more) { add_wait_queue(k->waitq, &wait); __set_current_state(TASK_INTERRUPTIBLE); @@ -1113,7 +1326,7 @@ aoe_ktstart(struct ktstate *k) struct task_struct *task; init_completion(&k->rendez); - task = kthread_run(kthread, k, k->name); + task = kthread_run(kthread, k, "%s", k->name); if (task == NULL || IS_ERR(task)) return -ENOMEM; k->task = task; @@ -1126,13 +1339,24 @@ aoe_ktstart(struct ktstate *k) static void ktcomplete(struct frame *f, struct sk_buff *skb) { + int id; ulong flags; f->r_skb = skb; - spin_lock_irqsave(&iocq.lock, flags); - list_add_tail(&f->head, &iocq.head); - spin_unlock_irqrestore(&iocq.lock, flags); - wake_up(&ktiowq); + id = f->t->d->aoeminor % ncpus; + spin_lock_irqsave(&iocq[id].lock, flags); + if (!kts[id].active) { + spin_unlock_irqrestore(&iocq[id].lock, flags); + /* The thread with id has not been spawned yet, + * so delegate the work to the main thread and + * try spawning a new thread. + */ + id = 0; + spin_lock_irqsave(&iocq[id].lock, flags); + } + list_add_tail(&f->head, &iocq[id].head); + spin_unlock_irqrestore(&iocq[id].lock, flags); + wake_up(&ktiowq[id]); } struct sk_buff * @@ -1141,7 +1365,6 @@ aoecmd_ata_rsp(struct sk_buff *skb) struct aoedev *d; struct aoe_hdr *h; struct frame *f; - struct aoetgt *t; u32 n; ulong flags; char ebuf[128]; @@ -1162,23 +1385,32 @@ aoecmd_ata_rsp(struct sk_buff *skb) n = be32_to_cpu(get_unaligned(&h->tag)); f = getframe(d, n); - if (f == NULL) { - calc_rttavg(d, -tsince(n)); - spin_unlock_irqrestore(&d->lock, flags); - aoedev_put(d); - snprintf(ebuf, sizeof ebuf, - "%15s e%d.%d tag=%08x@%08lx\n", - "unexpected rsp", - get_unaligned_be16(&h->major), - h->minor, - get_unaligned_be32(&h->tag), - jiffies); - aoechr_error(ebuf); - return skb; + if (f) { + calc_rttavg(d, f->t, tsince_hr(f)); + f->t->nout--; + if (f->flags & FFL_PROBE) + f->t->nout_probes--; + } else { + f = getframe_deferred(d, n); + if (f) { + calc_rttavg(d, NULL, tsince_hr(f)); + } else { + calc_rttavg(d, NULL, tsince(n)); + spin_unlock_irqrestore(&d->lock, flags); + aoedev_put(d); + snprintf(ebuf, sizeof(ebuf), + "%15s e%d.%d tag=%08x@%08lx s=%pm d=%pm\n", + "unexpected rsp", + get_unaligned_be16(&h->major), + h->minor, + get_unaligned_be32(&h->tag), + jiffies, + h->src, + h->dst); + aoechr_error(ebuf); + return skb; + } } - t = f->t; - calc_rttavg(d, tsince(f->tag)); - t->nout--; aoecmd_work(d); spin_unlock_irqrestore(&d->lock, flags); @@ -1201,7 +1433,7 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) aoecmd_cfg_pkts(aoemajor, aoeminor, &queue); aoenet_xmit(&queue); } - + struct sk_buff * aoecmd_ata_id(struct aoedev *d) { @@ -1227,6 +1459,7 @@ aoecmd_ata_id(struct aoedev *d) fhash(f); t->nout++; f->waited = 0; + f->waited_total = 0; /* set up ata header */ ah->scnt = 1; @@ -1235,41 +1468,69 @@ aoecmd_ata_id(struct aoedev *d) skb->dev = t->ifp->nd; - d->rttavg = MAXTIMER; + d->rttavg = RTTAVG_INIT; + d->rttdev = RTTDEV_INIT; d->timer.function = rexmit_timer; - return skb_clone(skb, GFP_ATOMIC); + skb = skb_clone(skb, GFP_ATOMIC); + if (skb) { + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; + } + + return skb; +} + +static struct aoetgt ** +grow_targets(struct aoedev *d) +{ + ulong oldn, newn; + struct aoetgt **tt; + + oldn = d->ntargets; + newn = oldn * 2; + tt = kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC); + if (!tt) + return NULL; + memmove(tt, d->targets, sizeof(*d->targets) * oldn); + d->tgt = tt + (d->tgt - d->targets); + kfree(d->targets); + d->targets = tt; + d->ntargets = newn; + + return &d->targets[oldn]; } - + static struct aoetgt * addtgt(struct aoedev *d, char *addr, ulong nframes) { struct aoetgt *t, **tt, **te; tt = d->targets; - te = tt + NTARGETS; + te = tt + d->ntargets; for (; tt < te && *tt; tt++) ; if (tt == te) { - printk(KERN_INFO - "aoe: device addtgt failure; too many targets\n"); - return NULL; + tt = grow_targets(d); + if (!tt) + goto nomem; } t = kzalloc(sizeof(*t), GFP_ATOMIC); - if (!t) { - printk(KERN_INFO "aoe: cannot allocate memory to add target\n"); - return NULL; - } - - d->ntargets++; + if (!t) + goto nomem; t->nframes = nframes; t->d = d; memcpy(t->addr, addr, sizeof t->addr); t->ifp = t->ifs; - t->maxout = t->nframes; + aoecmd_wreset(t); + t->maxout = t->nframes / 2; INIT_LIST_HEAD(&t->ffree); return *tt = t; + + nomem: + pr_info("aoe: cannot allocate memory to add target\n"); + return NULL; } static void @@ -1279,7 +1540,7 @@ setdbcnt(struct aoedev *d) int bcnt = 0; t = d->targets; - e = t + NTARGETS; + e = t + d->ntargets; for (; t < e && *t; t++) if (bcnt == 0 || bcnt > (*t)->minbcnt) bcnt = (*t)->minbcnt; @@ -1373,7 +1634,11 @@ aoecmd_cfg_rsp(struct sk_buff *skb) spin_lock_irqsave(&d->lock, flags); t = gettgt(d, h->src); - if (!t) { + if (t) { + t->nframes = n; + if (n < t->maxout) + aoecmd_wreset(t); + } else { t = addtgt(d, h->src, n); if (!t) goto bail; @@ -1402,17 +1667,26 @@ bail: } void +aoecmd_wreset(struct aoetgt *t) +{ + t->maxout = 1; + t->ssthresh = t->nframes / 2; + t->next_cwnd = t->nframes; +} + +void aoecmd_cleanslate(struct aoedev *d) { struct aoetgt **t, **te; - d->mintimer = MINTIMER; + d->rttavg = RTTAVG_INIT; + d->rttdev = RTTDEV_INIT; d->maxbcnt = 0; t = d->targets; - te = t + NTARGETS; + te = t + d->ntargets; for (; t < te && *t; t++) - (*t)->maxout = (*t)->nframes; + aoecmd_wreset(*t); } void @@ -1420,7 +1694,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf) { if (buf == NULL) return; - buf->resid = 0; + buf->iter.bi_size = 0; clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); if (buf->nframesout == 0) aoe_end_buf(d, buf); @@ -1429,6 +1703,17 @@ aoe_failbuf(struct aoedev *d, struct buf *buf) void aoe_flush_iocq(void) { + int i; + + for (i = 0; i < ncpus; i++) { + if (kts[i].active) + aoe_flush_iocq_by_index(i); + } +} + +void +aoe_flush_iocq_by_index(int id) +{ struct frame *f; struct aoedev *d; LIST_HEAD(flist); @@ -1436,9 +1721,9 @@ aoe_flush_iocq(void) struct sk_buff *skb; ulong flags; - spin_lock_irqsave(&iocq.lock, flags); - list_splice_init(&iocq.head, &flist); - spin_unlock_irqrestore(&iocq.lock, flags); + spin_lock_irqsave(&iocq[id].lock, flags); + list_splice_init(&iocq[id].head, &flist); + spin_unlock_irqrestore(&iocq[id].lock, flags); while (!list_empty(&flist)) { pos = flist.next; list_del(pos); @@ -1460,19 +1745,82 @@ aoe_flush_iocq(void) int __init aoecmd_init(void) { - INIT_LIST_HEAD(&iocq.head); - spin_lock_init(&iocq.lock); - init_waitqueue_head(&ktiowq); - kts.name = "aoe_ktio"; - kts.fn = ktio; - kts.waitq = &ktiowq; - kts.lock = &iocq.lock; - return aoe_ktstart(&kts); + void *p; + int i; + int ret; + + /* get_zeroed_page returns page with ref count 1 */ + p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + if (!p) + return -ENOMEM; + empty_page = virt_to_page(p); + + ncpus = num_online_cpus(); + + iocq = kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL); + if (!iocq) + return -ENOMEM; + + kts = kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL); + if (!kts) { + ret = -ENOMEM; + goto kts_fail; + } + + ktiowq = kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL); + if (!ktiowq) { + ret = -ENOMEM; + goto ktiowq_fail; + } + + mutex_init(&ktio_spawn_lock); + + for (i = 0; i < ncpus; i++) { + INIT_LIST_HEAD(&iocq[i].head); + spin_lock_init(&iocq[i].lock); + init_waitqueue_head(&ktiowq[i]); + snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i); + kts[i].fn = ktio; + kts[i].waitq = &ktiowq[i]; + kts[i].lock = &iocq[i].lock; + kts[i].id = i; + kts[i].active = 0; + } + kts[0].active = 1; + if (aoe_ktstart(&kts[0])) { + ret = -ENOMEM; + goto ktstart_fail; + } + return 0; + +ktstart_fail: + kfree(ktiowq); +ktiowq_fail: + kfree(kts); +kts_fail: + kfree(iocq); + + return ret; } void aoecmd_exit(void) { - aoe_ktstop(&kts); + int i; + + for (i = 0; i < ncpus; i++) + if (kts[i].active) + aoe_ktstop(&kts[i]); + aoe_flush_iocq(); + + /* Free up the iocq and thread speicific configuration + * allocated during startup. + */ + kfree(iocq); + kfree(kts); + kfree(ktiowq); + + free_page((unsigned long) page_address(empty_page)); + empty_page = NULL; } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 90e5b537f94..e774c50b684 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoedev.c * AoE device utility functions; maintains device list. @@ -12,10 +12,10 @@ #include <linux/bitmap.h> #include <linux/kdev_t.h> #include <linux/moduleparam.h> +#include <linux/string.h> #include "aoe.h" static void dummy_timer(ulong); -static void aoedev_freedev(struct aoedev *); static void freetgt(struct aoedev *d, struct aoetgt *t); static void skbpoolfree(struct aoedev *d); @@ -69,25 +69,34 @@ minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) NPERSHELF = 16, }; + if (aoemin >= NPERSHELF) { + pr_err("aoe: %s %d slots per shelf\n", + "static minor device numbers support only", + NPERSHELF); + error = -1; + goto out; + } + n = aoemaj * NPERSHELF + aoemin; - if (aoemin >= NPERSHELF || n >= N_DEVS) { + if (n >= N_DEVS) { pr_err("aoe: %s with e%ld.%d\n", "cannot use static minor device numbers", aoemaj, aoemin); error = -1; - } else { - spin_lock_irqsave(&used_minors_lock, flags); - if (test_bit(n, used_minors)) { - pr_err("aoe: %s %lu\n", - "existing device already has static minor number", - n); - error = -1; - } else - set_bit(n, used_minors); - spin_unlock_irqrestore(&used_minors_lock, flags); + goto out; } - *sysminor = n; + spin_lock_irqsave(&used_minors_lock, flags); + if (test_bit(n, used_minors)) { + pr_err("aoe: %s %lu\n", + "existing device already has static minor number", + n); + error = -1; + } else + set_bit(n, used_minors); + spin_unlock_irqrestore(&used_minors_lock, flags); + *sysminor = n * AOE_PARTITIONS; +out: return error; } @@ -170,41 +179,50 @@ aoe_failip(struct aoedev *d) aoe_end_request(d, rq, 0); } +static void +downdev_frame(struct list_head *pos) +{ + struct frame *f; + + f = list_entry(pos, struct frame, head); + list_del(pos); + if (f->buf) { + f->buf->nframesout--; + aoe_failbuf(f->t->d, f->buf); + } + aoe_freetframe(f); +} + void aoedev_downdev(struct aoedev *d) { struct aoetgt *t, **tt, **te; - struct frame *f; struct list_head *head, *pos, *nx; struct request *rq; int i; d->flags &= ~DEVFL_UP; - /* clean out active buffers */ + /* clean out active and to-be-retransmitted buffers */ for (i = 0; i < NFACTIVE; i++) { head = &d->factive[i]; - list_for_each_safe(pos, nx, head) { - f = list_entry(pos, struct frame, head); - list_del(pos); - if (f->buf) { - f->buf->nframesout--; - aoe_failbuf(d, f->buf); - } - aoe_freetframe(f); - } + list_for_each_safe(pos, nx, head) + downdev_frame(pos); } + head = &d->rexmitq; + list_for_each_safe(pos, nx, head) + downdev_frame(pos); + /* reset window dressings */ tt = d->targets; - te = tt + NTARGETS; + te = tt + d->ntargets; for (; tt < te && (t = *tt); tt++) { - t->maxout = t->nframes; + aoecmd_wreset(t); t->nout = 0; } /* clean out the in-process request (if any) */ aoe_failip(d); - d->htgt = NULL; /* fast fail all pending I/O */ if (d->blkq) { @@ -218,74 +236,159 @@ aoedev_downdev(struct aoedev *d) set_capacity(d->gd, 0); } +/* return whether the user asked for this particular + * device to be flushed + */ +static int +user_req(char *s, size_t slen, struct aoedev *d) +{ + const char *p; + size_t lim; + + if (!d->gd) + return 0; + p = kbasename(d->gd->disk_name); + lim = sizeof(d->gd->disk_name); + lim -= p - d->gd->disk_name; + if (slen < lim) + lim = slen; + + return !strncmp(s, p, lim); +} + static void -aoedev_freedev(struct aoedev *d) +freedev(struct aoedev *d) { struct aoetgt **t, **e; + int freeing = 0; + unsigned long flags; + + spin_lock_irqsave(&d->lock, flags); + if (d->flags & DEVFL_TKILL + && !(d->flags & DEVFL_FREEING)) { + d->flags |= DEVFL_FREEING; + freeing = 1; + } + spin_unlock_irqrestore(&d->lock, flags); + if (!freeing) + return; - cancel_work_sync(&d->work); + del_timer_sync(&d->timer); if (d->gd) { + aoedisk_rm_debugfs(d); aoedisk_rm_sysfs(d); del_gendisk(d->gd); put_disk(d->gd); blk_cleanup_queue(d->blkq); } t = d->targets; - e = t + NTARGETS; + e = t + d->ntargets; for (; t < e && *t; t++) freetgt(d, *t); if (d->bufpool) mempool_destroy(d->bufpool); skbpoolfree(d); minor_free(d->sysminor); - kfree(d); + + spin_lock_irqsave(&d->lock, flags); + d->flags |= DEVFL_FREED; + spin_unlock_irqrestore(&d->lock, flags); } -int -aoedev_flush(const char __user *str, size_t cnt) +enum flush_parms { + NOT_EXITING = 0, + EXITING = 1, +}; + +static int +flush(const char __user *str, size_t cnt, int exiting) { ulong flags; struct aoedev *d, **dd; - struct aoedev *rmd = NULL; char buf[16]; int all = 0; + int specified = 0; /* flush a specific device */ + unsigned int skipflags; - if (cnt >= 3) { + skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL; + + if (!exiting && cnt >= 3) { if (cnt > sizeof buf) cnt = sizeof buf; if (copy_from_user(buf, str, cnt)) return -EFAULT; all = !strncmp(buf, "all", 3); + if (!all) + specified = 1; } + flush_scheduled_work(); + /* pass one: without sleeping, do aoedev_downdev */ spin_lock_irqsave(&devlist_lock, flags); - dd = &devlist; - while ((d = *dd)) { + for (d = devlist; d; d = d->next) { spin_lock(&d->lock); - if ((!all && (d->flags & DEVFL_UP)) - || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) + if (exiting) { + /* unconditionally take each device down */ + } else if (specified) { + if (!user_req(buf, cnt, d)) + goto cont; + } else if ((!all && (d->flags & DEVFL_UP)) + || d->flags & skipflags || d->nopen - || d->ref) { - spin_unlock(&d->lock); - dd = &d->next; - continue; - } - *dd = d->next; + || d->ref) + goto cont; + aoedev_downdev(d); d->flags |= DEVFL_TKILL; +cont: spin_unlock(&d->lock); - d->next = rmd; - rmd = d; } spin_unlock_irqrestore(&devlist_lock, flags); - while ((d = rmd)) { - rmd = d->next; - del_timer_sync(&d->timer); - aoedev_freedev(d); /* must be able to sleep */ + + /* pass two: call freedev, which might sleep, + * for aoedevs marked with DEVFL_TKILL + */ +restart: + spin_lock_irqsave(&devlist_lock, flags); + for (d = devlist; d; d = d->next) { + spin_lock(&d->lock); + if (d->flags & DEVFL_TKILL + && !(d->flags & DEVFL_FREEING)) { + spin_unlock(&d->lock); + spin_unlock_irqrestore(&devlist_lock, flags); + freedev(d); + goto restart; + } + spin_unlock(&d->lock); } + + /* pass three: remove aoedevs marked with DEVFL_FREED */ + for (dd = &devlist, d = *dd; d; d = *dd) { + struct aoedev *doomed = NULL; + + spin_lock(&d->lock); + if (d->flags & DEVFL_FREED) { + *dd = d->next; + doomed = d; + } else { + dd = &d->next; + } + spin_unlock(&d->lock); + if (doomed) + kfree(doomed->targets); + kfree(doomed); + } + spin_unlock_irqrestore(&devlist_lock, flags); + return 0; } +int +aoedev_flush(const char __user *str, size_t cnt) +{ + return flush(str, cnt, NOT_EXITING); +} + /* This has been confirmed to occur once with Tms=3*1000 due to the * driver changing link and not processing its transmit ring. The * problem is hard enough to solve by returning an error that I'm @@ -332,13 +435,20 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) struct aoedev *d; int i; ulong flags; - ulong sysminor; + ulong sysminor = 0; spin_lock_irqsave(&devlist_lock, flags); for (d=devlist; d; d=d->next) if (d->aoemajor == maj && d->aoeminor == min) { + spin_lock(&d->lock); + if (d->flags & DEVFL_TKILL) { + spin_unlock(&d->lock); + d = NULL; + goto out; + } d->ref++; + spin_unlock(&d->lock); break; } if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) @@ -346,6 +456,13 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) d = kcalloc(1, sizeof *d, GFP_ATOMIC); if (!d) goto out; + d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC); + if (!d->targets) { + kfree(d); + d = NULL; + goto out; + } + d->ntargets = NTARGETS; INIT_WORK(&d->work, aoecmd_sleepwork); spin_lock_init(&d->lock); skb_queue_head_init(&d->skbpool); @@ -359,10 +476,12 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) d->ref = 1; for (i = 0; i < NFACTIVE; i++) INIT_LIST_HEAD(&d->factive[i]); + INIT_LIST_HEAD(&d->rexmitq); d->sysminor = sysminor; d->aoemajor = maj; d->aoeminor = min; - d->mintimer = MINTIMER; + d->rttavg = RTTAVG_INIT; + d->rttdev = RTTDEV_INIT; d->next = devlist; devlist = d; out: @@ -396,21 +515,8 @@ freetgt(struct aoedev *d, struct aoetgt *t) void aoedev_exit(void) { - struct aoedev *d; - ulong flags; - - aoe_flush_iocq(); - while ((d = devlist)) { - devlist = d->next; - - spin_lock_irqsave(&d->lock, flags); - aoedev_downdev(d); - d->flags |= DEVFL_TKILL; - spin_unlock_irqrestore(&d->lock, flags); - - del_timer_sync(&d->timer); - aoedev_freedev(d); - } + flush_scheduled_work(); + flush(NULL, 0, EXITING); } int __init diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c index 04793c2c701..4b987c2fefb 100644 --- a/drivers/block/aoe/aoemain.c +++ b/drivers/block/aoe/aoemain.c @@ -105,7 +105,7 @@ aoe_init(void) aoechr_exit(); chr_fail: aoedev_exit(); - + printk(KERN_INFO "aoe: initialisation failure.\n"); return ret; } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 162c6471275..63773a90581 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoenet.c * Ethernet portion of AoE driver @@ -31,7 +31,7 @@ enum { static char aoe_iflist[IFLISTSZ]; module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600); -MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\""); +MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=dev1[,dev2...]"); static wait_queue_head_t txwq; static struct ktstate kts; @@ -52,13 +52,18 @@ static struct sk_buff_head skbtxq; /* enters with txlock held */ static int -tx(void) +tx(int id) __must_hold(&txlock) { struct sk_buff *skb; + struct net_device *ifp; while ((skb = skb_dequeue(&skbtxq))) { spin_unlock_irq(&txlock); - dev_queue_xmit(skb); + ifp = skb->dev; + if (dev_queue_xmit(skb) == NET_XMIT_DROP && net_ratelimit()) + pr_warn("aoe: packet could not be sent on %s. %s\n", + ifp ? ifp->name : "netif", + "consider increasing tx_queue_len"); spin_lock_irq(&txlock); } return 0; @@ -119,8 +124,8 @@ aoenet_xmit(struct sk_buff_head *queue) } } -/* - * (1) len doesn't include the header by default. I want this. +/* + * (1) len doesn't include the header by default. I want this. */ static int aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) @@ -200,7 +205,8 @@ aoenet_init(void) kts.lock = &txlock; kts.fn = tx; kts.waitq = &txwq; - kts.name = "aoe_tx"; + kts.id = 0; + snprintf(kts.name, sizeof(kts.name), "aoe_tx%d", kts.id); if (aoe_ktstart(&kts)) return -EAGAIN; dev_add_pack(&aoe_pt); |
