diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 20:58:12 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 20:58:12 -0800 |
commit | 848b81415c42ff3dc9a4204749087b015c37ef66 (patch) | |
tree | 391da3a73aea48632248220d2d6b8d45a88f7eae /drivers | |
parent | 992956189de58cae9f2be40585bc25105cd7c5ad (diff) | |
parent | 6fd59a83b9261fa53eaf98fb5514abba504a3ea3 (diff) |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc patches from Andrew Morton:
"Incoming:
- lots of misc stuff
- backlight tree updates
- lib/ updates
- Oleg's percpu-rwsem changes
- checkpatch
- rtc
- aoe
- more checkpoint/restart support
I still have a pile of MM stuff pending - Pekka should be merging
later today after which that is good to go. A number of other things
are twiddling thumbs awaiting maintainer merges."
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (180 commits)
scatterlist: don't BUG when we can trivially return a proper error.
docs: update documentation about /proc/<pid>/fdinfo/<fd> fanotify output
fs, fanotify: add @mflags field to fanotify output
docs: add documentation about /proc/<pid>/fdinfo/<fd> output
fs, notify: add procfs fdinfo helper
fs, exportfs: add exportfs_encode_inode_fh() helper
fs, exportfs: escape nil dereference if no s_export_op present
fs, epoll: add procfs fdinfo helper
fs, eventfd: add procfs fdinfo helper
procfs: add ability to plug in auxiliary fdinfo providers
tools/testing/selftests/kcmp/kcmp_test.c: print reason for failure in kcmp_test
breakpoint selftests: print failure status instead of cause make error
kcmp selftests: print fail status instead of cause make error
kcmp selftests: make run_tests fix
mem-hotplug selftests: print failure status instead of cause make error
cpu-hotplug selftests: print failure status instead of cause make error
mqueue selftests: print failure status instead of cause make error
vm selftests: print failure status instead of cause make error
ubifs: use prandom_bytes
mtd: nandsim: use prandom_bytes
...
Diffstat (limited to 'drivers')
59 files changed, 1978 insertions, 710 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index d2ed7f18d1a..175649468c9 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,5 +1,5 @@ /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ -#define VERSION "50" +#define VERSION "81" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" @@ -10,7 +10,7 @@ #define AOE_PARTITIONS (16) #endif -#define WHITESPACE " \t\v\f\n" +#define WHITESPACE " \t\v\f\n," enum { AOECMD_ATA, @@ -73,21 +73,29 @@ enum { DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */ DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ DEVFL_GDALLOC = (1<<3), /* need to alloc gendisk */ - DEVFL_KICKME = (1<<4), /* slow polling network card catch */ - DEVFL_NEWSIZE = (1<<5), /* need to update dev size in block layer */ + DEVFL_GD_NOW = (1<<4), /* allocating gendisk */ + DEVFL_KICKME = (1<<5), /* slow polling network card catch */ + DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ + DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */ + DEVFL_FREED = (1<<8), /* device has been cleaned up */ }; enum { DEFAULTBCNT = 2 * 512, /* 2 sectors */ MIN_BUFS = 16, - NTARGETS = 8, + NTARGETS = 4, NAOEIFS = 8, NSKBPOOLMAX = 256, NFACTIVE = 61, TIMERTICK = HZ / 10, - MINTIMER = HZ >> 2, - MAXTIMER = HZ << 1, + RTTSCALE = 8, + RTTDSCALE = 3, + RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE, + RTTDEV_INIT = RTTAVG_INIT / 4, + + HARD_SCORN_SECS = 10, /* try another remote port after this */ + MAX_TAINT = 1000, /* cap on aoetgt taint */ }; struct buf { @@ -100,10 +108,17 @@ struct buf { struct request *rq; }; +enum frame_flags { + FFL_PROBE = 1, +}; + struct frame { struct list_head head; u32 tag; + struct timeval sent; /* high-res time packet was sent */ + u32 sent_jiffs; /* low-res jiffies-based sent time */ ulong waited; + ulong waited_total; struct aoetgt *t; /* parent target I belong to */ sector_t lba; struct sk_buff *skb; /* command skb freed on module exit */ @@ -112,6 +127,7 @@ struct frame { struct bio_vec *bv; ulong bcnt; ulong bv_off; + char flags; }; struct aoeif { @@ -122,28 +138,31 @@ struct aoeif { struct aoetgt { unsigned char addr[6]; - ushort nframes; + ushort nframes; /* cap on frames to use */ struct aoedev *d; /* parent device I belong to */ struct list_head ffree; /* list of free frames */ struct aoeif ifs[NAOEIFS]; struct aoeif *ifp; /* current aoeif in use */ - ushort nout; - ushort maxout; - ulong falloc; - ulong lastwadj; /* last window adjustment */ + ushort nout; /* number of AoE commands outstanding */ + ushort maxout; /* current value for max outstanding */ + ushort next_cwnd; /* incr maxout after decrementing to zero */ + ushort ssthresh; /* slow start threshold */ + ulong falloc; /* number of allocated frames */ + int taint; /* how much we want to avoid this aoetgt */ int minbcnt; int wpkts, rpkts; + char nout_probes; }; struct aoedev { struct aoedev *next; ulong sysminor; ulong aoemajor; + u32 rttavg; /* scaled AoE round trip time average */ + u32 rttdev; /* scaled round trip time mean deviation */ u16 aoeminor; u16 flags; u16 nopen; /* (bd_openers isn't available without sleeping) */ - u16 rttavg; /* round trip average of requests/responses */ - u16 mintimer; u16 fw_ver; /* version of blade's firmware */ u16 lasttag; /* last tag sent */ u16 useme; @@ -151,7 +170,7 @@ struct aoedev { struct work_struct work;/* disk create work struct */ struct gendisk *gd; struct request_queue *blkq; - struct hd_geometry geo; + struct hd_geometry geo; sector_t ssize; struct timer_list timer; spinlock_t lock; @@ -164,11 +183,12 @@ struct aoedev { } ip; ulong maxbcnt; struct list_head factive[NFACTIVE]; /* hash of active frames */ - struct aoetgt *targets[NTARGETS]; + struct list_head rexmitq; /* deferred retransmissions */ + struct aoetgt **targets; + ulong ntargets; /* number of allocated aoetgt pointers */ struct aoetgt **tgt; /* target in use when working */ - struct aoetgt *htgt; /* target needing rexmit assistance */ - ulong ntargets; ulong kicked; + char ident[512]; }; /* kthread tracking */ @@ -195,6 +215,7 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor); struct sk_buff *aoecmd_ata_rsp(struct sk_buff *); void aoecmd_cfg_rsp(struct sk_buff *); void aoecmd_sleepwork(struct work_struct *); +void aoecmd_wreset(struct aoetgt *t); void aoecmd_cleanslate(struct aoedev *); void aoecmd_exit(void); int aoecmd_init(void); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 00dfc5008ad..a129f8c8073 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -16,11 +16,19 @@ #include <linux/netdevice.h> #include <linux/mutex.h> #include <linux/export.h> +#include <linux/moduleparam.h> +#include <scsi/sg.h> #include "aoe.h" static DEFINE_MUTEX(aoeblk_mutex); static struct kmem_cache *buf_pool_cache; +/* GPFS needs a larger value than the default. */ +static int aoe_maxsectors; +module_param(aoe_maxsectors, int, 0644); +MODULE_PARM_DESC(aoe_maxsectors, + "When nonzero, set the maximum number of sectors per I/O request"); + static ssize_t aoedisk_show_state(struct device *dev, struct device_attribute *attr, char *page) { @@ -59,7 +67,7 @@ static ssize_t aoedisk_show_netif(struct device *dev, nd = nds; ne = nd + ARRAY_SIZE(nds); t = d->targets; - te = t + NTARGETS; + te = t + d->ntargets; for (; t < te && *t; t++) { ifp = (*t)->ifs; e = ifp + NAOEIFS; @@ -91,6 +99,14 @@ static ssize_t aoedisk_show_fwver(struct device *dev, return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver); } +static ssize_t aoedisk_show_payload(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct gendisk *disk = dev_to_disk(dev); + struct aoedev *d = disk->private_data; + + return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt); +} static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL); static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL); @@ -99,12 +115,14 @@ static struct device_attribute dev_attr_firmware_version = { .attr = { .name = "firmware-version", .mode = S_IRUGO }, .show = aoedisk_show_fwver, }; +static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL); static struct attribute *aoe_attrs[] = { &dev_attr_state.attr, &dev_attr_mac.attr, &dev_attr_netif.attr, &dev_attr_firmware_version.attr, + &dev_attr_payload.attr, NULL, }; @@ -129,9 +147,18 @@ aoeblk_open(struct block_device *bdev, fmode_t mode) struct aoedev *d = bdev->bd_disk->private_data; ulong flags; + if (!virt_addr_valid(d)) { + pr_crit("aoe: invalid device pointer in %s\n", + __func__); + WARN_ON(1); + return -ENODEV; + } + if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL) + return -ENODEV; + mutex_lock(&aoeblk_mutex); spin_lock_irqsave(&d->lock, flags); - if (d->flags & DEVFL_UP) { + if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) { d->nopen++; spin_unlock_irqrestore(&d->lock, flags); mutex_unlock(&aoeblk_mutex); @@ -195,9 +222,38 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } +static int +aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg) +{ + struct aoedev *d; + + if (!arg) + return -EINVAL; + + d = bdev->bd_disk->private_data; + if ((d->flags & DEVFL_UP) == 0) { + pr_err("aoe: disk not up\n"); + return -ENODEV; + } + + if (cmd == HDIO_GET_IDENTITY) { + if (!copy_to_user((void __user *) arg, &d->ident, + sizeof(d->ident))) + return 0; + return -EFAULT; + } + + /* udev calls scsi_id, which uses SG_IO, resulting in noise */ + if (cmd != SG_IO) + pr_info("aoe: unknown ioctl 0x%x\n", cmd); + + return -ENOTTY; +} + static const struct block_device_operations aoe_bdops = { .open = aoeblk_open, .release = aoeblk_release, + .ioctl = aoeblk_ioctl, .getgeo = aoeblk_getgeo, .owner = THIS_MODULE, }; @@ -212,6 +268,18 @@ aoeblk_gdalloc(void *vp) struct request_queue *q; enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; ulong flags; + int late = 0; + + spin_lock_irqsave(&d->lock, flags); + if (d->flags & DEVFL_GDALLOC + && !(d->flags & DEVFL_TKILL) + && !(d->flags & DEVFL_GD_NOW)) + d->flags |= DEVFL_GD_NOW; + else + late = 1; + spin_unlock_irqrestore(&d->lock, flags); + if (late) + return; gd = alloc_disk(AOE_PARTITIONS); if (gd == NULL) { @@ -231,23 +299,24 @@ aoeblk_gdalloc(void *vp) if (q == NULL) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); - mempool_destroy(mp); - goto err_disk; + goto err_mempool; } - d->blkq = blk_alloc_queue(GFP_KERNEL); - if (!d->blkq) - goto err_mempool; - d->blkq->backing_dev_info.name = "aoe"; - if (bdi_init(&d->blkq->backing_dev_info)) - goto err_blkq; spin_lock_irqsave(&d->lock, flags); - blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS); + WARN_ON(!(d->flags & DEVFL_GD_NOW)); + WARN_ON(!(d->flags & DEVFL_GDALLOC)); + WARN_ON(d->flags & DEVFL_TKILL); + WARN_ON(d->gd); + WARN_ON(d->flags & DEVFL_UP); + blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); + q->backing_dev_info.name = "aoe"; q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; d->bufpool = mp; d->blkq = gd->queue = q; q->queuedata = d; d->gd = gd; + if (aoe_maxsectors) + blk_queue_max_hw_sectors(q, aoe_maxsectors); gd->major = AOE_MAJOR; gd->first_minor = d->sysminor; gd->fops = &aoe_bdops; @@ -263,18 +332,21 @@ aoeblk_gdalloc(void *vp) add_disk(gd); aoedisk_add_sysfs(d); + + spin_lock_irqsave(&d->lock, flags); + WARN_ON(!(d->flags & DEVFL_GD_NOW)); + d->flags &= ~DEVFL_GD_NOW; + spin_unlock_irqrestore(&d->lock, flags); return; -err_blkq: - blk_cleanup_queue(d->blkq); - d->blkq = NULL; err_mempool: - mempool_destroy(d->bufpool); + mempool_destroy(mp); err_disk: put_disk(gd); err: spin_lock_irqsave(&d->lock, flags); - d->flags &= ~DEVFL_GDALLOC; + d->flags &= ~DEVFL_GD_NOW; + schedule_work(&d->work); spin_unlock_irqrestore(&d->lock, flags); } diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index ed57a890c64..42e67ad6bd2 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -39,6 +39,11 @@ struct ErrMsg { }; static DEFINE_MUTEX(aoechr_mutex); + +/* A ring buffer of error messages, to be read through + * "/dev/etherd/err". When no messages are present, + * readers will block waiting for messages to appear. + */ static struct ErrMsg emsgs[NMSG]; static int emsgs_head_idx, emsgs_tail_idx; static struct completion emsgs_comp; @@ -282,7 +287,7 @@ aoechr_init(void) int n, i; n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops); - if (n < 0) { + if (n < 0) { printk(KERN_ERR "aoe: can't register char device\n"); return n; } diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 9fe4f186555..25ef5c014fc 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -22,6 +22,7 @@ #define MAXIOC (8192) /* default meant to avoid most soft lockups */ static void ktcomplete(struct frame *, struct sk_buff *); +static int count_targets(struct aoedev *d, int *untainted); static struct buf *nextbuf(struct aoedev *); @@ -29,7 +30,7 @@ static int aoe_deadsecs = 60 * 3; module_param(aoe_deadsecs, int, 0644); MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); -static int aoe_maxout = 16; +static int aoe_maxout = 64; module_param(aoe_maxout, int, 0644); MODULE_PARM_DESC(aoe_maxout, "Only aoe_maxout outstanding packets for every MAC on eX.Y."); @@ -43,6 +44,8 @@ static struct { spinlock_t lock; } iocq; +static struct page *empty_page; + static struct sk_buff * new_skb(ulong len) { @@ -59,6 +62,23 @@ new_skb(ulong len) } static struct frame * +getframe_deferred(struct aoedev *d, u32 tag) +{ + struct list_head *head, *pos, *nx; + struct frame *f; + + head = &d->rexmitq; + list_for_each_safe(pos, nx, head) { + f = list_entry(pos, struct frame, head); + if (f->tag == tag) { + list_del(pos); + return f; + } + } + return NULL; +} + +static struct frame * getframe(struct aoedev *d, u32 tag) { struct frame *f; @@ -162,8 +182,10 @@ aoe_freetframe(struct frame *f) t = f->t; f->buf = NULL; + f->lba = 0; f->bv = NULL; f->r_skb = NULL; + f->flags = 0; list_add(&f->head, &t->ffree); } @@ -217,20 +239,25 @@ newframe(struct aoedev *d) struct frame *f; struct aoetgt *t, **tt; int totout = 0; + int use_tainted; + int has_untainted; - if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */ + if (!d->targets || !d->targets[0]) { printk(KERN_ERR "aoe: NULL TARGETS!\n"); return NULL; } tt = d->tgt; /* last used target */ - for (;;) { + for (use_tainted = 0, has_untainted = 0;;) { tt++; - if (tt >= &d->targets[NTARGETS] || !*tt) + if (tt >= &d->targets[d->ntargets] || !*tt) tt = d->targets; t = *tt; - totout += t->nout; + if (!t->taint) { + has_untainted = 1; + totout += t->nout; + } if (t->nout < t->maxout - && t != d->htgt + && (use_tainted || !t->taint) && t->ifp->nd) { f = newtframe(d, t); if (f) { @@ -239,8 +266,12 @@ newframe(struct aoedev *d) return f; } } - if (tt == d->tgt) /* we've looped and found nada */ - break; + if (tt == d->tgt) { /* we've looped and found nada */ + if (!use_tainted && !has_untainted) + use_tainted = 1; + else + break; + } } if (totout == 0) { d->kicked++; @@ -277,21 +308,68 @@ fhash(struct frame *f) list_add_tail(&f->head, &d->factive[n]); } +static void +ata_rw_frameinit(struct frame *f) +{ + struct aoetgt *t; + struct aoe_hdr *h; + struct aoe_atahdr *ah; + struct sk_buff *skb; + char writebit, extbit; + + skb = f->skb; + h = (struct aoe_hdr *) skb_mac_header(skb); + ah = (struct aoe_atahdr *) (h + 1); + skb_put(skb, sizeof(*h) + sizeof(*ah)); + memset(h, 0, skb->len); + + writebit = 0x10; + extbit = 0x4; + + t = f->t; + f->tag = aoehdr_atainit(t->d, t, h); + fhash(f); + t->nout++; + f->waited = 0; + f->waited_total = 0; + if (f->buf) + f->lba = f->buf->sector; + + /* set up ata header */ + ah->scnt = f->bcnt >> 9; + put_lba(ah, f->lba); + if (t->d->flags & DEVFL_EXT) { + ah->aflags |= AOEAFL_EXT; + } else { + extbit = 0; + ah->lba3 &= 0x0f; + ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ + } + if (f->buf && bio_data_dir(f->buf->bio) == WRITE) { + skb_fillup(skb, f->bv, f->bv_off, f->bcnt); + ah->aflags |= AOEAFL_WRITE; + skb->len += f->bcnt; + skb->data_len = f->bcnt; + skb->truesize += f->bcnt; + t->wpkts++; + } else { + t->rpkts++; + writebit = 0; + } + + ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit; + skb->dev = t->ifp->nd; +} + static int aoecmd_ata_rw(struct aoedev *d) { struct frame *f; - struct aoe_hdr *h; - struct aoe_atahdr *ah; struct buf *buf; struct aoetgt *t; struct sk_buff *skb; struct sk_buff_head queue; ulong bcnt, fbcnt; - char writebit, extbit; - - writebit = 0x10; - extbit = 0x4; buf = nextbuf(d); if (buf == NULL) @@ -326,50 +404,18 @@ aoecmd_ata_rw(struct aoedev *d) } while (fbcnt); /* initialize the headers & frame */ - skb = f->skb; - h = (struct aoe_hdr *) skb_mac_header(skb); - ah = (struct aoe_atahdr *) (h+1); - skb_put(skb, sizeof *h + sizeof *ah); - memset(h, 0, skb->len); - f->tag = aoehdr_atainit(d, t, h); - fhash(f); - t->nout++; - f->waited = 0; f->buf = buf; f->bcnt = bcnt; - f->lba = buf->sector; - - /* set up ata header */ - ah->scnt = bcnt >> 9; - put_lba(ah, buf->sector); - if (d->flags & DEVFL_EXT) { - ah->aflags |= AOEAFL_EXT; - } else { - extbit = 0; - ah->lba3 &= 0x0f; - ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ - } - if (bio_data_dir(buf->bio) == WRITE) { - skb_fillup(skb, f->bv, f->bv_off, bcnt); - ah->aflags |= AOEAFL_WRITE; - skb->len += bcnt; - skb->data_len = bcnt; - skb->truesize += bcnt; - t->wpkts++; - } else { - t->rpkts++; - writebit = 0; - } - - ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit; + ata_rw_frameinit(f); /* mark all tracking fields and load out */ buf->nframesout += 1; buf->sector += bcnt >> 9; - skb->dev = t->ifp->nd; - skb = skb_clone(skb, GFP_ATOMIC); + skb = skb_clone(f->skb, GFP_ATOMIC); if (skb) { + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); @@ -442,11 +488,14 @@ resend(struct aoedev *d, struct frame *f) h = (struct aoe_hdr *) skb_mac_header(skb); ah = (struct aoe_atahdr *) (h+1); - snprintf(buf, sizeof buf, - "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n", - "retransmit", |