aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/aoe
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /drivers/block/aoe
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'drivers/block/aoe')
-rw-r--r--drivers/block/aoe/Makefile6
-rw-r--r--drivers/block/aoe/aoe.h165
-rw-r--r--drivers/block/aoe/aoeblk.c267
-rw-r--r--drivers/block/aoe/aoechr.c244
-rw-r--r--drivers/block/aoe/aoecmd.c629
-rw-r--r--drivers/block/aoe/aoedev.c180
-rw-r--r--drivers/block/aoe/aoemain.c112
-rw-r--r--drivers/block/aoe/aoenet.c172
8 files changed, 1775 insertions, 0 deletions
diff --git a/drivers/block/aoe/Makefile b/drivers/block/aoe/Makefile
new file mode 100644
index 00000000000..e76d997183c
--- /dev/null
+++ b/drivers/block/aoe/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for ATA over Ethernet
+#
+
+obj-$(CONFIG_ATA_OVER_ETH) += aoe.o
+aoe-objs := aoeblk.o aoechr.o aoecmd.o aoedev.o aoemain.o aoenet.o
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
new file mode 100644
index 00000000000..db78f826d40
--- /dev/null
+++ b/drivers/block/aoe/aoe.h
@@ -0,0 +1,165 @@
+/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
+#define VERSION "5"
+#define AOE_MAJOR 152
+#define DEVICE_NAME "aoe"
+#ifndef AOE_PARTITIONS
+#define AOE_PARTITIONS 16
+#endif
+#define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * 10 + (aoeminor))
+#define AOEMAJOR(sysminor) ((sysminor) / 10)
+#define AOEMINOR(sysminor) ((sysminor) % 10)
+#define WHITESPACE " \t\v\f\n"
+
+enum {
+ AOECMD_ATA,
+ AOECMD_CFG,
+
+ AOEFL_RSP = (1<<3),
+ AOEFL_ERR = (1<<2),
+
+ AOEAFL_EXT = (1<<6),
+ AOEAFL_DEV = (1<<4),
+ AOEAFL_ASYNC = (1<<1),
+ AOEAFL_WRITE = (1<<0),
+
+ AOECCMD_READ = 0,
+ AOECCMD_TEST,
+ AOECCMD_PTEST,
+ AOECCMD_SET,
+ AOECCMD_FSET,
+
+ AOE_HVER = 0x10,
+};
+
+struct aoe_hdr {
+ unsigned char dst[6];
+ unsigned char src[6];
+ unsigned char type[2];
+ unsigned char verfl;
+ unsigned char err;
+ unsigned char major[2];
+ unsigned char minor;
+ unsigned char cmd;
+ unsigned char tag[4];
+};
+
+struct aoe_atahdr {
+ unsigned char aflags;
+ unsigned char errfeat;
+ unsigned char scnt;
+ unsigned char cmdstat;
+ unsigned char lba0;
+ unsigned char lba1;
+ unsigned char lba2;
+ unsigned char lba3;
+ unsigned char lba4;
+ unsigned char lba5;
+ unsigned char res[2];
+};
+
+struct aoe_cfghdr {
+ unsigned char bufcnt[2];
+ unsigned char fwver[2];
+ unsigned char res;
+ unsigned char aoeccmd;
+ unsigned char cslen[2];
+};
+
+enum {
+ DEVFL_UP = 1, /* device is installed in system and ready for AoE->ATA commands */
+ DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */
+ DEVFL_EXT = (1<<2), /* device accepts lba48 commands */
+ DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */
+ DEVFL_WC_UPDATE = (1<<4), /* this device needs to update write cache status */
+ DEVFL_WORKON = (1<<4),
+
+ BUFFL_FAIL = 1,
+};
+
+enum {
+ MAXATADATA = 1024,
+ NPERSHELF = 10,
+ FREETAG = -1,
+ MIN_BUFS = 8,
+};
+
+struct buf {
+ struct list_head bufs;
+ ulong flags;
+ ulong nframesout;
+ char *bufaddr;
+ ulong resid;
+ ulong bv_resid;
+ sector_t sector;
+ struct bio *bio;
+ struct bio_vec *bv;
+};
+
+struct frame {
+ int tag;
+ ulong waited;
+ struct buf *buf;
+ char *bufaddr;
+ int writedatalen;
+ int ndata;
+
+ /* largest possible */
+ unsigned char data[sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr)];
+};
+
+struct aoedev {
+ struct aoedev *next;
+ unsigned char addr[6]; /* remote mac addr */
+ ushort flags;
+ ulong sysminor;
+ ulong aoemajor;
+ ulong aoeminor;
+ ulong nopen; /* (bd_openers isn't available without sleeping) */
+ ulong rttavg; /* round trip average of requests/responses */
+ u16 fw_ver; /* version of blade's firmware */
+ struct work_struct work;/* disk create work struct */
+ struct gendisk *gd;
+ request_queue_t blkq;
+ struct hd_geometry geo;
+ sector_t ssize;
+ struct timer_list timer;
+ spinlock_t lock;
+ struct net_device *ifp; /* interface ed is attached to */
+ struct sk_buff *skblist;/* packets needing to be sent */
+ mempool_t *bufpool; /* for deadlock-free Buf allocation */
+ struct list_head bufq; /* queue of bios to work on */
+ struct buf *inprocess; /* the one we're currently working on */
+ ulong lasttag; /* last tag sent */
+ ulong nframes; /* number of frames below */
+ struct frame *frames;
+};
+
+
+int aoeblk_init(void);
+void aoeblk_exit(void);
+void aoeblk_gdalloc(void *);
+void aoedisk_rm_sysfs(struct aoedev *d);
+
+int aoechr_init(void);
+void aoechr_exit(void);
+void aoechr_error(char *);
+
+void aoecmd_work(struct aoedev *d);
+void aoecmd_cfg(ushort, unsigned char);
+void aoecmd_ata_rsp(struct sk_buff *);
+void aoecmd_cfg_rsp(struct sk_buff *);
+
+int aoedev_init(void);
+void aoedev_exit(void);
+struct aoedev *aoedev_bymac(unsigned char *);
+void aoedev_downdev(struct aoedev *d);
+struct aoedev *aoedev_set(ulong, unsigned char *, struct net_device *, ulong);
+int aoedev_busy(void);
+
+int aoenet_init(void);
+void aoenet_exit(void);
+void aoenet_xmit(struct sk_buff *);
+int is_aoe_netif(struct net_device *ifp);
+int set_aoe_iflist(const char __user *str, size_t size);
+
+u64 mac_addr(char addr[6]);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
new file mode 100644
index 00000000000..63561b280bc
--- /dev/null
+++ b/drivers/block/aoe/aoeblk.c
@@ -0,0 +1,267 @@
+/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
+/*
+ * aoeblk.c
+ * block device routines
+ */
+
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/genhd.h>
+#include <linux/netdevice.h>
+#include "aoe.h"
+
+static kmem_cache_t *buf_pool_cache;
+
+/* add attributes for our block devices in sysfs */
+static ssize_t aoedisk_show_state(struct gendisk * disk, char *page)
+{
+ struct aoedev *d = disk->private_data;
+
+ return snprintf(page, PAGE_SIZE,
+ "%s%s\n",
+ (d->flags & DEVFL_UP) ? "up" : "down",
+ (d->flags & DEVFL_CLOSEWAIT) ? ",closewait" : "");
+}
+static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)
+{
+ struct aoedev *d = disk->private_data;
+
+ return snprintf(page, PAGE_SIZE, "%012llx\n",
+ (unsigned long long)mac_addr(d->addr));
+}
+static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
+{
+ struct aoedev *d = disk->private_data;
+
+ return snprintf(page, PAGE_SIZE, "%s\n", d->ifp->name);
+}
+
+static struct disk_attribute disk_attr_state = {
+ .attr = {.name = "state", .mode = S_IRUGO },
+ .show = aoedisk_show_state
+};
+static struct disk_attribute disk_attr_mac = {
+ .attr = {.name = "mac", .mode = S_IRUGO },
+ .show = aoedisk_show_mac
+};
+static struct disk_attribute disk_attr_netif = {
+ .attr = {.name = "netif", .mode = S_IRUGO },
+ .show = aoedisk_show_netif
+};
+
+static void
+aoedisk_add_sysfs(struct aoedev *d)
+{
+ sysfs_create_file(&d->gd->kobj, &disk_attr_state.attr);
+ sysfs_create_file(&d->gd->kobj, &disk_attr_mac.attr);
+ sysfs_create_file(&d->gd->kobj, &disk_attr_netif.attr);
+}
+void
+aoedisk_rm_sysfs(struct aoedev *d)
+{
+ sysfs_remove_link(&d->gd->kobj, "state");
+ sysfs_remove_link(&d->gd->kobj, "mac");
+ sysfs_remove_link(&d->gd->kobj, "netif");
+}
+
+static int
+aoeblk_open(struct inode *inode, struct file *filp)
+{
+ struct aoedev *d;
+ ulong flags;
+
+ d = inode->i_bdev->bd_disk->private_data;
+
+ spin_lock_irqsave(&d->lock, flags);
+ if (d->flags & DEVFL_UP) {
+ d->nopen++;
+ spin_unlock_irqrestore(&d->lock, flags);
+ return 0;
+ }
+ spin_unlock_irqrestore(&d->lock, flags);
+ return -ENODEV;
+}
+
+static int
+aoeblk_release(struct inode *inode, struct file *filp)
+{
+ struct aoedev *d;
+ ulong flags;
+
+ d = inode->i_bdev->bd_disk->private_data;
+
+ spin_lock_irqsave(&d->lock, flags);
+
+ if (--d->nopen == 0 && (d->flags & DEVFL_CLOSEWAIT)) {
+ d->flags &= ~DEVFL_CLOSEWAIT;
+ spin_unlock_irqrestore(&d->lock, flags);
+ aoecmd_cfg(d->aoemajor, d->aoeminor);
+ return 0;
+ }
+ spin_unlock_irqrestore(&d->lock, flags);
+
+ return 0;
+}
+
+static int
+aoeblk_make_request(request_queue_t *q, struct bio *bio)
+{
+ struct aoedev *d;
+ struct buf *buf;
+ struct sk_buff *sl;
+ ulong flags;
+
+ blk_queue_bounce(q, &bio);
+
+ d = bio->bi_bdev->bd_disk->private_data;
+ buf = mempool_alloc(d->bufpool, GFP_NOIO);
+ if (buf == NULL) {
+ printk(KERN_INFO "aoe: aoeblk_make_request: buf allocation "
+ "failure\n");
+ bio_endio(bio, bio->bi_size, -ENOMEM);
+ return 0;
+ }
+ memset(buf, 0, sizeof(*buf));
+ INIT_LIST_HEAD(&buf->bufs);
+ buf->bio = bio;
+ buf->resid = bio->bi_size;
+ buf->sector = bio->bi_sector;
+ buf->bv = buf->bio->bi_io_vec;
+ buf->bv_resid = buf->bv->bv_len;
+ buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
+
+ spin_lock_irqsave(&d->lock, flags);
+
+ if ((d->flags & DEVFL_UP) == 0) {
+ printk(KERN_INFO "aoe: aoeblk_make_request: device %ld.%ld is not up\n",
+ d->aoemajor, d->aoeminor);
+ spin_unlock_irqrestore(&d->lock, flags);
+ mempool_free(buf, d->bufpool);
+ bio_endio(bio, bio->bi_size, -ENXIO);
+ return 0;
+ }
+
+ list_add_tail(&buf->bufs, &d->bufq);
+ aoecmd_work(d);
+
+ sl = d->skblist;
+ d->skblist = NULL;
+
+ spin_unlock_irqrestore(&d->lock, flags);
+
+ aoenet_xmit(sl);
+ return 0;
+}
+
+/* This ioctl implementation expects userland to have the device node
+ * permissions set so that only priviledged users can open an aoe
+ * block device directly.
+ */
+static int
+aoeblk_ioctl(struct inode *inode, struct file *filp, uint cmd, ulong arg)
+{
+ struct aoedev *d;
+
+ if (!arg)
+ return -EINVAL;
+
+ d = inode->i_bdev->bd_disk->private_data;
+ if ((d->flags & DEVFL_UP) == 0) {
+ printk(KERN_ERR "aoe: aoeblk_ioctl: disk not up\n");
+ return -ENODEV;
+ }
+
+ if (cmd == HDIO_GETGEO) {
+ d->geo.start = get_start_sect(inode->i_bdev);
+ if (!copy_to_user((void __user *) arg, &d->geo, sizeof d->geo))
+ return 0;
+ return -EFAULT;
+ }
+ printk(KERN_INFO "aoe: aoeblk_ioctl: unknown ioctl %d\n", cmd);
+ return -EINVAL;
+}
+
+static struct block_device_operations aoe_bdops = {
+ .open = aoeblk_open,
+ .release = aoeblk_release,
+ .ioctl = aoeblk_ioctl,
+ .owner = THIS_MODULE,
+};
+
+/* alloc_disk and add_disk can sleep */
+void
+aoeblk_gdalloc(void *vp)
+{
+ struct aoedev *d = vp;
+ struct gendisk *gd;
+ ulong flags;
+
+ gd = alloc_disk(AOE_PARTITIONS);
+ if (gd == NULL) {
+ printk(KERN_ERR "aoe: aoeblk_gdalloc: cannot allocate disk "
+ "structure for %ld.%ld\n", d->aoemajor, d->aoeminor);
+ spin_lock_irqsave(&d->lock, flags);
+ d->flags &= ~DEVFL_WORKON;
+ spin_unlock_irqrestore(&d->lock, flags);
+ return;
+ }
+
+ d->bufpool = mempool_create(MIN_BUFS,
+ mempool_alloc_slab, mempool_free_slab,
+ buf_pool_cache);
+ if (d->bufpool == NULL) {
+ printk(KERN_ERR "aoe: aoeblk_gdalloc: cannot allocate bufpool "
+ "for %ld.%ld\n", d->aoemajor, d->aoeminor);
+ put_disk(gd);
+ spin_lock_irqsave(&d->lock, flags);
+ d->flags &= ~DEVFL_WORKON;
+ spin_unlock_irqrestore(&d->lock, flags);
+ return;
+ }
+
+ spin_lock_irqsave(&d->lock, flags);
+ blk_queue_make_request(&d->blkq, aoeblk_make_request);
+ gd->major = AOE_MAJOR;
+ gd->first_minor = d->sysminor * AOE_PARTITIONS;
+ gd->fops = &aoe_bdops;
+ gd->private_data = d;
+ gd->capacity = d->ssize;
+ snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%ld",
+ d->aoemajor, d->aoeminor);
+
+ gd->queue = &d->blkq;
+ d->gd = gd;
+ d->flags &= ~DEVFL_WORKON;
+ d->flags |= DEVFL_UP;
+
+ spin_unlock_irqrestore(&d->lock, flags);
+
+ add_disk(gd);
+ aoedisk_add_sysfs(d);
+
+ printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu "
+ "sectors\n", (unsigned long long)mac_addr(d->addr),
+ d->aoemajor, d->aoeminor,
+ d->fw_ver, (long long)d->ssize);
+}
+
+void
+aoeblk_exit(void)
+{
+ kmem_cache_destroy(buf_pool_cache);
+}
+
+int __init
+aoeblk_init(void)
+{
+ buf_pool_cache = kmem_cache_create("aoe_bufs",
+ sizeof(struct buf),
+ 0, 0, NULL, NULL);
+ if (buf_pool_cache == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
new file mode 100644
index 00000000000..14aeca3e2e8
--- /dev/null
+++ b/drivers/block/aoe/aoechr.c
@@ -0,0 +1,244 @@
+/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
+/*
+ * aoechr.c
+ * AoE character device driver
+ */
+
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include "aoe.h"
+
+enum {
+ //MINOR_STAT = 1, (moved to sysfs)
+ MINOR_ERR = 2,
+ MINOR_DISCOVER,
+ MINOR_INTERFACES,
+ MSGSZ = 2048,
+ NARGS = 10,
+ NMSG = 100, /* message backlog to retain */
+};
+
+struct aoe_chardev {
+ ulong minor;
+ char name[32];
+};
+
+enum { EMFL_VALID = 1 };
+
+struct ErrMsg {
+ short flags;
+ short len;
+ char *msg;
+};
+
+static struct ErrMsg emsgs[NMSG];
+static int emsgs_head_idx, emsgs_tail_idx;
+static struct semaphore emsgs_sema;
+static spinlock_t emsgs_lock;
+static int nblocked_emsgs_readers;
+static struct class_simple *aoe_class;
+static struct aoe_chardev chardevs[] = {
+ { MINOR_ERR, "err" },
+ { MINOR_DISCOVER, "discover" },
+ { MINOR_INTERFACES, "interfaces" },
+};
+
+static int
+discover(void)
+{
+ aoecmd_cfg(0xffff, 0xff);
+ return 0;
+}
+
+static int
+interfaces(const char __user *str, size_t size)
+{
+ if (set_aoe_iflist(str, size)) {
+ printk(KERN_CRIT
+ "%s: could not set interface list: %s\n",
+ __FUNCTION__, "too many interfaces");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void
+aoechr_error(char *msg)
+{
+ struct ErrMsg *em;
+ char *mp;
+ ulong flags, n;
+
+ n = strlen(msg);
+
+ spin_lock_irqsave(&emsgs_lock, flags);
+
+ em = emsgs + emsgs_tail_idx;
+ if ((em->flags & EMFL_VALID)) {
+bail: spin_unlock_irqrestore(&emsgs_lock, flags);
+ return;
+ }
+
+ mp = kmalloc(n, GFP_ATOMIC);
+ if (mp == NULL) {
+ printk(KERN_CRIT "aoe: aoechr_error: allocation failure, len=%ld\n", n);
+ goto bail;
+ }
+
+ memcpy(mp, msg, n);
+ em->msg = mp;
+ em->flags |= EMFL_VALID;
+ em->len = n;
+
+ emsgs_tail_idx++;
+ emsgs_tail_idx %= ARRAY_SIZE(emsgs);
+
+ spin_unlock_irqrestore(&emsgs_lock, flags);
+
+ if (nblocked_emsgs_readers)
+ up(&emsgs_sema);
+}
+
+static ssize_t
+aoechr_write(struct file *filp, const char __user *buf, size_t cnt, loff_t *offp)
+{
+ int ret = -EINVAL;
+
+ switch ((unsigned long) filp->private_data) {
+ default:
+ printk(KERN_INFO "aoe: aoechr_write: can't write to that file.\n");
+ break;
+ case MINOR_DISCOVER:
+ ret = discover();
+ break;
+ case MINOR_INTERFACES:
+ ret = interfaces(buf, cnt);
+ break;
+ }
+ if (ret == 0)
+ ret = cnt;
+ return ret;
+}
+
+static int
+aoechr_open(struct inode *inode, struct file *filp)
+{
+ int n, i;
+
+ n = MINOR(inode->i_rdev);
+ filp->private_data = (void *) (unsigned long) n;
+
+ for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
+ if (chardevs[i].minor == n)
+ return 0;
+ return -EINVAL;
+}
+
+static int
+aoechr_rel(struct inode *inode, struct file *filp)
+{
+ return 0;
+}
+
+static ssize_t
+aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off)
+{
+ unsigned long n;
+ char *mp;
+ struct ErrMsg *em;
+ ssize_t len;
+ ulong flags;
+
+ n = (unsigned long) filp->private_data;
+ switch (n) {
+ case MINOR_ERR:
+ spin_lock_irqsave(&emsgs_lock, flags);
+loop:
+ em = emsgs + emsgs_head_idx;
+ if ((em->flags & EMFL_VALID) == 0) {
+ if (filp->f_flags & O_NDELAY) {
+ spin_unlock_irqrestore(&emsgs_lock, flags);
+ return -EAGAIN;
+ }
+ nblocked_emsgs_readers++;
+
+ spin_unlock_irqrestore(&emsgs_lock, flags);
+
+ n = down_interruptible(&emsgs_sema);
+
+ spin_lock_irqsave(&emsgs_lock, flags);
+
+ nblocked_emsgs_readers--;
+
+ if (n) {
+ spin_unlock_irqrestore(&emsgs_lock, flags);
+ return -ERESTARTSYS;
+ }
+ goto loop;
+ }
+ if (em->len > cnt) {
+ spin_unlock_irqrestore(&emsgs_lock, flags);
+ return -EAGAIN;
+ }
+ mp = em->msg;
+ len = em->len;
+ em->msg = NULL;
+ em->flags &= ~EMFL_VALID;
+
+ emsgs_head_idx++;
+ emsgs_head_idx %= ARRAY_SIZE(emsgs);
+
+ spin_unlock_irqrestore(&emsgs_lock, flags);
+
+ n = copy_to_user(buf, mp, len);
+ kfree(mp);
+ return n == 0 ? len : -EFAULT;
+ default:
+ return -EFAULT;
+ }
+}
+
+static struct file_operations aoe_fops = {
+ .write = aoechr_write,
+ .read = aoechr_read,
+ .open = aoechr_open,
+ .release = aoechr_rel,
+ .owner = THIS_MODULE,
+};
+
+int __init
+aoechr_init(void)
+{
+ int n, i;
+
+ n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
+ if (n < 0) {
+ printk(KERN_ERR "aoe: aoechr_init: can't register char device\n");
+ return n;
+ }
+ sema_init(&emsgs_sema, 0);
+ spin_lock_init(&emsgs_lock);
+ aoe_class = class_simple_create(THIS_MODULE, "aoe");
+ if (IS_ERR(aoe_class)) {
+ unregister_chrdev(AOE_MAJOR, "aoechr");
+ return PTR_ERR(aoe_class);
+ }
+ for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
+ class_simple_device_add(aoe_class,
+ MKDEV(AOE_MAJOR, chardevs[i].minor),
+ NULL, chardevs[i].name);
+
+ return 0;
+}
+
+void
+aoechr_exit(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
+ class_simple_device_remove(MKDEV(AOE_MAJOR, chardevs[i].minor));
+ class_simple_destroy(aoe_class);
+ unregister_chrdev(AOE_MAJOR, "aoechr");
+}
+
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
new file mode 100644
index 00000000000..fb6d942a456
--- /dev/null
+++ b/drivers/block/aoe/aoecmd.c
@@ -0,0 +1,629 @@
+/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
+/*
+ * aoecmd.c
+ * Filesystem request handling methods
+ */
+
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "aoe.h"
+
+#define TIMERTICK (HZ / 10)
+#define MINTIMER (2 * TIMERTICK)
+#define MAXTIMER (HZ << 1)
+#define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
+
+static struct sk_buff *
+new_skb(struct net_device *if_dev, ulong len)
+{
+ struct sk_buff *skb;
+
+ skb = alloc_skb(len, GFP_ATOMIC);
+ if (skb) {
+ skb->nh.raw = skb->mac.raw = skb->data;
+ skb->dev = if_dev;
+ skb->protocol = __constant_htons(ETH_P_AOE);
+ skb->priority = 0;
+ skb_put(skb, len);
+ skb->next = skb->prev = NULL;
+
+ /* tell the network layer not to perform IP checksums
+ * or to get the NIC to do it
+ */
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+ return skb;
+}
+
+static struct sk_buff *
+skb_prepare(struct aoedev *d, struct frame *f)
+{
+ struct sk_buff *skb;
+ char *p;
+
+ skb = new_skb(d->ifp, f->ndata + f->writedatalen);
+ if (!skb) {
+ printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
+ return NULL;
+ }
+
+ p = skb->mac.raw;
+ memcpy(p, f->data, f->ndata);
+
+ if (f->writedatalen) {
+ p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
+ memcpy(p, f->bufaddr, f->writedatalen);
+ }
+
+ return skb;
+}
+
+static struct frame *
+getframe(struct aoedev *d, int tag)
+{
+ struct frame *f, *e;
+
+ f = d->frames;
+ e = f + d->nframes;
+ for (; f<e; f++)
+ if (f->tag == tag)
+ return f;
+ return NULL;
+}
+
+/*
+ * Leave the top bit clear so we have tagspace for userland.
+ * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
+ * This driver reserves tag -1 to mean "unused frame."
+ */
+static int
+newtag(struct aoedev *d)
+{
+ register ulong n;
+
+ n = jiffies & 0xffff;
+ return n |= (++d->lasttag & 0x7fff) << 16;
+}
+
+static int
+aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
+{
+ u16 type = __constant_cpu_to_be16(ETH_P_AOE);
+ u16 aoemajor = __cpu_to_be16(d->aoemajor);
+ u32 host_tag = newtag(d);
+ u32 tag = __cpu_to_be32(host_tag);
+
+ memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
+ memcpy(h->dst, d->addr, sizeof h->dst);
+ memcpy(h->type, &type, sizeof type);
+ h->verfl = AOE_HVER;
+ memcpy(h->major, &aoemajor, sizeof aoemajor);
+ h->minor = d->aoeminor;
+ h->cmd = AOECMD_ATA;
+ memcpy(h->tag, &tag, sizeof tag);
+
+ return host_tag;
+}
+
+static void
+aoecmd_ata_rw(struct aoedev *d, struct frame *f)
+{
+ struct aoe_hdr *h;
+ struct aoe_atahdr *ah;
+ struct buf *buf;
+ struct sk_buff *skb;
+ ulong bcnt;
+ register sector_t sector;
+ char writebit, extbit;
+
+ writebit = 0x10;
+ extbit = 0x4;
+
+ buf = d->inprocess;
+
+ sector = buf->sector;
+ bcnt = buf->bv_resid;
+ if (bcnt > MAXATADATA)
+ bcnt = MAXATADATA;
+
+ /* initialize the headers & frame */
+ h = (struct aoe_hdr *) f->data;
+ ah = (struct aoe_atahdr *) (h+1);
+ f->ndata = sizeof *h + sizeof *ah;
+ memset(h, 0, f->ndata);
+ f->tag = aoehdr_atainit(d, h);
+ f->waited = 0;
+ f->buf = buf;
+ f->bufaddr = buf->bufaddr;
+
+ /* set up ata header */
+ ah->scnt = bcnt >> 9;
+ ah->lba0 = sector;
+ ah->lba1 = sector >>= 8;
+ ah->lba2 = sector >>= 8;
+ ah->lba3 = sector >>= 8;
+ if (d->flags & DEVFL_EXT) {
+ ah->aflags |= AOEAFL_EXT;
+ ah->lba4 = sector >>= 8;
+ ah->lba5 = sector >>= 8;
+ } else {
+ extbit = 0;
+ ah->lba3 &= 0x0f;
+ ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
+ }
+
+ if (bio_data_dir(buf->bio) == WRITE) {
+ ah->aflags |= AOEAFL_WRITE;
+ f->writedatalen = bcnt;
+ } else {
+ writebit = 0;
+ f->writedatalen = 0;
+ }
+
+ ah->cmdstat = WIN_READ | writebit | extbit;
+
+ /* mark all tracking fields and load out */
+ buf->nframesout += 1;
+ buf->bufaddr += bcnt;
+ buf->bv_resid -= bcnt;
+/* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
+ buf->resid -= bcnt;
+ buf->sector += bcnt >> 9;
+ if (buf->resid == 0) {
+ d->inprocess = NULL;
+ } else if (buf->bv_resid == 0) {
+ buf->bv++;
+ buf->bv_resid = buf->bv->bv_len;
+ buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
+ }
+
+ skb = skb_prepare(d, f);
+ if (skb) {
+ skb->next = d->skblist;
+ d->skblist = skb;
+ }
+}
+
+/* enters with d->lock held */
+void
+aoecmd_work(struct aoedev *d)
+{
+ struct frame *f;
+ struct buf *buf;
+loop:
+ f = getframe(d, FREETAG);
+ if (f == NULL)
+ return;
+ if (d->inprocess == NULL) {
+ if (list_empty(&d->bufq))
+ return;
+ buf = container_of(d->bufq.next, struct buf, bufs);
+ list_del(d->bufq.next);
+/*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
+ d->inprocess = buf;
+ }
+ aoecmd_ata_rw(d, f);
+ goto loop;
+}
+
+static void
+rexmit(struct aoedev *d, struct frame *f)
+{
+ struct sk_buff *skb;
+ struct aoe_hdr *h;
+ char buf[128];
+ u32 n;
+ u32 net_tag;
+
+ n = newtag(d);
+
+ snprintf(buf, sizeof buf,
+ "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
+ "retransmit",
+ d->aoemajor, d->aoeminor, f->tag, jiffies, n);
+ aoechr_error(buf);
+
+ h = (struct aoe_hdr *) f->data;
+ f->tag = n;
+ net_tag = __cpu_to_be32(n);
+ memcpy(h->tag, &net_tag, sizeof net_tag);
+
+ skb = skb_prepare(d, f);
+ if (skb) {
+ skb->next = d->skblist;
+ d->skblist = skb;
+ }
+}
+
+static int
+tsince(int tag)
+{
+ int n;
+
+ n = jiffies & 0xffff;
+ n -= tag & 0xffff;
+ if (n < 0)
+ n += 1<<16;
+ return n;
+}
+
+static void
+rexmit_timer(ulong vp)
+{
+ struct aoedev *d;
+ struct frame *f, *e;
+ struct sk_buff *sl;
+ register long timeout;
+ ulong flags, n;
+
+ d = (struct aoedev *) vp;
+ sl = NULL;
+
+ /* timeout is always ~150% of the moving average */
+ timeout = d->rttavg;
+ timeout += timeout >> 1;
+
+ spin_lock_irqsave(&d->lock, flags);
+
+ if (d->flags & DEVFL_TKILL) {
+tdie: spin_unlock_irqrestore(&d->lock, flags);
+ return;
+ }
+ f = d->frames;
+ e = f + d->nframes;
+ for (; f<e; f++) {
+ if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
+ n = f->waited += timeout;
+ n /= HZ;
+ if (n > MAXWAIT) { /* waited too long. device failure. */
+ aoedev_downdev(d);
+ goto tdie;
+ }
+ rexmit(d, f);
+ }
+ }
+
+ sl = d->skblist;
+ d->skblist = NULL;
+ if (sl) {
+ n = d->rttavg <<= 1;
+ if (n > MAXTIMER)
+ d->rttavg = MAXTIMER;
+ }
+
+ d->timer.expires = jiffies + TIMERTICK;
+ add_timer(&d->timer);
+
+ spin_unlock_irqrestore(&d->lock, flags);
+
+ aoenet_xmit(sl);
+}
+
+static void
+ataid_complete(struct aoedev *d, unsigned char *id)
+{
+ u64 ssize;
+ u16 n;
+
+ /* word 83: command set supported */
+ n = __le16_to_cpu(*((u16 *) &id[83<<1]));
+
+ /* word 86: command set/feature enabled */
+ n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
+
+ if (n & (1<<10)) { /* bit 10: LBA 48 */
+ d->flags |= DEVFL_EXT;
+
+ /* word 100: number lba48 sectors */
+ ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
+
+ /* set as in ide-disk.c:init_idedisk_capacity */
+ d->geo.cylinders = ssize;
+ d->geo.cylinders /= (255 * 63);
+ d->geo.heads = 255;
+ d->geo.sectors = 63;
+ } else {
+ d->flags &= ~DEVFL_EXT;
+
+ /* number lba28 sectors */
+ ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
+
+ /* NOTE: obsolete in ATA 6 */
+ d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
+ d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
+ d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
+ }
+ d->ssize = ssize;
+ d->geo.start = 0;
+ if (d->gd != NULL) {
+ d->gd->capacity = ssize;
+ d->flags |= DEVFL_UP;
+ return;
+ }
+ if (d->flags & DEVFL_WORKON) {
+ printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
+ "(This really shouldn't happen).\n");
+ return;
+ }
+ INIT_WORK(&d->work, aoeblk_gdalloc, d);
+ schedule_work(&d->work);
+ d->flags |= DEVFL_WORKON;
+}
+
+static void
+calc_rttavg(struct aoedev *d, int rtt)
+{
+ register long n;
+
+ n = rtt;
+ if (n < MINTIMER)
+ n = MINTIMER;
+ else if (n > MAXTIMER)
+ n = MAXTIMER;
+
+ /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
+ n -= d->rttavg;
+ d->rttavg += n >> 2;
+}
+
+void
+aoecmd_ata_rsp(struct sk_buff *skb)
+{
+ struct aoedev *d;
+ struct aoe_hdr *hin;
+ struct aoe_atahdr *ahin, *ahout;
+ struct frame *f;
+ struct buf *buf;
+ struct sk_buff *sl;
+ register long n;
+ ulong flags;
+ char ebuf[128];
+
+ hin = (struct aoe_hdr *) skb->mac.raw;
+ d = aoedev_bymac(hin->src);
+ if (d == NULL) {
+ snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
+ "for unknown device %d.%d\n",
+ __be16_to_cpu(*((u16 *) hin->major)),
+ hin->minor);
+ aoechr_error(ebuf);
+ return;
+ }
+
+ spin_lock_irqsave(&d->lock, flags);
+
+ f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
+ if (f == NULL) {
+ spin_unlock_irqrestore(&d->lock, flags);
+ snprintf(ebuf, sizeof ebuf,
+ "%15s e%d.%d tag=%08x@%08lx\n",
+ "unexpected rsp",
+ __be16_to_cpu(*((u16 *) hin->major)),
+ hin->minor,
+ __be32_to_cpu(*((u32 *) hin->tag)),
+ jiffies);
+ aoechr_error(ebuf);
+ return;
+ }
+
+ calc_rttavg(d, tsince(f->tag));
+
+ ahin = (struct aoe_atahdr *) (hin+1);
+ ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
+ buf = f->buf;
+
+ if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
+ printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
+ "stat=%2.2Xh from e%ld.%ld\n",
+ ahout->cmdstat, ahin->cmdstat,
+ d->aoemajor, d->aoeminor);
+ if (buf)
+ buf->flags |= BUFFL_FAIL;
+ } else {
+ switch (ahout->cmdstat) {
+ case WIN_READ:
+ case WIN_READ_EXT:
+ n = ahout->scnt << 9;
+ if (skb->len - sizeof *hin - sizeof *ahin < n) {
+ printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
+ "ata data size in read. skb->len=%d\n",
+ skb->len);
+ /* fail frame f? just returning will rexmit. */
+ spin_unlock_irqrestore(&d->lock, flags);
+ return;
+ }
+ memcpy(f->bufaddr, ahin+1, n);
+ case WIN_WRITE:
+ case WIN_WRITE_EXT:
+ break;
+ case WIN_IDENTIFY:
+ if (skb->len - sizeof *hin - sizeof *ahin < 512) {
+ printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
+ "in ataid. skb->len=%d\n", skb->len);
+ spin_unlock_irqrestore(&d->lock, flags);
+ return;
+ }
+ ataid_complete(d, (char *) (ahin+1));
+ /* d->flags |= DEVFL_WC_UPDATE; */
+ break;
+ default:
+ printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
+ "outbound ata command %2.2Xh for %d.%d\n",
+ ahout->cmdstat,
+ __be16_to_cpu(*((u16 *) hin->major)),
+ hin->minor);
+ }
+ }
+
+ if (buf) {
+ buf->nframesout -= 1;
+ if (buf->nframesout == 0 && buf->resid == 0) {
+ n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
+ bio_endio(buf->bio, buf->bio->bi_size, n);
+ mempool_free(buf, d->bufpool);
+ }
+ }
+
+ f->buf = NULL;
+ f->tag = FREETAG;
+
+ aoecmd_work(d);
+
+ sl = d->skblist;
+ d->skblist = NULL;
+
+ spin_unlock_irqrestore(&d->lock, flags);
+
+ aoenet_xmit(sl);
+}
+
+void
+aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
+{
+ struct aoe_hdr *h;
+ struct aoe_cfghdr *ch;
+ struct sk_buff *skb, *sl;