diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-02-04 08:00:54 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-02-04 08:00:54 -0800 |
commit | 93890b71a34f9490673a6edd56b61c2124215e46 (patch) | |
tree | c5d82620f2cb69f0bf43639e63f54b0c0e2eb744 | |
parent | f5bb3a5e9dcdb8435471562b6cada89525cf4df1 (diff) | |
parent | 6b35e40767c6c1ac783330109ae8e0c09ea6bc82 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (25 commits)
virtio: balloon driver
virtio: Use PCI revision field to indicate virtio PCI ABI version
virtio: PCI device
virtio_blk: implement naming for vda-vdz,vdaa-vdzz,vdaaa-vdzzz
virtio_blk: Dont waste major numbers
virtio_blk: provide getgeo
virtio_net: parametrize the napi_weight for virtio receive queue.
virtio: free transmit skbs when notified, not on next xmit.
virtio: flush buffers on open
virtnet: remove double ether_setup
virtio: Allow virtio to be modular and used by modules
virtio: Use the sg_phys convenience function.
virtio: Put the virtio under the virtualization menu
virtio: handle interrupts after callbacks turned off
virtio: reset function
virtio: populate network rings in the probe routine, not open
virtio: Tweak virtio_net defines
virtio: Net header needs hdr_len
virtio: remove unused id field from struct virtio_blk_outhdr
virtio: clarify NO_NOTIFY flag usage
...
-rw-r--r-- | Documentation/lguest/lguest.c | 231 | ||||
-rw-r--r-- | arch/x86/kvm/Kconfig | 1 | ||||
-rw-r--r-- | drivers/Kconfig | 2 | ||||
-rw-r--r-- | drivers/block/Kconfig | 3 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 106 | ||||
-rw-r--r-- | drivers/char/virtio_console.c | 4 | ||||
-rw-r--r-- | drivers/lguest/lguest_device.c | 146 | ||||
-rw-r--r-- | drivers/net/Kconfig | 3 | ||||
-rw-r--r-- | drivers/net/virtio_net.c | 155 | ||||
-rw-r--r-- | drivers/virtio/Kconfig | 31 | ||||
-rw-r--r-- | drivers/virtio/Makefile | 2 | ||||
-rw-r--r-- | drivers/virtio/virtio.c | 65 | ||||
-rw-r--r-- | drivers/virtio/virtio_balloon.c | 284 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci.c | 446 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 51 | ||||
-rw-r--r-- | include/linux/lguest_launcher.h | 9 | ||||
-rw-r--r-- | include/linux/skbuff.h | 1 | ||||
-rw-r--r-- | include/linux/virtio.h | 19 | ||||
-rw-r--r-- | include/linux/virtio_balloon.h | 18 | ||||
-rw-r--r-- | include/linux/virtio_blk.h | 22 | ||||
-rw-r--r-- | include/linux/virtio_config.h | 104 | ||||
-rw-r--r-- | include/linux/virtio_net.h | 32 | ||||
-rw-r--r-- | include/linux/virtio_pci.h | 57 | ||||
-rw-r--r-- | include/linux/virtio_ring.h | 14 | ||||
-rw-r--r-- | net/9p/trans_virtio.c | 8 | ||||
-rw-r--r-- | net/core/skbuff.c | 29 |
26 files changed, 1422 insertions, 421 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 6c8a2386cd5..0f23d67f958 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -34,6 +34,8 @@ #include <zlib.h> #include <assert.h> #include <sched.h> +#include <limits.h> +#include <stddef.h> #include "linux/lguest_launcher.h" #include "linux/virtio_config.h" #include "linux/virtio_net.h" @@ -99,13 +101,11 @@ struct device_list /* The descriptor page for the devices. */ u8 *descpage; - /* The tail of the last descriptor. */ - unsigned int desc_used; - /* A single linked list of devices. */ struct device *dev; - /* ... And an end pointer so we can easily append new devices */ - struct device **lastdev; + /* And a pointer to the last device for easy append and also for + * configuration appending. */ + struct device *lastdev; }; /* The list of Guest devices, based on command line arguments. */ @@ -191,7 +191,14 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, #define cpu_to_le64(v64) (v64) #define le16_to_cpu(v16) (v16) #define le32_to_cpu(v32) (v32) -#define le64_to_cpu(v32) (v64) +#define le64_to_cpu(v64) (v64) + +/* The device virtqueue descriptors are followed by feature bitmasks. */ +static u8 *get_feature_bits(struct device *dev) +{ + return (u8 *)(dev->desc + 1) + + dev->desc->num_vq * sizeof(struct lguest_vqconfig); +} /*L:100 The Launcher code itself takes us out into userspace, that scary place * where pointers run wild and free! Unfortunately, like most userspace @@ -914,21 +921,58 @@ static void enable_fd(int fd, struct virtqueue *vq) write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); } +/* Resetting a device is fairly easy. */ +static void reset_device(struct device *dev) +{ + struct virtqueue *vq; + + verbose("Resetting device %s\n", dev->name); + /* Clear the status. */ + dev->desc->status = 0; + + /* Clear any features they've acked. */ + memset(get_feature_bits(dev) + dev->desc->feature_len, 0, + dev->desc->feature_len); + + /* Zero out the virtqueues. */ + for (vq = dev->vq; vq; vq = vq->next) { + memset(vq->vring.desc, 0, + vring_size(vq->config.num, getpagesize())); + vq->last_avail_idx = 0; + } +} + /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ static void handle_output(int fd, unsigned long addr) { struct device *i; struct virtqueue *vq; - /* Check each virtqueue. */ + /* Check each device and virtqueue. */ for (i = devices.dev; i; i = i->next) { + /* Notifications to device descriptors reset the device. */ + if (from_guest_phys(addr) == i->desc) { + reset_device(i); + return; + } + + /* Notifications to virtqueues mean output has occurred. */ for (vq = i->vq; vq; vq = vq->next) { - if (vq->config.pfn == addr/getpagesize() - && vq->handle_output) { - verbose("Output to %s\n", vq->dev->name); - vq->handle_output(fd, vq); + if (vq->config.pfn != addr/getpagesize()) + continue; + + /* Guest should acknowledge (and set features!) before + * using the device. */ + if (i->desc->status == 0) { + warnx("%s gave early output", i->name); return; } + + if (strcmp(vq->dev->name, "console") != 0) + verbose("Output to %s\n", vq->dev->name); + if (vq->handle_output) + vq->handle_output(fd, vq); + return; } } @@ -986,54 +1030,44 @@ static void handle_input(int fd) * * All devices need a descriptor so the Guest knows it exists, and a "struct * device" so the Launcher can keep track of it. We have common helper - * routines to allocate them. - * - * This routine allocates a new "struct lguest_device_desc" from descriptor - * table just above the Guest's normal memory. It returns a pointer to that - * descriptor. */ -static struct lguest_device_desc *new_dev_desc(u16 type) -{ - struct lguest_device_desc *d; + * routines to allocate and manage them. */ - /* We only have one page for all the descriptors. */ - if (devices.desc_used + sizeof(*d) > getpagesize()) - errx(1, "Too many devices"); - - /* We don't need to set config_len or status: page is 0 already. */ - d = (void *)devices.descpage + devices.desc_used; - d->type = type; - devices.desc_used += sizeof(*d); - - return d; +/* The layout of the device page is a "struct lguest_device_desc" followed by a + * number of virtqueue descriptors, then two sets of feature bits, then an + * array of configuration bytes. This routine returns the configuration + * pointer. */ +static u8 *device_config(const struct device *dev) +{ + return (void *)(dev->desc + 1) + + dev->desc->num_vq * sizeof(struct lguest_vqconfig) + + dev->desc->feature_len * 2; } -/* Each device descriptor is followed by some configuration information. - * Each configuration field looks like: u8 type, u8 len, [... len bytes...]. - * - * This routine adds a new field to an existing device's descriptor. It only - * works for the last device, but that's OK because that's how we use it. */ -static void add_desc_field(struct device *dev, u8 type, u8 len, const void *c) +/* This routine allocates a new "struct lguest_device_desc" from descriptor + * table page just above the Guest's normal memory. It returns a pointer to + * that descriptor. */ +static struct lguest_device_desc *new_dev_desc(u16 type) { - /* This is the last descriptor, right? */ - assert(devices.descpage + devices.desc_used - == (u8 *)(dev->desc + 1) + dev->desc->config_len); + struct lguest_device_desc d = { .type = type }; + void *p; - /* We only have one page of device descriptions. */ - if (devices.desc_used + 2 + len > getpagesize()) - errx(1, "Too many devices"); + /* Figure out where the next device config is, based on the last one. */ + if (devices.lastdev) + p = device_config(devices.lastdev) + + devices.lastdev->desc->config_len; + else + p = devices.descpage; - /* Copy in the new config header: type then length. */ - devices.descpage[devices.desc_used++] = type; - devices.descpage[devices.desc_used++] = len; - memcpy(devices.descpage + devices.desc_used, c, len); - devices.desc_used += len; + /* We only have one page for all the descriptors. */ + if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) + errx(1, "Too many devices"); - /* Update the device descriptor length: two byte head then data. */ - dev->desc->config_len += 2 + len; + /* p might not be aligned, so we memcpy in. */ + return memcpy(p, &d, sizeof(d)); } -/* This routine adds a virtqueue to a device. We specify how many descriptors - * the virtqueue is to have. */ +/* Each device descriptor is followed by the description of its virtqueues. We + * specify how many descriptors the virtqueue is to have. */ static void add_virtqueue(struct device *dev, unsigned int num_descs, void (*handle_output)(int fd, struct virtqueue *me)) { @@ -1059,9 +1093,15 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, /* Initialize the vring. */ vring_init(&vq->vring, num_descs, p, getpagesize()); - /* Add the configuration information to this device's descriptor. */ - add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, - sizeof(vq->config), &vq->config); + /* Append virtqueue to this device's descriptor. We use + * device_config() to get the end of the device's current virtqueues; + * we check that we haven't added any config or feature information + * yet, otherwise we'd be overwriting them. */ + assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); + memcpy(device_config(dev), &vq->config, sizeof(vq->config)); + dev->desc->num_vq++; + + verbose("Virtqueue page %#lx\n", to_guest_phys(p)); /* Add to tail of list, so dev->vq is first vq, dev->vq->next is * second. */ @@ -1072,11 +1112,41 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, * virtqueue. */ vq->handle_output = handle_output; - /* Set the "Don't Notify Me" flag if we don't have a handler */ + /* As an optimization, set the advisory "Don't Notify Me" flag if we + * don't have a handler */ if (!handle_output) vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; } +/* The first half of the feature bitmask is for us to advertise features. The + * second half if for the Guest to accept features. */ +static void add_feature(struct device *dev, unsigned bit) +{ + u8 *features = get_feature_bits(dev); + + /* We can't extend the feature bits once we've added config bytes */ + if (dev->desc->feature_len <= bit / CHAR_BIT) { + assert(dev->desc->config_len == 0); + dev->desc->feature_len = (bit / CHAR_BIT) + 1; + } + + features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); +} + +/* This routine sets the configuration fields for an existing device's + * descriptor. It only works for the last device, but that's OK because that's + * how we use it. */ +static void set_config(struct device *dev, unsigned len, const void *conf) +{ + /* Check we haven't overflowed our single page. */ + if (device_config(dev) + len > devices.descpage + getpagesize()) + errx(1, "Too many devices"); + + /* Copy in the config information, and store the length. */ + memcpy(device_config(dev), conf, len); + dev->desc->config_len = len; +} + /* This routine does all the creation and setup of a new device, including * calling new_dev_desc() to allocate the descriptor and device memory. */ static struct device *new_device(const char *name, u16 type, int fd, @@ -1084,14 +1154,6 @@ static struct device *new_device(const char *name, u16 type, int fd, { struct device *dev = malloc(sizeof(*dev)); - /* Append to device list. Prepending to a single-linked list is - * easier, but the user expects the devices to be arranged on the bus - * in command-line order. The first network device on the command line - * is eth0, the first block device /dev/vda, etc. */ - *devices.lastdev = dev; - dev->next = NULL; - devices.lastdev = &dev->next; - /* Now we populate the fields one at a time. */ dev->fd = fd; /* If we have an input handler for this file descriptor, then we add it @@ -1102,6 +1164,17 @@ static struct device *new_device(const char *name, u16 type, int fd, dev->handle_input = handle_input; dev->name = name; dev->vq = NULL; + + /* Append to device list. Prepending to a single-linked list is + * easier, but the user expects the devices to be arranged on the bus + * in command-line order. The first network device on the command line + * is eth0, the first block device /dev/vda, etc. */ + if (devices.lastdev) + devices.lastdev->next = dev; + else + devices.dev = dev; + devices.lastdev = dev; + return dev; } @@ -1226,7 +1299,7 @@ static void setup_tun_net(const char *arg) int netfd, ipfd; u32 ip; const char *br_name = NULL; - u8 hwaddr[6]; + struct virtio_net_config conf; /* We open the /dev/net/tun device and tell it we want a tap device. A * tap device is like a tun device, only somehow different. To tell @@ -1265,12 +1338,13 @@ static void setup_tun_net(const char *arg) ip = str2ip(arg); /* Set up the tun device, and get the mac address for the interface. */ - configure_device(ipfd, ifr.ifr_name, ip, hwaddr); + configure_device(ipfd, ifr.ifr_name, ip, conf.mac); /* Tell Guest what MAC address to use. */ - add_desc_field(dev, VIRTIO_CONFIG_NET_MAC_F, sizeof(hwaddr), hwaddr); + add_feature(dev, VIRTIO_NET_F_MAC); + set_config(dev, sizeof(conf), &conf); - /* We don't seed the socket any more; setup is done. */ + /* We don't need the socket any more; setup is done. */ close(ipfd); verbose("device %u: tun net %u.%u.%u.%u\n", @@ -1458,8 +1532,7 @@ static void setup_block_file(const char *filename) struct device *dev; struct vblk_info *vblk; void *stack; - u64 cap; - unsigned int val; + struct virtio_blk_config conf; /* This is the pipe the I/O thread will use to tell us I/O is done. */ pipe(p); @@ -1477,14 +1550,18 @@ static void setup_block_file(const char *filename) vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); vblk->len = lseek64(vblk->fd, 0, SEEK_END); + /* We support barriers. */ + add_feature(dev, VIRTIO_BLK_F_BARRIER); + /* Tell Guest how many sectors this device has. */ - cap = cpu_to_le64(vblk->len / 512); - add_desc_field(dev, VIRTIO_CONFIG_BLK_F_CAPACITY, sizeof(cap), &cap); + conf.capacity = cpu_to_le64(vblk->len / 512); /* Tell Guest not to put in too many descriptors at once: two are used * for the in and out elements. */ - val = cpu_to_le32(VIRTQUEUE_NUM - 2); - add_desc_field(dev, VIRTIO_CONFIG_BLK_F_SEG_MAX, sizeof(val), &val); + add_feature(dev, VIRTIO_BLK_F_SEG_MAX); + conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); + + set_config(dev, sizeof(conf), &conf); /* The I/O thread writes to this end of the pipe when done. */ vblk->done_fd = p[1]; @@ -1505,7 +1582,7 @@ static void setup_block_file(const char *filename) close(vblk->workpipe[0]); verbose("device %u: virtblock %llu sectors\n", - devices.device_num, cap); + devices.device_num, le64_to_cpu(conf.capacity)); } /* That's the end of device setup. :*/ @@ -1610,12 +1687,12 @@ int main(int argc, char *argv[]) /* First we initialize the device list. Since console and network * device receive input from a file descriptor, we keep an fdset * (infds) and the maximum fd number (max_infd) with the head of the - * list. We also keep a pointer to the last device, for easy appending - * to the list. Finally, we keep the next interrupt number to hand out - * (1: remember that 0 is used by the timer). */ + * list. We also keep a pointer to the last device. Finally, we keep + * the next interrupt number to hand out (1: remember that 0 is used by + * the timer). */ FD_ZERO(&devices.infds); devices.max_infd = -1; - devices.lastdev = &devices.dev; + devices.lastdev = NULL; devices.next_irq = 1; cpu_id = 0; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index c83e1c9b512..41962e793c0 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -53,5 +53,6 @@ config KVM_AMD # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. source drivers/lguest/Kconfig +source drivers/virtio/Kconfig endif # VIRTUALIZATION diff --git a/drivers/Kconfig b/drivers/Kconfig index 08d4ae20159..3f8a231fe75 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -91,6 +91,4 @@ source "drivers/dca/Kconfig" source "drivers/auxdisplay/Kconfig" source "drivers/uio/Kconfig" - -source "drivers/virtio/Kconfig" endmenu diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f2122855d4e..64e5148d82b 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -440,6 +440,7 @@ config VIRTIO_BLK tristate "Virtio block driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO ---help--- - This is the virtual block driver for lguest. Say Y or M. + This is the virtual block driver for virtio. It can be used with + lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. endif # BLK_DEV diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 924ddd8bccd..3b1a68d6edd 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -7,8 +7,10 @@ #include <linux/scatterlist.h> #define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS) +#define PART_BITS 4 + +static int major, index; -static unsigned char virtblk_index = 'a'; struct virtio_blk { spinlock_t lock; @@ -36,7 +38,7 @@ struct virtblk_req struct virtio_blk_inhdr in_hdr; }; -static bool blk_done(struct virtqueue *vq) +static void blk_done(struct virtqueue *vq) { struct virtio_blk *vblk = vq->vdev->priv; struct virtblk_req *vbr; @@ -65,7 +67,6 @@ static bool blk_done(struct virtqueue *vq) /* In case queue is stopped waiting for more buffers. */ blk_start_queue(vblk->disk->queue); spin_unlock_irqrestore(&vblk->lock, flags); - return true; } static bool do_req(struct request_queue *q, struct virtio_blk *vblk, @@ -153,20 +154,37 @@ static int virtblk_ioctl(struct inode *inode, struct file *filp, (void __user *)data); } +/* We provide getgeo only to please some old bootloader/partitioning tools */ +static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) +{ + /* some standard values, similar to sd */ + geo->heads = 1 << 6; + geo->sectors = 1 << 5; + geo->cylinders = get_capacity(bd->bd_disk) >> 11; + return 0; +} + static struct block_device_operations virtblk_fops = { - .ioctl = virtblk_ioctl, - .owner = THIS_MODULE, + .ioctl = virtblk_ioctl, + .owner = THIS_MODULE, + .getgeo = virtblk_getgeo, }; +static int index_to_minor(int index) +{ + return index << PART_BITS; +} + static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; - int err, major; - void *token; - unsigned int len; + int err; u64 cap; u32 v; + if (index_to_minor(index) >= 1 << MINORBITS) + return -ENOSPC; + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); if (!vblk) { err = -ENOMEM; @@ -178,7 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->vdev = vdev; /* We expect one virtqueue, for output. */ - vblk->vq = vdev->config->find_vq(vdev, blk_done); + vblk->vq = vdev->config->find_vq(vdev, 0, blk_done); if (IS_ERR(vblk->vq)) { err = PTR_ERR(vblk->vq); goto out_free_vblk; @@ -190,17 +208,11 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_free_vq; } - major = register_blkdev(0, "virtblk"); - if (major < 0) { - err = major; - goto out_mempool; - } - /* FIXME: How many partitions? How long is a piece of string? */ - vblk->disk = alloc_disk(1 << 4); + vblk->disk = alloc_disk(1 << PART_BITS); if (!vblk->disk) { err = -ENOMEM; - goto out_unregister_blkdev; + goto out_mempool; } vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); @@ -209,22 +221,32 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_put_disk; } - sprintf(vblk->disk->disk_name, "vd%c", virtblk_index++); + if (index < 26) { + sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); + } else if (index < (26 + 1) * 26) { + sprintf(vblk->disk->disk_name, "vd%c%c", + 'a' + index / 26 - 1, 'a' + index % 26); + } else { + const unsigned int m1 = (index / 26 - 1) / 26 - 1; + const unsigned int m2 = (index / 26 - 1) % 26; + const unsigned int m3 = index % 26; + sprintf(vblk->disk->disk_name, "vd%c%c%c", + 'a' + m1, 'a' + m2, 'a' + m3); + } + vblk->disk->major = major; - vblk->disk->first_minor = 0; + vblk->disk->first_minor = index_to_minor(index); vblk->disk->private_data = vblk; vblk->disk->fops = &virtblk_fops; + index++; /* If barriers are supported, tell block layer that queue is ordered */ - token = vdev->config->find(vdev, VIRTIO_CONFIG_BLK_F, &len); - if (virtio_use_bit(vdev, token, len, VIRTIO_BLK_F_BARRIER)) + if (vdev->config->feature(vdev, VIRTIO_BLK_F_BARRIER)) blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); - err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_CAPACITY, &cap); - if (err) { - dev_err(&vdev->dev, "Bad/missing capacity in config\n"); - goto out_cleanup_queue; - } + /* Host must always specify the capacity. */ + __virtio_config_val(vdev, offsetof(struct virtio_blk_config, capacity), + &cap); /* If capacity is too big, truncate with warning. */ if ((sector_t)cap != cap) { @@ -234,31 +256,25 @@ static int virtblk_probe(struct virtio_device *vdev) } set_capacity(vblk->disk, cap); - err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SIZE_MAX, &v); + /* Host can optionally specify maximum segment size and number of + * segments. */ + err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, + offsetof(struct virtio_blk_config, size_max), + &v); if (!err) blk_queue_max_segment_size(vblk->disk->queue, v); - else if (err != -ENOENT) { - dev_err(&vdev->dev, "Bad SIZE_MAX in config\n"); - goto out_cleanup_queue; - } - err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SEG_MAX, &v); + err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, + offsetof(struct virtio_blk_config, seg_max), + &v); if (!err) blk_queue_max_hw_segments(vblk->disk->queue, v); - else if (err != -ENOENT) { - dev_err(&vdev->dev, "Bad SEG_MAX in config\n"); - goto out_cleanup_queue; - } add_disk(vblk->disk); return 0; -out_cleanup_queue: - blk_cleanup_queue(vblk->disk->queue); out_put_disk: put_disk(vblk->disk); -out_unregister_blkdev: - unregister_blkdev(major, "virtblk"); out_mempool: mempool_destroy(vblk->pool); out_free_vq: @@ -274,12 +290,16 @@ static void virtblk_remove(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; int major = vblk->disk->major; + /* Nothing should be pending. */ BUG_ON(!list_empty(&vblk->reqs)); + + /* Stop all the virtqueues. */ + vdev->config->reset(vdev); + blk_cleanup_queue(vblk->disk->queue); put_disk(vblk->disk); unregister_blkdev(major, "virtblk"); mempool_destroy(vblk->pool); - /* There should be nothing in the queue now, so no need to shutdown */ vdev->config->del_vq(vblk->vq); kfree(vblk); } @@ -299,11 +319,15 @@ static struct virtio_driver virtio_blk = { static int __init init(void) { + major = register_blkdev(0, "virtblk"); + if (major < 0) + return major; return register_virtio_driver(&virtio_blk); } static void __exit fini(void) { + unregister_blkdev(major, "virtblk"); unregister_virtio_driver(&virtio_blk); } module_init(init); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index e34da5c9719..dc17fe3a88b 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -158,13 +158,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Find the input queue. */ /* FIXME: This is why we want to wean off hvc: we do nothing * when input comes in. */ - in_vq = vdev->config->find_vq(vdev, NULL); + in_vq = vdev->config->find_vq(vdev, 0, NULL); if (IS_ERR(in_vq)) { err = PTR_ERR(in_vq); goto free; } - out_vq = vdev->config->find_vq(vdev, NULL); + out_vq = vdev->config->find_vq(vdev, 1, NULL); if (IS_ERR(out_vq)) { err = PTR_ERR(out_vq); goto free_in_vq; diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index e2eec38c83c..84f85e23cca 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -52,57 +52,82 @@ struct lguest_device { /*D:130 * Device configurations * - * The configuration information for a device consists of a series of fields. - * We don't really care what they are: the Launcher set them up, and the driver - * will look at them during setup. + * The configuration information for a device consists of one or more + * virtqueues, a feature bitmaks, and some configuration bytes. The + * configuration bytes don't really matter to us: the Launcher sets them up, and + * the driver will look at them during setup. * - * For us these fields come immediately after that device's descriptor in the - * lguest_devices page. - * - * Each field starts with a "type" byte, a "length" byte, then that number of - * bytes of configuration information. The device descriptor tells us the - * total configuration length so we know when we've reached the last field. */ + * A convenient routine to return the device's virtqueue config array: + * immediately after the descriptor. */ +static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) +{ + return (void *)(desc + 1); +} -/* type + length bytes */ -#define FHDR_LEN 2 +/* The features come immediately after the virtqueues. */ +static u8 *lg_features(const struct lguest_device_desc *desc) +{ + return (void *)(lg_vq(desc) + desc->num_vq); +} -/* This finds the first field of a given type for a device's configuration. */ -static void *lg_find(struct virtio_device *vdev, u8 type, unsigned int *len) +/* The config space comes after the two feature bitmasks. */ +static u8 *lg_config(const struct lguest_device_desc *desc) { - struct lguest_device_desc *desc = to_lgdev(vdev)->desc; - int i; - - for (i = 0; i < desc->config_len; i += FHDR_LEN + desc->config[i+1]) { - if (desc->config[i] == type) { - /* Mark it used, so Host can know we looked at it, and - * also so we won't find the same one twice. */ - desc->config[i] |= 0x80; - /* Remember, the second byte is the length. */ - *len = desc->config[i+1]; - /* We return a pointer to the field header. */ - return desc->config + i; - } - } + return lg_features(desc) + desc->feature_len * 2; +} - /* Not found: return NULL for failure. */ - return NULL; +/* The total size of the config page used by this device (incl. desc) */ +static unsigned desc_size(const struct lguest_device_desc *desc) +{ + return sizeof(*desc) + + desc->num_vq * sizeof(struct lguest_vqconfig) + + desc->feature_len * 2 + + desc->config_len; +} + +/* This tests (and acknowleges) a feature bit. */ +static bool lg_feature(struct virtio_device *vdev, unsigned fbit) +{ + struct lguest_device_desc *desc = to_lgdev(vdev)->desc; + u8 *features; + + /* Obviously if they ask for a feature off the end of our feature + * bitmap, it's not set. */ + if (fbit / 8 > desc->feature_len) + return false; + + /* The feature bitmap comes after the virtqueues. */ + features = lg_features(desc); + if (!(features[fbit / 8] & (1 << (fbit % 8)))) + return false; + + /* We set the matching bit in the other half of the bitmap to tell the + * Host we want to use this feature. We don't use this yet, but we + * could in future. */ + features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8)); + return true; } /* Once they've found a field, getting a copy of it is easy. */ -static void lg_get(struct virtio_device *vdev, void *token, +static void lg_get(struct virtio_device *vdev, unsigned int offset, void *buf, unsigned len) { - /* Check they didn't ask for more than the length of the field! */ - BUG_ON(len > ((u8 *)token)[1]); - memcpy(buf, token + FHDR_LEN, len); + struct lguest_device_desc *desc = to_lgdev(vdev)->desc; + + /* Check they didn't ask for more than the length of the config! */ + BUG_ON(offset + len > desc->config_len); + memcpy(buf, lg_config(desc) + offset, len); } /* Setting the contents is also trivial. */ -static void lg_set(struct virtio_device *vdev, void *token, +static void lg_set(struct virtio_device *vdev, unsigned int offset, const void *buf, unsigned len) { - BUG_ON(len > ((u8 *)token)[1]); - memcpy(token + FHDR_LEN, buf, len); + struct lguest_device_desc *desc = to_lgdev(vdev)->desc; + + /* Check they didn't ask for more than the length of the config! */ + BUG_ON(offset + len > desc->config_len); + memcpy(lg_config(desc) + offset, buf, len); } /* The operations to get and set the status word just access the status field @@ -114,9 +139,20 @@ static u8 lg_get_status(struct virtio_device *vdev) static void lg_set_status(struct virtio_device *vdev, u8 status) { + BUG_ON(!status); to_lgdev(vdev)->desc->status = status; } +/* To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor + * address of the device. The Host will zero the status and all the + * features. */ +static void lg_reset(struct virtio_device *vdev) +{ + unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; + + hcall(LHCALL_NOTIFY, (max_pfn<<PAGE_SHIFT) + offset, 0, 0); +} + /* * Virtqueues * @@ -165,39 +201,29 @@ static void lg_notify(struct virtqueue *vq) * * So we provide devices with a "find virtqueue and set it up" function. */ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, - bool (*callback)(struct virtqueue *vq)) + unsigned index, + void (*callback)(struct virtqueue *vq)) { + struct lguest_device *ldev = to_lgdev(vdev); struct lguest_vq_info *lvq; struct virtqueue *vq; - unsigned int len; - void *token; int err; - /* Look for a field of the correct type to mark a virtqueue. |