diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-02-04 08:00:54 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-02-04 08:00:54 -0800 |
commit | 93890b71a34f9490673a6edd56b61c2124215e46 (patch) | |
tree | c5d82620f2cb69f0bf43639e63f54b0c0e2eb744 /drivers | |
parent | f5bb3a5e9dcdb8435471562b6cada89525cf4df1 (diff) | |
parent | 6b35e40767c6c1ac783330109ae8e0c09ea6bc82 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (25 commits)
virtio: balloon driver
virtio: Use PCI revision field to indicate virtio PCI ABI version
virtio: PCI device
virtio_blk: implement naming for vda-vdz,vdaa-vdzz,vdaaa-vdzzz
virtio_blk: Dont waste major numbers
virtio_blk: provide getgeo
virtio_net: parametrize the napi_weight for virtio receive queue.
virtio: free transmit skbs when notified, not on next xmit.
virtio: flush buffers on open
virtnet: remove double ether_setup
virtio: Allow virtio to be modular and used by modules
virtio: Use the sg_phys convenience function.
virtio: Put the virtio under the virtualization menu
virtio: handle interrupts after callbacks turned off
virtio: reset function
virtio: populate network rings in the probe routine, not open
virtio: Tweak virtio_net defines
virtio: Net header needs hdr_len
virtio: remove unused id field from struct virtio_blk_outhdr
virtio: clarify NO_NOTIFY flag usage
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/Kconfig | 2 | ||||
-rw-r--r-- | drivers/block/Kconfig | 3 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 106 | ||||
-rw-r--r-- | drivers/char/virtio_console.c | 4 | ||||
-rw-r--r-- | drivers/lguest/lguest_device.c | 146 | ||||
-rw-r--r-- | drivers/net/Kconfig | 3 | ||||
-rw-r--r-- | drivers/net/virtio_net.c | 155 | ||||
-rw-r--r-- | drivers/virtio/Kconfig | 31 | ||||
-rw-r--r-- | drivers/virtio/Makefile | 2 | ||||
-rw-r--r-- | drivers/virtio/virtio.c | 65 | ||||
-rw-r--r-- | drivers/virtio/virtio_balloon.c | 284 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci.c | 446 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 51 |
13 files changed, 1055 insertions, 243 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index 08d4ae20159..3f8a231fe75 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -91,6 +91,4 @@ source "drivers/dca/Kconfig" source "drivers/auxdisplay/Kconfig" source "drivers/uio/Kconfig" - -source "drivers/virtio/Kconfig" endmenu diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f2122855d4e..64e5148d82b 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -440,6 +440,7 @@ config VIRTIO_BLK tristate "Virtio block driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO ---help--- - This is the virtual block driver for lguest. Say Y or M. + This is the virtual block driver for virtio. It can be used with + lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. endif # BLK_DEV diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 924ddd8bccd..3b1a68d6edd 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -7,8 +7,10 @@ #include <linux/scatterlist.h> #define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS) +#define PART_BITS 4 + +static int major, index; -static unsigned char virtblk_index = 'a'; struct virtio_blk { spinlock_t lock; @@ -36,7 +38,7 @@ struct virtblk_req struct virtio_blk_inhdr in_hdr; }; -static bool blk_done(struct virtqueue *vq) +static void blk_done(struct virtqueue *vq) { struct virtio_blk *vblk = vq->vdev->priv; struct virtblk_req *vbr; @@ -65,7 +67,6 @@ static bool blk_done(struct virtqueue *vq) /* In case queue is stopped waiting for more buffers. */ blk_start_queue(vblk->disk->queue); spin_unlock_irqrestore(&vblk->lock, flags); - return true; } static bool do_req(struct request_queue *q, struct virtio_blk *vblk, @@ -153,20 +154,37 @@ static int virtblk_ioctl(struct inode *inode, struct file *filp, (void __user *)data); } +/* We provide getgeo only to please some old bootloader/partitioning tools */ +static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) +{ + /* some standard values, similar to sd */ + geo->heads = 1 << 6; + geo->sectors = 1 << 5; + geo->cylinders = get_capacity(bd->bd_disk) >> 11; + return 0; +} + static struct block_device_operations virtblk_fops = { - .ioctl = virtblk_ioctl, - .owner = THIS_MODULE, + .ioctl = virtblk_ioctl, + .owner = THIS_MODULE, + .getgeo = virtblk_getgeo, }; +static int index_to_minor(int index) +{ + return index << PART_BITS; +} + static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; - int err, major; - void *token; - unsigned int len; + int err; u64 cap; u32 v; + if (index_to_minor(index) >= 1 << MINORBITS) + return -ENOSPC; + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); if (!vblk) { err = -ENOMEM; @@ -178,7 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->vdev = vdev; /* We expect one virtqueue, for output. */ - vblk->vq = vdev->config->find_vq(vdev, blk_done); + vblk->vq = vdev->config->find_vq(vdev, 0, blk_done); if (IS_ERR(vblk->vq)) { err = PTR_ERR(vblk->vq); goto out_free_vblk; @@ -190,17 +208,11 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_free_vq; } - major = register_blkdev(0, "virtblk"); - if (major < 0) { - err = major; - goto out_mempool; - } - /* FIXME: How many partitions? How long is a piece of string? */ - vblk->disk = alloc_disk(1 << 4); + vblk->disk = alloc_disk(1 << PART_BITS); if (!vblk->disk) { err = -ENOMEM; - goto out_unregister_blkdev; + goto out_mempool; } vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); @@ -209,22 +221,32 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_put_disk; } - sprintf(vblk->disk->disk_name, "vd%c", virtblk_index++); + if (index < 26) { + sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); + } else if (index < (26 + 1) * 26) { + sprintf(vblk->disk->disk_name, "vd%c%c", + 'a' + index / 26 - 1, 'a' + index % 26); + } else { + const unsigned int m1 = (index / 26 - 1) / 26 - 1; + const unsigned int m2 = (index / 26 - 1) % 26; + const unsigned int m3 = index % 26; + sprintf(vblk->disk->disk_name, "vd%c%c%c", + 'a' + m1, 'a' + m2, 'a' + m3); + } + vblk->disk->major = major; - vblk->disk->first_minor = 0; + vblk->disk->first_minor = index_to_minor(index); vblk->disk->private_data = vblk; vblk->disk->fops = &virtblk_fops; + index++; /* If barriers are supported, tell block layer that queue is ordered */ - token = vdev->config->find(vdev, VIRTIO_CONFIG_BLK_F, &len); - if (virtio_use_bit(vdev, token, len, VIRTIO_BLK_F_BARRIER)) + if (vdev->config->feature(vdev, VIRTIO_BLK_F_BARRIER)) blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); - err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_CAPACITY, &cap); - if (err) { - dev_err(&vdev->dev, "Bad/missing capacity in config\n"); - goto out_cleanup_queue; - } + /* Host must always specify the capacity. */ + __virtio_config_val(vdev, offsetof(struct virtio_blk_config, capacity), + &cap); /* If capacity is too big, truncate with warning. */ if ((sector_t)cap != cap) { @@ -234,31 +256,25 @@ static int virtblk_probe(struct virtio_device *vdev) } set_capacity(vblk->disk, cap); - err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SIZE_MAX, &v); + /* Host can optionally specify maximum segment size and number of + * segments. */ + err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, + offsetof(struct virtio_blk_config, size_max), + &v); if (!err) blk_queue_max_segment_size(vblk->disk->queue, v); - else if (err != -ENOENT) { - dev_err(&vdev->dev, "Bad SIZE_MAX in config\n"); - goto out_cleanup_queue; - } - err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SEG_MAX, &v); + err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, + offsetof(struct virtio_blk_config, seg_max), + &v); if (!err) blk_queue_max_hw_segments(vblk->disk->queue, v); - else if (err != -ENOENT) { - dev_err(&vdev->dev, "Bad SEG_MAX in config\n"); - goto out_cleanup_queue; - } add_disk(vblk->disk); return 0; -out_cleanup_queue: - blk_cleanup_queue(vblk->disk->queue); out_put_disk: put_disk(vblk->disk); -out_unregister_blkdev: - unregister_blkdev(major, "virtblk"); out_mempool: mempool_destroy(vblk->pool); out_free_vq: @@ -274,12 +290,16 @@ static void virtblk_remove(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; int major = vblk->disk->major; + /* Nothing should be pending. */ BUG_ON(!list_empty(&vblk->reqs)); + + /* Stop all the virtqueues. */ + vdev->config->reset(vdev); + blk_cleanup_queue(vblk->disk->queue); put_disk(vblk->disk); unregister_blkdev(major, "virtblk"); mempool_destroy(vblk->pool); - /* There should be nothing in the queue now, so no need to shutdown */ vdev->config->del_vq(vblk->vq); kfree(vblk); } @@ -299,11 +319,15 @@ static struct virtio_driver virtio_blk = { static int __init init(void) { + major = register_blkdev(0, "virtblk"); + if (major < 0) + return major; return register_virtio_driver(&virtio_blk); } static void __exit fini(void) { + unregister_blkdev(major, "virtblk"); unregister_virtio_driver(&virtio_blk); } module_init(init); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index e34da5c9719..dc17fe3a88b 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -158,13 +158,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Find the input queue. */ /* FIXME: This is why we want to wean off hvc: we do nothing * when input comes in. */ - in_vq = vdev->config->find_vq(vdev, NULL); + in_vq = vdev->config->find_vq(vdev, 0, NULL); if (IS_ERR(in_vq)) { err = PTR_ERR(in_vq); goto free; } - out_vq = vdev->config->find_vq(vdev, NULL); + out_vq = vdev->config->find_vq(vdev, 1, NULL); if (IS_ERR(out_vq)) { err = PTR_ERR(out_vq); goto free_in_vq; diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index e2eec38c83c..84f85e23cca 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -52,57 +52,82 @@ struct lguest_device { /*D:130 * Device configurations * - * The configuration information for a device consists of a series of fields. - * We don't really care what they are: the Launcher set them up, and the driver - * will look at them during setup. + * The configuration information for a device consists of one or more + * virtqueues, a feature bitmaks, and some configuration bytes. The + * configuration bytes don't really matter to us: the Launcher sets them up, and + * the driver will look at them during setup. * - * For us these fields come immediately after that device's descriptor in the - * lguest_devices page. - * - * Each field starts with a "type" byte, a "length" byte, then that number of - * bytes of configuration information. The device descriptor tells us the - * total configuration length so we know when we've reached the last field. */ + * A convenient routine to return the device's virtqueue config array: + * immediately after the descriptor. */ +static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) +{ + return (void *)(desc + 1); +} -/* type + length bytes */ -#define FHDR_LEN 2 +/* The features come immediately after the virtqueues. */ +static u8 *lg_features(const struct lguest_device_desc *desc) +{ + return (void *)(lg_vq(desc) + desc->num_vq); +} -/* This finds the first field of a given type for a device's configuration. */ -static void *lg_find(struct virtio_device *vdev, u8 type, unsigned int *len) +/* The config space comes after the two feature bitmasks. */ +static u8 *lg_config(const struct lguest_device_desc *desc) { - struct lguest_device_desc *desc = to_lgdev(vdev)->desc; - int i; - - for (i = 0; i < desc->config_len; i += FHDR_LEN + desc->config[i+1]) { - if (desc->config[i] == type) { - /* Mark it used, so Host can know we looked at it, and - * also so we won't find the same one twice. */ - desc->config[i] |= 0x80; - /* Remember, the second byte is the length. */ - *len = desc->config[i+1]; - /* We return a pointer to the field header. */ - return desc->config + i; - } - } + return lg_features(desc) + desc->feature_len * 2; +} - /* Not found: return NULL for failure. */ - return NULL; +/* The total size of the config page used by this device (incl. desc) */ +static unsigned desc_size(const struct lguest_device_desc *desc) +{ + return sizeof(*desc) + + desc->num_vq * sizeof(struct lguest_vqconfig) + + desc->feature_len * 2 + + desc->config_len; +} + +/* This tests (and acknowleges) a feature bit. */ +static bool lg_feature(struct virtio_device *vdev, unsigned fbit) +{ + struct lguest_device_desc *desc = to_lgdev(vdev)->desc; + u8 *features; + + /* Obviously if they ask for a feature off the end of our feature + * bitmap, it's not set. */ + if (fbit / 8 > desc->feature_len) + return false; + + /* The feature bitmap comes after the virtqueues. */ + features = lg_features(desc); + if (!(features[fbit / 8] & (1 << (fbit % 8)))) + return false; + + /* We set the matching bit in the other half of the bitmap to tell the + * Host we want to use this feature. We don't use this yet, but we + * could in future. */ + features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8)); + return true; } /* Once they've found a field, getting a copy of it is easy. */ -static void lg_get(struct virtio_device *vdev, void *token, +static void lg_get(struct virtio_device *vdev, unsigned int offset, void *buf, unsigned len) { - /* Check they didn't ask for more than the length of the field! */ - BUG_ON(len > ((u8 *)token)[1]); - memcpy(buf, token + FHDR_LEN, len); + struct lguest_device_desc *desc = to_lgdev(vdev)->desc; + + /* Check they didn't ask for more than the length of the config! */ + BUG_ON(offset + len > desc->config_len); + memcpy(buf, lg_config(desc) + offset, len); } /* Setting the contents is also trivial. */ -static void lg_set(struct virtio_device *vdev, void *token, +static void lg_set(struct virtio_device *vdev, unsigned int offset, const void *buf, unsigned len) { - BUG_ON(len > ((u8 *)token)[1]); - memcpy(token + FHDR_LEN, buf, len); + struct lguest_device_desc *desc = to_lgdev(vdev)->desc; + + /* Check they didn't ask for more than the length of the config! */ + BUG_ON(offset + len > desc->config_len); + memcpy(lg_config(desc) + offset, buf, len); } /* The operations to get and set the status word just access the status field @@ -114,9 +139,20 @@ static u8 lg_get_status(struct virtio_device *vdev) static void lg_set_status(struct virtio_device *vdev, u8 status) { + BUG_ON(!status); to_lgdev(vdev)->desc->status = status; } +/* To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor + * address of the device. The Host will zero the status and all the + * features. */ +static void lg_reset(struct virtio_device *vdev) +{ + unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; + + hcall(LHCALL_NOTIFY, (max_pfn<<PAGE_SHIFT) + offset, 0, 0); +} + /* * Virtqueues * @@ -165,39 +201,29 @@ static void lg_notify(struct virtqueue *vq) * * So we provide devices with a "find virtqueue and set it up" function. */ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, - bool (*callback)(struct virtqueue *vq)) + unsigned index, + void (*callback)(struct virtqueue *vq)) { + struct lguest_device *ldev = to_lgdev(vdev); struct lguest_vq_info *lvq; struct virtqueue *vq; - unsigned int len; - void *token; int err; - /* Look for a field of the correct type to mark a virtqueue. Note that - * if this succeeds, then the type will be changed so it won't be found - * again, and future lg_find_vq() calls will find the next - * virtqueue (if any). */ - token = vdev->config->find(vdev, VIRTIO_CONFIG_F_VIRTQUEUE, &len); - if (!token) + /* We must have this many virtqueues. */ + if (index >= ldev->desc->num_vq) return ERR_PTR(-ENOENT); lvq = kmalloc(sizeof(*lvq), GFP_KERNEL); if (!lvq) return ERR_PTR(-ENOMEM); - /* Note: we could use a configuration space inside here, just like we - * do for the device. This would allow expansion in future, because - * our configuration system is designed to be expansible. But this is - * way easier. */ - if (len != sizeof(lvq->config)) { - dev_err(&vdev->dev, "Unexpected virtio config len %u\n", len); - err = -EIO; - goto free_lvq; - } - /* Make a copy of the "struct lguest_vqconfig" field. We need a copy - * because the config space might not be aligned correctly. */ - vdev->config->get(vdev, token, &lvq->config, sizeof(lvq->config)); + /* Make a copy of the "struct lguest_vqconfig" entry, which sits after + * the descriptor. We need a copy because the config space might not + * be aligned correctly. */ + memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config)); + printk("Mapping virtqueue %i addr %lx\n", index, + (unsigned long)lvq->config.pfn << PAGE_SHIFT); /* Figure out how many pages the ring will take, and map that memory */ lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, DIV_ROUND_UP(vring_size(lvq->config.num, @@ -259,11 +285,12 @@ static void lg_del_vq(struct virtqueue *vq) /* The ops structure which hooks everything together. */ static struct virtio_config_ops lguest_config_ops = { - .find = lg_find, + .feature = lg_feature, .get = lg_get, .set = lg_set, .get_status = lg_get_status, .set_status = lg_set_status, + .reset = lg_reset, .find_vq = lg_find_vq, .del_vq = lg_del_vq, }; @@ -329,13 +356,14 @@ static void scan_devices(void) struct lguest_device_desc *d; /* We start at the page beginning, and skip over each entry. */ - for (i = 0; i < PAGE_SIZE; i += sizeof(*d) + d->config_len) { + for (i = 0; i < PAGE_SIZE; i += desc_size(d)) { d = lguest_devices + i; /* Once we hit a zero, stop. */ if (d->type == 0) break; + printk("Device at %i has size %u\n", i, desc_size(d)); add_lguest_device(d); } } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 55d224c8a0b..f234ba3f040 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -3114,6 +3114,7 @@ config VIRTIO_NET tristate "Virtio network driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO ---help--- - This is the virtual network driver for lguest. Say Y or M. + This is the virtual network driver for virtio. It can be used with + lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. endif # NETDEVICES diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5413dbf3d4a..e66de0c12fc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -24,6 +24,13 @@ #include <linux/virtio_net.h> #include <linux/scatterlist.h> +static int napi_weight = 128; +module_param(napi_weight, int, 0444); + +static int csum = 1, gso = 1; +module_param(csum, bool, 0444); +module_param(gso, bool, 0444); + /* FIXME: MTU in config. */ #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN) @@ -52,13 +59,14 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb) sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); } -static bool skb_xmit_done(struct virtqueue *rvq) +static void skb_xmit_done(struct virtqueue *svq) { - struct virtnet_info *vi = rvq->vdev->priv; + struct virtnet_info *vi = svq->vdev->priv; - /* In case we were waiting for output buffers. */ + /* Suppress further interrupts. */ + svq->vq_ops->disable_cb(svq); + /* We were waiting for more output buffers. */ netif_wake_queue(vi->dev); - return true; } static void receive_skb(struct net_device *dev, struct sk_buff *skb, @@ -83,28 +91,16 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { pr_debug("Needs csum!\n"); - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = hdr->csum_start; - skb->csum_offset = hdr->csum_offset; - if (skb->csum_start > skb->len - 2 - || skb->csum_offset > skb->len - 2) { - if (net_ratelimit()) - printk(KERN_WARNING "%s: csum=%u/%u len=%u\n", - dev->name, skb->csum_start, - skb->csum_offset, skb->len); + if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset)) goto frame_err; - } } if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); - switch (hdr->gso_type) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; - case VIRTIO_NET_HDR_GSO_TCPV4_ECN: - skb_shinfo(skb)->gso_type = SKB_GSO_TCP_ECN; - break; case VIRTIO_NET_HDR_GSO_UDP: skb_shinfo(skb)->gso_type = SKB_GSO_UDP; break; @@ -118,6 +114,9 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, goto frame_err; } + if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + skb_shinfo(skb)->gso_size = hdr->gso_size; if (skb_shinfo(skb)->gso_size == 0) { if (net_ratelimit()) @@ -170,12 +169,14 @@ static void try_fill_recv(struct virtnet_info *vi) vi->rvq->vq_ops->kick(vi->rvq); } -static bool skb_recv_done(struct virtqueue *rvq) +static void skb_recv_done(struct virtqueue *rvq) { struct virtnet_info *vi = rvq->vdev->priv; - netif_rx_schedule(vi->dev, &vi->napi); - /* Suppress further interrupts. */ - return false; + /* Schedule NAPI, Suppress further interrupts if successful. */ + if (netif_rx_schedule_prep(vi->dev, &vi->napi)) { + rvq->vq_ops->disable_cb(rvq); + __netif_rx_schedule(vi->dev, &vi->napi); + } } static int virtnet_poll(struct napi_struct *napi, int budget) @@ -201,7 +202,7 @@ again: /* Out of packets? */ if (received < budget) { netif_rx_complete(vi->dev, napi); - if (unlikely(!vi->rvq->vq_ops->restart(vi->rvq)) + if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq)) && netif_rx_reschedule(vi->dev, napi)) goto again; } @@ -236,8 +237,6 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev) pr_debug("%s: xmit %p %s\n", dev->name, skb, print_mac(mac, dest)); - free_old_xmit_skbs(vi); - /* Encode metadata header at front. */ hdr = skb_vnet_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -250,10 +249,9 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev) } if (skb_is_gso(skb)) { + hdr->hdr_len = skb_transport_header(skb) - skb->data; hdr->gso_size = skb_shinfo(skb)->gso_size; - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4_ECN; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; @@ -261,19 +259,34 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev) hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) + hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else { hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; - hdr->gso_size = 0; + hdr->gso_size = hdr->hdr_len = 0; } vnet_hdr_to_sg(sg, skb); num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; __skb_queue_head(&vi->send, skb); + +again: + /* Free up any pending old buffers before queueing new ones. */ + free_old_xmit_skbs(vi); err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); if (err) { pr_debug("%s: virtio not prepared to send\n", dev->name); - skb_unlink(skb, &vi->send); netif_stop_queue(dev); + + /* Activate callback for using skbs: if this fails it + * means some were used in the meantime. */ + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { + printk("Unlikely: restart svq failed\n"); + netif_start_queue(dev); + goto again; + } + __skb_unlink(skb, &vi->send); + return NETDEV_TX_BUSY; } vi->svq->vq_ops->kick(vi->svq); @@ -285,45 +298,31 @@ static int virtnet_open(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); - try_fill_recv(vi); + napi_enable(&vi->napi); - /* If we didn't even get one input buffer, we're useless. */ - if (vi->num == 0) - return -ENOMEM; + /* If all buffers were filled by other side before we napi_enabled, we + * won't get another interrupt, so process any outstanding packets + * now. virtnet_poll wants re-enable the queue, so we disable here. */ + vi->rvq->vq_ops->disable_cb(vi->rvq); + netif_rx_schedule(vi->dev, &vi->napi); - napi_enable(&vi->napi); return 0; } static int virtnet_close(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); - struct sk_buff *skb; napi_disable(&vi->napi); - /* networking core has neutered skb_xmit_done/skb_recv_done, so don't - * worry about races vs. get(). */ - vi->rvq->vq_ops->shutdown(vi->rvq); - while ((skb = __skb_dequeue(&vi->recv)) != NULL) { - kfree_skb(skb); - vi->num--; - } - vi->svq->vq_ops->shutdown(vi->svq); - while ((skb = __skb_dequeue(&vi->send)) != NULL) - kfree_skb(skb); - - BUG_ON(vi->num != 0); return 0; } static int virtnet_probe(struct virtio_device *vdev) { int err; - unsigned int len; struct net_device *dev; struct virtnet_info *vi; - void *token; /* Allocate ourselves a network device with room for our info */ dev = alloc_etherdev(sizeof(struct virtnet_info)); @@ -331,7 +330,6 @@ static int virtnet_probe(struct virtio_device *vdev) return -ENOMEM; /* Set up network device as normal. */ - ether_setup(dev); dev->open = virtnet_open; dev->stop = virtnet_close; dev->hard_start_xmit = start_xmit; @@ -339,42 +337,37 @@ static int virtnet_probe(struct virtio_device *vdev) SET_NETDEV_DEV(dev, &vdev->dev); /* Do we support "hardware" checksums? */ - token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_F, &len); - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_NO_CSUM)) { + if (csum && vdev->config->feature(vdev, VIRTIO_NET_F_CSUM)) { /* This opens up the world of extra features. */ dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4)) - dev->features |= NETIF_F_TSO; - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_UFO)) - dev->features |= NETIF_F_UFO; - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4_ECN)) - dev->features |= NETIF_F_TSO_ECN; - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO6)) - dev->features |= NETIF_F_TSO6; + if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_GSO)) { + dev->features |= NETIF_F_TSO | NETIF_F_UFO + | NETIF_F_TSO_ECN | NETIF_F_TSO6; + } } /* Configuration may specify what MAC to use. Otherwise random. */ - token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_MAC_F, &len); - if (token) { - dev->addr_len = len; - vdev->config->get(vdev, token, dev->dev_addr, len); + if (vdev->config->feature(vdev, VIRTIO_NET_F_MAC)) { + vdev->config->get(vdev, + offsetof(struct virtio_net_config, mac), + dev->dev_addr, dev->addr_len); } else random_ether_addr(dev->dev_addr); /* Set up our device-specific information */ vi = netdev_priv(dev); - netif_napi_add(dev, &vi->napi, virtnet_poll, 16); + netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight); vi->dev = dev; vi->vdev = vdev; /* We expect two virtqueues, receive then send. */ - vi->rvq = vdev->config->find_vq(vdev, skb_recv_done); + vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); if (IS_ERR(vi->rvq)) { err = PTR_ERR(vi->rvq); goto free; } - vi->svq = vdev->config->find_vq(vdev, skb_xmit_done); + vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done); if (IS_ERR(vi->svq)) { err = PTR_ERR(vi->svq); goto free_recv; @@ -389,10 +382,22 @@ static int virtnet_probe(struct virtio_device *vdev) pr_debug("virtio_net: registering device failed\n"); goto free_send; } + + /* Last of all, set up some receive buffers. */ + try_fill_recv(vi); + + /* If we didn't even get one input buffer, we're useless. */ + if (vi->num == 0) { + err = -ENOMEM; + goto unregister; + } + pr_debug("virtnet: registered device %s\n", dev->name); vdev->priv = vi; return 0; +unregister: + unregister_netdev(dev); free_send: vdev->config->del_vq(vi->svq); free_recv: @@ -405,6 +410,20 @@ free: static void virtnet_remove(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; + struct sk_buff *skb; + + /* Stop all the virtqueues. */ + vdev->config->reset(vdev); + + /* Free our skbs in send and recv queues, if any. */ + while ((skb = __skb_dequeue(&vi->recv)) != NULL) { + kfree_skb(skb); + vi->num--; + } + while ((skb = __skb_dequeue(&vi->send)) != NULL) + kfree_skb(skb); + + BUG_ON(vi->num != 0); vdev->config->del_vq(vi->svq); vdev->config->del_vq(vi->rvq); diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 9e33fc4da87..3dd6294d10b 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -1,8 +1,35 @@ # Virtio always gets selected by whoever wants it. config VIRTIO - bool + tristate # Similarly the virtio ring implementation. config VIRTIO_RING - bool + tristate depends on VIRTIO + +config VIRTIO_PCI + tristate "PCI driver for virtio devices (EXPERIMENTAL)" + depends on PCI && EXPERIMENTAL + select VIRTIO + select VIRTIO_RING + ---help--- + This drivers provides support for virtio based paravirtual device + drivers over PCI. This requires that your VMM has appropriate PCI + virtio backends. Most QEMU based VMMs should support these devices + (like KVM or Xen). + + Currently, the ABI is not considered stable so there is no guarantee + that this version of the driver will work with your VMM. + + If unsure, say M. + +config VIRTIO_BALLOON + tristate "Virtio balloon driver (EXPERIMENTAL)" + select VIRTIO + select VIRTIO_RING + ---help--- + This driver supports increasing and decreasing the amount + of memory within a KVM guest. + + If unsure, say M. + diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index f70e40971dd..6738c446c19 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,2 +1,4 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o +obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o +obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 69d7ea02cd4..b535483bc55 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -102,9 +102,13 @@ static int virtio_dev_remove(struct device *_d) struct virtio_driver *drv = container_of(dev->dev.driver, struct virtio_driver, driver); - dev->config->set_status(dev, dev->config->get_status(dev) - & ~VIRTIO_CONFIG_S_DRIVER); drv->remove(dev); + + /* Driver should have reset device. */ + BUG_ON(dev->config->get_status(dev)); + + /* Acknowledge the device's existence again. */ + add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); return 0; } @@ -130,6 +134,10 @@ int register_virtio_device(struct virtio_device *dev) dev->dev.bus = &virtio_bus; sprintf(dev->dev.bus_id, "%u", dev->index); + /* We always start by resetting the device, in case a previous + * driver messed it up. This also tests that code path a little. */ + dev->config->reset(dev); + /* Acknowledge that we've seen the device. */ add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); @@ -148,55 +156,18 @@ void unregister_virtio_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(unregister_virtio_device); < |