diff options
Diffstat (limited to 'drivers/virtio')
| -rw-r--r-- | drivers/virtio/Kconfig | 42 | ||||
| -rw-r--r-- | drivers/virtio/Makefile | 4 | ||||
| -rw-r--r-- | drivers/virtio/config.c | 1 | ||||
| -rw-r--r-- | drivers/virtio/virtio.c | 84 | ||||
| -rw-r--r-- | drivers/virtio/virtio_balloon.c | 462 | ||||
| -rw-r--r-- | drivers/virtio/virtio_mmio.c | 666 | ||||
| -rw-r--r-- | drivers/virtio/virtio_pci.c | 260 | ||||
| -rw-r--r-- | drivers/virtio/virtio_ring.c | 635 |
8 files changed, 1836 insertions, 318 deletions
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 3dd6294d10b..c6683f2e396 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -1,17 +1,16 @@ -# Virtio always gets selected by whoever wants it. config VIRTIO tristate + ---help--- + This option is selected by any driver which implements the virtio + bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST, + CONFIG_RPMSG or CONFIG_S390_GUEST. -# Similarly the virtio ring implementation. -config VIRTIO_RING - tristate - depends on VIRTIO +menu "Virtio drivers" config VIRTIO_PCI - tristate "PCI driver for virtio devices (EXPERIMENTAL)" - depends on PCI && EXPERIMENTAL + tristate "PCI driver for virtio devices" + depends on PCI select VIRTIO - select VIRTIO_RING ---help--- This drivers provides support for virtio based paravirtual device drivers over PCI. This requires that your VMM has appropriate PCI @@ -24,12 +23,33 @@ config VIRTIO_PCI If unsure, say M. config VIRTIO_BALLOON - tristate "Virtio balloon driver (EXPERIMENTAL)" - select VIRTIO - select VIRTIO_RING + tristate "Virtio balloon driver" + depends on VIRTIO ---help--- This driver supports increasing and decreasing the amount of memory within a KVM guest. If unsure, say M. + config VIRTIO_MMIO + tristate "Platform bus driver for memory mapped virtio devices" + depends on HAS_IOMEM + select VIRTIO + ---help--- + This drivers provides support for memory mapped virtio + platform device driver. + + If unsure, say N. + +config VIRTIO_MMIO_CMDLINE_DEVICES + bool "Memory mapped virtio devices parameter parsing" + depends on VIRTIO_MMIO + ---help--- + Allow virtio-mmio devices instantiation via the kernel command line + or module parameters. Be aware that using incorrect parameters (base + address in particular) can crash your system - you have been warned. + See Documentation/kernel-parameters.txt for details. + + If unsure, say 'N'. + +endmenu diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 6738c446c19..9076635697b 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_VIRTIO) += virtio.o -obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o +obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff --git a/drivers/virtio/config.c b/drivers/virtio/config.c index 983d482fba4..f70bcd2ff98 100644 --- a/drivers/virtio/config.c +++ b/drivers/virtio/config.c @@ -9,5 +9,4 @@ #include <linux/virtio.h> #include <linux/virtio_config.h> #include <linux/bug.h> -#include <asm/system.h> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 3a43ebf83a4..fed0ce198ae 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -1,40 +1,49 @@ #include <linux/virtio.h> #include <linux/spinlock.h> #include <linux/virtio_config.h> +#include <linux/module.h> +#include <linux/idr.h> /* Unique numbering for virtio devices. */ -static unsigned int dev_index; +static DEFINE_IDA(virtio_index_ida); static ssize_t device_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - return sprintf(buf, "%hu", dev->id.device); + struct virtio_device *dev = dev_to_virtio(_d); + return sprintf(buf, "0x%04x\n", dev->id.device); } +static DEVICE_ATTR_RO(device); + static ssize_t vendor_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - return sprintf(buf, "%hu", dev->id.vendor); + struct virtio_device *dev = dev_to_virtio(_d); + return sprintf(buf, "0x%04x\n", dev->id.vendor); } +static DEVICE_ATTR_RO(vendor); + static ssize_t status_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - return sprintf(buf, "0x%08x", dev->config->get_status(dev)); + struct virtio_device *dev = dev_to_virtio(_d); + return sprintf(buf, "0x%08x\n", dev->config->get_status(dev)); } +static DEVICE_ATTR_RO(status); + static ssize_t modalias_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - + struct virtio_device *dev = dev_to_virtio(_d); return sprintf(buf, "virtio:d%08Xv%08X\n", dev->id.device, dev->id.vendor); } +static DEVICE_ATTR_RO(modalias); + static ssize_t features_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d, struct virtio_device, dev); + struct virtio_device *dev = dev_to_virtio(_d); unsigned int i; ssize_t len = 0; @@ -46,14 +55,17 @@ static ssize_t features_show(struct device *_d, len += sprintf(buf+len, "\n"); return len; } -static struct device_attribute virtio_dev_attrs[] = { - __ATTR_RO(device), - __ATTR_RO(vendor), - __ATTR_RO(status), - __ATTR_RO(modalias), - __ATTR_RO(features), - __ATTR_NULL +static DEVICE_ATTR_RO(features); + +static struct attribute *virtio_dev_attrs[] = { + &dev_attr_device.attr, + &dev_attr_vendor.attr, + &dev_attr_status.attr, + &dev_attr_modalias.attr, + &dev_attr_features.attr, + NULL, }; +ATTRIBUTE_GROUPS(virtio_dev); static inline int virtio_id_match(const struct virtio_device *dev, const struct virtio_device_id *id) @@ -69,10 +81,10 @@ static inline int virtio_id_match(const struct virtio_device *dev, static int virtio_dev_match(struct device *_dv, struct device_driver *_dr) { unsigned int i; - struct virtio_device *dev = container_of(_dv,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_dv); const struct virtio_device_id *ids; - ids = container_of(_dr, struct virtio_driver, driver)->id_table; + ids = drv_to_virtio(_dr)->id_table; for (i = 0; ids[i].device; i++) if (virtio_id_match(dev, &ids[i])) return 1; @@ -81,7 +93,7 @@ static int virtio_dev_match(struct device *_dv, struct device_driver *_dr) static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env) { - struct virtio_device *dev = container_of(_dv,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_dv); return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X", dev->id.device, dev->id.vendor); @@ -96,8 +108,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, unsigned int fbit) { unsigned int i; - struct virtio_driver *drv = container_of(vdev->dev.driver, - struct virtio_driver, driver); + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); for (i = 0; i < drv->feature_table_size; i++) if (drv->feature_table[i] == fbit) @@ -109,9 +120,8 @@ EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature); static int virtio_dev_probe(struct device *_d) { int err, i; - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - struct virtio_driver *drv = container_of(dev->dev.driver, - struct virtio_driver, driver); + struct virtio_device *dev = dev_to_virtio(_d); + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); u32 device_features; /* We have a driver! */ @@ -139,22 +149,24 @@ static int virtio_dev_probe(struct device *_d) err = drv->probe(dev); if (err) add_status(dev, VIRTIO_CONFIG_S_FAILED); - else + else { add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + if (drv->scan) + drv->scan(dev); + } return err; } static int virtio_dev_remove(struct device *_d) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - struct virtio_driver *drv = container_of(dev->dev.driver, - struct virtio_driver, driver); + struct virtio_device *dev = dev_to_virtio(_d); + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); drv->remove(dev); /* Driver should have reset device. */ - BUG_ON(dev->config->get_status(dev)); + WARN_ON_ONCE(dev->config->get_status(dev)); /* Acknowledge the device's existence again. */ add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); @@ -164,7 +176,7 @@ static int virtio_dev_remove(struct device *_d) static struct bus_type virtio_bus = { .name = "virtio", .match = virtio_dev_match, - .dev_attrs = virtio_dev_attrs, + .dev_groups = virtio_dev_groups, .uevent = virtio_uevent, .probe = virtio_dev_probe, .remove = virtio_dev_remove, @@ -192,7 +204,11 @@ int register_virtio_device(struct virtio_device *dev) dev->dev.bus = &virtio_bus; /* Assign a unique device index and hence name. */ - dev->index = dev_index++; + err = ida_simple_get(&virtio_index_ida, 0, 0, GFP_KERNEL); + if (err < 0) + goto out; + + dev->index = err; dev_set_name(&dev->dev, "virtio%u", dev->index); /* We always start by resetting the device, in case a previous @@ -207,6 +223,7 @@ int register_virtio_device(struct virtio_device *dev) /* device_register() causes the bus infrastructure to look for a * matching driver. */ err = device_register(&dev->dev); +out: if (err) add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; @@ -215,7 +232,10 @@ EXPORT_SYMBOL_GPL(register_virtio_device); void unregister_virtio_device(struct virtio_device *dev) { + int index = dev->index; /* save for after device release */ + device_unregister(&dev->dev); + ida_simple_remove(&virtio_index_ida, index); } EXPORT_SYMBOL_GPL(unregister_virtio_device); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 9dd58804288..25ebe8eecdb 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -1,4 +1,5 @@ -/* Virtio balloon implementation, inspired by Dor Loar and Marcelo +/* + * Virtio balloon implementation, inspired by Dor Laor and Marcelo * Tosatti's implementations. * * Copyright 2008 Rusty Russell IBM Corporation @@ -17,18 +18,29 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -//#define DEBUG + #include <linux/virtio.h> #include <linux/virtio_balloon.h> #include <linux/swap.h> #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/delay.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/balloon_compaction.h> + +/* + * Balloon device works in 4K page units. So each page is pointed to by + * multiple balloon pages. All memory counters in this driver are in balloon + * page units. + */ +#define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) +#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; /* Where the ballooning thread waits for config to change. */ wait_queue_head_t config_change; @@ -37,18 +49,28 @@ struct virtio_balloon struct task_struct *thread; /* Waiting for host to ack the pages we released. */ - struct completion acked; + wait_queue_head_t acked; - /* Do we have to tell Host *before* we reuse pages? */ - bool tell_host_first; - - /* The pages we've told the Host we're not using. */ + /* Number of balloon pages we've told the Host we're not using. */ unsigned int num_pages; - struct list_head pages; + /* + * The pages we've told the Host we're not using are enqueued + * at vb_dev_info->pages list. + * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE + * to num_pages above. + */ + struct balloon_dev_info *vb_dev_info; + + /* Synchronize access/update to this struct virtio_balloon elements */ + struct mutex balloon_lock; /* The array of pfns we tell the Host about. */ unsigned int num_pfns; - u32 pfns[256]; + u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; + + /* Memory statistics */ + int need_stats_update; + struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; }; static struct virtio_device_id id_table[] = { @@ -62,96 +84,181 @@ static u32 page_to_balloon_pfn(struct page *page) BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT); /* Convert pfn from Linux page size to balloon page size. */ - return pfn >> (PAGE_SHIFT - VIRTIO_BALLOON_PFN_SHIFT); + return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; +} + +static struct page *balloon_pfn_to_page(u32 pfn) +{ + BUG_ON(pfn % VIRTIO_BALLOON_PAGES_PER_PAGE); + return pfn_to_page(pfn / VIRTIO_BALLOON_PAGES_PER_PAGE); } static void balloon_ack(struct virtqueue *vq) { - struct virtio_balloon *vb; - unsigned int len; + struct virtio_balloon *vb = vq->vdev->priv; - vb = vq->vq_ops->get_buf(vq, &len); - if (vb) - complete(&vb->acked); + wake_up(&vb->acked); } static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) { struct scatterlist sg; + unsigned int len; sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); - init_completion(&vb->acked); - /* We should always be able to add one buffer to an empty queue. */ - if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0) - BUG(); - vq->vq_ops->kick(vq); + virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); + virtqueue_kick(vq); /* When host has read buffer, this completes via balloon_ack */ - wait_for_completion(&vb->acked); + wait_event(vb->acked, virtqueue_get_buf(vq, &len)); +} + +static void set_page_pfns(u32 pfns[], struct page *page) +{ + unsigned int i; + + /* Set balloon pfns pointing at this page. + * Note that the first pfn points at start of the page. */ + for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++) + pfns[i] = page_to_balloon_pfn(page) + i; } static void fill_balloon(struct virtio_balloon *vb, size_t num) { + struct balloon_dev_info *vb_dev_info = vb->vb_dev_info; + /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); - for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { - struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); + mutex_lock(&vb->balloon_lock); + for (vb->num_pfns = 0; vb->num_pfns < num; + vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { + struct page *page = balloon_page_enqueue(vb_dev_info); + if (!page) { - if (printk_ratelimit()) - dev_printk(KERN_INFO, &vb->vdev->dev, - "Out of puff! Can't get %zu pages\n", - num); + dev_info_ratelimited(&vb->vdev->dev, + "Out of puff! Can't get %u pages\n", + VIRTIO_BALLOON_PAGES_PER_PAGE); /* Sleep for at least 1/5 of a second before retry. */ msleep(200); break; } - vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page); - totalram_pages--; - vb->num_pages++; - list_add(&page->lru, &vb->pages); + set_page_pfns(vb->pfns + vb->num_pfns, page); + vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; + adjust_managed_page_count(page, -1); } - /* Didn't get any? Oh well. */ - if (vb->num_pfns == 0) - return; - - tell_host(vb, vb->inflate_vq); + /* Did we get any? */ + if (vb->num_pfns != 0) + tell_host(vb, vb->inflate_vq); + mutex_unlock(&vb->balloon_lock); } static void release_pages_by_pfn(const u32 pfns[], unsigned int num) { unsigned int i; - for (i = 0; i < num; i++) { - __free_page(pfn_to_page(pfns[i])); - totalram_pages++; + /* Find pfns pointing at start of each page, get pages and free them. */ + for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { + struct page *page = balloon_pfn_to_page(pfns[i]); + balloon_page_free(page); + adjust_managed_page_count(page, 1); } } static void leak_balloon(struct virtio_balloon *vb, size_t num) { struct page *page; + struct balloon_dev_info *vb_dev_info = vb->vb_dev_info; /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); - for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { - page = list_first_entry(&vb->pages, struct page, lru); - list_del(&page->lru); - vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page); - vb->num_pages--; + mutex_lock(&vb->balloon_lock); + for (vb->num_pfns = 0; vb->num_pfns < num; + vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { + page = balloon_page_dequeue(vb_dev_info); + if (!page) + break; + set_page_pfns(vb->pfns + vb->num_pfns, page); + vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; } - if (vb->tell_host_first) { + /* + * Note that if + * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); + * is true, we *have* to do it in this order + */ + if (vb->num_pfns != 0) tell_host(vb, vb->deflate_vq); - release_pages_by_pfn(vb->pfns, vb->num_pfns); - } else { - release_pages_by_pfn(vb->pfns, vb->num_pfns); - tell_host(vb, vb->deflate_vq); - } + mutex_unlock(&vb->balloon_lock); + release_pages_by_pfn(vb->pfns, vb->num_pfns); +} + +static inline void update_stat(struct virtio_balloon *vb, int idx, + u16 tag, u64 val) +{ + BUG_ON(idx >= VIRTIO_BALLOON_S_NR); + vb->stats[idx].tag = tag; + vb->stats[idx].val = val; +} + +#define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) + +static void update_balloon_stats(struct virtio_balloon *vb) +{ + unsigned long events[NR_VM_EVENT_ITEMS]; + struct sysinfo i; + int idx = 0; + + all_vm_events(events); + si_meminfo(&i); + + update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, + pages_to_bytes(events[PSWPIN])); + update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, + pages_to_bytes(events[PSWPOUT])); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, + pages_to_bytes(i.freeram)); + update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, + pages_to_bytes(i.totalram)); +} + +/* + * While most virtqueues communicate guest-initiated requests to the hypervisor, + * the stats queue operates in reverse. The driver initializes the virtqueue + * with a single buffer. From that point forward, all conversations consist of + * a hypervisor request (a call to this function) which directs us to refill + * the virtqueue with a fresh stats buffer. Since stats collection can sleep, + * we notify our kthread which does the actual work via stats_handle_request(). + */ +static void stats_request(struct virtqueue *vq) +{ + struct virtio_balloon *vb = vq->vdev->priv; + + vb->need_stats_update = 1; + wake_up(&vb->config_change); +} + +static void stats_handle_request(struct virtio_balloon *vb) +{ + struct virtqueue *vq; + struct scatterlist sg; + unsigned int len; + + vb->need_stats_update = 0; + update_balloon_stats(vb); + + vq = vb->stats_vq; + if (!virtqueue_get_buf(vq, &len)) + return; + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); + virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); + virtqueue_kick(vq); } static void virtballoon_changed(struct virtio_device *vdev) @@ -163,20 +270,21 @@ static void virtballoon_changed(struct virtio_device *vdev) static inline s64 towards_target(struct virtio_balloon *vb) { - u32 v; - vb->vdev->config->get(vb->vdev, - offsetof(struct virtio_balloon_config, num_pages), - &v, sizeof(v)); - return (s64)v - vb->num_pages; + __le32 v; + s64 target; + + virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, &v); + + target = le32_to_cpu(v); + return target - vb->num_pages; } static void update_balloon_size(struct virtio_balloon *vb) { __le32 actual = cpu_to_le32(vb->num_pages); - vb->vdev->config->set(vb->vdev, - offsetof(struct virtio_balloon_config, actual), - &actual, sizeof(actual)); + virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, + &actual); } static int balloon(void *_vballoon) @@ -190,23 +298,139 @@ static int balloon(void *_vballoon) try_to_freeze(); wait_event_interruptible(vb->config_change, (diff = towards_target(vb)) != 0 + || vb->need_stats_update || kthread_should_stop() || freezing(current)); + if (vb->need_stats_update) + stats_handle_request(vb); if (diff > 0) fill_balloon(vb, diff); else if (diff < 0) leak_balloon(vb, -diff); update_balloon_size(vb); + + /* + * For large balloon changes, we could spend a lot of time + * and always have work to do. Be nice if preempt disabled. + */ + cond_resched(); + } + return 0; +} + +static int init_vqs(struct virtio_balloon *vb) +{ + struct virtqueue *vqs[3]; + vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; + const char *names[] = { "inflate", "deflate", "stats" }; + int err, nvqs; + + /* + * We expect two virtqueues: inflate and deflate, and + * optionally stat. + */ + nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names); + if (err) + return err; + + vb->inflate_vq = vqs[0]; + vb->deflate_vq = vqs[1]; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + struct scatterlist sg; + vb->stats_vq = vqs[2]; + + /* + * Prime this virtqueue with one buffer so the hypervisor can + * use it to signal us later (it can't be broken yet!). + */ + sg_init_one(&sg, vb->stats, sizeof vb->stats); + if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) + < 0) + BUG(); + virtqueue_kick(vb->stats_vq); } return 0; } +static const struct address_space_operations virtio_balloon_aops; +#ifdef CONFIG_BALLOON_COMPACTION +/* + * virtballoon_migratepage - perform the balloon page migration on behalf of + * a compation thread. (called under page lock) + * @mapping: the page->mapping which will be assigned to the new migrated page. + * @newpage: page that will replace the isolated page after migration finishes. + * @page : the isolated (old) page that is about to be migrated to newpage. + * @mode : compaction mode -- not used for balloon page migration. + * + * After a ballooned page gets isolated by compaction procedures, this is the + * function that performs the page migration on behalf of a compaction thread + * The page migration for virtio balloon is done in a simple swap fashion which + * follows these two macro steps: + * 1) insert newpage into vb->pages list and update the host about it; + * 2) update the host about the old page removed from vb->pages list; + * + * This function preforms the balloon page migration task. + * Called through balloon_mapping->a_ops->migratepage + */ +static int virtballoon_migratepage(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode) +{ + struct balloon_dev_info *vb_dev_info = balloon_page_device(page); + struct virtio_balloon *vb; + unsigned long flags; + + BUG_ON(!vb_dev_info); + + vb = vb_dev_info->balloon_device; + + /* + * In order to avoid lock contention while migrating pages concurrently + * to leak_balloon() or fill_balloon() we just give up the balloon_lock + * this turn, as it is easier to retry the page migration later. + * This also prevents fill_balloon() getting stuck into a mutex + * recursion in the case it ends up triggering memory compaction + * while it is attempting to inflate the ballon. + */ + if (!mutex_trylock(&vb->balloon_lock)) + return -EAGAIN; + + /* balloon's page migration 1st step -- inflate "newpage" */ + spin_lock_irqsave(&vb_dev_info->pages_lock, flags); + balloon_page_insert(newpage, mapping, &vb_dev_info->pages); + vb_dev_info->isolated_pages--; + spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; + set_page_pfns(vb->pfns, newpage); + tell_host(vb, vb->inflate_vq); + + /* + * balloon's page migration 2nd step -- deflate "page" + * + * It's safe to delete page->lru here because this page is at + * an isolated migration list, and this step is expected to happen here + */ + balloon_page_delete(page); + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; + set_page_pfns(vb->pfns, page); + tell_host(vb, vb->deflate_vq); + + mutex_unlock(&vb->balloon_lock); + + return MIGRATEPAGE_BALLOON_SUCCESS; +} + +/* define the balloon_mapping->a_ops callback to allow balloon page migration */ +static const struct address_space_operations virtio_balloon_aops = { + .migratepage = virtballoon_migratepage, +}; +#endif /* CONFIG_BALLOON_COMPACTION */ + static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; - struct virtqueue *vqs[2]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack }; - const char *names[] = { "inflate", "deflate" }; + struct address_space *vb_mapping; + struct balloon_dev_info *vb_devinfo; int err; vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); @@ -215,18 +439,37 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out; } - INIT_LIST_HEAD(&vb->pages); vb->num_pages = 0; + mutex_init(&vb->balloon_lock); init_waitqueue_head(&vb->config_change); + init_waitqueue_head(&vb->acked); vb->vdev = vdev; + vb->need_stats_update = 0; - /* We expect two virtqueues. */ - err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names); - if (err) + vb_devinfo = balloon_devinfo_alloc(vb); + if (IS_ERR(vb_devinfo)) { + err = PTR_ERR(vb_devinfo); goto out_free_vb; + } - vb->inflate_vq = vqs[0]; - vb->deflate_vq = vqs[1]; + vb_mapping = balloon_mapping_alloc(vb_devinfo, + (balloon_compaction_check()) ? + &virtio_balloon_aops : NULL); + if (IS_ERR(vb_mapping)) { + /* + * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP + * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off. + */ + err = PTR_ERR(vb_mapping); + if (err != -EOPNOTSUPP) + goto out_free_vb_devinfo; + } + + vb->vb_dev_info = vb_devinfo; + + err = init_vqs(vb); + if (err) + goto out_free_vb_mapping; vb->thread = kthread_run(balloon, vb, "vballoon"); if (IS_ERR(vb->thread)) { @@ -234,61 +477,94 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_del_vqs; } - vb->tell_host_first - = virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); - return 0; out_del_vqs: vdev->config->del_vqs(vdev); +out_free_vb_mapping: + balloon_mapping_free(vb_mapping); +out_free_vb_devinfo: + balloon_devinfo_free(vb_devinfo); out_free_vb: kfree(vb); out: return err; } -static void __devexit virtballoon_remove(struct virtio_device *vdev) +static void remove_common(struct virtio_balloon *vb) { - struct virtio_balloon *vb = vdev->priv; - - kthread_stop(vb->thread); - /* There might be pages left in the balloon: free them. */ while (vb->num_pages) leak_balloon(vb, vb->num_pages); + update_balloon_size(vb); /* Now we reset the device so we can clean up the queues. */ - vdev->config->reset(vdev); + vb->vdev->config->reset(vb->vdev); - vdev->config->del_vqs(vdev); + vb->vdev->config->del_vqs(vb->vdev); +} + +static void virtballoon_remove(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + + kthread_stop(vb->thread); + remove_common(vb); + balloon_mapping_free(vb->vb_dev_info->mapping); + balloon_devinfo_free(vb->vb_dev_info); kfree(vb); } -static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST }; +#ifdef CONFIG_PM_SLEEP +static int virtballoon_freeze(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + + /* + * The kthread is already frozen by the PM core before this + * function is called. + */ + + remove_common(vb); + return 0; +} + +static int virtballoon_restore(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + int ret; + + ret = init_vqs(vdev->priv); + if (ret) + return ret; + + fill_balloon(vb, towards_target(vb)); + update_balloon_size(vb); + return 0; +} +#endif + +static unsigned int features[] = { + VIRTIO_BALLOON_F_MUST_TELL_HOST, + VIRTIO_BALLOON_F_STATS_VQ, +}; -static struct virtio_driver virtio_balloon = { +static struct virtio_driver virtio_balloon_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtballoon_probe, - .remove = __devexit_p(virtballoon_remove), + .remove = virtballoon_remove, .config_changed = virtballoon_changed, +#ifdef CONFIG_PM_SLEEP + .freeze = virtballoon_freeze, + .restore = virtballoon_restore, +#endif }; -static int __init init(void) -{ - return register_virtio_driver(&virtio_balloon); -} - -static void __exit fini(void) -{ - unregister_virtio_driver(&virtio_balloon); -} -module_init(init); -module_exit(fini); - +module_virtio_driver(virtio_balloon_driver); MODULE_DEVICE_TABLE(virtio, id_table); MODULE_DESCRIPTION("Virtio balloon driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c new file mode 100644 index 00000000000..c600ccfd692 --- /dev/null +++ b/drivers/virtio/virtio_mmio.c @@ -0,0 +1,666 @@ +/* + * Virtio memory mapped device driver + * + * Copyright 2011, ARM Ltd. + * + * This module allows virtio devices to be used over a virtual, memory mapped + * platform device. + * + * The guest device(s) may be instantiated in one of three equivalent ways: + * + * 1. Static platform device in board's code, eg.: + * + * static struct platform_device v2m_virtio_device = { + * .name = "virtio-mmio", + * .id = -1, + * .num_resources = 2, + * .resource = (struct resource []) { + * { + * .start = 0x1001e000, + * .end = 0x1001e0ff, + * .flags = IORESOURCE_MEM, + * }, { + * .start = 42 + 32, + * .end = 42 + 32, + * .flags = IORESOURCE_IRQ, + * }, + * } + * }; + * + * 2. Device Tree node, eg.: + * + * virtio_block@1e000 { + * compatible = "virtio,mmio"; + * reg = <0x1e000 0x100>; + * interrupts = <42>; + * } + * + * 3. Kernel module (or command line) parameter. Can be used more than once - + * one device will be created for each one. Syntax: + * + * [virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>] + * where: + * <size> := size (can use standard suffixes like K, M or G) + * <baseaddr> := physical base address + * <irq> := interrupt number (as passed to request_irq()) + * <id> := (optional) platform device id + * eg.: + * virtio_mmio.device=0x100@0x100b0000:48 \ + * virtio_mmio.device=1K@0x1001e000:74 + * + * + * + * Registers layout (all 32-bit wide): + * + * offset d. name description + * ------ -- ---------------- ----------------- + * + * 0x000 R MagicValue Magic value "virt" + * 0x004 R Version Device version (current max. 1) + * 0x008 R DeviceID Virtio device ID + * 0x00c R VendorID Virtio vendor ID + * + * 0x010 R HostFeatures Features supported by the host + * 0x014 W HostFeaturesSel Set of host features to access via HostFeatures + * + * 0x020 W GuestFeatures Features activated by the guest + * 0x024 W GuestFeaturesSel Set of activated features to set via GuestFeatures + * 0x028 W GuestPageSize Size of guest's memory page in bytes + * + * 0x030 W QueueSel Queue selector + * 0x034 R QueueNumMax Maximum size of the currently selected queue + * 0x038 W QueueNum Queue size for the currently selected queue + * 0x03c W QueueAlign Used Ring alignment for the current queue + * 0x040 RW QueuePFN PFN for the currently selected queue + * + * 0x050 W QueueNotify Queue notifier + * 0x060 R InterruptStatus Interrupt status register + * 0x064 W InterruptACK Interrupt acknowledge register + * 0x070 RW Status Device status register + * + * 0x100+ RW Device-specific configuration space + * + * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#define pr_fmt(fmt) "virtio-mmio: " fmt + +#include <linux/highmem.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_mmio.h> +#include <linux/virtio_ring.h> + + + +/* The alignment to use between consumer and producer parts of vring. + * Currently hardcoded to the page size. */ +#define VIRTIO_MMIO_VRING_ALIGN PAGE_SIZE + + + +#define to_virtio_mmio_device(_plat_dev) \ + container_of(_plat_dev, struct virtio_mmio_device, vdev) + +struct virtio_mmio_device { + struct virtio_device vdev; + struct platform_device *pdev; + + void __iomem *base; + unsigned long version; + + /* a list of queues so we can dispatch IRQs */ + spinlock_t lock; + struct list_head virtqueues; +}; + +struct virtio_mmio_vq_info { + /* the actual virtqueue */ + struct virtqueue *vq; + + /* the number of entries in the queue */ + unsigned int num; + + /* the virtual address of the ring queue */ + void *queue; + + /* the list node for the virtqueues list */ + struct list_head node; +}; + + + +/* Configuration interface */ + +static u32 vm_get_features(struct virtio_device *vdev) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + + /* TODO: Features > 32 bits */ + writel(0, vm_dev->base + VIRTIO_MMIO_HOST_FEATURES_SEL); + + return readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES); +} + +static void vm_finalize_features(struct virtio_device *vdev) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + int i; + + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + + for (i = 0; i < ARRAY_SIZE(vdev->features); i++) { + writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL); + writel(vdev->features[i], + vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES); + } +} + +static void vm_get(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + ptr[i] = readb(vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i); +} + +static void vm_set(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + const u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + writeb(ptr[i], vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i); +} + +static u8 vm_get_status(struct virtio_device *vdev) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + + return readl(vm_dev->base + VIRTIO_MMIO_STATUS) & 0xff; +} + +static void vm_set_status(struct virtio_device *vdev, u8 status) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + + /* We should never be setting status to 0. */ + BUG_ON(status == 0); + + writel(status, vm_dev->base + VIRTIO_MMIO_STATUS); +} + +static void vm_reset(struct virtio_device *vdev) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + + /* 0 status means a reset. */ + writel(0, vm_dev->base + VIRTIO_MMIO_STATUS); +} + + + +/* Transport interface */ + +/* the notify function used when creating a virt queue */ +static bool vm_notify(struct virtqueue *vq) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); + + /* We write the queue's selector into the notification register to + * signal the other end */ + writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); + return true; +} + +/* Notify all virtqueues on an interrupt. */ +static irqreturn_t vm_interrupt(int irq, void *opaque) +{ + struct virtio_mmio_device *vm_dev = opaque; + struct virtio_mmio_vq_info *info; + struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver, + struct virtio_driver, driver); + unsigned long status; + unsigned long flags; + irqreturn_t ret = IRQ_NONE; + + /* Read and acknowledge interrupts */ + status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS); + writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK); + + if (unlikely(status & VIRTIO_MMIO_INT_CONFIG) + && vdrv && vdrv->config_changed) { + vdrv->config_changed(&vm_dev->vdev); + ret = IRQ_HANDLED; + } + + if (likely(status & VIRTIO_MMIO_INT_VRING)) { + spin_lock_irqsave(&vm_dev->lock, flags); + list_for_each_entry(info, &vm_dev->virtqueues, node) + ret |= vring_interrupt(irq, info->vq); + spin_unlock_irqrestore(&vm_dev->lock, flags); + } + + return ret; +} + + + +static void vm_del_vq(struct virtqueue *vq) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); + struct virtio_mmio_vq_info *info = vq->priv; + unsigned long flags, size; + unsigned int index = vq->index; + + spin_lock_irqsave(&vm_dev->lock, flags); + list_del(&info->node); + spin_unlock_irqrestore(&vm_dev->lock, flags); + + vring_del_virtqueue(vq); + + /* Select and deactivate the queue */ + writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); + writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); + + size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN)); + free_pages_exact(info->queue, size); + kfree(info); +} + +static void vm_del_vqs(struct virtio_device *vdev) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + struct virtqueue *vq, *n; + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + vm_del_vq(vq); + + free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev); +} + + + +static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + struct virtio_mmio_vq_info *info; + struct virtqueue *vq; + unsigned long flags, size; + int err; + + if (!name) + return NULL; + + /* Select the queue we're interested in */ + writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); + + /* Queue shouldn't already be set up. */ + if (readl(vm_dev->base + VIRTIO_MMIO_QUEUE_PFN)) { + err = -ENOENT; + goto error_available; + } + + /* Allocate and fill out our active queue description */ + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + err = -ENOMEM; + goto error_kmalloc; + } + + /* Allocate pages for the queue - start with a queue as big as + * possible (limited by maximum size allowed by device), drop down + * to a minimal size, just big enough to fit descriptor table + * and two rings (which makes it "alignment_size * 2") + */ + info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX); + + /* If the device reports a 0 entry queue, we won't be able to + * use it to perform I/O, and vring_new_virtqueue() can't create + * empty queues anyway, so don't bother to set up the device. + */ + if (info->num == 0) { + err = -ENOENT; + goto error_alloc_pages; + } + + while (1) { + size = PAGE_ALIGN(vring_size(info->num, + VIRTIO_MMIO_VRING_ALIGN)); + /* Did the last iter shrink the queue below minimum size? */ + if (size < VIRTIO_MMIO_VRING_ALIGN * 2) { + err = -ENOMEM; + goto error_alloc_pages; + } + + info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); + if (info->queue) + break; + + info->num /= 2; + } + + /* Activate the queue */ + writel(info->num, vm_dev->base + VIRTIO_MMIO_QUEUE_NUM); + writel(VIRTIO_MMIO_VRING_ALIGN, + vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN); + writel(virt_to_phys(info->queue) >> PAGE_SHIFT, + vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); + + /* Create the vring */ + vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev, + true, info->queue, vm_notify, callback, name); + if (!vq) { + err = -ENOMEM; + goto error_new_virtqueue; + } + + vq->priv = info; + info->vq = vq; + + spin_lock_irqsave(&vm_dev->lock, flags); + list_add(&info->node, &vm_dev->virtqueues); + spin_unlock_irqrestore(&vm_dev->lock, flags); + + return vq; + +error_new_virtqueue: + writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); + free_pages_exact(info->queue, size); +error_alloc_pages: + kfree(info); +error_kmalloc: +error_available: + return ERR_PTR(err); +} + +static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + unsigned int irq = platform_get_irq(vm_dev->pdev, 0); + int i, err; + + err = request_irq(irq, vm_interrupt, IRQF_SHARED, + dev_name(&vdev->dev), vm_dev); + if (err) + return err; + + for (i = 0; i < nvqs; ++i) { + vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]); + if (IS_ERR(vqs[i])) { + vm_del_vqs(vdev); + return PTR_ERR(vqs[i]); + } + } + + return 0; +} + +static const char *vm_bus_name(struct virtio_device *vdev) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + + return vm_dev->pdev->name; +} + +static const struct virtio_config_ops virtio_mmio_config_ops = { + .get = vm_get, + .set = vm_set, + .get_status = vm_get_status, + .set_status = vm_set_status, + .reset = vm_reset, + .find_vqs = vm_find_vqs, + .del_vqs = vm_del_vqs, + .get_features = vm_get_features, + .finalize_features = vm_finalize_features, + .bus_name = vm_bus_name, +}; + + + +/* Platform device */ + +static int virtio_mmio_probe(struct platform_device *pdev) +{ + struct virtio_mmio_device *vm_dev; + struct resource *mem; + unsigned long magic; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) + return -EINVAL; + + if (!devm_request_mem_region(&pdev->dev, mem->start, + resource_size(mem), pdev->name)) + return -EBUSY; + + vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL); + if (!vm_dev) + return -ENOMEM; + + vm_dev->vdev.dev.parent = &pdev->dev; + vm_dev->vdev.config = &virtio_mmio_config_ops; + vm_dev->pdev = pdev; + INIT_LIST_HEAD(&vm_dev->virtqueues); + spin_lock_init(&vm_dev->lock); + + vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); + if (vm_dev->base == NULL) + return -EFAULT; + + /* Check magic value */ + magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE); + if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) { + dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic); + return -ENODEV; + } + + /* Check device version */ + vm_dev->version = readl(vm_dev->base + VIRTIO_MMIO_VERSION); + if (vm_dev->version != 1) { + dev_err(&pdev->dev, "Version %ld not supported!\n", + vm_dev->version); + return -ENXIO; + } + + vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID); + vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID); + + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + + platform_set_drvdata(pdev, vm_dev); + + return register_virtio_device(&vm_dev->vdev); +} + +static int virtio_mmio_remove(struct platform_device *pdev) +{ + struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev); + + unregister_virtio_device(&vm_dev->vdev); + + return 0; +} + + + +/* Devices list parameter */ + +#if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES) + +static struct device vm_cmdline_parent = { + .init_name = "virtio-mmio-cmdline", +}; + +static int vm_cmdline_parent_registered; +static int vm_cmdline_id; + +static int vm_cmdline_set(const char *device, + const struct kernel_param *kp) +{ + int err; + struct resource resources[2] = {}; + char *str; + long long int base, size; + unsigned int irq; + int processed, consumed = 0; + struct platform_device *pdev; + + /* Consume "size" part of the command line parameter */ + size = memparse(device, &str); + + /* Get "@<base>:<irq>[:<id>]" chunks */ + processed = sscanf(str, "@%lli:%u%n:%d%n", + &base, &irq, &consumed, + &vm_cmdline_id, &consumed); + + /* + * sscanf() must processes at least 2 chunks; also there + * must be no extra characters after the last chunk, so + * str[consumed] must be '\0' + */ + if (processed < 2 || str[consumed]) + return -EINVAL; + + resources[0].flags = IORESOURCE_MEM; + resources[0].start = base; + resources[0].end = base + size - 1; + + resources[1].flags = IORESOURCE_IRQ; + resources[1].start = resources[1].end = irq; + + if (!vm_cmdline_parent_registered) { + err = device_register(&vm_cmdline_parent); + if (err) { + pr_err("Failed to register parent device!\n"); + return err; + } + vm_cmdline_parent_registered = 1; + } + + pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n", + vm_cmdline_id, + (unsigned long long)resources[0].start, + (unsigned long long)resources[0].end, + (int)resources[1].start); + + pdev = platform_device_register_resndata(&vm_cmdline_parent, + "virtio-mmio", vm_cmdline_id++, + resources, ARRAY_SIZE(resources), NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + return 0; +} + +static int vm_cmdline_get_device(struct device *dev, void *data) +{ + char *buffer = data; + unsigned int len = strlen(buffer); + struct platform_device *pdev = to_platform_device(dev); + + snprintf(buffer + len, PAGE_SIZE - len, "0x%llx@0x%llx:%llu:%d\n", + pdev->resource[0].end - pdev->resource[0].start + 1ULL, + (unsigned long long)pdev->resource[0].start, + (unsigned long long)pdev->resource[1].start, + pdev->id); + return 0; +} + +static int vm_cmdline_get(char *buffer, const struct kernel_param *kp) +{ + buffer[0] = '\0'; + device_for_each_child(&vm_cmdline_parent, buffer, + vm_cmdline_get_device); + return strlen(buffer) + 1; +} + +static struct kernel_param_ops vm_cmdline_param_ops = { + .set = vm_cmdline_set, + .get = vm_cmdline_get, +}; + +device_param_cb(device, &vm_cmdline_param_ops, NULL, S_IRUSR); + +static int vm_unregister_cmdline_device(struct device *dev, + void *data) +{ + platform_device_unregister(to_platform_device(dev)); + + return 0; +} + +static void vm_unregister_cmdline_devices(void) +{ + if (vm_cmdline_parent_registered) { + device_for_each_child(&vm_cmdline_parent, NULL, + vm_unregister_cmdline_device); + device_unregister(&vm_cmdline_parent); + vm_cmdline_parent_registered = 0; + } +} + +#else + +static void vm_unregister_cmdline_devices(void) +{ +} + +#endif + +/* Platform driver */ + +static struct of_device_id virtio_mmio_match[] = { + { .compatible = "virtio,mmio", }, + {}, +}; +MODULE_DEVICE_TABLE(of, virtio_mmio_match); + +static struct platform_driver virtio_mmio_driver = { + .probe = virtio_mmio_probe, + .remove = virtio_mmio_remove, + .driver = { + .name = "virtio-mmio", + .owner = THIS_MODULE, + .of_match_table = virtio_mmio_match, + }, +}; + +static int __init virtio_mmio_init(void) +{ + return platform_driver_register(&virtio_mmio_driver); +} + +static void __exit virtio_mmio_exit(void) +{ + platform_driver_unregister(&virtio_mmio_driver); + vm_unregister_cmdline_devices(); +} + +module_init(virtio_mmio_init); +module_exit(virtio_mmio_exit); + +MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>"); +MODULE_DESCRIPTION("Platform bus driver for memory mapped virtio devices"); +MODULE_LICENSE("GPL"); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 28d9cf7cf72..101db3faf5d 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/list.h> #include <linux/pci.h> +#include <linux/slab.h> #include <linux/interrupt.h> #include <linux/virtio.h> #include <linux/virtio_config.h> @@ -47,6 +48,7 @@ struct virtio_pci_device int msix_enabled; int intx_enabled; struct msix_entry *msix_entries; + cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ char (*msix_names)[256]; @@ -54,6 +56,10 @@ struct virtio_pci_device unsigned msix_vectors; /* Vectors allocated, excluding per-vq vectors if any */ unsigned msix_used_vectors; + + /* Status saved during hibernate/restore */ + u8 saved_status; + /* Whether we have vector per vq */ bool per_vq_vectors; }; @@ -74,9 +80,6 @@ struct virtio_pci_vq_info /* the number of entries in the queue */ int num; - /* the index of the queue */ - int queue_index; - /* the virtual address of the ring queue */ void *queue; @@ -88,18 +91,13 @@ struct virtio_pci_vq_info }; /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ -static struct pci_device_id virtio_pci_id_table[] = { - { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, - { 0 }, +static DEFINE_PCI_DEVICE_TABLE(virtio_pci_id_table) = { + { PCI_DEVICE(0x1af4, PCI_ANY_ID) }, + { 0 } }; MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); -/* A PCI device has it's own struct device and so does a virtio device so - * we create a place for the virtio devices to show up in sysfs. I think it - * would make more sense for virtio to not insist on having it's own device. */ -static struct device *virtio_pci_root; - /* Convert a generic virtio device to our structure */ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) { @@ -173,22 +171,40 @@ static void vp_set_status(struct virtio_device *vdev, u8 status) iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); } +/* wait for pending irq handlers */ +static void vp_synchronize_vectors(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) + synchronize_irq(vp_dev->pci_dev->irq); + + for (i = 0; i < vp_dev->msix_vectors; ++i) + synchronize_irq(vp_dev->msix_entries[i].vector); +} + static void vp_reset(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* 0 status means a reset. */ iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + /* Flush out the status write, and flush in device writes, + * including MSi-X interrupts, if any. */ + ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); + /* Flush pending VQ/configuration callbacks. */ + vp_synchronize_vectors(vdev); } /* the notify function used when creating a virt queue */ -static void vp_notify(struct virtqueue *vq) +static bool vp_notify(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - struct virtio_pci_vq_info *info = vq->priv; /* we write the queue's selector into the notification register to * signal the other end */ - iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); + iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); + return true; } /* Handle a configuration change: Tell driver if it wants to know. */ @@ -261,6 +277,10 @@ static void vp_free_vectors(struct virtio_device *vdev) for (i = 0; i < vp_dev->msix_used_vectors; ++i) free_irq(vp_dev->msix_entries[i].vector, vp_dev); + for (i = 0; i < vp_dev->msix_vectors; i++) + if (vp_dev->msix_affinity_masks[i]) + free_cpumask_var(vp_dev->msix_affinity_masks[i]); + if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ iowrite16(VIRTIO_MSI_NO_VECTOR, @@ -270,14 +290,16 @@ static void vp_free_vectors(struct virtio_device *vdev) pci_disable_msix(vp_dev->pci_dev); vp_dev->msix_enabled = 0; - vp_dev->msix_vectors = 0; } + vp_dev->msix_vectors = 0; vp_dev->msix_used_vectors = 0; kfree(vp_dev->msix_names); vp_dev->msix_names = NULL; kfree(vp_dev->msix_entries); vp_dev->msix_entries = NULL; + kfree(vp_dev->msix_affinity_masks); + vp_dev->msix_affinity_masks = NULL; } static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, @@ -288,6 +310,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, unsigned i, v; int err = -ENOMEM; + vp_dev->msix_vectors = nvectors; + vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, GFP_KERNEL); if (!vp_dev->msix_entries) @@ -296,17 +320,23 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, GFP_KERNEL); if (!vp_dev->msix_names) goto error; + vp_dev->msix_affinity_masks + = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, + GFP_KERNEL); + if (!vp_dev->msix_affinity_masks) + goto error; + for (i = 0; i < nvectors; ++i) + if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], + GFP_KERNEL)) + goto error; for (i = 0; i < nvectors; ++i) vp_dev->msix_entries[i].entry = i; - /* pci_enable_msix returns positive if we can't get this many. */ - err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); - if (err > 0) - err = -ENOSPC; + err = pci_enable_msix_exact(vp_dev->pci_dev, + vp_dev->msix_entries, nvectors); if (err) goto error; - vp_dev->msix_vectors = nvectors; vp_dev->msix_enabled = 1; /* Set the vector used for configuration */ @@ -384,7 +414,6 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, if (!info) return ERR_PTR(-ENOMEM); - info->queue_index = index; info->num = num; info->msix_vector = msix_vec; @@ -400,8 +429,8 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); /* create the vring */ - vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, - vdev, info->queue, vp_notify, callback, name); + vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev, + true, info->queue, vp_notify, callback, name); if (!vq) { err = -ENOMEM; goto out_activate_queue; @@ -419,9 +448,13 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, } } - spin_lock_irqsave(&vp_dev->lock, flags); - list_add(&info->node, &vp_dev->virtqueues); - spin_unlock_irqrestore(&vp_dev->lock, flags); + if (callback) { + spin_lock_irqsave(&vp_dev->lock, flags); + list_add(&info->node, &vp_dev->virtqueues); + spin_unlock_irqrestore(&vp_dev->lock, flags); + } else { + INIT_LIST_HEAD(&info->node); + } return vq; @@ -445,7 +478,7 @@ static void vp_del_vq(struct virtqueue *vq) list_del(&info->node); spin_unlock_irqrestore(&vp_dev->lock, flags); - iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); if (vp_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, @@ -473,7 +506,8 @@ static void vp_del_vqs(struct virtio_device *vdev) list_for_each_entry_safe(vq, n, &vdev->vqs, list) { info = vq->priv; - if (vp_dev->per_vq_vectors) + if (vp_dev->per_vq_vectors && + info->msix_vector != VIRTIO_MSI_NO_VECTOR) free_irq(vp_dev->msix_entries[info->msix_vector].vector, vq); vp_del_vq(vq); @@ -519,7 +553,10 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, vp_dev->per_vq_vectors = per_vq_vectors; allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { - if (!callbacks[i] || !vp_dev->msix_enabled) + if (!names[i]) { + vqs[i] = NULL; + continue; + } else if (!callbacks[i] || !vp_dev->msix_enabled) msix_vec = VIRTIO_MSI_NO_VECTOR; else if (vp_dev->per_vq_vectors) msix_vec = allocated_vectors++; @@ -579,7 +616,43 @@ static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, false, false); } -static struct virtio_config_ops virtio_pci_config_ops = { +static const char *vp_bus_name(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + return pci_name(vp_dev->pci_dev); +} + +/* Setup the affinity for a virtqueue: + * - force the affinity for per vq vector + * - OR over all affinities for shared MSI + * - ignore the affinity request if we're using INTX + */ +static int vp_set_vq_affinity(struct virtqueue *vq, int cpu) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_vq_info *info = vq->priv; + struct cpumask *mask; + unsigned int irq; + + if (!vq->callback) + return -EINVAL; + + if (vp_dev->msix_enabled) { + mask = vp_dev->msix_affinity_masks[info->msix_vector]; + irq = vp_dev->msix_entries[info->msix_vector].vector; + if (cpu == -1) + irq_set_affinity_hint(irq, NULL); + else { + cpumask_set_cpu(cpu, mask); + irq_set_affinity_hint(irq, mask); + } + } + return 0; +} + +static const struct virtio_config_ops virtio_pci_config_ops = { .get = vp_get, .set = vp_set, .get_status = vp_get_status, @@ -589,25 +662,22 @@ static struct virtio_config_ops virtio_pci_config_ops = { .del_vqs = vp_del_vqs, .get_features = vp_get_features, .finalize_features = vp_finalize_features, + .bus_name = vp_bus_name, + .set_vq_affinity = vp_set_vq_affinity, }; static void virtio_pci_release_dev(struct device *_d) { - struct virtio_device *dev = container_of(_d, struct virtio_device, dev); - struct virtio_pci_device *vp_dev = to_vp_device(dev); - struct pci_dev *pci_dev = vp_dev->pci_dev; - - vp_del_vqs(dev); - pci_set_drvdata(pci_dev, NULL); - pci_iounmap(pci_dev, vp_dev->ioaddr); - pci_release_regions(pci_dev); - pci_disable_device(pci_dev); - kfree(vp_dev); + /* + * No need for a release method as we allocate/free + * all devices together with the pci devices. + * Provide an empty one to avoid getting a warning from core. + */ } /* the PCI probing function */ -static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, - const struct pci_device_id *id) +static int virtio_pci_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id) { struct virtio_pci_device *vp_dev; int err; @@ -627,13 +697,16 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, if (vp_dev == NULL) return -ENOMEM; - vp_dev->vdev.dev.parent = virtio_pci_root; + vp_dev->vdev.dev.parent = &pci_dev->dev; vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->vdev.config = &virtio_pci_config_ops; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); spin_lock_init(&vp_dev->lock); + /* Disable MSI/MSIX to bring device to a known good state. */ + pci_msi_off(pci_dev); + /* enable the device */ err = pci_enable_device(pci_dev); if (err) @@ -644,15 +717,18 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, goto out_enable_device; vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); - if (vp_dev->ioaddr == NULL) + if (vp_dev->ioaddr == NULL) { + err = -ENOMEM; goto out_req_regions; + } pci_set_drvdata(pci_dev, vp_dev); + pci_set_master(pci_dev); /* we use the subsystem vendor/device id as the virtio vendor/device * id. this allows us to use the same PCI vendor/device id for all * virtio devices and to identify the particular virtio driver by - * the subsytem ids */ + * the subsystem ids */ vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; vp_dev->vdev.id.device = pci_dev->subsystem_device; @@ -664,7 +740,6 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, return 0; out_set_drvdata: - pci_set_drvdata(pci_dev, NULL); pci_iounmap(pci_dev, vp_dev->ioaddr); out_req_regions: pci_release_regions(pci_dev); @@ -675,27 +750,70 @@ out: return err; } -static void __devexit virtio_pci_remove(struct pci_dev *pci_dev) +static void virtio_pci_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); unregister_virtio_device(&vp_dev->vdev); + + vp_del_vqs(&vp_dev->vdev); + pci_iounmap(pci_dev, vp_dev->ioaddr); + pci_release_regions(pci_dev); + pci_disable_device(pci_dev); + kfree(vp_dev); } -#ifdef CONFIG_PM -static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int virtio_pci_freeze(struct device *dev) { - pci_save_state(pci_dev); - pci_set_power_state(pci_dev, PCI_D3hot); - return 0; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct virtio_driver *drv; + int ret; + + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + ret = 0; + vp_dev->saved_status = vp_get_status(&vp_dev->vdev); + if (drv && drv->freeze) + ret = drv->freeze(&vp_dev->vdev); + + if (!ret) + pci_disable_device(pci_dev); + return ret; } -static int virtio_pci_resume(struct pci_dev *pci_dev) +static int virtio_pci_restore(struct device *dev) { - pci_restore_state(pci_dev); - pci_set_power_state(pci_dev, PCI_D0); - return 0; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct virtio_driver *drv; + int ret; + + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + ret = pci_enable_device(pci_dev); + if (ret) + return ret; + + pci_set_master(pci_dev); + vp_finalize_features(&vp_dev->vdev); + + if (drv && drv->restore) + ret = drv->restore(&vp_dev->vdev); + + /* Finally, tell the device we're all set */ + if (!ret) + vp_set_status(&vp_dev->vdev, vp_dev->saved_status); + + return ret; } + +static const struct dev_pm_ops virtio_pci_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) +}; #endif static struct pci_driver virtio_pci_driver = { @@ -703,33 +821,9 @@ static struct pci_driver virtio_pci_driver = { .id_table = virtio_pci_id_table, .probe = virtio_pci_probe, .remove = virtio_pci_remove, -#ifdef CONFIG_PM - .suspend = virtio_pci_suspend, - .resume = virtio_pci_resume, +#ifdef CONFIG_PM_SLEEP + .driver.pm = &virtio_pci_pm_ops, #endif }; -static int __init virtio_pci_init(void) -{ - int err; - - virtio_pci_root = root_device_register("virtio-pci"); - if (IS_ERR(virtio_pci_root)) - return PTR_ERR(virtio_pci_root); - - err = pci_register_driver(&virtio_pci_driver); - if (err) - root_device_unregister(virtio_pci_root); - - return err; -} - -module_init(virtio_pci_init); - -static void __exit virtio_pci_exit(void) -{ - pci_unregister_driver(&virtio_pci_driver); - root_device_unregister(virtio_pci_root); -} - -module_exit(virtio_pci_exit); +module_pci_driver(virtio_pci_driver); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fbd2ecde93e..4d08f45a9c2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -20,6 +20,10 @@ #include <linux/virtio_ring.h> #include <linux/virtio_config.h> #include <linux/device.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/hrtimer.h> +#include <linux/kmemleak.h> #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ @@ -36,10 +40,9 @@ panic("%s:in_use = %i\n", \ (_vq)->vq.name, (_vq)->in_use); \ (_vq)->in_use = __LINE__; \ - mb(); \ } while (0) #define END_USE(_vq) \ - do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0) + do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) #else #define BAD_RING(_vq, fmt, args...) \ do { \ @@ -58,14 +61,18 @@ struct vring_virtqueue /* Actual memory layout for this queue */ struct vring vring; + /* Can we use weak barriers? */ + bool weak_barriers; + /* Other side has made a mess, don't try any more. */ bool broken; /* Host supports indirect buffers */ bool indirect; - /* Number of free buffers */ - unsigned int num_free; + /* Host publishes avail event idx */ + bool event; + /* Head of free buffer list. */ unsigned int free_head; /* Number we've added since last sync. */ @@ -75,11 +82,15 @@ struct vring_virtqueue u16 last_used_idx; /* How to notify other side. FIXME: commonalize hcalls! */ - void (*notify)(struct virtqueue *vq); + bool (*notify)(struct virtqueue *vq); #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; #endif /* Tokens for callbacks. */ @@ -88,47 +99,83 @@ struct vring_virtqueue #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, + unsigned int *count) +{ + return sg_next(sg); +} + +static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, + unsigned int *count) +{ + if (--(*count) == 0) + return NULL; + return sg + 1; +} + /* Set up an indirect table of descriptors and add it to the queue. */ -static int vring_add_indirect(struct vring_virtqueue *vq, - struct scatterlist sg[], - unsigned int out, - unsigned int in) +static inline int vring_add_indirect(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + struct scatterlist *(*next) + (struct scatterlist *, unsigned int *), + unsigned int total_sg, + unsigned int total_out, + unsigned int total_in, + unsigned int out_sgs, + unsigned int in_sgs, + gfp_t gfp) { struct vring_desc *desc; unsigned head; - int i; + struct scatterlist *sg; + int i, n; + + /* + * We require lowmem mappings for the descriptors because + * otherwise virt_to_phys will give us bogus addresses in the + * virtqueue. + */ + gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); - desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC); + desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); if (!desc) - return vq->vring.num; - - /* Transfer entries from the sg list into the indirect page */ - for (i = 0; i < out; i++) { - desc[i].flags = VRING_DESC_F_NEXT; - desc[i].addr = sg_phys(sg); - desc[i].len = sg->length; - desc[i].next = i+1; - sg++; + return -ENOMEM; + + /* Transfer entries from the sg lists into the indirect page */ + i = 0; + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { + desc[i].flags = VRING_DESC_F_NEXT; + desc[i].addr = sg_phys(sg); + desc[i].len = sg->length; + desc[i].next = i+1; + i++; + } } - for (; i < (out + in); i++) { - desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; - desc[i].addr = sg_phys(sg); - desc[i].len = sg->length; - desc[i].next = i+1; - sg++; + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { + desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; + desc[i].addr = sg_phys(sg); + desc[i].len = sg->length; + desc[i].next = i+1; + i++; + } } + BUG_ON(i != total_sg); /* Last one doesn't continue. */ desc[i-1].flags &= ~VRING_DESC_F_NEXT; desc[i-1].next = 0; /* We're about to use a buffer */ - vq->num_free--; + vq->vq.num_free--; /* Use a single buffer which doesn't continue */ head = vq->free_head; vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; vq->vring.desc[head].addr = virt_to_phys(desc); + /* kmemleak gives a false positive, as it's hidden by virt_to_phys */ + kmemleak_ignore(desc); vq->vring.desc[head].len = i * sizeof(struct vring_desc); /* Update free pointer */ @@ -137,59 +184,92 @@ static int vring_add_indirect(struct vring_virtqueue *vq, return head; } -static int vring_add_buf(struct virtqueue *_vq, - struct scatterlist sg[], - unsigned int out, - unsigned int in, - void *data) +static inline int virtqueue_add(struct virtqueue *_vq, + struct scatterlist *sgs[], + struct scatterlist *(*next) + (struct scatterlist *, unsigned int *), + unsigned int total_out, + unsigned int total_in, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); - unsigned int i, avail, head, uninitialized_var(prev); + struct scatterlist *sg; + unsigned int i, n, avail, uninitialized_var(prev), total_sg; + int head; START_USE(vq); BUG_ON(data == NULL); + if (unlikely(vq->broken)) { + END_USE(vq); + return -EIO; + } + +#ifdef DEBUG + { + ktime_t now = ktime_get(); + + /* No kick or get, with .1 second between? Warn. */ + if (vq->last_add_time_valid) + WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time)) + > 100); + vq->last_add_time = now; + vq->last_add_time_valid = true; + } +#endif + + total_sg = total_in + total_out; + /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ - if (vq->indirect && (out + in) > 1 && vq->num_free) { - head = vring_add_indirect(vq, sg, out, in); - if (head != vq->vring.num) + if (vq->indirect && total_sg > 1 && vq->vq.num_free) { + head = vring_add_indirect(vq, sgs, next, total_sg, total_out, + total_in, + out_sgs, in_sgs, gfp); + if (likely(head >= 0)) goto add_head; } - BUG_ON(out + in > vq->vring.num); - BUG_ON(out + in == 0); + BUG_ON(total_sg > vq->vring.num); + BUG_ON(total_sg == 0); - if (vq->num_free < out + in) { + if (vq->vq.num_free < total_sg) { pr_debug("Can't add buf len %i - avail = %i\n", - out + in, vq->num_free); + total_sg, vq->vq.num_free); /* FIXME: for historical reasons, we force a notify here if * there are outgoing parts to the buffer. Presumably the * host should service the ring ASAP. */ - if (out) + if (out_sgs) vq->notify(&vq->vq); END_USE(vq); return -ENOSPC; } /* We're about to use some buffers from the free list. */ - vq->num_free -= out + in; - - head = vq->free_head; - for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { - vq->vring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vring.desc[i].addr = sg_phys(sg); - vq->vring.desc[i].len = sg->length; - prev = i; - sg++; + vq->vq.num_free -= total_sg; + + head = i = vq->free_head; + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { + vq->vring.desc[i].flags = VRING_DESC_F_NEXT; + vq->vring.desc[i].addr = sg_phys(sg); + vq->vring.desc[i].len = sg->length; + prev = i; + i = vq->vring.desc[i].next; + } } - for (; in; i = vq->vring.desc[i].next, in--) { - vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; - vq->vring.desc[i].addr = sg_phys(sg); - vq->vring.desc[i].len = sg->length; - prev = i; - sg++; + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { + vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; + vq->vring.desc[i].addr = sg_phys(sg); + vq->vring.desc[i].len = sg->length; + prev = i; + i = vq->vring.desc[i].next; + } } /* Last one doesn't continue. */ vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; @@ -202,39 +282,198 @@ add_head: vq->data[head] = data; /* Put entry in available array (but don't update avail->idx until they - * do sync). FIXME: avoid modulus here? */ - avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num; + * do sync). */ + avail = (vq->vring.avail->idx & (vq->vring.num-1)); vq->vring.avail->ring[avail] = head; + /* Descriptors and available array need to be set before we expose the + * new available array entries. */ + virtio_wmb(vq->weak_barriers); + vq->vring.avail->idx++; + vq->num_added++; + + /* This is very unlikely, but theoretically possible. Kick + * just in case. */ + if (unlikely(vq->num_added == (1 << 16) - 1)) + virtqueue_kick(_vq); + pr_debug("Added buffer head %i to %p\n", head, vq); END_USE(vq); - /* If we're indirect, we can fit many (assuming not OOM). */ - if (vq->indirect) - return vq->num_free ? vq->vring.num : 0; - return vq->num_free; + return 0; +} + +/** + * virtqueue_add_sgs - expose buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of terminated scatterlists. + * @out_num: the number of scatterlists readable by other side + * @in_num: the number of scatterlists which are writable (after readable ones) + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_sgs(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) +{ + unsigned int i, total_out, total_in; + + /* Count them first. */ + for (i = total_out = total_in = 0; i < out_sgs; i++) { + struct scatterlist *sg; + for (sg = sgs[i]; sg; sg = sg_next(sg)) + total_out++; + } + for (; i < out_sgs + in_sgs; i++) { + struct scatterlist *sg; + for (sg = sgs[i]; sg; sg = sg_next(sg)) + total_in++; + } + return virtqueue_add(_vq, sgs, sg_next_chained, + total_out, total_in, out_sgs, in_sgs, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_sgs); + +/** + * virtqueue_add_outbuf - expose output buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of scatterlists (need not be terminated!) + * @num: the number of scatterlists readable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_outbuf(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp); } +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); + +/** + * virtqueue_add_inbuf - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of scatterlists (need not be terminated!) + * @num: the number of scatterlists writable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); -static void vring_kick(struct virtqueue *_vq) +/** + * virtqueue_kick_prepare - first half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * Instead of virtqueue_kick(), you can do: + * if (virtqueue_kick_prepare(vq)) + * virtqueue_notify(vq); + * + * This is sometimes useful because the virtqueue_kick_prepare() needs + * to be serialized, but the actual virtqueue_notify() call does not. + */ +bool virtqueue_kick_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + u16 new, old; + bool needs_kick; + START_USE(vq); - /* Descriptors and available array need to be set before we expose the - * new available array entries. */ - wmb(); + /* We need to expose available array entries before checking avail + * event. */ + virtio_mb(vq->weak_barriers); - vq->vring.avail->idx += vq->num_added; + old = vq->vring.avail->idx - vq->num_added; + new = vq->vring.avail->idx; vq->num_added = 0; - /* Need to update avail index before checking if we should notify */ - mb(); - - if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) - /* Prod other side to tell it about changes. */ - vq->notify(&vq->vq); +#ifdef DEBUG + if (vq->last_add_time_valid) { + WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), + vq->last_add_time)) > 100); + } + vq->last_add_time_valid = false; +#endif + if (vq->event) { + needs_kick = vring_need_event(vring_avail_event(&vq->vring), + new, old); + } else { + needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY); + } END_USE(vq); + return needs_kick; +} +EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); + +/** + * virtqueue_notify - second half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * This does not need to be serialized. + * + * Returns false if host notify failed or queue is broken, otherwise true. + */ +bool virtqueue_notify(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (unlikely(vq->broken)) + return false; + + /* Prod other side to tell it about changes. */ + if (!vq->notify(_vq)) { + vq->broken = true; + return false; + } + return true; +} +EXPORT_SYMBOL_GPL(virtqueue_notify); + +/** + * virtqueue_kick - update after add_buf + * @vq: the struct virtqueue + * + * After one or more virtqueue_add_* calls, invoke this to kick + * the other side. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns false if kick failed, otherwise true. + */ +bool virtqueue_kick(struct virtqueue *vq) +{ + if (virtqueue_kick_prepare(vq)) + return virtqueue_notify(vq); + return true; } +EXPORT_SYMBOL_GPL(virtqueue_kick); static void detach_buf(struct vring_virtqueue *vq, unsigned int head) { @@ -252,13 +491,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { i = vq->vring.desc[i].next; - vq->num_free++; + vq->vq.num_free++; } vq->vring.desc[i].next = vq->free_head; vq->free_head = head; /* Plus final descriptor */ - vq->num_free++; + vq->vq.num_free++; } static inline bool more_used(const struct vring_virtqueue *vq) @@ -266,11 +505,28 @@ static inline bool more_used(const struct vring_virtqueue *vq) return vq->last_used_idx != vq->vring.used->idx; } -static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len) +/** + * virtqueue_get_buf - get the next used buffer + * @vq: the struct virtqueue we're talking about. + * @len: the length written into the buffer + * + * If the driver wrote data into the buffer, @len will be set to the + * amount written. This means you don't need to clear the buffer + * beforehand to ensure there's no data leakage in the case of short + * writes. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns NULL if there are no used buffers, or the "data" token + * handed to virtqueue_add_*(). + */ +void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) { struct vring_virtqueue *vq = to_vvq(_vq); void *ret; unsigned int i; + u16 last_used; START_USE(vq); @@ -286,10 +542,11 @@ static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len) } /* Only get used array entries after they have been exposed by host. */ - rmb(); + virtio_rmb(vq->weak_barriers); - i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; - *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; + last_used = (vq->last_used_idx & (vq->vring.num - 1)); + i = vq->vring.used->ring[last_used].id; + *len = vq->vring.used->ring[last_used].len; if (unlikely(i >= vq->vring.num)) { BAD_RING(vq, "id %u out of range\n", i); @@ -304,28 +561,138 @@ static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len) ret = vq->data[i]; detach_buf(vq, i); vq->last_used_idx++; + /* If we expect an interrupt for the next entry, tell host + * by writing event index and flush out the write before + * the read in the next get_buf call. */ + if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { + vring_used_event(&vq->vring) = vq->last_used_idx; + virtio_mb(vq->weak_barriers); + } + +#ifdef DEBUG + vq->last_add_time_valid = false; +#endif + END_USE(vq); return ret; } +EXPORT_SYMBOL_GPL(virtqueue_get_buf); -static void vring_disable_cb(struct virtqueue *_vq) +/** + * virtqueue_disable_cb - disable callbacks + * @vq: the struct virtqueue we're talking about. + * + * Note that this is not necessarily synchronous, hence unreliable and only + * useful as an optimization. + * + * Unlike other operations, this need not be serialized. + */ +void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; } +EXPORT_SYMBOL_GPL(virtqueue_disable_cb); -static bool vring_enable_cb(struct virtqueue *_vq) +/** + * virtqueue_enable_cb_prepare - restart callbacks after disable_cb + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns current queue state + * in an opaque unsigned value. This value should be later tested by + * virtqueue_poll, to detect a possible race between the driver checking for + * more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + u16 last_used_idx; START_USE(vq); /* We optimistically turn back on interrupts, then check if there was * more to do. */ + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; - mb(); - if (unlikely(more_used(vq))) { + vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx; + END_USE(vq); + return last_used_idx; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); + +/** + * virtqueue_poll - query pending used buffers + * @vq: the struct virtqueue we're talking about. + * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). + * + * Returns "true" if there are pending used buffers in the queue. + * + * This does not need to be serialized. + */ +bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + virtio_mb(vq->weak_barriers); + return (u16)last_used_idx != vq->vring.used->idx; +} +EXPORT_SYMBOL_GPL(virtqueue_poll); + +/** + * virtqueue_enable_cb - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns "false" if there are pending + * buffers in the queue, to detect a possible race between the driver + * checking for more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +bool virtqueue_enable_cb(struct virtqueue *_vq) +{ + unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); + return !virtqueue_poll(_vq, last_used_idx); +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb); + +/** + * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 bufs; + + START_USE(vq); + + /* We optimistically turn back on interrupts, then check if there was + * more to do. */ + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ + vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + /* TODO: tune this threshold */ + bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; + vring_used_event(&vq->vring) = vq->last_used_idx + bufs; + virtio_mb(vq->weak_barriers); + if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { END_USE(vq); return false; } @@ -333,6 +700,41 @@ static bool vring_enable_cb(struct virtqueue *_vq) END_USE(vq); return true; } +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); + +/** + * virtqueue_detach_unused_buf - detach first unused buffer + * @vq: the struct virtqueue we're talking about. + * + * Returns NULL or the "data" token handed to virtqueue_add_*(). + * This is not valid on an active queue; it is useful only for device + * shutdown. + */ +void *virtqueue_detach_unused_buf(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + unsigned int i; + void *buf; + + START_USE(vq); + + for (i = 0; i < vq->vring.num; i++) { + if (!vq->data[i]) + continue; + /* detach_buf clears data, so grab it now. */ + buf = vq->data[i]; + detach_buf(vq, i); + vq->vring.avail->idx--; + END_USE(vq); + return buf; + } + /* That should have freed everything. */ + BUG_ON(vq->vq.num_free != vq->vring.num); + + END_USE(vq); + return NULL; +} +EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); irqreturn_t vring_interrupt(int irq, void *_vq) { @@ -354,19 +756,13 @@ irqreturn_t vring_interrupt(int irq, void *_vq) } EXPORT_SYMBOL_GPL(vring_interrupt); -static struct virtqueue_ops vring_vq_ops = { - .add_buf = vring_add_buf, - .get_buf = vring_get_buf, - .kick = vring_kick, - .disable_cb = vring_disable_cb, - .enable_cb = vring_enable_cb, -}; - -struct virtqueue *vring_new_virtqueue(unsigned int num, +struct virtqueue *vring_new_virtqueue(unsigned int index, + unsigned int num, unsigned int vring_align, struct virtio_device *vdev, + bool weak_barriers, void *pages, - void (*notify)(struct virtqueue *), + bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name) { @@ -386,28 +782,34 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, vring_init(&vq->vring, num, pages, vring_align); vq->vq.callback = callback; vq->vq.vdev = vdev; - vq->vq.vq_ops = &vring_vq_ops; vq->vq.name = name; + vq->vq.num_free = num; + vq->vq.index = index; vq->notify = notify; + vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; vq->num_added = 0; list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; + vq->last_add_time_valid = false; #endif vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); /* No callback? Tell other side not to bother us. */ if (!callback) vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; /* Put everything in free lists. */ - vq->num_free = num; vq->free_head = 0; - for (i = 0; i < num-1; i++) + for (i = 0; i < num-1; i++) { vq->vring.desc[i].next = i+1; + vq->data[i] = NULL; + } + vq->data[i] = NULL; return &vq->vq; } @@ -429,6 +831,8 @@ void vring_transport_features(struct virtio_device *vdev) switch (i) { case VIRTIO_RING_F_INDIRECT_DESC: break; + case VIRTIO_RING_F_EVENT_IDX: + break; default: /* We don't understand this bit. */ clear_bit(i, vdev->features); @@ -437,4 +841,43 @@ void vring_transport_features(struct virtio_device *vdev) } EXPORT_SYMBOL_GPL(vring_transport_features); +/** + * virtqueue_get_vring_size - return the size of the virtqueue's vring + * @vq: the struct virtqueue containing the vring of interest. + * + * Returns the size of the vring. This is mainly used for boasting to + * userspace. Unlike other operations, this need not be serialized. + */ +unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) +{ + + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->vring.num; +} +EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); + +bool virtqueue_is_broken(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + return vq->broken; +} +EXPORT_SYMBOL_GPL(virtqueue_is_broken); + +/* + * This should prevent the device from being used, allowing drivers to + * recover. You may need to grab appropriate locks to flush. + */ +void virtio_break_device(struct virtio_device *dev) +{ + struct virtqueue *_vq; + + list_for_each_entry(_vq, &dev->vqs, list) { + struct vring_virtqueue *vq = to_vvq(_vq); + vq->broken = true; + } +} +EXPORT_SYMBOL_GPL(virtio_break_device); + MODULE_LICENSE("GPL"); |
