diff options
Diffstat (limited to 'drivers/virtio')
| -rw-r--r-- | drivers/virtio/Kconfig | 42 | ||||
| -rw-r--r-- | drivers/virtio/Makefile | 4 | ||||
| -rw-r--r-- | drivers/virtio/config.c | 1 | ||||
| -rw-r--r-- | drivers/virtio/virtio.c | 84 | ||||
| -rw-r--r-- | drivers/virtio/virtio_balloon.c | 398 | ||||
| -rw-r--r-- | drivers/virtio/virtio_mmio.c | 666 | ||||
| -rw-r--r-- | drivers/virtio/virtio_pci.c | 252 | ||||
| -rw-r--r-- | drivers/virtio/virtio_ring.c | 600 | 
8 files changed, 1710 insertions, 337 deletions
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 3dd6294d10b..c6683f2e396 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -1,17 +1,16 @@ -# Virtio always gets selected by whoever wants it.  config VIRTIO  	tristate +	---help--- +	  This option is selected by any driver which implements the virtio +	  bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST, +	  CONFIG_RPMSG or CONFIG_S390_GUEST. -# Similarly the virtio ring implementation. -config VIRTIO_RING -	tristate -	depends on VIRTIO +menu "Virtio drivers"  config VIRTIO_PCI -	tristate "PCI driver for virtio devices (EXPERIMENTAL)" -	depends on PCI && EXPERIMENTAL +	tristate "PCI driver for virtio devices" +	depends on PCI  	select VIRTIO -	select VIRTIO_RING  	---help---  	  This drivers provides support for virtio based paravirtual device  	  drivers over PCI.  This requires that your VMM has appropriate PCI @@ -24,12 +23,33 @@ config VIRTIO_PCI  	  If unsure, say M.  config VIRTIO_BALLOON -	tristate "Virtio balloon driver (EXPERIMENTAL)" -	select VIRTIO -	select VIRTIO_RING +	tristate "Virtio balloon driver" +	depends on VIRTIO  	---help---  	 This driver supports increasing and decreasing the amount  	 of memory within a KVM guest.  	 If unsure, say M. + config VIRTIO_MMIO +	tristate "Platform bus driver for memory mapped virtio devices" +	depends on HAS_IOMEM + 	select VIRTIO + 	---help--- + 	 This drivers provides support for memory mapped virtio +	 platform device driver. + + 	 If unsure, say N. + +config VIRTIO_MMIO_CMDLINE_DEVICES +	bool "Memory mapped virtio devices parameter parsing" +	depends on VIRTIO_MMIO +	---help--- +	 Allow virtio-mmio devices instantiation via the kernel command line +	 or module parameters. Be aware that using incorrect parameters (base +	 address in particular) can crash your system - you have been warned. +	 See Documentation/kernel-parameters.txt for details. + +	 If unsure, say 'N'. + +endmenu diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 6738c446c19..9076635697b 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_VIRTIO) += virtio.o -obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o +obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o  obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o  obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff --git a/drivers/virtio/config.c b/drivers/virtio/config.c index 983d482fba4..f70bcd2ff98 100644 --- a/drivers/virtio/config.c +++ b/drivers/virtio/config.c @@ -9,5 +9,4 @@  #include <linux/virtio.h>  #include <linux/virtio_config.h>  #include <linux/bug.h> -#include <asm/system.h> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 3a43ebf83a4..fed0ce198ae 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -1,40 +1,49 @@  #include <linux/virtio.h>  #include <linux/spinlock.h>  #include <linux/virtio_config.h> +#include <linux/module.h> +#include <linux/idr.h>  /* Unique numbering for virtio devices. */ -static unsigned int dev_index; +static DEFINE_IDA(virtio_index_ida);  static ssize_t device_show(struct device *_d,  			   struct device_attribute *attr, char *buf)  { -	struct virtio_device *dev = container_of(_d,struct virtio_device,dev); -	return sprintf(buf, "%hu", dev->id.device); +	struct virtio_device *dev = dev_to_virtio(_d); +	return sprintf(buf, "0x%04x\n", dev->id.device);  } +static DEVICE_ATTR_RO(device); +  static ssize_t vendor_show(struct device *_d,  			   struct device_attribute *attr, char *buf)  { -	struct virtio_device *dev = container_of(_d,struct virtio_device,dev); -	return sprintf(buf, "%hu", dev->id.vendor); +	struct virtio_device *dev = dev_to_virtio(_d); +	return sprintf(buf, "0x%04x\n", dev->id.vendor);  } +static DEVICE_ATTR_RO(vendor); +  static ssize_t status_show(struct device *_d,  			   struct device_attribute *attr, char *buf)  { -	struct virtio_device *dev = container_of(_d,struct virtio_device,dev); -	return sprintf(buf, "0x%08x", dev->config->get_status(dev)); +	struct virtio_device *dev = dev_to_virtio(_d); +	return sprintf(buf, "0x%08x\n", dev->config->get_status(dev));  } +static DEVICE_ATTR_RO(status); +  static ssize_t modalias_show(struct device *_d,  			     struct device_attribute *attr, char *buf)  { -	struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - +	struct virtio_device *dev = dev_to_virtio(_d);  	return sprintf(buf, "virtio:d%08Xv%08X\n",  		       dev->id.device, dev->id.vendor);  } +static DEVICE_ATTR_RO(modalias); +  static ssize_t features_show(struct device *_d,  			     struct device_attribute *attr, char *buf)  { -	struct virtio_device *dev = container_of(_d, struct virtio_device, dev); +	struct virtio_device *dev = dev_to_virtio(_d);  	unsigned int i;  	ssize_t len = 0; @@ -46,14 +55,17 @@ static ssize_t features_show(struct device *_d,  	len += sprintf(buf+len, "\n");  	return len;  } -static struct device_attribute virtio_dev_attrs[] = { -	__ATTR_RO(device), -	__ATTR_RO(vendor), -	__ATTR_RO(status), -	__ATTR_RO(modalias), -	__ATTR_RO(features), -	__ATTR_NULL +static DEVICE_ATTR_RO(features); + +static struct attribute *virtio_dev_attrs[] = { +	&dev_attr_device.attr, +	&dev_attr_vendor.attr, +	&dev_attr_status.attr, +	&dev_attr_modalias.attr, +	&dev_attr_features.attr, +	NULL,  }; +ATTRIBUTE_GROUPS(virtio_dev);  static inline int virtio_id_match(const struct virtio_device *dev,  				  const struct virtio_device_id *id) @@ -69,10 +81,10 @@ static inline int virtio_id_match(const struct virtio_device *dev,  static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)  {  	unsigned int i; -	struct virtio_device *dev = container_of(_dv,struct virtio_device,dev); +	struct virtio_device *dev = dev_to_virtio(_dv);  	const struct virtio_device_id *ids; -	ids = container_of(_dr, struct virtio_driver, driver)->id_table; +	ids = drv_to_virtio(_dr)->id_table;  	for (i = 0; ids[i].device; i++)  		if (virtio_id_match(dev, &ids[i]))  			return 1; @@ -81,7 +93,7 @@ static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)  static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)  { -	struct virtio_device *dev = container_of(_dv,struct virtio_device,dev); +	struct virtio_device *dev = dev_to_virtio(_dv);  	return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",  			      dev->id.device, dev->id.vendor); @@ -96,8 +108,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev,  					 unsigned int fbit)  {  	unsigned int i; -	struct virtio_driver *drv = container_of(vdev->dev.driver, -						 struct virtio_driver, driver); +	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);  	for (i = 0; i < drv->feature_table_size; i++)  		if (drv->feature_table[i] == fbit) @@ -109,9 +120,8 @@ EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);  static int virtio_dev_probe(struct device *_d)  {  	int err, i; -	struct virtio_device *dev = container_of(_d,struct virtio_device,dev); -	struct virtio_driver *drv = container_of(dev->dev.driver, -						 struct virtio_driver, driver); +	struct virtio_device *dev = dev_to_virtio(_d); +	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);  	u32 device_features;  	/* We have a driver! */ @@ -139,22 +149,24 @@ static int virtio_dev_probe(struct device *_d)  	err = drv->probe(dev);  	if (err)  		add_status(dev, VIRTIO_CONFIG_S_FAILED); -	else +	else {  		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); +		if (drv->scan) +			drv->scan(dev); +	}  	return err;  }  static int virtio_dev_remove(struct device *_d)  { -	struct virtio_device *dev = container_of(_d,struct virtio_device,dev); -	struct virtio_driver *drv = container_of(dev->dev.driver, -						 struct virtio_driver, driver); +	struct virtio_device *dev = dev_to_virtio(_d); +	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);  	drv->remove(dev);  	/* Driver should have reset device. */ -	BUG_ON(dev->config->get_status(dev)); +	WARN_ON_ONCE(dev->config->get_status(dev));  	/* Acknowledge the device's existence again. */  	add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); @@ -164,7 +176,7 @@ static int virtio_dev_remove(struct device *_d)  static struct bus_type virtio_bus = {  	.name  = "virtio",  	.match = virtio_dev_match, -	.dev_attrs = virtio_dev_attrs, +	.dev_groups = virtio_dev_groups,  	.uevent = virtio_uevent,  	.probe = virtio_dev_probe,  	.remove = virtio_dev_remove, @@ -192,7 +204,11 @@ int register_virtio_device(struct virtio_device *dev)  	dev->dev.bus = &virtio_bus;  	/* Assign a unique device index and hence name. */ -	dev->index = dev_index++; +	err = ida_simple_get(&virtio_index_ida, 0, 0, GFP_KERNEL); +	if (err < 0) +		goto out; + +	dev->index = err;  	dev_set_name(&dev->dev, "virtio%u", dev->index);  	/* We always start by resetting the device, in case a previous @@ -207,6 +223,7 @@ int register_virtio_device(struct virtio_device *dev)  	/* device_register() causes the bus infrastructure to look for a  	 * matching driver. */  	err = device_register(&dev->dev); +out:  	if (err)  		add_status(dev, VIRTIO_CONFIG_S_FAILED);  	return err; @@ -215,7 +232,10 @@ EXPORT_SYMBOL_GPL(register_virtio_device);  void unregister_virtio_device(struct virtio_device *dev)  { +	int index = dev->index; /* save for after device release */ +  	device_unregister(&dev->dev); +	ida_simple_remove(&virtio_index_ida, index);  }  EXPORT_SYMBOL_GPL(unregister_virtio_device); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0f1da45ba47..25ebe8eecdb 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -1,4 +1,5 @@ -/* Virtio balloon implementation, inspired by Dor Loar and Marcelo +/* + * Virtio balloon implementation, inspired by Dor Laor and Marcelo   * Tosatti's implementations.   *   *  Copyright 2008 Rusty Russell IBM Corporation @@ -17,7 +18,7 @@   *  along with this program; if not, write to the Free Software   *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA   */ -//#define DEBUG +  #include <linux/virtio.h>  #include <linux/virtio_balloon.h>  #include <linux/swap.h> @@ -25,6 +26,16 @@  #include <linux/freezer.h>  #include <linux/delay.h>  #include <linux/slab.h> +#include <linux/module.h> +#include <linux/balloon_compaction.h> + +/* + * Balloon device works in 4K page units.  So each page is pointed to by + * multiple balloon pages.  All memory counters in this driver are in balloon + * page units. + */ +#define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) +#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256  struct virtio_balloon  { @@ -38,18 +49,24 @@ struct virtio_balloon  	struct task_struct *thread;  	/* Waiting for host to ack the pages we released. */ -	struct completion acked; - -	/* Do we have to tell Host *before* we reuse pages? */ -	bool tell_host_first; +	wait_queue_head_t acked; -	/* The pages we've told the Host we're not using. */ +	/* Number of balloon pages we've told the Host we're not using. */  	unsigned int num_pages; -	struct list_head pages; +	/* +	 * The pages we've told the Host we're not using are enqueued +	 * at vb_dev_info->pages list. +	 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE +	 * to num_pages above. +	 */ +	struct balloon_dev_info *vb_dev_info; + +	/* Synchronize access/update to this struct virtio_balloon elements */ +	struct mutex balloon_lock;  	/* The array of pfns we tell the Host about. */  	unsigned int num_pfns; -	u32 pfns[256]; +	u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];  	/* Memory statistics */  	int need_stats_update; @@ -67,97 +84,117 @@ static u32 page_to_balloon_pfn(struct page *page)  	BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);  	/* Convert pfn from Linux page size to balloon page size. */ -	return pfn >> (PAGE_SHIFT - VIRTIO_BALLOON_PFN_SHIFT); +	return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; +} + +static struct page *balloon_pfn_to_page(u32 pfn) +{ +	BUG_ON(pfn % VIRTIO_BALLOON_PAGES_PER_PAGE); +	return pfn_to_page(pfn / VIRTIO_BALLOON_PAGES_PER_PAGE);  }  static void balloon_ack(struct virtqueue *vq)  { -	struct virtio_balloon *vb; -	unsigned int len; +	struct virtio_balloon *vb = vq->vdev->priv; -	vb = virtqueue_get_buf(vq, &len); -	if (vb) -		complete(&vb->acked); +	wake_up(&vb->acked);  }  static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)  {  	struct scatterlist sg; +	unsigned int len;  	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); -	init_completion(&vb->acked); -  	/* We should always be able to add one buffer to an empty queue. */ -	if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0) -		BUG(); +	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);  	virtqueue_kick(vq);  	/* When host has read buffer, this completes via balloon_ack */ -	wait_for_completion(&vb->acked); +	wait_event(vb->acked, virtqueue_get_buf(vq, &len)); +} + +static void set_page_pfns(u32 pfns[], struct page *page) +{ +	unsigned int i; + +	/* Set balloon pfns pointing at this page. +	 * Note that the first pfn points at start of the page. */ +	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++) +		pfns[i] = page_to_balloon_pfn(page) + i;  }  static void fill_balloon(struct virtio_balloon *vb, size_t num)  { +	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info; +  	/* We can only do one array worth at a time. */  	num = min(num, ARRAY_SIZE(vb->pfns)); -	for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { -		struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | -					__GFP_NOMEMALLOC | __GFP_NOWARN); +	mutex_lock(&vb->balloon_lock); +	for (vb->num_pfns = 0; vb->num_pfns < num; +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { +		struct page *page = balloon_page_enqueue(vb_dev_info); +  		if (!page) { -			if (printk_ratelimit()) -				dev_printk(KERN_INFO, &vb->vdev->dev, -					   "Out of puff! Can't get %zu pages\n", -					   num); +			dev_info_ratelimited(&vb->vdev->dev, +					     "Out of puff! Can't get %u pages\n", +					     VIRTIO_BALLOON_PAGES_PER_PAGE);  			/* Sleep for at least 1/5 of a second before retry. */  			msleep(200);  			break;  		} -		vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page); -		totalram_pages--; -		vb->num_pages++; -		list_add(&page->lru, &vb->pages); +		set_page_pfns(vb->pfns + vb->num_pfns, page); +		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; +		adjust_managed_page_count(page, -1);  	} -	/* Didn't get any?  Oh well. */ -	if (vb->num_pfns == 0) -		return; - -	tell_host(vb, vb->inflate_vq); +	/* Did we get any? */ +	if (vb->num_pfns != 0) +		tell_host(vb, vb->inflate_vq); +	mutex_unlock(&vb->balloon_lock);  }  static void release_pages_by_pfn(const u32 pfns[], unsigned int num)  {  	unsigned int i; -	for (i = 0; i < num; i++) { -		__free_page(pfn_to_page(pfns[i])); -		totalram_pages++; +	/* Find pfns pointing at start of each page, get pages and free them. */ +	for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { +		struct page *page = balloon_pfn_to_page(pfns[i]); +		balloon_page_free(page); +		adjust_managed_page_count(page, 1);  	}  }  static void leak_balloon(struct virtio_balloon *vb, size_t num)  {  	struct page *page; +	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;  	/* We can only do one array worth at a time. */  	num = min(num, ARRAY_SIZE(vb->pfns)); -	for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { -		page = list_first_entry(&vb->pages, struct page, lru); -		list_del(&page->lru); -		vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page); -		vb->num_pages--; +	mutex_lock(&vb->balloon_lock); +	for (vb->num_pfns = 0; vb->num_pfns < num; +	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { +		page = balloon_page_dequeue(vb_dev_info); +		if (!page) +			break; +		set_page_pfns(vb->pfns + vb->num_pfns, page); +		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;  	} -	if (vb->tell_host_first) { -		tell_host(vb, vb->deflate_vq); -		release_pages_by_pfn(vb->pfns, vb->num_pfns); -	} else { -		release_pages_by_pfn(vb->pfns, vb->num_pfns); +	/* +	 * Note that if +	 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); +	 * is true, we *have* to do it in this order +	 */ +	if (vb->num_pfns != 0)  		tell_host(vb, vb->deflate_vq); -	} +	mutex_unlock(&vb->balloon_lock); +	release_pages_by_pfn(vb->pfns, vb->num_pfns);  }  static inline void update_stat(struct virtio_balloon *vb, int idx, @@ -201,12 +238,8 @@ static void update_balloon_stats(struct virtio_balloon *vb)   */  static void stats_request(struct virtqueue *vq)  { -	struct virtio_balloon *vb; -	unsigned int len; +	struct virtio_balloon *vb = vq->vdev->priv; -	vb = virtqueue_get_buf(vq, &len); -	if (!vb) -		return;  	vb->need_stats_update = 1;  	wake_up(&vb->config_change);  } @@ -215,14 +248,16 @@ static void stats_handle_request(struct virtio_balloon *vb)  {  	struct virtqueue *vq;  	struct scatterlist sg; +	unsigned int len;  	vb->need_stats_update = 0;  	update_balloon_stats(vb);  	vq = vb->stats_vq; +	if (!virtqueue_get_buf(vq, &len)) +		return;  	sg_init_one(&sg, vb->stats, sizeof(vb->stats)); -	if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0) -		BUG(); +	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);  	virtqueue_kick(vq);  } @@ -235,20 +270,21 @@ static void virtballoon_changed(struct virtio_device *vdev)  static inline s64 towards_target(struct virtio_balloon *vb)  { -	u32 v; -	vb->vdev->config->get(vb->vdev, -			      offsetof(struct virtio_balloon_config, num_pages), -			      &v, sizeof(v)); -	return (s64)v - vb->num_pages; +	__le32 v; +	s64 target; + +	virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, &v); + +	target = le32_to_cpu(v); +	return target - vb->num_pages;  }  static void update_balloon_size(struct virtio_balloon *vb)  {  	__le32 actual = cpu_to_le32(vb->num_pages); -	vb->vdev->config->set(vb->vdev, -			      offsetof(struct virtio_balloon_config, actual), -			      &actual, sizeof(actual)); +	virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, +		      &actual);  }  static int balloon(void *_vballoon) @@ -272,36 +308,31 @@ static int balloon(void *_vballoon)  		else if (diff < 0)  			leak_balloon(vb, -diff);  		update_balloon_size(vb); + +		/* +		 * For large balloon changes, we could spend a lot of time +		 * and always have work to do.  Be nice if preempt disabled. +		 */ +		cond_resched();  	}  	return 0;  } -static int virtballoon_probe(struct virtio_device *vdev) +static int init_vqs(struct virtio_balloon *vb)  { -	struct virtio_balloon *vb;  	struct virtqueue *vqs[3];  	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };  	const char *names[] = { "inflate", "deflate", "stats" };  	int err, nvqs; -	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); -	if (!vb) { -		err = -ENOMEM; -		goto out; -	} - -	INIT_LIST_HEAD(&vb->pages); -	vb->num_pages = 0; -	init_waitqueue_head(&vb->config_change); -	vb->vdev = vdev; -	vb->need_stats_update = 0; - -	/* We expect two virtqueues: inflate and deflate, -	 * and optionally stat. */ +	/* +	 * We expect two virtqueues: inflate and deflate, and +	 * optionally stat. +	 */  	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; -	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names); +	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);  	if (err) -		goto out_free_vb; +		return err;  	vb->inflate_vq = vqs[0];  	vb->deflate_vq = vqs[1]; @@ -311,13 +342,134 @@ static int virtballoon_probe(struct virtio_device *vdev)  		/*  		 * Prime this virtqueue with one buffer so the hypervisor can -		 * use it to signal us later. +		 * use it to signal us later (it can't be broken yet!).  		 */  		sg_init_one(&sg, vb->stats, sizeof vb->stats); -		if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0) +		if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) +		    < 0)  			BUG();  		virtqueue_kick(vb->stats_vq);  	} +	return 0; +} + +static const struct address_space_operations virtio_balloon_aops; +#ifdef CONFIG_BALLOON_COMPACTION +/* + * virtballoon_migratepage - perform the balloon page migration on behalf of + *			     a compation thread.     (called under page lock) + * @mapping: the page->mapping which will be assigned to the new migrated page. + * @newpage: page that will replace the isolated page after migration finishes. + * @page   : the isolated (old) page that is about to be migrated to newpage. + * @mode   : compaction mode -- not used for balloon page migration. + * + * After a ballooned page gets isolated by compaction procedures, this is the + * function that performs the page migration on behalf of a compaction thread + * The page migration for virtio balloon is done in a simple swap fashion which + * follows these two macro steps: + *  1) insert newpage into vb->pages list and update the host about it; + *  2) update the host about the old page removed from vb->pages list; + * + * This function preforms the balloon page migration task. + * Called through balloon_mapping->a_ops->migratepage + */ +static int virtballoon_migratepage(struct address_space *mapping, +		struct page *newpage, struct page *page, enum migrate_mode mode) +{ +	struct balloon_dev_info *vb_dev_info = balloon_page_device(page); +	struct virtio_balloon *vb; +	unsigned long flags; + +	BUG_ON(!vb_dev_info); + +	vb = vb_dev_info->balloon_device; + +	/* +	 * In order to avoid lock contention while migrating pages concurrently +	 * to leak_balloon() or fill_balloon() we just give up the balloon_lock +	 * this turn, as it is easier to retry the page migration later. +	 * This also prevents fill_balloon() getting stuck into a mutex +	 * recursion in the case it ends up triggering memory compaction +	 * while it is attempting to inflate the ballon. +	 */ +	if (!mutex_trylock(&vb->balloon_lock)) +		return -EAGAIN; + +	/* balloon's page migration 1st step  -- inflate "newpage" */ +	spin_lock_irqsave(&vb_dev_info->pages_lock, flags); +	balloon_page_insert(newpage, mapping, &vb_dev_info->pages); +	vb_dev_info->isolated_pages--; +	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); +	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; +	set_page_pfns(vb->pfns, newpage); +	tell_host(vb, vb->inflate_vq); + +	/* +	 * balloon's page migration 2nd step -- deflate "page" +	 * +	 * It's safe to delete page->lru here because this page is at +	 * an isolated migration list, and this step is expected to happen here +	 */ +	balloon_page_delete(page); +	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; +	set_page_pfns(vb->pfns, page); +	tell_host(vb, vb->deflate_vq); + +	mutex_unlock(&vb->balloon_lock); + +	return MIGRATEPAGE_BALLOON_SUCCESS; +} + +/* define the balloon_mapping->a_ops callback to allow balloon page migration */ +static const struct address_space_operations virtio_balloon_aops = { +			.migratepage = virtballoon_migratepage, +}; +#endif /* CONFIG_BALLOON_COMPACTION */ + +static int virtballoon_probe(struct virtio_device *vdev) +{ +	struct virtio_balloon *vb; +	struct address_space *vb_mapping; +	struct balloon_dev_info *vb_devinfo; +	int err; + +	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); +	if (!vb) { +		err = -ENOMEM; +		goto out; +	} + +	vb->num_pages = 0; +	mutex_init(&vb->balloon_lock); +	init_waitqueue_head(&vb->config_change); +	init_waitqueue_head(&vb->acked); +	vb->vdev = vdev; +	vb->need_stats_update = 0; + +	vb_devinfo = balloon_devinfo_alloc(vb); +	if (IS_ERR(vb_devinfo)) { +		err = PTR_ERR(vb_devinfo); +		goto out_free_vb; +	} + +	vb_mapping = balloon_mapping_alloc(vb_devinfo, +					   (balloon_compaction_check()) ? +					   &virtio_balloon_aops : NULL); +	if (IS_ERR(vb_mapping)) { +		/* +		 * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP +		 * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off. +		 */ +		err = PTR_ERR(vb_mapping); +		if (err != -EOPNOTSUPP) +			goto out_free_vb_devinfo; +	} + +	vb->vb_dev_info = vb_devinfo; + +	err = init_vqs(vb); +	if (err) +		goto out_free_vb_mapping;  	vb->thread = kthread_run(balloon, vb, "vballoon");  	if (IS_ERR(vb->thread)) { @@ -325,36 +477,73 @@ static int virtballoon_probe(struct virtio_device *vdev)  		goto out_del_vqs;  	} -	vb->tell_host_first -		= virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); -  	return 0;  out_del_vqs:  	vdev->config->del_vqs(vdev); +out_free_vb_mapping: +	balloon_mapping_free(vb_mapping); +out_free_vb_devinfo: +	balloon_devinfo_free(vb_devinfo);  out_free_vb:  	kfree(vb);  out:  	return err;  } -static void __devexit virtballoon_remove(struct virtio_device *vdev) +static void remove_common(struct virtio_balloon *vb)  { -	struct virtio_balloon *vb = vdev->priv; - -	kthread_stop(vb->thread); -  	/* There might be pages left in the balloon: free them. */  	while (vb->num_pages)  		leak_balloon(vb, vb->num_pages); +	update_balloon_size(vb);  	/* Now we reset the device so we can clean up the queues. */ -	vdev->config->reset(vdev); +	vb->vdev->config->reset(vb->vdev); -	vdev->config->del_vqs(vdev); +	vb->vdev->config->del_vqs(vb->vdev); +} + +static void virtballoon_remove(struct virtio_device *vdev) +{ +	struct virtio_balloon *vb = vdev->priv; + +	kthread_stop(vb->thread); +	remove_common(vb); +	balloon_mapping_free(vb->vb_dev_info->mapping); +	balloon_devinfo_free(vb->vb_dev_info);  	kfree(vb);  } +#ifdef CONFIG_PM_SLEEP +static int virtballoon_freeze(struct virtio_device *vdev) +{ +	struct virtio_balloon *vb = vdev->priv; + +	/* +	 * The kthread is already frozen by the PM core before this +	 * function is called. +	 */ + +	remove_common(vb); +	return 0; +} + +static int virtballoon_restore(struct virtio_device *vdev) +{ +	struct virtio_balloon *vb = vdev->priv; +	int ret; + +	ret = init_vqs(vdev->priv); +	if (ret) +		return ret; + +	fill_balloon(vb, towards_target(vb)); +	update_balloon_size(vb); +	return 0; +} +#endif +  static unsigned int features[] = {  	VIRTIO_BALLOON_F_MUST_TELL_HOST,  	VIRTIO_BALLOON_F_STATS_VQ, @@ -367,22 +556,15 @@ static struct virtio_driver virtio_balloon_driver = {  	.driver.owner =	THIS_MODULE,  	.id_table =	id_table,  	.probe =	virtballoon_probe, -	.remove =	__devexit_p(virtballoon_remove), +	.remove =	virtballoon_remove,  	.config_changed = virtballoon_changed, +#ifdef CONFIG_PM_SLEEP +	.freeze	=	virtballoon_freeze, +	.restore =	virtballoon_restore, +#endif  }; -static int __init init(void) -{ -	return register_virtio_driver(&virtio_balloon_driver); -} - -static void __exit fini(void) -{ -	unregister_virtio_driver(&virtio_balloon_driver); -} -module_init(init); -module_exit(fini); - +module_virtio_driver(virtio_balloon_driver);  MODULE_DEVICE_TABLE(virtio, id_table);  MODULE_DESCRIPTION("Virtio balloon driver");  MODULE_LICENSE("GPL"); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c new file mode 100644 index 00000000000..c600ccfd692 --- /dev/null +++ b/drivers/virtio/virtio_mmio.c @@ -0,0 +1,666 @@ +/* + * Virtio memory mapped device driver + * + * Copyright 2011, ARM Ltd. + * + * This module allows virtio devices to be used over a virtual, memory mapped + * platform device. + * + * The guest device(s) may be instantiated in one of three equivalent ways: + * + * 1. Static platform device in board's code, eg.: + * + *	static struct platform_device v2m_virtio_device = { + *		.name = "virtio-mmio", + *		.id = -1, + *		.num_resources = 2, + *		.resource = (struct resource []) { + *			{ + *				.start = 0x1001e000, + *				.end = 0x1001e0ff, + *				.flags = IORESOURCE_MEM, + *			}, { + *				.start = 42 + 32, + *				.end = 42 + 32, + *				.flags = IORESOURCE_IRQ, + *			}, + *		} + *	}; + * + * 2. Device Tree node, eg.: + * + *		virtio_block@1e000 { + *			compatible = "virtio,mmio"; + *			reg = <0x1e000 0x100>; + *			interrupts = <42>; + *		} + * + * 3. Kernel module (or command line) parameter. Can be used more than once - + *    one device will be created for each one. Syntax: + * + *		[virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>] + *    where: + *		<size>     := size (can use standard suffixes like K, M or G) + *		<baseaddr> := physical base address + *		<irq>      := interrupt number (as passed to request_irq()) + *		<id>       := (optional) platform device id + *    eg.: + *		virtio_mmio.device=0x100@0x100b0000:48 \ + *				virtio_mmio.device=1K@0x1001e000:74 + * + * + * + * Registers layout (all 32-bit wide): + * + * offset d. name             description + * ------ -- ---------------- ----------------- + * + * 0x000  R  MagicValue       Magic value "virt" + * 0x004  R  Version          Device version (current max. 1) + * 0x008  R  DeviceID         Virtio device ID + * 0x00c  R  VendorID         Virtio vendor ID + * + * 0x010  R  HostFeatures     Features supported by the host + * 0x014  W  HostFeaturesSel  Set of host features to access via HostFeatures + * + * 0x020  W  GuestFeatures    Features activated by the guest + * 0x024  W  GuestFeaturesSel Set of activated features to set via GuestFeatures + * 0x028  W  GuestPageSize    Size of guest's memory page in bytes + * + * 0x030  W  QueueSel         Queue selector + * 0x034  R  QueueNumMax      Maximum size of the currently selected queue + * 0x038  W  QueueNum         Queue size for the currently selected queue + * 0x03c  W  QueueAlign       Used Ring alignment for the current queue + * 0x040  RW QueuePFN         PFN for the currently selected queue + * + * 0x050  W  QueueNotify      Queue notifier + * 0x060  R  InterruptStatus  Interrupt status register + * 0x064  W  InterruptACK     Interrupt acknowledge register + * 0x070  RW Status           Device status register + * + * 0x100+ RW                  Device-specific configuration space + * + * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#define pr_fmt(fmt) "virtio-mmio: " fmt + +#include <linux/highmem.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_mmio.h> +#include <linux/virtio_ring.h> + + + +/* The alignment to use between consumer and producer parts of vring. + * Currently hardcoded to the page size. */ +#define VIRTIO_MMIO_VRING_ALIGN		PAGE_SIZE + + + +#define to_virtio_mmio_device(_plat_dev) \ +	container_of(_plat_dev, struct virtio_mmio_device, vdev) + +struct virtio_mmio_device { +	struct virtio_device vdev; +	struct platform_device *pdev; + +	void __iomem *base; +	unsigned long version; + +	/* a list of queues so we can dispatch IRQs */ +	spinlock_t lock; +	struct list_head virtqueues; +}; + +struct virtio_mmio_vq_info { +	/* the actual virtqueue */ +	struct virtqueue *vq; + +	/* the number of entries in the queue */ +	unsigned int num; + +	/* the virtual address of the ring queue */ +	void *queue; + +	/* the list node for the virtqueues list */ +	struct list_head node; +}; + + + +/* Configuration interface */ + +static u32 vm_get_features(struct virtio_device *vdev) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + +	/* TODO: Features > 32 bits */ +	writel(0, vm_dev->base + VIRTIO_MMIO_HOST_FEATURES_SEL); + +	return readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES); +} + +static void vm_finalize_features(struct virtio_device *vdev) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); +	int i; + +	/* Give virtio_ring a chance to accept features. */ +	vring_transport_features(vdev); + +	for (i = 0; i < ARRAY_SIZE(vdev->features); i++) { +		writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL); +		writel(vdev->features[i], +				vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES); +	} +} + +static void vm_get(struct virtio_device *vdev, unsigned offset, +		   void *buf, unsigned len) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); +	u8 *ptr = buf; +	int i; + +	for (i = 0; i < len; i++) +		ptr[i] = readb(vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i); +} + +static void vm_set(struct virtio_device *vdev, unsigned offset, +		   const void *buf, unsigned len) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); +	const u8 *ptr = buf; +	int i; + +	for (i = 0; i < len; i++) +		writeb(ptr[i], vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i); +} + +static u8 vm_get_status(struct virtio_device *vdev) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + +	return readl(vm_dev->base + VIRTIO_MMIO_STATUS) & 0xff; +} + +static void vm_set_status(struct virtio_device *vdev, u8 status) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + +	/* We should never be setting status to 0. */ +	BUG_ON(status == 0); + +	writel(status, vm_dev->base + VIRTIO_MMIO_STATUS); +} + +static void vm_reset(struct virtio_device *vdev) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + +	/* 0 status means a reset. */ +	writel(0, vm_dev->base + VIRTIO_MMIO_STATUS); +} + + + +/* Transport interface */ + +/* the notify function used when creating a virt queue */ +static bool vm_notify(struct virtqueue *vq) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); + +	/* We write the queue's selector into the notification register to +	 * signal the other end */ +	writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); +	return true; +} + +/* Notify all virtqueues on an interrupt. */ +static irqreturn_t vm_interrupt(int irq, void *opaque) +{ +	struct virtio_mmio_device *vm_dev = opaque; +	struct virtio_mmio_vq_info *info; +	struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver, +			struct virtio_driver, driver); +	unsigned long status; +	unsigned long flags; +	irqreturn_t ret = IRQ_NONE; + +	/* Read and acknowledge interrupts */ +	status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS); +	writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK); + +	if (unlikely(status & VIRTIO_MMIO_INT_CONFIG) +			&& vdrv && vdrv->config_changed) { +		vdrv->config_changed(&vm_dev->vdev); +		ret = IRQ_HANDLED; +	} + +	if (likely(status & VIRTIO_MMIO_INT_VRING)) { +		spin_lock_irqsave(&vm_dev->lock, flags); +		list_for_each_entry(info, &vm_dev->virtqueues, node) +			ret |= vring_interrupt(irq, info->vq); +		spin_unlock_irqrestore(&vm_dev->lock, flags); +	} + +	return ret; +} + + + +static void vm_del_vq(struct virtqueue *vq) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); +	struct virtio_mmio_vq_info *info = vq->priv; +	unsigned long flags, size; +	unsigned int index = vq->index; + +	spin_lock_irqsave(&vm_dev->lock, flags); +	list_del(&info->node); +	spin_unlock_irqrestore(&vm_dev->lock, flags); + +	vring_del_virtqueue(vq); + +	/* Select and deactivate the queue */ +	writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); +	writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); + +	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN)); +	free_pages_exact(info->queue, size); +	kfree(info); +} + +static void vm_del_vqs(struct virtio_device *vdev) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); +	struct virtqueue *vq, *n; + +	list_for_each_entry_safe(vq, n, &vdev->vqs, list) +		vm_del_vq(vq); + +	free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev); +} + + + +static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, +				  void (*callback)(struct virtqueue *vq), +				  const char *name) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); +	struct virtio_mmio_vq_info *info; +	struct virtqueue *vq; +	unsigned long flags, size; +	int err; + +	if (!name) +		return NULL; + +	/* Select the queue we're interested in */ +	writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); + +	/* Queue shouldn't already be set up. */ +	if (readl(vm_dev->base + VIRTIO_MMIO_QUEUE_PFN)) { +		err = -ENOENT; +		goto error_available; +	} + +	/* Allocate and fill out our active queue description */ +	info = kmalloc(sizeof(*info), GFP_KERNEL); +	if (!info) { +		err = -ENOMEM; +		goto error_kmalloc; +	} + +	/* Allocate pages for the queue - start with a queue as big as +	 * possible (limited by maximum size allowed by device), drop down +	 * to a minimal size, just big enough to fit descriptor table +	 * and two rings (which makes it "alignment_size * 2") +	 */ +	info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX); + +	/* If the device reports a 0 entry queue, we won't be able to +	 * use it to perform I/O, and vring_new_virtqueue() can't create +	 * empty queues anyway, so don't bother to set up the device. +	 */ +	if (info->num == 0) { +		err = -ENOENT; +		goto error_alloc_pages; +	} + +	while (1) { +		size = PAGE_ALIGN(vring_size(info->num, +				VIRTIO_MMIO_VRING_ALIGN)); +		/* Did the last iter shrink the queue below minimum size? */ +		if (size < VIRTIO_MMIO_VRING_ALIGN * 2) { +			err = -ENOMEM; +			goto error_alloc_pages; +		} + +		info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); +		if (info->queue) +			break; + +		info->num /= 2; +	} + +	/* Activate the queue */ +	writel(info->num, vm_dev->base + VIRTIO_MMIO_QUEUE_NUM); +	writel(VIRTIO_MMIO_VRING_ALIGN, +			vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN); +	writel(virt_to_phys(info->queue) >> PAGE_SHIFT, +			vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); + +	/* Create the vring */ +	vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev, +				 true, info->queue, vm_notify, callback, name); +	if (!vq) { +		err = -ENOMEM; +		goto error_new_virtqueue; +	} + +	vq->priv = info; +	info->vq = vq; + +	spin_lock_irqsave(&vm_dev->lock, flags); +	list_add(&info->node, &vm_dev->virtqueues); +	spin_unlock_irqrestore(&vm_dev->lock, flags); + +	return vq; + +error_new_virtqueue: +	writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); +	free_pages_exact(info->queue, size); +error_alloc_pages: +	kfree(info); +error_kmalloc: +error_available: +	return ERR_PTR(err); +} + +static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, +		       struct virtqueue *vqs[], +		       vq_callback_t *callbacks[], +		       const char *names[]) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); +	unsigned int irq = platform_get_irq(vm_dev->pdev, 0); +	int i, err; + +	err = request_irq(irq, vm_interrupt, IRQF_SHARED, +			dev_name(&vdev->dev), vm_dev); +	if (err) +		return err; + +	for (i = 0; i < nvqs; ++i) { +		vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]); +		if (IS_ERR(vqs[i])) { +			vm_del_vqs(vdev); +			return PTR_ERR(vqs[i]); +		} +	} + +	return 0; +} + +static const char *vm_bus_name(struct virtio_device *vdev) +{ +	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + +	return vm_dev->pdev->name; +} + +static const struct virtio_config_ops virtio_mmio_config_ops = { +	.get		= vm_get, +	.set		= vm_set, +	.get_status	= vm_get_status, +	.set_status	= vm_set_status, +	.reset		= vm_reset, +	.find_vqs	= vm_find_vqs, +	.del_vqs	= vm_del_vqs, +	.get_features	= vm_get_features, +	.finalize_features = vm_finalize_features, +	.bus_name	= vm_bus_name, +}; + + + +/* Platform device */ + +static int virtio_mmio_probe(struct platform_device *pdev) +{ +	struct virtio_mmio_device *vm_dev; +	struct resource *mem; +	unsigned long magic; + +	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); +	if (!mem) +		return -EINVAL; + +	if (!devm_request_mem_region(&pdev->dev, mem->start, +			resource_size(mem), pdev->name)) +		return -EBUSY; + +	vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL); +	if (!vm_dev) +		return  -ENOMEM; + +	vm_dev->vdev.dev.parent = &pdev->dev; +	vm_dev->vdev.config = &virtio_mmio_config_ops; +	vm_dev->pdev = pdev; +	INIT_LIST_HEAD(&vm_dev->virtqueues); +	spin_lock_init(&vm_dev->lock); + +	vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); +	if (vm_dev->base == NULL) +		return -EFAULT; + +	/* Check magic value */ +	magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE); +	if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) { +		dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic); +		return -ENODEV; +	} + +	/* Check device version */ +	vm_dev->version = readl(vm_dev->base + VIRTIO_MMIO_VERSION); +	if (vm_dev->version != 1) { +		dev_err(&pdev->dev, "Version %ld not supported!\n", +				vm_dev->version); +		return -ENXIO; +	} + +	vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID); +	vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID); + +	writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + +	platform_set_drvdata(pdev, vm_dev); + +	return register_virtio_device(&vm_dev->vdev); +} + +static int virtio_mmio_remove(struct platform_device *pdev) +{ +	struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev); + +	unregister_virtio_device(&vm_dev->vdev); + +	return 0; +} + + + +/* Devices list parameter */ + +#if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES) + +static struct device vm_cmdline_parent = { +	.init_name = "virtio-mmio-cmdline", +}; + +static int vm_cmdline_parent_registered; +static int vm_cmdline_id; + +static int vm_cmdline_set(const char *device, +		const struct kernel_param *kp) +{ +	int err; +	struct resource resources[2] = {}; +	char *str; +	long long int base, size; +	unsigned int irq; +	int processed, consumed = 0; +	struct platform_device *pdev; + +	/* Consume "size" part of the command line parameter */ +	size = memparse(device, &str); + +	/* Get "@<base>:<irq>[:<id>]" chunks */ +	processed = sscanf(str, "@%lli:%u%n:%d%n", +			&base, &irq, &consumed, +			&vm_cmdline_id, &consumed); + +	/* +	 * sscanf() must processes at least 2 chunks; also there +	 * must be no extra characters after the last chunk, so +	 * str[consumed] must be '\0' +	 */ +	if (processed < 2 || str[consumed]) +		return -EINVAL; + +	resources[0].flags = IORESOURCE_MEM; +	resources[0].start = base; +	resources[0].end = base + size - 1; + +	resources[1].flags = IORESOURCE_IRQ; +	resources[1].start = resources[1].end = irq; + +	if (!vm_cmdline_parent_registered) { +		err = device_register(&vm_cmdline_parent); +		if (err) { +			pr_err("Failed to register parent device!\n"); +			return err; +		} +		vm_cmdline_parent_registered = 1; +	} + +	pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n", +		       vm_cmdline_id, +		       (unsigned long long)resources[0].start, +		       (unsigned long long)resources[0].end, +		       (int)resources[1].start); + +	pdev = platform_device_register_resndata(&vm_cmdline_parent, +			"virtio-mmio", vm_cmdline_id++, +			resources, ARRAY_SIZE(resources), NULL, 0); +	if (IS_ERR(pdev)) +		return PTR_ERR(pdev); + +	return 0; +} + +static int vm_cmdline_get_device(struct device *dev, void *data) +{ +	char *buffer = data; +	unsigned int len = strlen(buffer); +	struct platform_device *pdev = to_platform_device(dev); + +	snprintf(buffer + len, PAGE_SIZE - len, "0x%llx@0x%llx:%llu:%d\n", +			pdev->resource[0].end - pdev->resource[0].start + 1ULL, +			(unsigned long long)pdev->resource[0].start, +			(unsigned long long)pdev->resource[1].start, +			pdev->id); +	return 0; +} + +static int vm_cmdline_get(char *buffer, const struct kernel_param *kp) +{ +	buffer[0] = '\0'; +	device_for_each_child(&vm_cmdline_parent, buffer, +			vm_cmdline_get_device); +	return strlen(buffer) + 1; +} + +static struct kernel_param_ops vm_cmdline_param_ops = { +	.set = vm_cmdline_set, +	.get = vm_cmdline_get, +}; + +device_param_cb(device, &vm_cmdline_param_ops, NULL, S_IRUSR); + +static int vm_unregister_cmdline_device(struct device *dev, +		void *data) +{ +	platform_device_unregister(to_platform_device(dev)); + +	return 0; +} + +static void vm_unregister_cmdline_devices(void) +{ +	if (vm_cmdline_parent_registered) { +		device_for_each_child(&vm_cmdline_parent, NULL, +				vm_unregister_cmdline_device); +		device_unregister(&vm_cmdline_parent); +		vm_cmdline_parent_registered = 0; +	} +} + +#else + +static void vm_unregister_cmdline_devices(void) +{ +} + +#endif + +/* Platform driver */ + +static struct of_device_id virtio_mmio_match[] = { +	{ .compatible = "virtio,mmio", }, +	{}, +}; +MODULE_DEVICE_TABLE(of, virtio_mmio_match); + +static struct platform_driver virtio_mmio_driver = { +	.probe		= virtio_mmio_probe, +	.remove		= virtio_mmio_remove, +	.driver		= { +		.name	= "virtio-mmio", +		.owner	= THIS_MODULE, +		.of_match_table	= virtio_mmio_match, +	}, +}; + +static int __init virtio_mmio_init(void) +{ +	return platform_driver_register(&virtio_mmio_driver); +} + +static void __exit virtio_mmio_exit(void) +{ +	platform_driver_unregister(&virtio_mmio_driver); +	vm_unregister_cmdline_devices(); +} + +module_init(virtio_mmio_init); +module_exit(virtio_mmio_exit); + +MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>"); +MODULE_DESCRIPTION("Platform bus driver for memory mapped virtio devices"); +MODULE_LICENSE("GPL"); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index ef8d9d558fc..101db3faf5d 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -48,6 +48,7 @@ struct virtio_pci_device  	int msix_enabled;  	int intx_enabled;  	struct msix_entry *msix_entries; +	cpumask_var_t *msix_affinity_masks;  	/* Name strings for interrupts. This size should be enough,  	 * and I'm too lazy to allocate each name separately. */  	char (*msix_names)[256]; @@ -55,6 +56,10 @@ struct virtio_pci_device  	unsigned msix_vectors;  	/* Vectors allocated, excluding per-vq vectors if any */  	unsigned msix_used_vectors; + +	/* Status saved during hibernate/restore */ +	u8 saved_status; +  	/* Whether we have vector per vq */  	bool per_vq_vectors;  }; @@ -75,9 +80,6 @@ struct virtio_pci_vq_info  	/* the number of entries in the queue */  	int num; -	/* the index of the queue */ -	int queue_index; -  	/* the virtual address of the ring queue */  	void *queue; @@ -89,18 +91,13 @@ struct virtio_pci_vq_info  };  /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ -static struct pci_device_id virtio_pci_id_table[] = { -	{ 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, -	{ 0 }, +static DEFINE_PCI_DEVICE_TABLE(virtio_pci_id_table) = { +	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) }, +	{ 0 }  };  MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); -/* A PCI device has it's own struct device and so does a virtio device so - * we create a place for the virtio devices to show up in sysfs.  I think it - * would make more sense for virtio to not insist on having it's own device. */ -static struct device *virtio_pci_root; -  /* Convert a generic virtio device to our structure */  static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)  { @@ -174,22 +171,40 @@ static void vp_set_status(struct virtio_device *vdev, u8 status)  	iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);  } +/* wait for pending irq handlers */ +static void vp_synchronize_vectors(struct virtio_device *vdev) +{ +	struct virtio_pci_device *vp_dev = to_vp_device(vdev); +	int i; + +	if (vp_dev->intx_enabled) +		synchronize_irq(vp_dev->pci_dev->irq); + +	for (i = 0; i < vp_dev->msix_vectors; ++i) +		synchronize_irq(vp_dev->msix_entries[i].vector); +} +  static void vp_reset(struct virtio_device *vdev)  {  	struct virtio_pci_device *vp_dev = to_vp_device(vdev);  	/* 0 status means a reset. */  	iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); +	/* Flush out the status write, and flush in device writes, +	 * including MSi-X interrupts, if any. */ +	ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); +	/* Flush pending VQ/configuration callbacks. */ +	vp_synchronize_vectors(vdev);  }  /* the notify function used when creating a virt queue */ -static void vp_notify(struct virtqueue *vq) +static bool vp_notify(struct virtqueue *vq)  {  	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); -	struct virtio_pci_vq_info *info = vq->priv;  	/* we write the queue's selector into the notification register to  	 * signal the other end */ -	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); +	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); +	return true;  }  /* Handle a configuration change: Tell driver if it wants to know. */ @@ -262,6 +277,10 @@ static void vp_free_vectors(struct virtio_device *vdev)  	for (i = 0; i < vp_dev->msix_used_vectors; ++i)  		free_irq(vp_dev->msix_entries[i].vector, vp_dev); +	for (i = 0; i < vp_dev->msix_vectors; i++) +		if (vp_dev->msix_affinity_masks[i]) +			free_cpumask_var(vp_dev->msix_affinity_masks[i]); +  	if (vp_dev->msix_enabled) {  		/* Disable the vector used for configuration */  		iowrite16(VIRTIO_MSI_NO_VECTOR, @@ -271,14 +290,16 @@ static void vp_free_vectors(struct virtio_device *vdev)  		pci_disable_msix(vp_dev->pci_dev);  		vp_dev->msix_enabled = 0; -		vp_dev->msix_vectors = 0;  	} +	vp_dev->msix_vectors = 0;  	vp_dev->msix_used_vectors = 0;  	kfree(vp_dev->msix_names);  	vp_dev->msix_names = NULL;  	kfree(vp_dev->msix_entries);  	vp_dev->msix_entries = NULL; +	kfree(vp_dev->msix_affinity_masks); +	vp_dev->msix_affinity_masks = NULL;  }  static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, @@ -289,6 +310,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,  	unsigned i, v;  	int err = -ENOMEM; +	vp_dev->msix_vectors = nvectors; +  	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,  				       GFP_KERNEL);  	if (!vp_dev->msix_entries) @@ -297,17 +320,23 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,  				     GFP_KERNEL);  	if (!vp_dev->msix_names)  		goto error; +	vp_dev->msix_affinity_masks +		= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, +			  GFP_KERNEL); +	if (!vp_dev->msix_affinity_masks) +		goto error; +	for (i = 0; i < nvectors; ++i) +		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], +					GFP_KERNEL)) +			goto error;  	for (i = 0; i < nvectors; ++i)  		vp_dev->msix_entries[i].entry = i; -	/* pci_enable_msix returns positive if we can't get this many. */ -	err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); -	if (err > 0) -		err = -ENOSPC; +	err = pci_enable_msix_exact(vp_dev->pci_dev, +				    vp_dev->msix_entries, nvectors);  	if (err)  		goto error; -	vp_dev->msix_vectors = nvectors;  	vp_dev->msix_enabled = 1;  	/* Set the vector used for configuration */ @@ -385,7 +414,6 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,  	if (!info)  		return ERR_PTR(-ENOMEM); -	info->queue_index = index;  	info->num = num;  	info->msix_vector = msix_vec; @@ -401,8 +429,8 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,  		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);  	/* create the vring */ -	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, -				 vdev, info->queue, vp_notify, callback, name); +	vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev, +				 true, info->queue, vp_notify, callback, name);  	if (!vq) {  		err = -ENOMEM;  		goto out_activate_queue; @@ -420,9 +448,13 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,  		}  	} -	spin_lock_irqsave(&vp_dev->lock, flags); -	list_add(&info->node, &vp_dev->virtqueues); -	spin_unlock_irqrestore(&vp_dev->lock, flags); +	if (callback) { +		spin_lock_irqsave(&vp_dev->lock, flags); +		list_add(&info->node, &vp_dev->virtqueues); +		spin_unlock_irqrestore(&vp_dev->lock, flags); +	} else { +		INIT_LIST_HEAD(&info->node); +	}  	return vq; @@ -446,7 +478,7 @@ static void vp_del_vq(struct virtqueue *vq)  	list_del(&info->node);  	spin_unlock_irqrestore(&vp_dev->lock, flags); -	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); +	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);  	if (vp_dev->msix_enabled) {  		iowrite16(VIRTIO_MSI_NO_VECTOR, @@ -521,7 +553,10 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,  	vp_dev->per_vq_vectors = per_vq_vectors;  	allocated_vectors = vp_dev->msix_used_vectors;  	for (i = 0; i < nvqs; ++i) { -		if (!callbacks[i] || !vp_dev->msix_enabled) +		if (!names[i]) { +			vqs[i] = NULL; +			continue; +		} else if (!callbacks[i] || !vp_dev->msix_enabled)  			msix_vec = VIRTIO_MSI_NO_VECTOR;  		else if (vp_dev->per_vq_vectors)  			msix_vec = allocated_vectors++; @@ -581,7 +616,43 @@ static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,  				  false, false);  } -static struct virtio_config_ops virtio_pci_config_ops = { +static const char *vp_bus_name(struct virtio_device *vdev) +{ +	struct virtio_pci_device *vp_dev = to_vp_device(vdev); + +	return pci_name(vp_dev->pci_dev); +} + +/* Setup the affinity for a virtqueue: + * - force the affinity for per vq vector + * - OR over all affinities for shared MSI + * - ignore the affinity request if we're using INTX + */ +static int vp_set_vq_affinity(struct virtqueue *vq, int cpu) +{ +	struct virtio_device *vdev = vq->vdev; +	struct virtio_pci_device *vp_dev = to_vp_device(vdev); +	struct virtio_pci_vq_info *info = vq->priv; +	struct cpumask *mask; +	unsigned int irq; + +	if (!vq->callback) +		return -EINVAL; + +	if (vp_dev->msix_enabled) { +		mask = vp_dev->msix_affinity_masks[info->msix_vector]; +		irq = vp_dev->msix_entries[info->msix_vector].vector; +		if (cpu == -1) +			irq_set_affinity_hint(irq, NULL); +		else { +			cpumask_set_cpu(cpu, mask); +			irq_set_affinity_hint(irq, mask); +		} +	} +	return 0; +} + +static const struct virtio_config_ops virtio_pci_config_ops = {  	.get		= vp_get,  	.set		= vp_set,  	.get_status	= vp_get_status, @@ -591,25 +662,22 @@ static struct virtio_config_ops virtio_pci_config_ops = {  	.del_vqs	= vp_del_vqs,  	.get_features	= vp_get_features,  	.finalize_features = vp_finalize_features, +	.bus_name	= vp_bus_name, +	.set_vq_affinity = vp_set_vq_affinity,  };  static void virtio_pci_release_dev(struct device *_d)  { -	struct virtio_device *dev = container_of(_d, struct virtio_device, dev); -	struct virtio_pci_device *vp_dev = to_vp_device(dev); -	struct pci_dev *pci_dev = vp_dev->pci_dev; - -	vp_del_vqs(dev); -	pci_set_drvdata(pci_dev, NULL); -	pci_iounmap(pci_dev, vp_dev->ioaddr); -	pci_release_regions(pci_dev); -	pci_disable_device(pci_dev); -	kfree(vp_dev); +	/* +	 * No need for a release method as we allocate/free +	 * all devices together with the pci devices. +	 * Provide an empty one to avoid getting a warning from core. +	 */  }  /* the PCI probing function */ -static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, -				      const struct pci_device_id *id) +static int virtio_pci_probe(struct pci_dev *pci_dev, +			    const struct pci_device_id *id)  {  	struct virtio_pci_device *vp_dev;  	int err; @@ -629,7 +697,7 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,  	if (vp_dev == NULL)  		return -ENOMEM; -	vp_dev->vdev.dev.parent = virtio_pci_root; +	vp_dev->vdev.dev.parent = &pci_dev->dev;  	vp_dev->vdev.dev.release = virtio_pci_release_dev;  	vp_dev->vdev.config = &virtio_pci_config_ops;  	vp_dev->pci_dev = pci_dev; @@ -649,8 +717,10 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,  		goto out_enable_device;  	vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); -	if (vp_dev->ioaddr == NULL) +	if (vp_dev->ioaddr == NULL) { +		err = -ENOMEM;  		goto out_req_regions; +	}  	pci_set_drvdata(pci_dev, vp_dev);  	pci_set_master(pci_dev); @@ -670,7 +740,6 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,  	return 0;  out_set_drvdata: -	pci_set_drvdata(pci_dev, NULL);  	pci_iounmap(pci_dev, vp_dev->ioaddr);  out_req_regions:  	pci_release_regions(pci_dev); @@ -681,61 +750,80 @@ out:  	return err;  } -static void __devexit virtio_pci_remove(struct pci_dev *pci_dev) +static void virtio_pci_remove(struct pci_dev *pci_dev)  {  	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);  	unregister_virtio_device(&vp_dev->vdev); + +	vp_del_vqs(&vp_dev->vdev); +	pci_iounmap(pci_dev, vp_dev->ioaddr); +	pci_release_regions(pci_dev); +	pci_disable_device(pci_dev); +	kfree(vp_dev);  } -#ifdef CONFIG_PM -static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int virtio_pci_freeze(struct device *dev)  { -	pci_save_state(pci_dev); -	pci_set_power_state(pci_dev, PCI_D3hot); -	return 0; +	struct pci_dev *pci_dev = to_pci_dev(dev); +	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); +	struct virtio_driver *drv; +	int ret; + +	drv = container_of(vp_dev->vdev.dev.driver, +			   struct virtio_driver, driver); + +	ret = 0; +	vp_dev->saved_status = vp_get_status(&vp_dev->vdev); +	if (drv && drv->freeze) +		ret = drv->freeze(&vp_dev->vdev); + +	if (!ret) +		pci_disable_device(pci_dev); +	return ret;  } -static int virtio_pci_resume(struct pci_dev *pci_dev) +static int virtio_pci_restore(struct device *dev)  { -	pci_restore_state(pci_dev); -	pci_set_power_state(pci_dev, PCI_D0); -	return 0; +	struct pci_dev *pci_dev = to_pci_dev(dev); +	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); +	struct virtio_driver *drv; +	int ret; + +	drv = container_of(vp_dev->vdev.dev.driver, +			   struct virtio_driver, driver); + +	ret = pci_enable_device(pci_dev); +	if (ret) +		return ret; + +	pci_set_master(pci_dev); +	vp_finalize_features(&vp_dev->vdev); + +	if (drv && drv->restore) +		ret = drv->restore(&vp_dev->vdev); + +	/* Finally, tell the device we're all set */ +	if (!ret) +		vp_set_status(&vp_dev->vdev, vp_dev->saved_status); + +	return ret;  } + +static const struct dev_pm_ops virtio_pci_pm_ops = { +	SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) +};  #endif  static struct pci_driver virtio_pci_driver = {  	.name		= "virtio-pci",  	.id_table	= virtio_pci_id_table,  	.probe		= virtio_pci_probe, -	.remove		= __devexit_p(virtio_pci_remove), -#ifdef CONFIG_PM -	.suspend	= virtio_pci_suspend, -	.resume		= virtio_pci_resume, +	.remove		= virtio_pci_remove, +#ifdef CONFIG_PM_SLEEP +	.driver.pm	= &virtio_pci_pm_ops,  #endif  }; -static int __init virtio_pci_init(void) -{ -	int err; - -	virtio_pci_root = root_device_register("virtio-pci"); -	if (IS_ERR(virtio_pci_root)) -		return PTR_ERR(virtio_pci_root); - -	err = pci_register_driver(&virtio_pci_driver); -	if (err) -		root_device_unregister(virtio_pci_root); - -	return err; -} - -module_init(virtio_pci_init); - -static void __exit virtio_pci_exit(void) -{ -	pci_unregister_driver(&virtio_pci_driver); -	root_device_unregister(virtio_pci_root); -} - -module_exit(virtio_pci_exit); +module_pci_driver(virtio_pci_driver); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 1475ed6b575..4d08f45a9c2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -21,24 +21,9 @@  #include <linux/virtio_config.h>  #include <linux/device.h>  #include <linux/slab.h> - -/* virtio guest is communicating with a virtual "device" that actually runs on - * a host processor.  Memory barriers are used to control SMP effects. */ -#ifdef CONFIG_SMP -/* Where possible, use SMP barriers which are more lightweight than mandatory - * barriers, because mandatory barriers control MMIO effects on accesses - * through relaxed memory I/O windows (which virtio does not use). */ -#define virtio_mb() smp_mb() -#define virtio_rmb() smp_rmb() -#define virtio_wmb() smp_wmb() -#else -/* We must force memory ordering even if guest is UP since host could be - * running on another CPU, but SMP barriers are defined to barrier() in that - * configuration. So fall back to mandatory barriers instead. */ -#define virtio_mb() mb() -#define virtio_rmb() rmb() -#define virtio_wmb() wmb() -#endif +#include <linux/module.h> +#include <linux/hrtimer.h> +#include <linux/kmemleak.h>  #ifdef DEBUG  /* For development, we want to crash whenever the ring is screwed. */ @@ -76,14 +61,18 @@ struct vring_virtqueue  	/* Actual memory layout for this queue */  	struct vring vring; +	/* Can we use weak barriers? */ +	bool weak_barriers; +  	/* Other side has made a mess, don't try any more. */  	bool broken;  	/* Host supports indirect buffers */  	bool indirect; -	/* Number of free buffers */ -	unsigned int num_free; +	/* Host publishes avail event idx */ +	bool event; +  	/* Head of free buffer list. */  	unsigned int free_head;  	/* Number we've added since last sync. */ @@ -93,11 +82,15 @@ struct vring_virtqueue  	u16 last_used_idx;  	/* How to notify other side. FIXME: commonalize hcalls! */ -	void (*notify)(struct virtqueue *vq); +	bool (*notify)(struct virtqueue *vq);  #ifdef DEBUG  	/* They're supposed to lock for us. */  	unsigned int in_use; + +	/* Figure out if their kicks are too delayed. */ +	bool last_add_time_valid; +	ktime_t last_add_time;  #endif  	/* Tokens for callbacks. */ @@ -106,48 +99,83 @@ struct vring_virtqueue  #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, +						  unsigned int *count) +{ +	return sg_next(sg); +} + +static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, +					      unsigned int *count) +{ +	if (--(*count) == 0) +		return NULL; +	return sg + 1; +} +  /* Set up an indirect table of descriptors and add it to the queue. */ -static int vring_add_indirect(struct vring_virtqueue *vq, -			      struct scatterlist sg[], -			      unsigned int out, -			      unsigned int in, -			      gfp_t gfp) +static inline int vring_add_indirect(struct vring_virtqueue *vq, +				     struct scatterlist *sgs[], +				     struct scatterlist *(*next) +				       (struct scatterlist *, unsigned int *), +				     unsigned int total_sg, +				     unsigned int total_out, +				     unsigned int total_in, +				     unsigned int out_sgs, +				     unsigned int in_sgs, +				     gfp_t gfp)  {  	struct vring_desc *desc;  	unsigned head; -	int i; +	struct scatterlist *sg; +	int i, n; -	desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); +	/* +	 * We require lowmem mappings for the descriptors because +	 * otherwise virt_to_phys will give us bogus addresses in the +	 * virtqueue. +	 */ +	gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); + +	desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);  	if (!desc)  		return -ENOMEM; -	/* Transfer entries from the sg list into the indirect page */ -	for (i = 0; i < out; i++) { -		desc[i].flags = VRING_DESC_F_NEXT; -		desc[i].addr = sg_phys(sg); -		desc[i].len = sg->length; -		desc[i].next = i+1; -		sg++; +	/* Transfer entries from the sg lists into the indirect page */ +	i = 0; +	for (n = 0; n < out_sgs; n++) { +		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { +			desc[i].flags = VRING_DESC_F_NEXT; +			desc[i].addr = sg_phys(sg); +			desc[i].len = sg->length; +			desc[i].next = i+1; +			i++; +		}  	} -	for (; i < (out + in); i++) { -		desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; -		desc[i].addr = sg_phys(sg); -		desc[i].len = sg->length; -		desc[i].next = i+1; -		sg++; +	for (; n < (out_sgs + in_sgs); n++) { +		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { +			desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; +			desc[i].addr = sg_phys(sg); +			desc[i].len = sg->length; +			desc[i].next = i+1; +			i++; +		}  	} +	BUG_ON(i != total_sg);  	/* Last one doesn't continue. */  	desc[i-1].flags &= ~VRING_DESC_F_NEXT;  	desc[i-1].next = 0;  	/* We're about to use a buffer */ -	vq->num_free--; +	vq->vq.num_free--;  	/* Use a single buffer which doesn't continue */  	head = vq->free_head;  	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;  	vq->vring.desc[head].addr = virt_to_phys(desc); +	/* kmemleak gives a false positive, as it's hidden by virt_to_phys */ +	kmemleak_ignore(desc);  	vq->vring.desc[head].len = i * sizeof(struct vring_desc);  	/* Update free pointer */ @@ -156,61 +184,92 @@ static int vring_add_indirect(struct vring_virtqueue *vq,  	return head;  } -int virtqueue_add_buf_gfp(struct virtqueue *_vq, -			  struct scatterlist sg[], -			  unsigned int out, -			  unsigned int in, -			  void *data, -			  gfp_t gfp) +static inline int virtqueue_add(struct virtqueue *_vq, +				struct scatterlist *sgs[], +				struct scatterlist *(*next) +				  (struct scatterlist *, unsigned int *), +				unsigned int total_out, +				unsigned int total_in, +				unsigned int out_sgs, +				unsigned int in_sgs, +				void *data, +				gfp_t gfp)  {  	struct vring_virtqueue *vq = to_vvq(_vq); -	unsigned int i, avail, uninitialized_var(prev); +	struct scatterlist *sg; +	unsigned int i, n, avail, uninitialized_var(prev), total_sg;  	int head;  	START_USE(vq);  	BUG_ON(data == NULL); +	if (unlikely(vq->broken)) { +		END_USE(vq); +		return -EIO; +	} + +#ifdef DEBUG +	{ +		ktime_t now = ktime_get(); + +		/* No kick or get, with .1 second between?  Warn. */ +		if (vq->last_add_time_valid) +			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time)) +					    > 100); +		vq->last_add_time = now; +		vq->last_add_time_valid = true; +	} +#endif + +	total_sg = total_in + total_out; +  	/* If the host supports indirect descriptor tables, and we have multiple  	 * buffers, then go indirect. FIXME: tune this threshold */ -	if (vq->indirect && (out + in) > 1 && vq->num_free) { -		head = vring_add_indirect(vq, sg, out, in, gfp); +	if (vq->indirect && total_sg > 1 && vq->vq.num_free) { +		head = vring_add_indirect(vq, sgs, next, total_sg, total_out, +					  total_in, +					  out_sgs, in_sgs, gfp);  		if (likely(head >= 0))  			goto add_head;  	} -	BUG_ON(out + in > vq->vring.num); -	BUG_ON(out + in == 0); +	BUG_ON(total_sg > vq->vring.num); +	BUG_ON(total_sg == 0); -	if (vq->num_free < out + in) { +	if (vq->vq.num_free < total_sg) {  		pr_debug("Can't add buf len %i - avail = %i\n", -			 out + in, vq->num_free); +			 total_sg, vq->vq.num_free);  		/* FIXME: for historical reasons, we force a notify here if  		 * there are outgoing parts to the buffer.  Presumably the  		 * host should service the ring ASAP. */ -		if (out) +		if (out_sgs)  			vq->notify(&vq->vq);  		END_USE(vq);  		return -ENOSPC;  	}  	/* We're about to use some buffers from the free list. */ -	vq->num_free -= out + in; - -	head = vq->free_head; -	for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { -		vq->vring.desc[i].flags = VRING_DESC_F_NEXT; -		vq->vring.desc[i].addr = sg_phys(sg); -		vq->vring.desc[i].len = sg->length; -		prev = i; -		sg++; +	vq->vq.num_free -= total_sg; + +	head = i = vq->free_head; +	for (n = 0; n < out_sgs; n++) { +		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { +			vq->vring.desc[i].flags = VRING_DESC_F_NEXT; +			vq->vring.desc[i].addr = sg_phys(sg); +			vq->vring.desc[i].len = sg->length; +			prev = i; +			i = vq->vring.desc[i].next; +		}  	} -	for (; in; i = vq->vring.desc[i].next, in--) { -		vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; -		vq->vring.desc[i].addr = sg_phys(sg); -		vq->vring.desc[i].len = sg->length; -		prev = i; -		sg++; +	for (; n < (out_sgs + in_sgs); n++) { +		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { +			vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; +			vq->vring.desc[i].addr = sg_phys(sg); +			vq->vring.desc[i].len = sg->length; +			prev = i; +			i = vq->vring.desc[i].next; +		}  	}  	/* Last one doesn't continue. */  	vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; @@ -223,39 +282,196 @@ add_head:  	vq->data[head] = data;  	/* Put entry in available array (but don't update avail->idx until they -	 * do sync).  FIXME: avoid modulus here? */ -	avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num; +	 * do sync). */ +	avail = (vq->vring.avail->idx & (vq->vring.num-1));  	vq->vring.avail->ring[avail] = head; +	/* Descriptors and available array need to be set before we expose the +	 * new available array entries. */ +	virtio_wmb(vq->weak_barriers); +	vq->vring.avail->idx++; +	vq->num_added++; + +	/* This is very unlikely, but theoretically possible.  Kick +	 * just in case. */ +	if (unlikely(vq->num_added == (1 << 16) - 1)) +		virtqueue_kick(_vq); +  	pr_debug("Added buffer head %i to %p\n", head, vq);  	END_USE(vq); -	/* If we're indirect, we can fit many (assuming not OOM). */ -	if (vq->indirect) -		return vq->num_free ? vq->vring.num : 0; -	return vq->num_free; +	return 0;  } -EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); -void virtqueue_kick(struct virtqueue *_vq) +/** + * virtqueue_add_sgs - expose buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of terminated scatterlists. + * @out_num: the number of scatterlists readable by other side + * @in_num: the number of scatterlists which are writable (after readable ones) + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_sgs(struct virtqueue *_vq, +		      struct scatterlist *sgs[], +		      unsigned int out_sgs, +		      unsigned int in_sgs, +		      void *data, +		      gfp_t gfp) +{ +	unsigned int i, total_out, total_in; + +	/* Count them first. */ +	for (i = total_out = total_in = 0; i < out_sgs; i++) { +		struct scatterlist *sg; +		for (sg = sgs[i]; sg; sg = sg_next(sg)) +			total_out++; +	} +	for (; i < out_sgs + in_sgs; i++) { +		struct scatterlist *sg; +		for (sg = sgs[i]; sg; sg = sg_next(sg)) +			total_in++; +	} +	return virtqueue_add(_vq, sgs, sg_next_chained, +			     total_out, total_in, out_sgs, in_sgs, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_sgs); + +/** + * virtqueue_add_outbuf - expose output buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of scatterlists (need not be terminated!) + * @num: the number of scatterlists readable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_outbuf(struct virtqueue *vq, +			 struct scatterlist sg[], unsigned int num, +			 void *data, +			 gfp_t gfp) +{ +	return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); + +/** + * virtqueue_add_inbuf - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of scatterlists (need not be terminated!) + * @num: the number of scatterlists writable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf(struct virtqueue *vq, +			struct scatterlist sg[], unsigned int num, +			void *data, +			gfp_t gfp) +{ +	return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); + +/** + * virtqueue_kick_prepare - first half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * Instead of virtqueue_kick(), you can do: + *	if (virtqueue_kick_prepare(vq)) + *		virtqueue_notify(vq); + * + * This is sometimes useful because the virtqueue_kick_prepare() needs + * to be serialized, but the actual virtqueue_notify() call does not. + */ +bool virtqueue_kick_prepare(struct virtqueue *_vq)  {  	struct vring_virtqueue *vq = to_vvq(_vq); +	u16 new, old; +	bool needs_kick; +  	START_USE(vq); -	/* Descriptors and available array need to be set before we expose the -	 * new available array entries. */ -	virtio_wmb(); +	/* We need to expose available array entries before checking avail +	 * event. */ +	virtio_mb(vq->weak_barriers); -	vq->vring.avail->idx += vq->num_added; +	old = vq->vring.avail->idx - vq->num_added; +	new = vq->vring.avail->idx;  	vq->num_added = 0; -	/* Need to update avail index before checking if we should notify */ -	virtio_mb(); - -	if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) -		/* Prod other side to tell it about changes. */ -		vq->notify(&vq->vq); +#ifdef DEBUG +	if (vq->last_add_time_valid) { +		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), +					      vq->last_add_time)) > 100); +	} +	vq->last_add_time_valid = false; +#endif +	if (vq->event) { +		needs_kick = vring_need_event(vring_avail_event(&vq->vring), +					      new, old); +	} else { +		needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY); +	}  	END_USE(vq); +	return needs_kick; +} +EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); + +/** + * virtqueue_notify - second half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * This does not need to be serialized. + * + * Returns false if host notify failed or queue is broken, otherwise true. + */ +bool virtqueue_notify(struct virtqueue *_vq) +{ +	struct vring_virtqueue *vq = to_vvq(_vq); + +	if (unlikely(vq->broken)) +		return false; + +	/* Prod other side to tell it about changes. */ +	if (!vq->notify(_vq)) { +		vq->broken = true; +		return false; +	} +	return true; +} +EXPORT_SYMBOL_GPL(virtqueue_notify); + +/** + * virtqueue_kick - update after add_buf + * @vq: the struct virtqueue + * + * After one or more virtqueue_add_* calls, invoke this to kick + * the other side. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns false if kick failed, otherwise true. + */ +bool virtqueue_kick(struct virtqueue *vq) +{ +	if (virtqueue_kick_prepare(vq)) +		return virtqueue_notify(vq); +	return true;  }  EXPORT_SYMBOL_GPL(virtqueue_kick); @@ -275,13 +491,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)  	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {  		i = vq->vring.desc[i].next; -		vq->num_free++; +		vq->vq.num_free++;  	}  	vq->vring.desc[i].next = vq->free_head;  	vq->free_head = head;  	/* Plus final descriptor */ -	vq->num_free++; +	vq->vq.num_free++;  }  static inline bool more_used(const struct vring_virtqueue *vq) @@ -289,11 +505,28 @@ static inline bool more_used(const struct vring_virtqueue *vq)  	return vq->last_used_idx != vq->vring.used->idx;  } +/** + * virtqueue_get_buf - get the next used buffer + * @vq: the struct virtqueue we're talking about. + * @len: the length written into the buffer + * + * If the driver wrote data into the buffer, @len will be set to the + * amount written.  This means you don't need to clear the buffer + * beforehand to ensure there's no data leakage in the case of short + * writes. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns NULL if there are no used buffers, or the "data" token + * handed to virtqueue_add_*(). + */  void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)  {  	struct vring_virtqueue *vq = to_vvq(_vq);  	void *ret;  	unsigned int i; +	u16 last_used;  	START_USE(vq); @@ -309,10 +542,11 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)  	}  	/* Only get used array entries after they have been exposed by host. */ -	virtio_rmb(); +	virtio_rmb(vq->weak_barriers); -	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; -	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; +	last_used = (vq->last_used_idx & (vq->vring.num - 1)); +	i = vq->vring.used->ring[last_used].id; +	*len = vq->vring.used->ring[last_used].len;  	if (unlikely(i >= vq->vring.num)) {  		BAD_RING(vq, "id %u out of range\n", i); @@ -327,11 +561,32 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)  	ret = vq->data[i];  	detach_buf(vq, i);  	vq->last_used_idx++; +	/* If we expect an interrupt for the next entry, tell host +	 * by writing event index and flush out the write before +	 * the read in the next get_buf call. */ +	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { +		vring_used_event(&vq->vring) = vq->last_used_idx; +		virtio_mb(vq->weak_barriers); +	} + +#ifdef DEBUG +	vq->last_add_time_valid = false; +#endif +  	END_USE(vq);  	return ret;  }  EXPORT_SYMBOL_GPL(virtqueue_get_buf); +/** + * virtqueue_disable_cb - disable callbacks + * @vq: the struct virtqueue we're talking about. + * + * Note that this is not necessarily synchronous, hence unreliable and only + * useful as an optimization. + * + * Unlike other operations, this need not be serialized. + */  void virtqueue_disable_cb(struct virtqueue *_vq)  {  	struct vring_virtqueue *vq = to_vvq(_vq); @@ -340,17 +595,104 @@ void virtqueue_disable_cb(struct virtqueue *_vq)  }  EXPORT_SYMBOL_GPL(virtqueue_disable_cb); +/** + * virtqueue_enable_cb_prepare - restart callbacks after disable_cb + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns current queue state + * in an opaque unsigned value. This value should be later tested by + * virtqueue_poll, to detect a possible race between the driver checking for + * more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +{ +	struct vring_virtqueue *vq = to_vvq(_vq); +	u16 last_used_idx; + +	START_USE(vq); + +	/* We optimistically turn back on interrupts, then check if there was +	 * more to do. */ +	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to +	 * either clear the flags bit or point the event index at the next +	 * entry. Always do both to keep code simple. */ +	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; +	vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx; +	END_USE(vq); +	return last_used_idx; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); + +/** + * virtqueue_poll - query pending used buffers + * @vq: the struct virtqueue we're talking about. + * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). + * + * Returns "true" if there are pending used buffers in the queue. + * + * This does not need to be serialized. + */ +bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) +{ +	struct vring_virtqueue *vq = to_vvq(_vq); + +	virtio_mb(vq->weak_barriers); +	return (u16)last_used_idx != vq->vring.used->idx; +} +EXPORT_SYMBOL_GPL(virtqueue_poll); + +/** + * virtqueue_enable_cb - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns "false" if there are pending + * buffers in the queue, to detect a possible race between the driver + * checking for more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */  bool virtqueue_enable_cb(struct virtqueue *_vq)  { +	unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); +	return !virtqueue_poll(_vq, last_used_idx); +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb); + +/** + * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +{  	struct vring_virtqueue *vq = to_vvq(_vq); +	u16 bufs;  	START_USE(vq);  	/* We optimistically turn back on interrupts, then check if there was  	 * more to do. */ +	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to +	 * either clear the flags bit or point the event index at the next +	 * entry. Always do both to keep code simple. */  	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; -	virtio_mb(); -	if (unlikely(more_used(vq))) { +	/* TODO: tune this threshold */ +	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; +	vring_used_event(&vq->vring) = vq->last_used_idx + bufs; +	virtio_mb(vq->weak_barriers); +	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {  		END_USE(vq);  		return false;  	} @@ -358,8 +700,16 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)  	END_USE(vq);  	return true;  } -EXPORT_SYMBOL_GPL(virtqueue_enable_cb); +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); +/** + * virtqueue_detach_unused_buf - detach first unused buffer + * @vq: the struct virtqueue we're talking about. + * + * Returns NULL or the "data" token handed to virtqueue_add_*(). + * This is not valid on an active queue; it is useful only for device + * shutdown. + */  void *virtqueue_detach_unused_buf(struct virtqueue *_vq)  {  	struct vring_virtqueue *vq = to_vvq(_vq); @@ -374,11 +724,12 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)  		/* detach_buf clears data, so grab it now. */  		buf = vq->data[i];  		detach_buf(vq, i); +		vq->vring.avail->idx--;  		END_USE(vq);  		return buf;  	}  	/* That should have freed everything. */ -	BUG_ON(vq->num_free != vq->vring.num); +	BUG_ON(vq->vq.num_free != vq->vring.num);  	END_USE(vq);  	return NULL; @@ -405,11 +756,13 @@ irqreturn_t vring_interrupt(int irq, void *_vq)  }  EXPORT_SYMBOL_GPL(vring_interrupt); -struct virtqueue *vring_new_virtqueue(unsigned int num, +struct virtqueue *vring_new_virtqueue(unsigned int index, +				      unsigned int num,  				      unsigned int vring_align,  				      struct virtio_device *vdev, +				      bool weak_barriers,  				      void *pages, -				      void (*notify)(struct virtqueue *), +				      bool (*notify)(struct virtqueue *),  				      void (*callback)(struct virtqueue *),  				      const char *name)  { @@ -430,23 +783,27 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,  	vq->vq.callback = callback;  	vq->vq.vdev = vdev;  	vq->vq.name = name; +	vq->vq.num_free = num; +	vq->vq.index = index;  	vq->notify = notify; +	vq->weak_barriers = weak_barriers;  	vq->broken = false;  	vq->last_used_idx = 0;  	vq->num_added = 0;  	list_add_tail(&vq->vq.list, &vdev->vqs);  #ifdef DEBUG  	vq->in_use = false; +	vq->last_add_time_valid = false;  #endif  	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); +	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);  	/* No callback?  Tell other side not to bother us. */  	if (!callback)  		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;  	/* Put everything in free lists. */ -	vq->num_free = num;  	vq->free_head = 0;  	for (i = 0; i < num-1; i++) {  		vq->vring.desc[i].next = i+1; @@ -474,6 +831,8 @@ void vring_transport_features(struct virtio_device *vdev)  		switch (i) {  		case VIRTIO_RING_F_INDIRECT_DESC:  			break; +		case VIRTIO_RING_F_EVENT_IDX: +			break;  		default:  			/* We don't understand this bit. */  			clear_bit(i, vdev->features); @@ -482,4 +841,43 @@ void vring_transport_features(struct virtio_device *vdev)  }  EXPORT_SYMBOL_GPL(vring_transport_features); +/** + * virtqueue_get_vring_size - return the size of the virtqueue's vring + * @vq: the struct virtqueue containing the vring of interest. + * + * Returns the size of the vring.  This is mainly used for boasting to + * userspace.  Unlike other operations, this need not be serialized. + */ +unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) +{ + +	struct vring_virtqueue *vq = to_vvq(_vq); + +	return vq->vring.num; +} +EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); + +bool virtqueue_is_broken(struct virtqueue *_vq) +{ +	struct vring_virtqueue *vq = to_vvq(_vq); + +	return vq->broken; +} +EXPORT_SYMBOL_GPL(virtqueue_is_broken); + +/* + * This should prevent the device from being used, allowing drivers to + * recover.  You may need to grab appropriate locks to flush. + */ +void virtio_break_device(struct virtio_device *dev) +{ +	struct virtqueue *_vq; + +	list_for_each_entry(_vq, &dev->vqs, list) { +		struct vring_virtqueue *vq = to_vvq(_vq); +		vq->broken = true; +	} +} +EXPORT_SYMBOL_GPL(virtio_break_device); +  MODULE_LICENSE("GPL");  | 
