diff options
Diffstat (limited to 'drivers/virtio/virtio_ring.c')
| -rw-r--r-- | drivers/virtio/virtio_ring.c | 600 | 
1 files changed, 499 insertions, 101 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 1475ed6b575..4d08f45a9c2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -21,24 +21,9 @@  #include <linux/virtio_config.h>  #include <linux/device.h>  #include <linux/slab.h> - -/* virtio guest is communicating with a virtual "device" that actually runs on - * a host processor.  Memory barriers are used to control SMP effects. */ -#ifdef CONFIG_SMP -/* Where possible, use SMP barriers which are more lightweight than mandatory - * barriers, because mandatory barriers control MMIO effects on accesses - * through relaxed memory I/O windows (which virtio does not use). */ -#define virtio_mb() smp_mb() -#define virtio_rmb() smp_rmb() -#define virtio_wmb() smp_wmb() -#else -/* We must force memory ordering even if guest is UP since host could be - * running on another CPU, but SMP barriers are defined to barrier() in that - * configuration. So fall back to mandatory barriers instead. */ -#define virtio_mb() mb() -#define virtio_rmb() rmb() -#define virtio_wmb() wmb() -#endif +#include <linux/module.h> +#include <linux/hrtimer.h> +#include <linux/kmemleak.h>  #ifdef DEBUG  /* For development, we want to crash whenever the ring is screwed. */ @@ -76,14 +61,18 @@ struct vring_virtqueue  	/* Actual memory layout for this queue */  	struct vring vring; +	/* Can we use weak barriers? */ +	bool weak_barriers; +  	/* Other side has made a mess, don't try any more. */  	bool broken;  	/* Host supports indirect buffers */  	bool indirect; -	/* Number of free buffers */ -	unsigned int num_free; +	/* Host publishes avail event idx */ +	bool event; +  	/* Head of free buffer list. */  	unsigned int free_head;  	/* Number we've added since last sync. */ @@ -93,11 +82,15 @@ struct vring_virtqueue  	u16 last_used_idx;  	/* How to notify other side. FIXME: commonalize hcalls! */ -	void (*notify)(struct virtqueue *vq); +	bool (*notify)(struct virtqueue *vq);  #ifdef DEBUG  	/* They're supposed to lock for us. */  	unsigned int in_use; + +	/* Figure out if their kicks are too delayed. */ +	bool last_add_time_valid; +	ktime_t last_add_time;  #endif  	/* Tokens for callbacks. */ @@ -106,48 +99,83 @@ struct vring_virtqueue  #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +static inline struct scatterlist *sg_next_chained(struct scatterlist *sg, +						  unsigned int *count) +{ +	return sg_next(sg); +} + +static inline struct scatterlist *sg_next_arr(struct scatterlist *sg, +					      unsigned int *count) +{ +	if (--(*count) == 0) +		return NULL; +	return sg + 1; +} +  /* Set up an indirect table of descriptors and add it to the queue. */ -static int vring_add_indirect(struct vring_virtqueue *vq, -			      struct scatterlist sg[], -			      unsigned int out, -			      unsigned int in, -			      gfp_t gfp) +static inline int vring_add_indirect(struct vring_virtqueue *vq, +				     struct scatterlist *sgs[], +				     struct scatterlist *(*next) +				       (struct scatterlist *, unsigned int *), +				     unsigned int total_sg, +				     unsigned int total_out, +				     unsigned int total_in, +				     unsigned int out_sgs, +				     unsigned int in_sgs, +				     gfp_t gfp)  {  	struct vring_desc *desc;  	unsigned head; -	int i; +	struct scatterlist *sg; +	int i, n; -	desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); +	/* +	 * We require lowmem mappings for the descriptors because +	 * otherwise virt_to_phys will give us bogus addresses in the +	 * virtqueue. +	 */ +	gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); + +	desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);  	if (!desc)  		return -ENOMEM; -	/* Transfer entries from the sg list into the indirect page */ -	for (i = 0; i < out; i++) { -		desc[i].flags = VRING_DESC_F_NEXT; -		desc[i].addr = sg_phys(sg); -		desc[i].len = sg->length; -		desc[i].next = i+1; -		sg++; +	/* Transfer entries from the sg lists into the indirect page */ +	i = 0; +	for (n = 0; n < out_sgs; n++) { +		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { +			desc[i].flags = VRING_DESC_F_NEXT; +			desc[i].addr = sg_phys(sg); +			desc[i].len = sg->length; +			desc[i].next = i+1; +			i++; +		}  	} -	for (; i < (out + in); i++) { -		desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; -		desc[i].addr = sg_phys(sg); -		desc[i].len = sg->length; -		desc[i].next = i+1; -		sg++; +	for (; n < (out_sgs + in_sgs); n++) { +		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { +			desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; +			desc[i].addr = sg_phys(sg); +			desc[i].len = sg->length; +			desc[i].next = i+1; +			i++; +		}  	} +	BUG_ON(i != total_sg);  	/* Last one doesn't continue. */  	desc[i-1].flags &= ~VRING_DESC_F_NEXT;  	desc[i-1].next = 0;  	/* We're about to use a buffer */ -	vq->num_free--; +	vq->vq.num_free--;  	/* Use a single buffer which doesn't continue */  	head = vq->free_head;  	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;  	vq->vring.desc[head].addr = virt_to_phys(desc); +	/* kmemleak gives a false positive, as it's hidden by virt_to_phys */ +	kmemleak_ignore(desc);  	vq->vring.desc[head].len = i * sizeof(struct vring_desc);  	/* Update free pointer */ @@ -156,61 +184,92 @@ static int vring_add_indirect(struct vring_virtqueue *vq,  	return head;  } -int virtqueue_add_buf_gfp(struct virtqueue *_vq, -			  struct scatterlist sg[], -			  unsigned int out, -			  unsigned int in, -			  void *data, -			  gfp_t gfp) +static inline int virtqueue_add(struct virtqueue *_vq, +				struct scatterlist *sgs[], +				struct scatterlist *(*next) +				  (struct scatterlist *, unsigned int *), +				unsigned int total_out, +				unsigned int total_in, +				unsigned int out_sgs, +				unsigned int in_sgs, +				void *data, +				gfp_t gfp)  {  	struct vring_virtqueue *vq = to_vvq(_vq); -	unsigned int i, avail, uninitialized_var(prev); +	struct scatterlist *sg; +	unsigned int i, n, avail, uninitialized_var(prev), total_sg;  	int head;  	START_USE(vq);  	BUG_ON(data == NULL); +	if (unlikely(vq->broken)) { +		END_USE(vq); +		return -EIO; +	} + +#ifdef DEBUG +	{ +		ktime_t now = ktime_get(); + +		/* No kick or get, with .1 second between?  Warn. */ +		if (vq->last_add_time_valid) +			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time)) +					    > 100); +		vq->last_add_time = now; +		vq->last_add_time_valid = true; +	} +#endif + +	total_sg = total_in + total_out; +  	/* If the host supports indirect descriptor tables, and we have multiple  	 * buffers, then go indirect. FIXME: tune this threshold */ -	if (vq->indirect && (out + in) > 1 && vq->num_free) { -		head = vring_add_indirect(vq, sg, out, in, gfp); +	if (vq->indirect && total_sg > 1 && vq->vq.num_free) { +		head = vring_add_indirect(vq, sgs, next, total_sg, total_out, +					  total_in, +					  out_sgs, in_sgs, gfp);  		if (likely(head >= 0))  			goto add_head;  	} -	BUG_ON(out + in > vq->vring.num); -	BUG_ON(out + in == 0); +	BUG_ON(total_sg > vq->vring.num); +	BUG_ON(total_sg == 0); -	if (vq->num_free < out + in) { +	if (vq->vq.num_free < total_sg) {  		pr_debug("Can't add buf len %i - avail = %i\n", -			 out + in, vq->num_free); +			 total_sg, vq->vq.num_free);  		/* FIXME: for historical reasons, we force a notify here if  		 * there are outgoing parts to the buffer.  Presumably the  		 * host should service the ring ASAP. */ -		if (out) +		if (out_sgs)  			vq->notify(&vq->vq);  		END_USE(vq);  		return -ENOSPC;  	}  	/* We're about to use some buffers from the free list. */ -	vq->num_free -= out + in; - -	head = vq->free_head; -	for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { -		vq->vring.desc[i].flags = VRING_DESC_F_NEXT; -		vq->vring.desc[i].addr = sg_phys(sg); -		vq->vring.desc[i].len = sg->length; -		prev = i; -		sg++; +	vq->vq.num_free -= total_sg; + +	head = i = vq->free_head; +	for (n = 0; n < out_sgs; n++) { +		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) { +			vq->vring.desc[i].flags = VRING_DESC_F_NEXT; +			vq->vring.desc[i].addr = sg_phys(sg); +			vq->vring.desc[i].len = sg->length; +			prev = i; +			i = vq->vring.desc[i].next; +		}  	} -	for (; in; i = vq->vring.desc[i].next, in--) { -		vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; -		vq->vring.desc[i].addr = sg_phys(sg); -		vq->vring.desc[i].len = sg->length; -		prev = i; -		sg++; +	for (; n < (out_sgs + in_sgs); n++) { +		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) { +			vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; +			vq->vring.desc[i].addr = sg_phys(sg); +			vq->vring.desc[i].len = sg->length; +			prev = i; +			i = vq->vring.desc[i].next; +		}  	}  	/* Last one doesn't continue. */  	vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; @@ -223,39 +282,196 @@ add_head:  	vq->data[head] = data;  	/* Put entry in available array (but don't update avail->idx until they -	 * do sync).  FIXME: avoid modulus here? */ -	avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num; +	 * do sync). */ +	avail = (vq->vring.avail->idx & (vq->vring.num-1));  	vq->vring.avail->ring[avail] = head; +	/* Descriptors and available array need to be set before we expose the +	 * new available array entries. */ +	virtio_wmb(vq->weak_barriers); +	vq->vring.avail->idx++; +	vq->num_added++; + +	/* This is very unlikely, but theoretically possible.  Kick +	 * just in case. */ +	if (unlikely(vq->num_added == (1 << 16) - 1)) +		virtqueue_kick(_vq); +  	pr_debug("Added buffer head %i to %p\n", head, vq);  	END_USE(vq); -	/* If we're indirect, we can fit many (assuming not OOM). */ -	if (vq->indirect) -		return vq->num_free ? vq->vring.num : 0; -	return vq->num_free; +	return 0;  } -EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); -void virtqueue_kick(struct virtqueue *_vq) +/** + * virtqueue_add_sgs - expose buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of terminated scatterlists. + * @out_num: the number of scatterlists readable by other side + * @in_num: the number of scatterlists which are writable (after readable ones) + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_sgs(struct virtqueue *_vq, +		      struct scatterlist *sgs[], +		      unsigned int out_sgs, +		      unsigned int in_sgs, +		      void *data, +		      gfp_t gfp) +{ +	unsigned int i, total_out, total_in; + +	/* Count them first. */ +	for (i = total_out = total_in = 0; i < out_sgs; i++) { +		struct scatterlist *sg; +		for (sg = sgs[i]; sg; sg = sg_next(sg)) +			total_out++; +	} +	for (; i < out_sgs + in_sgs; i++) { +		struct scatterlist *sg; +		for (sg = sgs[i]; sg; sg = sg_next(sg)) +			total_in++; +	} +	return virtqueue_add(_vq, sgs, sg_next_chained, +			     total_out, total_in, out_sgs, in_sgs, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_sgs); + +/** + * virtqueue_add_outbuf - expose output buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of scatterlists (need not be terminated!) + * @num: the number of scatterlists readable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_outbuf(struct virtqueue *vq, +			 struct scatterlist sg[], unsigned int num, +			 void *data, +			 gfp_t gfp) +{ +	return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); + +/** + * virtqueue_add_inbuf - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sgs: array of scatterlists (need not be terminated!) + * @num: the number of scatterlists writable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf(struct virtqueue *vq, +			struct scatterlist sg[], unsigned int num, +			void *data, +			gfp_t gfp) +{ +	return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); + +/** + * virtqueue_kick_prepare - first half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * Instead of virtqueue_kick(), you can do: + *	if (virtqueue_kick_prepare(vq)) + *		virtqueue_notify(vq); + * + * This is sometimes useful because the virtqueue_kick_prepare() needs + * to be serialized, but the actual virtqueue_notify() call does not. + */ +bool virtqueue_kick_prepare(struct virtqueue *_vq)  {  	struct vring_virtqueue *vq = to_vvq(_vq); +	u16 new, old; +	bool needs_kick; +  	START_USE(vq); -	/* Descriptors and available array need to be set before we expose the -	 * new available array entries. */ -	virtio_wmb(); +	/* We need to expose available array entries before checking avail +	 * event. */ +	virtio_mb(vq->weak_barriers); -	vq->vring.avail->idx += vq->num_added; +	old = vq->vring.avail->idx - vq->num_added; +	new = vq->vring.avail->idx;  	vq->num_added = 0; -	/* Need to update avail index before checking if we should notify */ -	virtio_mb(); - -	if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) -		/* Prod other side to tell it about changes. */ -		vq->notify(&vq->vq); +#ifdef DEBUG +	if (vq->last_add_time_valid) { +		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), +					      vq->last_add_time)) > 100); +	} +	vq->last_add_time_valid = false; +#endif +	if (vq->event) { +		needs_kick = vring_need_event(vring_avail_event(&vq->vring), +					      new, old); +	} else { +		needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY); +	}  	END_USE(vq); +	return needs_kick; +} +EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); + +/** + * virtqueue_notify - second half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * This does not need to be serialized. + * + * Returns false if host notify failed or queue is broken, otherwise true. + */ +bool virtqueue_notify(struct virtqueue *_vq) +{ +	struct vring_virtqueue *vq = to_vvq(_vq); + +	if (unlikely(vq->broken)) +		return false; + +	/* Prod other side to tell it about changes. */ +	if (!vq->notify(_vq)) { +		vq->broken = true; +		return false; +	} +	return true; +} +EXPORT_SYMBOL_GPL(virtqueue_notify); + +/** + * virtqueue_kick - update after add_buf + * @vq: the struct virtqueue + * + * After one or more virtqueue_add_* calls, invoke this to kick + * the other side. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns false if kick failed, otherwise true. + */ +bool virtqueue_kick(struct virtqueue *vq) +{ +	if (virtqueue_kick_prepare(vq)) +		return virtqueue_notify(vq); +	return true;  }  EXPORT_SYMBOL_GPL(virtqueue_kick); @@ -275,13 +491,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)  	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {  		i = vq->vring.desc[i].next; -		vq->num_free++; +		vq->vq.num_free++;  	}  	vq->vring.desc[i].next = vq->free_head;  	vq->free_head = head;  	/* Plus final descriptor */ -	vq->num_free++; +	vq->vq.num_free++;  }  static inline bool more_used(const struct vring_virtqueue *vq) @@ -289,11 +505,28 @@ static inline bool more_used(const struct vring_virtqueue *vq)  	return vq->last_used_idx != vq->vring.used->idx;  } +/** + * virtqueue_get_buf - get the next used buffer + * @vq: the struct virtqueue we're talking about. + * @len: the length written into the buffer + * + * If the driver wrote data into the buffer, @len will be set to the + * amount written.  This means you don't need to clear the buffer + * beforehand to ensure there's no data leakage in the case of short + * writes. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns NULL if there are no used buffers, or the "data" token + * handed to virtqueue_add_*(). + */  void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)  {  	struct vring_virtqueue *vq = to_vvq(_vq);  	void *ret;  	unsigned int i; +	u16 last_used;  	START_USE(vq); @@ -309,10 +542,11 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)  	}  	/* Only get used array entries after they have been exposed by host. */ -	virtio_rmb(); +	virtio_rmb(vq->weak_barriers); -	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; -	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; +	last_used = (vq->last_used_idx & (vq->vring.num - 1)); +	i = vq->vring.used->ring[last_used].id; +	*len = vq->vring.used->ring[last_used].len;  	if (unlikely(i >= vq->vring.num)) {  		BAD_RING(vq, "id %u out of range\n", i); @@ -327,11 +561,32 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)  	ret = vq->data[i];  	detach_buf(vq, i);  	vq->last_used_idx++; +	/* If we expect an interrupt for the next entry, tell host +	 * by writing event index and flush out the write before +	 * the read in the next get_buf call. */ +	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { +		vring_used_event(&vq->vring) = vq->last_used_idx; +		virtio_mb(vq->weak_barriers); +	} + +#ifdef DEBUG +	vq->last_add_time_valid = false; +#endif +  	END_USE(vq);  	return ret;  }  EXPORT_SYMBOL_GPL(virtqueue_get_buf); +/** + * virtqueue_disable_cb - disable callbacks + * @vq: the struct virtqueue we're talking about. + * + * Note that this is not necessarily synchronous, hence unreliable and only + * useful as an optimization. + * + * Unlike other operations, this need not be serialized. + */  void virtqueue_disable_cb(struct virtqueue *_vq)  {  	struct vring_virtqueue *vq = to_vvq(_vq); @@ -340,17 +595,104 @@ void virtqueue_disable_cb(struct virtqueue *_vq)  }  EXPORT_SYMBOL_GPL(virtqueue_disable_cb); +/** + * virtqueue_enable_cb_prepare - restart callbacks after disable_cb + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns current queue state + * in an opaque unsigned value. This value should be later tested by + * virtqueue_poll, to detect a possible race between the driver checking for + * more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +{ +	struct vring_virtqueue *vq = to_vvq(_vq); +	u16 last_used_idx; + +	START_USE(vq); + +	/* We optimistically turn back on interrupts, then check if there was +	 * more to do. */ +	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to +	 * either clear the flags bit or point the event index at the next +	 * entry. Always do both to keep code simple. */ +	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; +	vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx; +	END_USE(vq); +	return last_used_idx; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); + +/** + * virtqueue_poll - query pending used buffers + * @vq: the struct virtqueue we're talking about. + * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). + * + * Returns "true" if there are pending used buffers in the queue. + * + * This does not need to be serialized. + */ +bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) +{ +	struct vring_virtqueue *vq = to_vvq(_vq); + +	virtio_mb(vq->weak_barriers); +	return (u16)last_used_idx != vq->vring.used->idx; +} +EXPORT_SYMBOL_GPL(virtqueue_poll); + +/** + * virtqueue_enable_cb - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns "false" if there are pending + * buffers in the queue, to detect a possible race between the driver + * checking for more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */  bool virtqueue_enable_cb(struct virtqueue *_vq)  { +	unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); +	return !virtqueue_poll(_vq, last_used_idx); +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb); + +/** + * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +{  	struct vring_virtqueue *vq = to_vvq(_vq); +	u16 bufs;  	START_USE(vq);  	/* We optimistically turn back on interrupts, then check if there was  	 * more to do. */ +	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to +	 * either clear the flags bit or point the event index at the next +	 * entry. Always do both to keep code simple. */  	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; -	virtio_mb(); -	if (unlikely(more_used(vq))) { +	/* TODO: tune this threshold */ +	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; +	vring_used_event(&vq->vring) = vq->last_used_idx + bufs; +	virtio_mb(vq->weak_barriers); +	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {  		END_USE(vq);  		return false;  	} @@ -358,8 +700,16 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)  	END_USE(vq);  	return true;  } -EXPORT_SYMBOL_GPL(virtqueue_enable_cb); +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); +/** + * virtqueue_detach_unused_buf - detach first unused buffer + * @vq: the struct virtqueue we're talking about. + * + * Returns NULL or the "data" token handed to virtqueue_add_*(). + * This is not valid on an active queue; it is useful only for device + * shutdown. + */  void *virtqueue_detach_unused_buf(struct virtqueue *_vq)  {  	struct vring_virtqueue *vq = to_vvq(_vq); @@ -374,11 +724,12 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)  		/* detach_buf clears data, so grab it now. */  		buf = vq->data[i];  		detach_buf(vq, i); +		vq->vring.avail->idx--;  		END_USE(vq);  		return buf;  	}  	/* That should have freed everything. */ -	BUG_ON(vq->num_free != vq->vring.num); +	BUG_ON(vq->vq.num_free != vq->vring.num);  	END_USE(vq);  	return NULL; @@ -405,11 +756,13 @@ irqreturn_t vring_interrupt(int irq, void *_vq)  }  EXPORT_SYMBOL_GPL(vring_interrupt); -struct virtqueue *vring_new_virtqueue(unsigned int num, +struct virtqueue *vring_new_virtqueue(unsigned int index, +				      unsigned int num,  				      unsigned int vring_align,  				      struct virtio_device *vdev, +				      bool weak_barriers,  				      void *pages, -				      void (*notify)(struct virtqueue *), +				      bool (*notify)(struct virtqueue *),  				      void (*callback)(struct virtqueue *),  				      const char *name)  { @@ -430,23 +783,27 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,  	vq->vq.callback = callback;  	vq->vq.vdev = vdev;  	vq->vq.name = name; +	vq->vq.num_free = num; +	vq->vq.index = index;  	vq->notify = notify; +	vq->weak_barriers = weak_barriers;  	vq->broken = false;  	vq->last_used_idx = 0;  	vq->num_added = 0;  	list_add_tail(&vq->vq.list, &vdev->vqs);  #ifdef DEBUG  	vq->in_use = false; +	vq->last_add_time_valid = false;  #endif  	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); +	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);  	/* No callback?  Tell other side not to bother us. */  	if (!callback)  		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;  	/* Put everything in free lists. */ -	vq->num_free = num;  	vq->free_head = 0;  	for (i = 0; i < num-1; i++) {  		vq->vring.desc[i].next = i+1; @@ -474,6 +831,8 @@ void vring_transport_features(struct virtio_device *vdev)  		switch (i) {  		case VIRTIO_RING_F_INDIRECT_DESC:  			break; +		case VIRTIO_RING_F_EVENT_IDX: +			break;  		default:  			/* We don't understand this bit. */  			clear_bit(i, vdev->features); @@ -482,4 +841,43 @@ void vring_transport_features(struct virtio_device *vdev)  }  EXPORT_SYMBOL_GPL(vring_transport_features); +/** + * virtqueue_get_vring_size - return the size of the virtqueue's vring + * @vq: the struct virtqueue containing the vring of interest. + * + * Returns the size of the vring.  This is mainly used for boasting to + * userspace.  Unlike other operations, this need not be serialized. + */ +unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) +{ + +	struct vring_virtqueue *vq = to_vvq(_vq); + +	return vq->vring.num; +} +EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); + +bool virtqueue_is_broken(struct virtqueue *_vq) +{ +	struct vring_virtqueue *vq = to_vvq(_vq); + +	return vq->broken; +} +EXPORT_SYMBOL_GPL(virtqueue_is_broken); + +/* + * This should prevent the device from being used, allowing drivers to + * recover.  You may need to grab appropriate locks to flush. + */ +void virtio_break_device(struct virtio_device *dev) +{ +	struct virtqueue *_vq; + +	list_for_each_entry(_vq, &dev->vqs, list) { +		struct vring_virtqueue *vq = to_vvq(_vq); +		vq->broken = true; +	} +} +EXPORT_SYMBOL_GPL(virtio_break_device); +  MODULE_LICENSE("GPL");  | 
