aboutsummaryrefslogtreecommitdiff
path: root/drivers/vhost/vhost.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost/vhost.c')
-rw-r--r--drivers/vhost/vhost.c465
1 files changed, 208 insertions, 257 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c14c42b95ab..c90f4374442 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -13,22 +13,18 @@
#include <linux/eventfd.h>
#include <linux/vhost.h>
-#include <linux/virtio_net.h>
+#include <linux/uio.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
-#include <linux/rcupdate.h>
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/cgroup.h>
-
-#include <linux/net.h>
-#include <linux/if_packet.h>
-#include <linux/if_arp.h>
+#include <linux/module.h>
#include "vhost.h"
@@ -37,8 +33,6 @@ enum {
VHOST_MEMORY_F_LOG = 0x1,
};
-static unsigned vhost_zcopy_mask __read_mostly;
-
#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
@@ -64,7 +58,7 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
return 0;
}
-static void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
+void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
{
INIT_LIST_HEAD(&work->node);
work->fn = fn;
@@ -72,6 +66,7 @@ static void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
work->flushing = 0;
work->queue_seq = work->done_seq = 0;
}
+EXPORT_SYMBOL_GPL(vhost_work_init);
/* Init poll structure */
void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
@@ -81,27 +76,45 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
init_poll_funcptr(&poll->table, vhost_poll_func);
poll->mask = mask;
poll->dev = dev;
+ poll->wqh = NULL;
vhost_work_init(&poll->work, fn);
}
+EXPORT_SYMBOL_GPL(vhost_poll_init);
/* Start polling a file. We add ourselves to file's wait queue. The caller must
* keep a reference to a file until after vhost_poll_stop is called. */
-void vhost_poll_start(struct vhost_poll *poll, struct file *file)
+int vhost_poll_start(struct vhost_poll *poll, struct file *file)
{
unsigned long mask;
+ int ret = 0;
+
+ if (poll->wqh)
+ return 0;
mask = file->f_op->poll(file, &poll->table);
if (mask)
vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
+ if (mask & POLLERR) {
+ if (poll->wqh)
+ remove_wait_queue(poll->wqh, &poll->wait);
+ ret = -EINVAL;
+ }
+
+ return ret;
}
+EXPORT_SYMBOL_GPL(vhost_poll_start);
/* Stop polling a file. After this function returns, it becomes safe to drop the
* file reference. You must also flush afterwards. */
void vhost_poll_stop(struct vhost_poll *poll)
{
- remove_wait_queue(poll->wqh, &poll->wait);
+ if (poll->wqh) {
+ remove_wait_queue(poll->wqh, &poll->wait);
+ poll->wqh = NULL;
+ }
}
+EXPORT_SYMBOL_GPL(vhost_poll_stop);
static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
unsigned seq)
@@ -114,7 +127,7 @@ static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
return left <= 0;
}
-static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
+void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
{
unsigned seq;
int flushing;
@@ -129,6 +142,7 @@ static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
spin_unlock_irq(&dev->work_lock);
BUG_ON(flushing < 0);
}
+EXPORT_SYMBOL_GPL(vhost_work_flush);
/* Flush any work that has been scheduled. When calling this, don't hold any
* locks that are also used by the callback. */
@@ -136,9 +150,9 @@ void vhost_poll_flush(struct vhost_poll *poll)
{
vhost_work_flush(poll->dev, &poll->work);
}
+EXPORT_SYMBOL_GPL(vhost_poll_flush);
-static inline void vhost_work_queue(struct vhost_dev *dev,
- struct vhost_work *work)
+void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
{
unsigned long flags;
@@ -146,15 +160,19 @@ static inline void vhost_work_queue(struct vhost_dev *dev,
if (list_empty(&work->node)) {
list_add_tail(&work->node, &dev->work_list);
work->queue_seq++;
+ spin_unlock_irqrestore(&dev->work_lock, flags);
wake_up_process(dev->worker);
+ } else {
+ spin_unlock_irqrestore(&dev->work_lock, flags);
}
- spin_unlock_irqrestore(&dev->work_lock, flags);
}
+EXPORT_SYMBOL_GPL(vhost_work_queue);
void vhost_poll_queue(struct vhost_poll *poll)
{
vhost_work_queue(poll->dev, &poll->work);
}
+EXPORT_SYMBOL_GPL(vhost_poll_queue);
static void vhost_vq_reset(struct vhost_dev *dev,
struct vhost_virtqueue *vq)
@@ -171,9 +189,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->used_flags = 0;
vq->log_used = false;
vq->log_addr = -1ull;
- vq->vhost_hlen = 0;
- vq->sock_hlen = 0;
vq->private_data = NULL;
+ vq->acked_features = 0;
vq->log_base = NULL;
vq->error_ctx = NULL;
vq->error = NULL;
@@ -181,9 +198,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call_ctx = NULL;
vq->call = NULL;
vq->log_ctx = NULL;
- vq->upend_idx = 0;
- vq->done_idx = 0;
- vq->ubufs = NULL;
+ vq->memory = NULL;
}
static int vhost_worker(void *data)
@@ -191,7 +206,9 @@ static int vhost_worker(void *data)
struct vhost_dev *dev = data;
struct vhost_work *work = NULL;
unsigned uninitialized_var(seq);
+ mm_segment_t oldfs = get_fs();
+ set_fs(USER_DS);
use_mm(dev->mm);
for (;;) {
@@ -222,11 +239,14 @@ static int vhost_worker(void *data)
if (work) {
__set_current_state(TASK_RUNNING);
work->fn(work);
+ if (need_resched())
+ schedule();
} else
schedule();
}
unuse_mm(dev->mm);
+ set_fs(oldfs);
return 0;
}
@@ -238,43 +258,28 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
vq->log = NULL;
kfree(vq->heads);
vq->heads = NULL;
- kfree(vq->ubuf_info);
- vq->ubuf_info = NULL;
-}
-
-void vhost_enable_zcopy(int vq)
-{
- vhost_zcopy_mask |= 0x1 << vq;
}
/* Helper to allocate iovec buffers for all vqs. */
static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
{
+ struct vhost_virtqueue *vq;
int i;
- bool zcopy;
for (i = 0; i < dev->nvqs; ++i) {
- dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
- UIO_MAXIOV, GFP_KERNEL);
- dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV,
- GFP_KERNEL);
- dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads *
- UIO_MAXIOV, GFP_KERNEL);
- zcopy = vhost_zcopy_mask & (0x1 << i);
- if (zcopy)
- dev->vqs[i].ubuf_info =
- kmalloc(sizeof *dev->vqs[i].ubuf_info *
- UIO_MAXIOV, GFP_KERNEL);
- if (!dev->vqs[i].indirect || !dev->vqs[i].log ||
- !dev->vqs[i].heads ||
- (zcopy && !dev->vqs[i].ubuf_info))
+ vq = dev->vqs[i];
+ vq->indirect = kmalloc(sizeof *vq->indirect * UIO_MAXIOV,
+ GFP_KERNEL);
+ vq->log = kmalloc(sizeof *vq->log * UIO_MAXIOV, GFP_KERNEL);
+ vq->heads = kmalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL);
+ if (!vq->indirect || !vq->log || !vq->heads)
goto err_nomem;
}
return 0;
err_nomem:
for (; i >= 0; --i)
- vhost_vq_free_iovecs(&dev->vqs[i]);
+ vhost_vq_free_iovecs(dev->vqs[i]);
return -ENOMEM;
}
@@ -283,12 +288,13 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
int i;
for (i = 0; i < dev->nvqs; ++i)
- vhost_vq_free_iovecs(&dev->vqs[i]);
+ vhost_vq_free_iovecs(dev->vqs[i]);
}
-long vhost_dev_init(struct vhost_dev *dev,
- struct vhost_virtqueue *vqs, int nvqs)
+void vhost_dev_init(struct vhost_dev *dev,
+ struct vhost_virtqueue **vqs, int nvqs)
{
+ struct vhost_virtqueue *vq;
int i;
dev->vqs = vqs;
@@ -303,20 +309,19 @@ long vhost_dev_init(struct vhost_dev *dev,
dev->worker = NULL;
for (i = 0; i < dev->nvqs; ++i) {
- dev->vqs[i].log = NULL;
- dev->vqs[i].indirect = NULL;
- dev->vqs[i].heads = NULL;
- dev->vqs[i].ubuf_info = NULL;
- dev->vqs[i].dev = dev;
- mutex_init(&dev->vqs[i].mutex);
- vhost_vq_reset(dev, dev->vqs + i);
- if (dev->vqs[i].handle_kick)
- vhost_poll_init(&dev->vqs[i].poll,
- dev->vqs[i].handle_kick, POLLIN, dev);
+ vq = dev->vqs[i];
+ vq->log = NULL;
+ vq->indirect = NULL;
+ vq->heads = NULL;
+ vq->dev = dev;
+ mutex_init(&vq->mutex);
+ vhost_vq_reset(dev, vq);
+ if (vq->handle_kick)
+ vhost_poll_init(&vq->poll, vq->handle_kick,
+ POLLIN, dev);
}
-
- return 0;
}
+EXPORT_SYMBOL_GPL(vhost_dev_init);
/* Caller should have device mutex */
long vhost_dev_check_owner(struct vhost_dev *dev)
@@ -324,6 +329,7 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
/* Are you the owner? If not, I don't think you mean to do that */
return dev->mm == current->mm ? 0 : -EPERM;
}
+EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
struct vhost_attach_cgroups_struct {
struct vhost_work work;
@@ -351,13 +357,20 @@ static int vhost_attach_cgroups(struct vhost_dev *dev)
}
/* Caller should have device mutex */
-static long vhost_dev_set_owner(struct vhost_dev *dev)
+bool vhost_dev_has_owner(struct vhost_dev *dev)
+{
+ return dev->mm;
+}
+EXPORT_SYMBOL_GPL(vhost_dev_has_owner);
+
+/* Caller should have device mutex */
+long vhost_dev_set_owner(struct vhost_dev *dev)
{
struct task_struct *worker;
int err;
/* Is there an owner already? */
- if (dev->mm) {
+ if (vhost_dev_has_owner(dev)) {
err = -EBUSY;
goto err_mm;
}
@@ -392,76 +405,62 @@ err_worker:
err_mm:
return err;
}
+EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
-/* Caller should have device mutex */
-long vhost_dev_reset_owner(struct vhost_dev *dev)
+struct vhost_memory *vhost_dev_reset_owner_prepare(void)
{
- struct vhost_memory *memory;
+ return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
- /* Restore memory to default empty mapping. */
- memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
- if (!memory)
- return -ENOMEM;
+/* Caller should have device mutex */
+void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory)
+{
+ int i;
- vhost_dev_cleanup(dev);
+ vhost_dev_cleanup(dev, true);
+ /* Restore memory to default empty mapping. */
memory->nregions = 0;
- RCU_INIT_POINTER(dev->memory, memory);
- return 0;
+ dev->memory = memory;
+ /* We don't need VQ locks below since vhost_dev_cleanup makes sure
+ * VQs aren't running.
+ */
+ for (i = 0; i < dev->nvqs; ++i)
+ dev->vqs[i]->memory = memory;
}
+EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
-/* In case of DMA done not in order in lower device driver for some reason.
- * upend_idx is used to track end of used idx, done_idx is used to track head
- * of used idx. Once lower device DMA done contiguously, we will signal KVM
- * guest used idx.
- */
-int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq)
+void vhost_dev_stop(struct vhost_dev *dev)
{
int i;
- int j = 0;
-
- for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
- if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) {
- vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
- vhost_add_used_and_signal(vq->dev, vq,
- vq->heads[i].id, 0);
- ++j;
- } else
- break;
+
+ for (i = 0; i < dev->nvqs; ++i) {
+ if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) {
+ vhost_poll_stop(&dev->vqs[i]->poll);
+ vhost_poll_flush(&dev->vqs[i]->poll);
+ }
}
- if (j)
- vq->done_idx = i;
- return j;
}
+EXPORT_SYMBOL_GPL(vhost_dev_stop);
-/* Caller should have device mutex */
-void vhost_dev_cleanup(struct vhost_dev *dev)
+/* Caller should have device mutex if and only if locked is set */
+void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
{
int i;
for (i = 0; i < dev->nvqs; ++i) {
- if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
- vhost_poll_stop(&dev->vqs[i].poll);
- vhost_poll_flush(&dev->vqs[i].poll);
- }
- /* Wait for all lower device DMAs done. */
- if (dev->vqs[i].ubufs)
- vhost_ubuf_put_and_wait(dev->vqs[i].ubufs);
-
- /* Signal guest as appropriate. */
- vhost_zerocopy_signal_used(&dev->vqs[i]);
-
- if (dev->vqs[i].error_ctx)
- eventfd_ctx_put(dev->vqs[i].error_ctx);
- if (dev->vqs[i].error)
- fput(dev->vqs[i].error);
- if (dev->vqs[i].kick)
- fput(dev->vqs[i].kick);
- if (dev->vqs[i].call_ctx)
- eventfd_ctx_put(dev->vqs[i].call_ctx);
- if (dev->vqs[i].call)
- fput(dev->vqs[i].call);
- vhost_vq_reset(dev, dev->vqs + i);
+ if (dev->vqs[i]->error_ctx)
+ eventfd_ctx_put(dev->vqs[i]->error_ctx);
+ if (dev->vqs[i]->error)
+ fput(dev->vqs[i]->error);
+ if (dev->vqs[i]->kick)
+ fput(dev->vqs[i]->kick);
+ if (dev->vqs[i]->call_ctx)
+ eventfd_ctx_put(dev->vqs[i]->call_ctx);
+ if (dev->vqs[i]->call)
+ fput(dev->vqs[i]->call);
+ vhost_vq_reset(dev, dev->vqs[i]);
}
vhost_dev_free_iovecs(dev);
if (dev->log_ctx)
@@ -471,9 +470,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
fput(dev->log_file);
dev->log_file = NULL;
/* No one will access memory at this point */
- kfree(rcu_dereference_protected(dev->memory,
- lockdep_is_held(&dev->mutex)));
- RCU_INIT_POINTER(dev->memory, NULL);
+ kfree(dev->memory);
+ dev->memory = NULL;
WARN_ON(!list_empty(&dev->work_list));
if (dev->worker) {
kthread_stop(dev->worker);
@@ -483,6 +481,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
mmput(dev->mm);
dev->mm = NULL;
}
+EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
{
@@ -531,26 +530,28 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
for (i = 0; i < d->nvqs; ++i) {
int ok;
- mutex_lock(&d->vqs[i].mutex);
+ bool log;
+
+ mutex_lock(&d->vqs[i]->mutex);
+ log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL);
/* If ring is inactive, will check when it's enabled. */
- if (d->vqs[i].private_data)
- ok = vq_memory_access_ok(d->vqs[i].log_base, mem,
- log_all);
+ if (d->vqs[i]->private_data)
+ ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log);
else
ok = 1;
- mutex_unlock(&d->vqs[i].mutex);
+ mutex_unlock(&d->vqs[i]->mutex);
if (!ok)
return 0;
}
return 1;
}
-static int vq_access_ok(struct vhost_dev *d, unsigned int num,
+static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
struct vring_desc __user *desc,
struct vring_avail __user *avail,
struct vring_used __user *used)
{
- size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+ size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
access_ok(VERIFY_READ, avail,
sizeof *avail + num * sizeof *avail->ring + s) &&
@@ -562,25 +563,19 @@ static int vq_access_ok(struct vhost_dev *d, unsigned int num,
/* Caller should have device mutex but not vq mutex */
int vhost_log_access_ok(struct vhost_dev *dev)
{
- struct vhost_memory *mp;
-
- mp = rcu_dereference_protected(dev->memory,
- lockdep_is_held(&dev->mutex));
- return memory_access_ok(dev, mp, 1);
+ return memory_access_ok(dev, dev->memory, 1);
}
+EXPORT_SYMBOL_GPL(vhost_log_access_ok);
/* Verify access for write logging. */
/* Caller should have vq mutex and device mutex */
-static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq,
+static int vq_log_access_ok(struct vhost_virtqueue *vq,
void __user *log_base)
{
- struct vhost_memory *mp;
- size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+ size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
- mp = rcu_dereference_protected(vq->dev->memory,
- lockdep_is_held(&vq->mutex));
- return vq_memory_access_ok(log_base, mp,
- vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
+ return vq_memory_access_ok(log_base, vq->memory,
+ vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
(!vq->log_used || log_access_ok(log_base, vq->log_addr,
sizeof *vq->used +
vq->num * sizeof *vq->used->ring + s));
@@ -590,14 +585,16 @@ static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq,
/* Caller should have vq mutex and device mutex */
int vhost_vq_access_ok(struct vhost_virtqueue *vq)
{
- return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) &&
- vq_log_access_ok(vq->dev, vq, vq->log_base);
+ return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) &&
+ vq_log_access_ok(vq, vq->log_base);
}
+EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
{
struct vhost_memory mem, *newmem, *oldmem;
unsigned long size = offsetof(struct vhost_memory, regions);
+ int i;
if (copy_from_user(&mem, m, size))
return -EFAULT;
@@ -616,23 +613,27 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
return -EFAULT;
}
- if (!memory_access_ok(d, newmem,
- vhost_has_feature(d, VHOST_F_LOG_ALL))) {
+ if (!memory_access_ok(d, newmem, 0)) {
kfree(newmem);
return -EFAULT;
}
- oldmem = rcu_dereference_protected(d->memory,
- lockdep_is_held(&d->mutex));
- rcu_assign_pointer(d->memory, newmem);
- synchronize_rcu();
+ oldmem = d->memory;
+ d->memory = newmem;
+
+ /* All memory accesses are done under some VQ mutex. */
+ for (i = 0; i < d->nvqs; ++i) {
+ mutex_lock(&d->vqs[i]->mutex);
+ d->vqs[i]->memory = newmem;
+ mutex_unlock(&d->vqs[i]->mutex);
+ }
kfree(oldmem);
return 0;
}
-static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
+long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
{
- struct file *eventfp, *filep = NULL,
- *pollstart = NULL, *pollstop = NULL;
+ struct file *eventfp, *filep = NULL;
+ bool pollstart = false, pollstop = false;
struct eventfd_ctx *ctx = NULL;
u32 __user *idxp = argp;
struct vhost_virtqueue *vq;
@@ -648,7 +649,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
if (idx >= d->nvqs)
return -ENOBUFS;
- vq = d->vqs + idx;
+ vq = d->vqs[idx];
mutex_lock(&vq->mutex);
@@ -723,7 +724,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
* If it is not, we don't as size might not have been setup.
* We will verify when backend is configured. */
if (vq->private_data) {
- if (!vq_access_ok(d, vq->num,
+ if (!vq_access_ok(vq, vq->num,
(void __user *)(unsigned long)a.desc_user_addr,
(void __user *)(unsigned long)a.avail_user_addr,
(void __user *)(unsigned long)a.used_user_addr)) {
@@ -758,8 +759,8 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
break;
}
if (eventfp != vq->kick) {
- pollstop = filep = vq->kick;
- pollstart = vq->kick = eventfp;
+ pollstop = (filep = vq->kick) != NULL;
+ pollstart = (vq->kick = eventfp) != NULL;
} else
filep = eventfp;
break;
@@ -814,7 +815,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
fput(filep);
if (pollstart && vq->handle_kick)
- vhost_poll_start(&vq->poll, vq->kick);
+ r = vhost_poll_start(&vq->poll, vq->kick);
mutex_unlock(&vq->mutex);
@@ -822,11 +823,11 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
vhost_poll_flush(&vq->poll);
return r;
}
+EXPORT_SYMBOL_GPL(vhost_vring_ioctl);
/* Caller must have device mutex */
-long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
+long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
{
- void __user *argp = (void __user *)arg;
struct file *eventfp, *filep = NULL;
struct eventfd_ctx *ctx = NULL;
u64 p;
@@ -860,10 +861,10 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
for (i = 0; i < d->nvqs; ++i) {
struct vhost_virtqueue *vq;
void __user *base = (void __user *)(unsigned long)p;
- vq = d->vqs + i;
+ vq = d->vqs[i];
mutex_lock(&vq->mutex);
/* If ring is inactive, will check when it's enabled. */
- if (vq->private_data && !vq_log_access_ok(d, vq, base))
+ if (vq->private_data && !vq_log_access_ok(vq, base))
r = -EFAULT;
else
vq->log_base = base;
@@ -887,9 +888,9 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
} else
filep = eventfp;
for (i = 0; i < d->nvqs; ++i) {
- mutex_lock(&d->vqs[i].mutex);
- d->vqs[i].log_ctx = d->log_ctx;
- mutex_unlock(&d->vqs[i].mutex);
+ mutex_lock(&d->vqs[i]->mutex);
+ d->vqs[i]->log_ctx = d->log_ctx;
+ mutex_unlock(&d->vqs[i]->mutex);
}
if (ctx)
eventfd_ctx_put(ctx);
@@ -897,12 +898,13 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
fput(filep);
break;
default:
- r = vhost_set_vring(d, ioctl, argp);
+ r = -ENOIOCTLCMD;
break;
}
done:
return r;
}
+EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
__u64 addr, __u32 len)
@@ -937,9 +939,9 @@ static int set_bit_to_user(int nr, void __user *addr)
if (r < 0)
return r;
BUG_ON(r != 1);
- base = kmap_atomic(page, KM_USER0);
+ base = kmap_atomic(page);
set_bit(bit, base);
- kunmap_atomic(base, KM_USER0);
+ kunmap_atomic(base);
set_page_dirty_lock(page);
put_page(page);
return 0;
@@ -994,6 +996,7 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
BUG();
return 0;
}
+EXPORT_SYMBOL_GPL(vhost_log_write);
static int vhost_update_used_flags(struct vhost_virtqueue *vq)
{
@@ -1045,8 +1048,9 @@ int vhost_init_used(struct vhost_virtqueue *vq)
vq->signalled_used_valid = false;
return get_user(vq->last_used_idx, &vq->used->idx);
}
+EXPORT_SYMBOL_GPL(vhost_init_used);
-static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
+static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
struct iovec iov[], int iov_size)
{
const struct vhost_memory_region *reg;
@@ -1055,9 +1059,7 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
u64 s = 0;
int ret = 0;
- rcu_read_lock();
-
- mem = rcu_dereference(dev->memory);
+ mem = vq->memory;
while ((u64)len > s) {
u64 size;
if (unlikely(ret >= iov_size)) {
@@ -1071,7 +1073,7 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
}
_iov = iov + ret;
size = reg->memory_size - addr + reg->guest_phys_addr;
- _iov->iov_len = min((u64)len, size);
+ _iov->iov_len = min((u64)len - s, size);
_iov->iov_base = (void __user *)(unsigned long)
(reg->userspace_addr + addr - reg->guest_phys_addr);
s += size;
@@ -1079,7 +1081,6 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
++ret;
}
- rcu_read_unlock();
return ret;
}
@@ -1104,7 +1105,7 @@ static unsigned next_desc(struct vring_desc *desc)
return next;
}
-static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+static int get_indirect(struct vhost_virtqueue *vq,
struct iovec iov[], unsigned int iov_size,
unsigned int *out_num, unsigned int *in_num,
struct vhost_log *log, unsigned int *log_num,
@@ -1123,7 +1124,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
return -EINVAL;
}
- ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect,
+ ret = translate_desc(vq, indirect->addr, indirect->len, vq->indirect,
UIO_MAXIOV);
if (unlikely(ret < 0)) {
vq_err(vq, "Translation failure %d in indirect.\n", ret);
@@ -1163,7 +1164,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
return -EINVAL;
}
- ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+ ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count,
iov_size - iov_count);
if (unlikely(ret < 0)) {
vq_err(vq, "Translation failure %d indirect idx %d\n",
@@ -1200,7 +1201,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
* This function returns the descriptor number found, or vq->num (which is
* never a valid descriptor number) if none was found. A negative code is
* returned on error. */
-int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+int vhost_get_vq_desc(struct vhost_virtqueue *vq,
struct iovec iov[], unsigned int iov_size,
unsigned int *out_num, unsigned int *in_num,
struct vhost_log *log, unsigned int *log_num)
@@ -1274,7 +1275,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
return -EFAULT;
}
if (desc.flags & VRING_DESC_F_INDIRECT) {
- ret = get_indirect(dev, vq, iov, iov_size,
+ ret = get_indirect(vq, iov, iov_size,
out_num, in_num,
log, log_num, &desc);
if (unlikely(ret < 0)) {
@@ -1285,7 +1286,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
continue;
}
- ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+ ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count,
iov_size - iov_count);
if (unlikely(ret < 0)) {
vq_err(vq, "Translation failure %d descriptor idx %d\n",
@@ -1321,60 +1322,24 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
return head;
}
+EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
{
vq->last_avail_idx -= n;
}
+EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
/* After we've used one of their buffers, we tell them about it. We'll then
* want to notify the guest, using eventfd. */
int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
{
- struct vring_used_elem __user *used;
+ struct vring_used_elem heads = { head, len };
- /* The virtqueue contains a ring of used buffers. Get a pointer to the
- * next entry in that used ring. */
- used = &vq->used->ring[vq->last_used_idx % vq->num];
- if (__put_user(head, &used->id)) {
- vq_err(vq, "Failed to write used id");
- return -EFAULT;
- }
- if (__put_user(len, &used->len)) {
- vq_err(vq, "Failed to write used len");
- return -EFAULT;
- }
- /* Make sure buffer is written before we update index. */
- smp_wmb();
- if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) {
- vq_err(vq, "Failed to increment used idx");
- return -EFAULT;
- }
- if (unlikely(vq->log_used)) {
- /* Make sure data is seen before log. */
- smp_wmb();
- /* Log used ring entry write. */
- log_write(vq->log_base,
- vq->log_addr +
- ((void __user *)used - (void __user *)vq->used),
- sizeof *used);
- /* Log used index update. */
- log_write(vq->log_base,
- vq->log_addr + offsetof(struct vring_used, idx),
- sizeof vq->used->idx);
- if (vq->log_ctx)
- eventfd_signal(vq->log_ctx, 1);
- }
- vq->last_used_idx++;
- /* If the driver never bothers to signal in a very long while,
- * used index might wrap around. If that happens, invalidate
- * signalled_used index we stored. TODO: make sure driver
- * signals at least once in 2^16 and remove this. */
- if (unlikely(vq->last_used_idx == vq->signalled_used))
- vq->signalled_used_valid = false;
- return 0;
+ return vhost_add_used_n(vq, &heads, 1);
}
+EXPORT_SYMBOL_GPL(vhost_add_used);
static int __vhost_add_used_n(struct vhost_virtqueue *vq,
struct vring_used_elem *heads,
@@ -1386,7 +1351,16 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
start = vq->last_used_idx % vq->num;
used = vq->used->ring + start;
- if (__copy_to_user(used, heads, count * sizeof *used)) {
+ if (count == 1) {
+ if (__put_user(heads[0].id, &used->id)) {
+ vq_err(vq, "Failed to write used id");
+ return -EFAULT;
+ }
+ if (__put_user(heads[0].len, &used->len)) {
+ vq_err(vq, "Failed to write used len");
+ return -EFAULT;
+ }
+ } else if (__copy_to_user(used, heads, count * sizeof *used)) {
vq_err(vq, "Failed to write used");
return -EFAULT;
}
@@ -1444,6 +1418,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
}
return r;
}
+EXPORT_SYMBOL_GPL(vhost_add_used_n);
static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
@@ -1454,11 +1429,11 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
* interrupts. */
smp_mb();
- if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+ if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) &&
unlikely(vq->avail_idx == vq->last_avail_idx))
return true;
- if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
__u16 flags;
if (__get_user(flags, &vq->avail->flags)) {
vq_err(vq, "Failed to get flags");
@@ -1488,6 +1463,7 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
if (vq->call_ctx && vhost_notify(dev, vq))
eventfd_signal(vq->call_ctx, 1);
}
+EXPORT_SYMBOL_GPL(vhost_signal);
/* And here's the combo meal deal. Supersize me! */
void vhost_add_used_and_signal(struct vhost_dev *dev,
@@ -1497,6 +1473,7 @@ void vhost_add_used_and_signal(struct vhost_dev *dev,
vhost_add_used(vq, head, len);
vhost_signal(dev, vq);
}
+EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
/* multi-buffer version of vhost_add_used_and_signal */
void vhost_add_used_and_signal_n(struct vhost_dev *dev,
@@ -1506,6 +1483,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
vhost_add_used_n(vq, heads, count);
vhost_signal(dev, vq);
}
+EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
/* OK, now we need to know about added descriptors. */
bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
@@ -1516,7 +1494,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
return false;
vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
- if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
r = vhost_update_used_flags(vq);
if (r) {
vq_err(vq, "Failed to enable notification at %p: %d\n",
@@ -1543,6 +1521,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
return avail_idx != vq->avail_idx;
}
+EXPORT_SYMBOL_GPL(vhost_enable_notify);
/* We don't need to be notified again. */
void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
@@ -1552,56 +1531,28 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
return;
vq->used_flags |= VRING_USED_F_NO_NOTIFY;
- if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
r = vhost_update_used_flags(vq);
if (r)
vq_err(vq, "Failed to enable notification at %p: %d\n",
&vq->used->flags, r);
}
}
+EXPORT_SYMBOL_GPL(vhost_disable_notify);
-static void vhost_zerocopy_done_signal(struct kref *kref)
-{
- struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
- kref);
- wake_up(&ubufs->wait);
-}
-
-struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
- bool zcopy)
+static int __init vhost_init(void)
{
- struct vhost_ubuf_ref *ubufs;
- /* No zero copy backend? Nothing to count. */
- if (!zcopy)
- return NULL;
- ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL);
- if (!ubufs)
- return ERR_PTR(-ENOMEM);
- kref_init(&ubufs->kref);
- init_waitqueue_head(&ubufs->wait);
- ubufs->vq = vq;
- return ubufs;
-}
-
-void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
-{
- kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
+ return 0;
}
-void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
+static void __exit vhost_exit(void)
{
- kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
- wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
- kfree(ubufs);
}
-void vhost_zerocopy_callback(void *arg)
-{
- struct ubuf_info *ubuf = arg;
- struct vhost_ubuf_ref *ubufs = ubuf->arg;
- struct vhost_virtqueue *vq = ubufs->vq;
+module_init(vhost_init);
+module_exit(vhost_exit);
- /* set len = 1 to mark this desc buffers done DMA */
- vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
- kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
-}
+MODULE_VERSION("0.0.1");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Michael S. Tsirkin");
+MODULE_DESCRIPTION("Host kernel accelerator for virtio");