diff options
Diffstat (limited to 'drivers/vfio/pci/vfio_pci_intrs.c')
| -rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 190 |
1 files changed, 144 insertions, 46 deletions
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 211a4920b88..9dd49c9839a 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -22,6 +22,7 @@ #include <linux/vfio.h> #include <linux/wait.h> #include <linux/workqueue.h> +#include <linux/slab.h> #include "vfio_pci_private.h" @@ -76,9 +77,24 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) schedule_work(&virqfd->inject); } - if (flags & POLLHUP) - /* The eventfd is closing, detach from VFIO */ - virqfd_deactivate(virqfd); + if (flags & POLLHUP) { + unsigned long flags; + spin_lock_irqsave(&virqfd->vdev->irqlock, flags); + + /* + * The eventfd is closing, if the virqfd has not yet been + * queued for release, as determined by testing whether the + * vdev pointer to it is still valid, queue it now. As + * with kvm irqfds, we know we won't race against the virqfd + * going away because we hold wqh->lock to get here. + */ + if (*(virqfd->pvirqfd) == virqfd) { + *(virqfd->pvirqfd) = NULL; + virqfd_deactivate(virqfd); + } + + spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); + } return 0; } @@ -93,7 +109,6 @@ static void virqfd_ptable_queue_proc(struct file *file, static void virqfd_shutdown(struct work_struct *work) { struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); - struct virqfd **pvirqfd = virqfd->pvirqfd; u64 cnt; eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); @@ -101,7 +116,6 @@ static void virqfd_shutdown(struct work_struct *work) eventfd_ctx_put(virqfd->eventfd); kfree(virqfd); - *pvirqfd = NULL; } static void virqfd_inject(struct work_struct *work) @@ -116,21 +130,17 @@ static int virqfd_enable(struct vfio_pci_device *vdev, void (*thread)(struct vfio_pci_device *, void *), void *data, struct virqfd **pvirqfd, int fd) { - struct file *file = NULL; - struct eventfd_ctx *ctx = NULL; + struct fd irqfd; + struct eventfd_ctx *ctx; struct virqfd *virqfd; int ret = 0; unsigned int events; - if (*pvirqfd) - return -EBUSY; - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); if (!virqfd) return -ENOMEM; virqfd->pvirqfd = pvirqfd; - *pvirqfd = virqfd; virqfd->vdev = vdev; virqfd->handler = handler; virqfd->thread = thread; @@ -139,28 +149,45 @@ static int virqfd_enable(struct vfio_pci_device *vdev, INIT_WORK(&virqfd->shutdown, virqfd_shutdown); INIT_WORK(&virqfd->inject, virqfd_inject); - file = eventfd_fget(fd); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto fail; + irqfd = fdget(fd); + if (!irqfd.file) { + ret = -EBADF; + goto err_fd; } - ctx = eventfd_ctx_fileget(file); + ctx = eventfd_ctx_fileget(irqfd.file); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); - goto fail; + goto err_ctx; } virqfd->eventfd = ctx; /* + * virqfds can be released by closing the eventfd or directly + * through ioctl. These are both done through a workqueue, so + * we update the pointer to the virqfd under lock to avoid + * pushing multiple jobs to release the same virqfd. + */ + spin_lock_irq(&vdev->irqlock); + + if (*pvirqfd) { + spin_unlock_irq(&vdev->irqlock); + ret = -EBUSY; + goto err_busy; + } + *pvirqfd = virqfd; + + spin_unlock_irq(&vdev->irqlock); + + /* * Install our own custom wake-up handling so we are notified via * a callback whenever someone signals the underlying eventfd. */ init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); - events = file->f_op->poll(file, &virqfd->pt); + events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); /* * Check if there was an event already pending on the eventfd @@ -175,31 +202,38 @@ static int virqfd_enable(struct vfio_pci_device *vdev, * Do not drop the file until the irqfd is fully initialized, * otherwise we might race against the POLLHUP. */ - fput(file); + fdput(irqfd); return 0; - -fail: - if (ctx && !IS_ERR(ctx)) - eventfd_ctx_put(ctx); - - if (file && !IS_ERR(file)) - fput(file); - +err_busy: + eventfd_ctx_put(ctx); +err_ctx: + fdput(irqfd); +err_fd: kfree(virqfd); - *pvirqfd = NULL; return ret; } -static void virqfd_disable(struct virqfd *virqfd) +static void virqfd_disable(struct vfio_pci_device *vdev, + struct virqfd **pvirqfd) { - if (!virqfd) - return; + unsigned long flags; + + spin_lock_irqsave(&vdev->irqlock, flags); - virqfd_deactivate(virqfd); + if (*pvirqfd) { + virqfd_deactivate(*pvirqfd); + *pvirqfd = NULL; + } + + spin_unlock_irqrestore(&vdev->irqlock, flags); - /* Block until we know all outstanding shutdown jobs have completed. */ + /* + * Block until we know all outstanding shutdown jobs have completed. + * Even if we don't queue the job, flush the wq to be sure it's + * been released. + */ flush_workqueue(vfio_irqfd_cleanup_wq); } @@ -250,7 +284,8 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev) * a signal is necessary, which can then be handled via a work queue * or directly depending on the caller. */ -int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev, void *unused) +static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev, + void *unused) { struct pci_dev *pdev = vdev->pdev; unsigned long flags; @@ -330,6 +365,17 @@ static int vfio_intx_enable(struct vfio_pci_device *vdev) return -ENOMEM; vdev->num_ctx = 1; + + /* + * If the virtual interrupt is masked, restore it. Devices + * supporting DisINTx can be masked at the hardware level + * here, non-PCI-2.3 devices will have to wait until the + * interrupt is enabled. + */ + vdev->ctx[0].masked = vdev->virq_disabled; + if (vdev->pci_2_3) + pci_intx(vdev->pdev, !vdev->ctx[0].masked); + vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; return 0; @@ -364,25 +410,26 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) return PTR_ERR(trigger); } + vdev->ctx[0].trigger = trigger; + if (!vdev->pci_2_3) irqflags = 0; ret = request_irq(pdev->irq, vfio_intx_handler, irqflags, vdev->ctx[0].name, vdev); if (ret) { + vdev->ctx[0].trigger = NULL; kfree(vdev->ctx[0].name); eventfd_ctx_put(trigger); return ret; } - vdev->ctx[0].trigger = trigger; - /* * INTx disable will stick across the new irq setup, * disable_irq won't. */ spin_lock_irqsave(&vdev->irqlock, flags); - if (!vdev->pci_2_3 && (vdev->ctx[0].masked || vdev->virq_disabled)) + if (!vdev->pci_2_3 && vdev->ctx[0].masked) disable_irq_nosync(pdev->irq); spin_unlock_irqrestore(&vdev->irqlock, flags); @@ -392,8 +439,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - virqfd_disable(vdev->ctx[0].unmask); - virqfd_disable(vdev->ctx[0].mask); + virqfd_disable(vdev, &vdev->ctx[0].unmask); + virqfd_disable(vdev, &vdev->ctx[0].mask); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -435,15 +482,19 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) for (i = 0; i < nvec; i++) vdev->msix[i].entry = i; - ret = pci_enable_msix(pdev, vdev->msix, nvec); - if (ret) { + ret = pci_enable_msix_range(pdev, vdev->msix, 1, nvec); + if (ret < nvec) { + if (ret > 0) + pci_disable_msix(pdev); kfree(vdev->msix); kfree(vdev->ctx); return ret; } } else { - ret = pci_enable_msi_block(pdev, nvec); - if (ret) { + ret = pci_enable_msi_range(pdev, 1, nvec); + if (ret < nvec) { + if (ret > 0) + pci_disable_msi(pdev); kfree(vdev->ctx); return ret; } @@ -539,8 +590,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); for (i = 0; i < vdev->num_ctx; i++) { - virqfd_disable(vdev->ctx[i].unmask); - virqfd_disable(vdev->ctx[i].mask); + virqfd_disable(vdev, &vdev->ctx[i].unmask); + virqfd_disable(vdev, &vdev->ctx[i].mask); } if (msix) { @@ -577,7 +628,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, vfio_send_intx_eventfd, NULL, &vdev->ctx[0].unmask, fd); - virqfd_disable(vdev->ctx[0].unmask); + virqfd_disable(vdev, &vdev->ctx[0].unmask); } return 0; @@ -697,6 +748,46 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev, return 0; } +static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, void *data) +{ + int32_t fd = *(int32_t *)data; + + if ((index != VFIO_PCI_ERR_IRQ_INDEX) || + !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK)) + return -EINVAL; + + /* DATA_NONE/DATA_BOOL enables loopback testing */ + if (flags & VFIO_IRQ_SET_DATA_NONE) { + if (vdev->err_trigger) + eventfd_signal(vdev->err_trigger, 1); + return 0; + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t trigger = *(uint8_t *)data; + if (trigger && vdev->err_trigger) + eventfd_signal(vdev->err_trigger, 1); + return 0; + } + + /* Handle SET_DATA_EVENTFD */ + if (fd == -1) { + if (vdev->err_trigger) + eventfd_ctx_put(vdev->err_trigger); + vdev->err_trigger = NULL; + return 0; + } else if (fd >= 0) { + struct eventfd_ctx *efdctx; + efdctx = eventfd_ctx_fdget(fd); + if (IS_ERR(efdctx)) + return PTR_ERR(efdctx); + if (vdev->err_trigger) + eventfd_ctx_put(vdev->err_trigger); + vdev->err_trigger = efdctx; + return 0; + } else + return -EINVAL; +} int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, unsigned index, unsigned start, unsigned count, void *data) @@ -731,6 +822,13 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, break; } break; + case VFIO_PCI_ERR_IRQ_INDEX: + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_TRIGGER: + if (pci_is_pcie(vdev->pdev)) + func = vfio_pci_set_err_trigger; + break; + } } if (!func) |
