diff options
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/Kconfig | 14 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 74 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 1 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 578 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 78 | ||||
-rw-r--r-- | virt/kvm/iodev.h | 55 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 51 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 298 | ||||
-rw-r--r-- | virt/kvm/kvm_trace.c | 285 |
9 files changed, 963 insertions, 471 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig new file mode 100644 index 00000000000..daece36c0a5 --- /dev/null +++ b/virt/kvm/Kconfig @@ -0,0 +1,14 @@ +# KVM common configuration items and defaults + +config HAVE_KVM + bool + +config HAVE_KVM_IRQCHIP + bool + +config HAVE_KVM_EVENTFD + bool + select EVENTFD + +config KVM_APIC_ARCHITECTURE + bool diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 5ae620d32fa..04d69cd7049 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -14,32 +14,28 @@ #include "coalesced_mmio.h" -static int coalesced_mmio_in_range(struct kvm_io_device *this, - gpa_t addr, int len, int is_write) +static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) +{ + return container_of(dev, struct kvm_coalesced_mmio_dev, dev); +} + +static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, + gpa_t addr, int len) { - struct kvm_coalesced_mmio_dev *dev = - (struct kvm_coalesced_mmio_dev*)this->private; struct kvm_coalesced_mmio_zone *zone; - int next; + struct kvm_coalesced_mmio_ring *ring; + unsigned avail; int i; - if (!is_write) - return 0; - - /* kvm->lock is taken by the caller and must be not released before - * dev.read/write - */ - /* Are we able to batch it ? */ /* last is the first free entry * check if we don't meet the first used entry * there is always one unused entry in the buffer */ - - next = (dev->kvm->coalesced_mmio_ring->last + 1) % - KVM_COALESCED_MMIO_MAX; - if (next == dev->kvm->coalesced_mmio_ring->first) { + ring = dev->kvm->coalesced_mmio_ring; + avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; + if (avail < KVM_MAX_VCPUS) { /* full */ return 0; } @@ -60,14 +56,15 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this, return 0; } -static void coalesced_mmio_write(struct kvm_io_device *this, - gpa_t addr, int len, const void *val) +static int coalesced_mmio_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *val) { - struct kvm_coalesced_mmio_dev *dev = - (struct kvm_coalesced_mmio_dev*)this->private; + struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; + if (!coalesced_mmio_in_range(dev, addr, len)) + return -EOPNOTSUPP; - /* kvm->lock must be taken by caller before call to in_range()*/ + spin_lock(&dev->lock); /* copy data in first free entry of the ring */ @@ -76,29 +73,40 @@ static void coalesced_mmio_write(struct kvm_io_device *this, memcpy(ring->coalesced_mmio[ring->last].data, val, len); smp_wmb(); ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; + spin_unlock(&dev->lock); + return 0; } static void coalesced_mmio_destructor(struct kvm_io_device *this) { - kfree(this); + struct kvm_coalesced_mmio_dev *dev = to_mmio(this); + + kfree(dev); } +static const struct kvm_io_device_ops coalesced_mmio_ops = { + .write = coalesced_mmio_write, + .destructor = coalesced_mmio_destructor, +}; + int kvm_coalesced_mmio_init(struct kvm *kvm) { struct kvm_coalesced_mmio_dev *dev; + int ret; dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); if (!dev) return -ENOMEM; - dev->dev.write = coalesced_mmio_write; - dev->dev.in_range = coalesced_mmio_in_range; - dev->dev.destructor = coalesced_mmio_destructor; - dev->dev.private = dev; + spin_lock_init(&dev->lock); + kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); dev->kvm = kvm; kvm->coalesced_mmio_dev = dev; - kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev); - return 0; + ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); + if (ret < 0) + kfree(dev); + + return ret; } int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, @@ -109,16 +117,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, if (dev == NULL) return -EINVAL; - mutex_lock(&kvm->lock); + down_write(&kvm->slots_lock); if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { - mutex_unlock(&kvm->lock); + up_write(&kvm->slots_lock); return -ENOBUFS; } dev->zone[dev->nb_zones] = *zone; dev->nb_zones++; - mutex_unlock(&kvm->lock); + up_write(&kvm->slots_lock); return 0; } @@ -132,7 +140,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, if (dev == NULL) return -EINVAL; - mutex_lock(&kvm->lock); + down_write(&kvm->slots_lock); i = dev->nb_zones; while(i) { @@ -150,7 +158,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, i--; } - mutex_unlock(&kvm->lock); + up_write(&kvm->slots_lock); return 0; } diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 5ac0ec62846..4b49f27fa31 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h @@ -12,6 +12,7 @@ struct kvm_coalesced_mmio_dev { struct kvm_io_device dev; struct kvm *kvm; + spinlock_t lock; int nb_zones; struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; }; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c new file mode 100644 index 00000000000..bb4ebd89b9f --- /dev/null +++ b/virt/kvm/eventfd.c @@ -0,0 +1,578 @@ +/* + * kvm eventfd support - use eventfd objects to signal various KVM events + * + * Copyright 2009 Novell. All Rights Reserved. + * + * Author: + * Gregory Haskins <ghaskins@novell.com> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/workqueue.h> +#include <linux/syscalls.h> +#include <linux/wait.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/list.h> +#include <linux/eventfd.h> +#include <linux/kernel.h> + +#include "iodev.h" + +/* + * -------------------------------------------------------------------- + * irqfd: Allows an fd to be used to inject an interrupt to the guest + * + * Credit goes to Avi Kivity for the original idea. + * -------------------------------------------------------------------- + */ + +struct _irqfd { + struct kvm *kvm; + struct eventfd_ctx *eventfd; + int gsi; + struct list_head list; + poll_table pt; + wait_queue_head_t *wqh; + wait_queue_t wait; + struct work_struct inject; + struct work_struct shutdown; +}; + +static struct workqueue_struct *irqfd_cleanup_wq; + +static void +irqfd_inject(struct work_struct *work) +{ + struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); + struct kvm *kvm = irqfd->kvm; + + mutex_lock(&kvm->irq_lock); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); + mutex_unlock(&kvm->irq_lock); +} + +/* + * Race-free decouple logic (ordering is critical) + */ +static void +irqfd_shutdown(struct work_struct *work) +{ + struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); + + /* + * Synchronize with the wait-queue and unhook ourselves to prevent + * further events. + */ + remove_wait_queue(irqfd->wqh, &irqfd->wait); + + /* + * We know no new events will be scheduled at this point, so block + * until all previously outstanding events have completed + */ + flush_work(&irqfd->inject); + + /* + * It is now safe to release the object's resources + */ + eventfd_ctx_put(irqfd->eventfd); + kfree(irqfd); +} + + +/* assumes kvm->irqfds.lock is held */ +static bool +irqfd_is_active(struct _irqfd *irqfd) +{ + return list_empty(&irqfd->list) ? false : true; +} + +/* + * Mark the irqfd as inactive and schedule it for removal + * + * assumes kvm->irqfds.lock is held + */ +static void +irqfd_deactivate(struct _irqfd *irqfd) +{ + BUG_ON(!irqfd_is_active(irqfd)); + + list_del_init(&irqfd->list); + + queue_work(irqfd_cleanup_wq, &irqfd->shutdown); +} + +/* + * Called with wqh->lock held and interrupts disabled + */ +static int +irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); + unsigned long flags = (unsigned long)key; + + if (flags & POLLIN) + /* An event has been signaled, inject an interrupt */ + schedule_work(&irqfd->inject); + + if (flags & POLLHUP) { + /* The eventfd is closing, detach from KVM */ + struct kvm *kvm = irqfd->kvm; + unsigned long flags; + + spin_lock_irqsave(&kvm->irqfds.lock, flags); + + /* + * We must check if someone deactivated the irqfd before + * we could acquire the irqfds.lock since the item is + * deactivated from the KVM side before it is unhooked from + * the wait-queue. If it is already deactivated, we can + * simply return knowing the other side will cleanup for us. + * We cannot race against the irqfd going away since the + * other side is required to acquire wqh->lock, which we hold + */ + if (irqfd_is_active(irqfd)) + irqfd_deactivate(irqfd); + + spin_unlock_irqrestore(&kvm->irqfds.lock, flags); + } + + return 0; +} + +static void +irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, + poll_table *pt) +{ + struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); + + irqfd->wqh = wqh; + add_wait_queue(wqh, &irqfd->wait); +} + +static int +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) +{ + struct _irqfd *irqfd; + struct file *file = NULL; + struct eventfd_ctx *eventfd = NULL; + int ret; + unsigned int events; + + irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); + if (!irqfd) + return -ENOMEM; + + irqfd->kvm = kvm; + irqfd->gsi = gsi; + INIT_LIST_HEAD(&irqfd->list); + INIT_WORK(&irqfd->inject, irqfd_inject); + INIT_WORK(&irqfd->shutdown, irqfd_shutdown); + + file = eventfd_fget(fd); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto fail; + } + + eventfd = eventfd_ctx_fileget(file); + if (IS_ERR(eventfd)) { + ret = PTR_ERR(eventfd); + goto fail; + } + + irqfd->eventfd = eventfd; + + /* + * Install our own custom wake-up handling so we are notified via + * a callback whenever someone signals the underlying eventfd + */ + init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); + init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); + + events = file->f_op->poll(file, &irqfd->pt); + + spin_lock_irq(&kvm->irqfds.lock); + list_add_tail(&irqfd->list, &kvm->irqfds.items); + spin_unlock_irq(&kvm->irqfds.lock); + + /* + * Check if there was an event already pending on the eventfd + * before we registered, and trigger it as if we didn't miss it. + */ + if (events & POLLIN) + schedule_work(&irqfd->inject); + + /* + * do not drop the file until the irqfd is fully initialized, otherwise + * we might race against the POLLHUP + */ + fput(file); + + return 0; + +fail: + if (eventfd && !IS_ERR(eventfd)) + eventfd_ctx_put(eventfd); + + if (!IS_ERR(file)) + fput(file); + + kfree(irqfd); + return ret; +} + +void +kvm_eventfd_init(struct kvm *kvm) +{ + spin_lock_init(&kvm->irqfds.lock); + INIT_LIST_HEAD(&kvm->irqfds.items); + INIT_LIST_HEAD(&kvm->ioeventfds); +} + +/* + * shutdown any irqfd's that match fd+gsi + */ +static int +kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) +{ + struct _irqfd *irqfd, *tmp; + struct eventfd_ctx *eventfd; + + eventfd = eventfd_ctx_fdget(fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + spin_lock_irq(&kvm->irqfds.lock); + + list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { + if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) + irqfd_deactivate(irqfd); + } + + spin_unlock_irq(&kvm->irqfds.lock); + eventfd_ctx_put(eventfd); + + /* + * Block until we know all outstanding shutdown jobs have completed + * so that we guarantee there will not be any more interrupts on this + * gsi once this deassign function returns. + */ + flush_workqueue(irqfd_cleanup_wq); + + return 0; +} + +int +kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) +{ + if (flags & KVM_IRQFD_FLAG_DEASSIGN) + return kvm_irqfd_deassign(kvm, fd, gsi); + + return kvm_irqfd_assign(kvm, fd, gsi); +} + +/* + * This function is called as the kvm VM fd is being released. Shutdown all + * irqfds that still remain open + */ +void +kvm_irqfd_release(struct kvm *kvm) +{ + struct _irqfd *irqfd, *tmp; + + spin_lock_irq(&kvm->irqfds.lock); + + list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) + irqfd_deactivate(irqfd); + + spin_unlock_irq(&kvm->irqfds.lock); + + /* + * Block until we know all outstanding shutdown jobs have completed + * since we do not take a kvm* reference. + */ + flush_workqueue(irqfd_cleanup_wq); + +} + +/* + * create a host-wide workqueue for issuing deferred shutdown requests + * aggregated from all vm* instances. We need our own isolated single-thread + * queue to prevent deadlock against flushing the normal work-queue. + */ +static int __init irqfd_module_init(void) +{ + irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); + if (!irqfd_cleanup_wq) + return -ENOMEM; + + return 0; +} + +static void __exit irqfd_module_exit(void) +{ + destroy_workqueue(irqfd_cleanup_wq); +} + +module_init(irqfd_module_init); +module_exit(irqfd_module_exit); + +/* + * -------------------------------------------------------------------- + * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. + * + * userspace can register a PIO/MMIO address with an eventfd for receiving + * notification when the memory has been touched. + * -------------------------------------------------------------------- + */ + +struct _ioeventfd { + struct list_head list; + u64 addr; + int length; + struct eventfd_ctx *eventfd; + u64 datamatch; + struct kvm_io_device dev; + bool wildcard; +}; + +static inline struct _ioeventfd * +to_ioeventfd(struct kvm_io_device *dev) +{ + return container_of(dev, struct _ioeventfd, dev); +} + +static void +ioeventfd_release(struct _ioeventfd *p) +{ + eventfd_ctx_put(p->eventfd); + list_del(&p->list); + kfree(p); +} + +static bool +ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) +{ + u64 _val; + + if (!(addr == p->addr && len == p->length)) + /* address-range must be precise for a hit */ + return false; + + if (p->wildcard) + /* all else equal, wildcard is always a hit */ + return true; + + /* otherwise, we have to actually compare the data */ + + BUG_ON(!IS_ALIGNED((unsigned long)val, len)); + + switch (len) { + case 1: + _val = *(u8 *)val; + break; + case 2: + _val = *(u16 *)val; + break; + case 4: + _val = *(u32 *)val; + break; + case 8: + _val = *(u64 *)val; + break; + default: + return false; + } + + return _val == p->datamatch ? true : false; +} + +/* MMIO/PIO writes trigger an event if the addr/val match */ +static int +ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) +{ + struct _ioeventfd *p = to_ioeventfd(this); + + if (!ioeventfd_in_range(p, addr, len, val)) + return -EOPNOTSUPP; + + eventfd_signal(p->eventfd, 1); + return 0; +} + +/* + * This function is called as KVM is completely shutting down. We do not + * need to worry about locking just nuke anything we have as quickly as possible + */ +static void +ioeventfd_destructor(struct kvm_io_device *this) +{ + struct _ioeventfd *p = to_ioeventfd(this); + + ioeventfd_release(p); +} + +static const struct kvm_io_device_ops ioeventfd_ops = { + .write = ioeventfd_write, + .destructor = ioeventfd_destructor, +}; + +/* assumes kvm->slots_lock held */ +static bool +ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) +{ + struct _ioeventfd *_p; + + list_for_each_entry(_p, &kvm->ioeventfds, list) + if (_p->addr == p->addr && _p->length == p->length && + (_p->wildcard || p->wildcard || + _p->datamatch == p->datamatch)) + return true; + + return false; +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; + struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; + struct _ioeventfd *p; + struct eventfd_ctx *eventfd; + int ret; + + /* must be natural-word sized */ + switch (args->len) { + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + eventfd = eventfd_ctx_fdget(args->fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + ret = -ENOMEM; + goto fail; + } + + INIT_LIST_HEAD(&p->list); + p->addr = args->addr; + p->length = args->len; + p->eventfd = eventfd; + + /* The datamatch feature is optional, otherwise this is a wildcard */ + if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) + p->datamatch = args->datamatch; + else + p->wildcard = true; + + down_write(&kvm->slots_lock); + + /* Verify that there isnt a match already */ + if (ioeventfd_check_collision(kvm, p)) { + ret = -EEXIST; + goto unlock_fail; + } + + kvm_iodevice_init(&p->dev, &ioeventfd_ops); + + ret = __kvm_io_bus_register_dev(bus, &p->dev); + if (ret < 0) + goto unlock_fail; + + list_add_tail(&p->list, &kvm->ioeventfds); + + up_write(&kvm->slots_lock); + + return 0; + +unlock_fail: + up_write(&kvm->slots_lock); + +fail: + kfree(p); + eventfd_ctx_put(eventfd); + + return ret; +} + +static int +kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; + struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; + struct _ioeventfd *p, *tmp; + struct eventfd_ctx *eventfd; + int ret = -ENOENT; + + eventfd = eventfd_ctx_fdget(args->fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + down_write(&kvm->slots_lock); + + list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { + bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); + + if (p->eventfd != eventfd || + p->addr != args->addr || + p->length != args->len || + p->wildcard != wildcard) + continue; + + if (!p->wildcard && p->datamatch != args->datamatch) + continue; + + __kvm_io_bus_unregister_dev(bus, &p->dev); + ioeventfd_release(p); + ret = 0; + break; + } + + up_write(&kvm->slots_lock); + + eventfd_ctx_put(eventfd); + + return ret; +} + +int +kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) + return kvm_deassign_ioeventfd(kvm, args); + + return kvm_assign_ioeventfd(kvm, args); +} diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 1150c6d5c7b..9fe140bb38e 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -36,6 +36,7 @@ #include <asm/processor.h> #include <asm/page.h> #include <asm/current.h> +#include <trace/events/kvm.h> #include "ioapic.h" #include "lapic.h" @@ -103,6 +104,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; + union kvm_ioapic_redirect_entry *e; switch (ioapic->ioregsel) { case IOAPIC_REG_VERSION: @@ -122,19 +124,20 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) ioapic_debug("change redir index %x val %x\n", index, val); if (index >= IOAPIC_NUM_PINS) return; - mask_before = ioapic->redirtbl[index].fields.mask; + e = &ioapic->redirtbl[index]; + mask_before = e->fields.mask; if (ioapic->ioregsel & 1) { - ioapic->redirtbl[index].bits &= 0xffffffff; - ioapic->redirtbl[index].bits |= (u64) val << 32; + e->bits &= 0xffffffff; + e->bits |= (u64) val << 32; } else { - ioapic->redirtbl[index].bits &= ~0xffffffffULL; - ioapic->redirtbl[index].bits |= (u32) val; - ioapic->redirtbl[index].fields.remote_irr = 0; + e->bits &= ~0xffffffffULL; + e->bits |= (u32) val; + e->fields.remote_irr = 0; } - mask_after = ioapic->redirtbl[index].fields.mask; + mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); - if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) ioapic_service(ioapic, index); break; @@ -164,7 +167,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) /* Always delivery PIT interrupt to vcpu 0 */ if (irq == 0) { irqe.dest_mode = 0; /* Physical mode. */ - irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id; + /* need to read apic_id from apic regiest since + * it can be rewritten */ + irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id; } #endif return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); @@ -188,7 +193,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) if ((edge && old_irr != ioapic->irr) || (!edge && !entry.fields.remote_irr)) ret = ioapic_service(ioapic, irq); + else + ret = 0; /* report coalesced interrupt */ } + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); } return ret; } @@ -220,24 +228,29 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); } -static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, - int len, int is_write) +static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) { - struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; + return container_of(dev, struct kvm_ioapic, dev); +} +static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) +{ return ((addr >= ioapic->base_address && (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); } -static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, - void *val) +static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, + void *val) { - struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; + struct kvm_ioapic *ioapic = to_ioapic(this); u32 result; + if (!ioapic_in_range(ioapic, addr)) + return -EOPNOTSUPP; ioapic_debug("addr %lx\n", (unsigned long)addr); ASSERT(!(addr & 0xf)); /* check alignment */ + mutex_lock(&ioapic->kvm->irq_lock); addr &= 0xff; switch (addr) { case IOAPIC_REG_SELECT: @@ -264,22 +277,28 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, default: printk(KERN_WARNING "ioapic: wrong length %d\n", len); } + mutex_unlock(&ioapic->kvm->irq_lock); + return 0; } -static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) +static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) { - struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; + struct kvm_ioapic *ioapic = to_ioapic(this); u32 data; + if (!ioapic_in_range(ioapic, addr)) + return -EOPNOTSUPP; ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", (void*)addr, len, val); ASSERT(!(addr & 0xf)); /* check alignment */ + + mutex_lock(&ioapic->kvm->irq_lock); if (len == 4 || len == 8) data = *(u32 *) val; else { printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); - return; + goto unlock; } addr &= 0xff; @@ -300,6 +319,9 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, default: break; } +unlock: + mutex_unlock(&ioapic->kvm->irq_lock); + return 0; } void kvm_ioapic_reset(struct kvm_ioapic *ioapic) @@ -314,21 +336,27 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->id = 0; } +static const struct kvm_io_device_ops ioapic_mmio_ops = { + .read = ioapic_mmio_read, + .write = ioapic_mmio_write, +}; + int kvm_ioapic_init(struct kvm *kvm) { struct kvm_ioapic *ioapic; + int ret; ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); if (!ioapic) return -ENOMEM; kvm->arch.vioapic = ioapic; kvm_ioapic_reset(ioapic); - ioapic->dev.read = ioapic_mmio_read; - ioapic->dev.write = ioapic_mmio_write; - ioapic->dev.in_range = ioapic_in_range; - ioapic->dev.private = ioapic; + kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); ioapic->kvm = kvm; - kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); - return 0; + ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); + if (ret < 0) + kfree(ioapic); + + return ret; } diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h index 55e8846ac3a..12fd3caffd2 100644 --- a/virt/kvm/iodev.h +++ b/virt/kvm/iodev.h @@ -17,49 +17,54 @@ #define __KVM_IODEV_H__ #include <linux/kvm_types.h> +#include <asm/errno.h> -struct kvm_io_device { - void (*read)(struct kvm_io_device *this, +struct kvm_io_device; + +/** + * kvm_io_device_ops are called under kvm slots_lock. + * read and write handlers return 0 if the transaction has been handled, + * or non-zero to have it passed to the next device. + **/ +struct kvm_io_device_ops { + int (*read)(struct kvm_io_device *this, + gpa_t addr, + int len, + void *val); + int (*write)(struct kvm_io_device *this, gpa_t addr, int len, - void *val); - void (*write)(struct kvm_io_device *this, - gpa_t addr, - int len, - const void *val); - int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len, - int is_write); + const void *val); void (*destructor)(struct kvm_io_device *this); +}; - void *private; + +struct kvm_io_device { + const struct kvm_io_device_ops *ops; }; -static inline void kvm_iodevice_read(struct kvm_io_device *dev, - gpa_t addr, - int len, - void *val) +static inline void kvm_iodevice_init(struct kvm_io_device *dev, + const struct kvm_io_device_ops *ops) { - dev->read(dev, addr, len, val); + dev->ops = ops; } -static inline void kvm_iodevice_write(struct kvm_io_device *dev, - gpa_t addr, - int len, - const void *val) +static inline int kvm_iodevice_read(struct kvm_io_device *dev, + gpa_t addr, int l, void *v) { - dev->write(dev, addr, len, val); + return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP; } -static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, - gpa_t addr, int len, int is_write) +static inline int kvm_iodevice_write(struct kvm_io_device *dev, + gpa_t addr, int l, const void *v) { - return dev->in_range(dev, addr, len, is_write); + return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP; } static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) { - if (dev->destructor) - dev->destructor(dev); + if (dev->ops->destructor) + dev->ops->destructor(dev); } #endif /* __KVM_IODEV_H__ */ diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ddc17f0e2f3..001663ff401 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -20,6 +20,7 @@ */ #include <linux/kvm_host.h> +#include <trace/events/kvm.h> #include <asm/msidef.h> #ifdef CONFIG_IA64 @@ -62,14 +63,14 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, int i, r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; + WARN_ON(!mutex_is_locked(&kvm->irq_lock)); + if (irq->dest_mode == 0 && irq->dest_id == 0xff && kvm_is_dm_lowest_prio(irq)) printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); - for (i = 0; i < KVM_MAX_VCPUS; i++) { - vcpu = kvm->vcpus[i]; - - if (!vcpu || !kvm_apic_present(vcpu)) + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) continue; if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, @@ -99,6 +100,8 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, { struct kvm_lapic_irq irq; + trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); + irq.dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; irq.vector = (e->msi.data & @@ -113,7 +116,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return kvm_irq_delivery_to_apic(kvm, NULL, &irq); } -/* This should be called with the kvm->lock mutex held +/* This should be called with the kvm->irq_lock mutex held * Return value: * < 0 Interrupt was ignored (masked or not delivered for other reasons) * = 0 Interrupt was coalesced (previous irq is still pending) @@ -125,6 +128,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) unsigned long *irq_state, sig_level; int ret = -1; + trace_kvm_set_irq(irq, level, irq_source_id); + + WARN_ON(!mutex_is_locked(&kvm->irq_lock)); + if (irq < KVM_IOAPIC_NUM_PINS) { irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; @@ -134,7 +141,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) else clear_bit(irq_source_id, irq_state); sig_level = !!(*irq_state); - } else /* Deal with MSI/MSI-X */ + } else if (!level) + return ret; + else /* Deal with MSI/MSI-X */ sig_level = 1; /* Not possible to detect if the guest uses the PIC or the @@ -159,6 +168,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) struct hlist_node *n; unsigned gsi = pin; + trace_kvm_ack_irq(irqchip, pin); + list_for_each_entry(e, &kvm->irq_routing, link) if (e->type == KVM_IRQ_ROUTING_IRQCHIP && e->irqchip.irqchip == irqchip && @@ -175,19 +186,26 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) void kvm_register_irq_ack_notifier(struct kvm *kvm, |