diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/assigned-dev.c | 28 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 51 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 15 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 26 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 75 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 4 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 153 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 16 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 553 |
10 files changed, 640 insertions, 284 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index daece36c0a5..7f1178f6b83 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -12,3 +12,6 @@ config HAVE_KVM_EVENTFD config KVM_APIC_ARCHITECTURE bool + +config KVM_MMIO + bool diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index f73de631e3e..7c98928b09d 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -1,7 +1,7 @@ /* * Kernel-based Virtual Machine - device assignment support * - * Copyright (C) 2006-9 Red Hat, Inc + * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -16,6 +16,7 @@ #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/interrupt.h> +#include <linux/slab.h> #include "irq.h" static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, @@ -57,12 +58,10 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) { struct kvm_assigned_dev_kernel *assigned_dev; - struct kvm *kvm; int i; assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, interrupt_work); - kvm = assigned_dev->kvm; spin_lock_irq(&assigned_dev->assigned_dev_lock); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { @@ -315,12 +314,16 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, kvm_assigned_dev_intr, 0, "kvm_assigned_msix_device", (void *)dev); - /* FIXME: free requested_irq's on failure */ if (r) - return r; + goto err; } return 0; +err: + for (i -= 1; i >= 0; i--) + free_irq(dev->host_msix_entries[i].vector, (void *)dev); + pci_disable_msix(dev->dev); + return r; } #endif @@ -443,9 +446,6 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *match; unsigned long host_irq_type, guest_irq_type; - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - if (!irqchip_in_kernel(kvm)) return r; @@ -504,12 +504,12 @@ out: static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_pci_dev *assigned_dev) { - int r = 0; + int r = 0, idx; struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; mutex_lock(&kvm->lock); - down_read(&kvm->slots_lock); + idx = srcu_read_lock(&kvm->srcu); match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, assigned_dev->assigned_dev_id); @@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, r = -ENOMEM; goto out; } - dev = pci_get_bus_and_slot(assigned_dev->busnr, + dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, + assigned_dev->busnr, assigned_dev->devfn); if (!dev) { printk(KERN_INFO "%s: host device not found\n", __func__); @@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, pci_reset_function(dev); match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_segnr = assigned_dev->segnr; match->host_busnr = assigned_dev->busnr; match->host_devfn = assigned_dev->devfn; match->flags = assigned_dev->flags; @@ -573,7 +575,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, } out: - up_read(&kvm->slots_lock); + srcu_read_unlock(&kvm->srcu, idx); mutex_unlock(&kvm->lock); return r; out_list_del: @@ -585,7 +587,7 @@ out_put: pci_dev_put(dev); out_free: kfree(match); - up_read(&kvm->slots_lock); + srcu_read_unlock(&kvm->srcu, idx); mutex_unlock(&kvm->lock); return r; } diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 04d69cd7049..fc8487564d1 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -2,6 +2,7 @@ * KVM coalesced MMIO * * Copyright (c) 2008 Bull S.A.S. + * Copyright 2009 Red Hat, Inc. and/or its affiliates. * * Author: Laurent Vivier <Laurent.Vivier@bull.net> * @@ -10,6 +11,7 @@ #include "iodev.h" #include <linux/kvm_host.h> +#include <linux/slab.h> #include <linux/kvm.h> #include "coalesced_mmio.h" @@ -92,41 +94,66 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = { int kvm_coalesced_mmio_init(struct kvm *kvm) { struct kvm_coalesced_mmio_dev *dev; + struct page *page; int ret; + ret = -ENOMEM; + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto out_err; + kvm->coalesced_mmio_ring = page_address(page); + + ret = -ENOMEM; dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); if (!dev) - return -ENOMEM; + goto out_free_page; spin_lock_init(&dev->lock); kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); dev->kvm = kvm; kvm->coalesced_mmio_dev = dev; - ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); + mutex_unlock(&kvm->slots_lock); if (ret < 0) - kfree(dev); + goto out_free_dev; + + return ret; +out_free_dev: + kvm->coalesced_mmio_dev = NULL; + kfree(dev); +out_free_page: + kvm->coalesced_mmio_ring = NULL; + __free_page(page); +out_err: return ret; } +void kvm_coalesced_mmio_free(struct kvm *kvm) +{ + if (kvm->coalesced_mmio_ring) + free_page((unsigned long)kvm->coalesced_mmio_ring); +} + int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone) + struct kvm_coalesced_mmio_zone *zone) { struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; if (dev == NULL) - return -EINVAL; + return -ENXIO; - down_write(&kvm->slots_lock); + mutex_lock(&kvm->slots_lock); if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { - up_write(&kvm->slots_lock); + mutex_unlock(&kvm->slots_lock); return -ENOBUFS; } dev->zone[dev->nb_zones] = *zone; dev->nb_zones++; - up_write(&kvm->slots_lock); + mutex_unlock(&kvm->slots_lock); return 0; } @@ -138,12 +165,12 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *z; if (dev == NULL) - return -EINVAL; + return -ENXIO; - down_write(&kvm->slots_lock); + mutex_lock(&kvm->slots_lock); i = dev->nb_zones; - while(i) { + while (i) { z = &dev->zone[i - 1]; /* unregister all zones @@ -158,7 +185,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, i--; } - up_write(&kvm->slots_lock); + mutex_unlock(&kvm->slots_lock); return 0; } diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 4b49f27fa31..8a5959e3535 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h @@ -1,3 +1,6 @@ +#ifndef __KVM_COALESCED_MMIO_H__ +#define __KVM_COALESCED_MMIO_H__ + /* * KVM coalesced MMIO * @@ -7,6 +10,8 @@ * */ +#ifdef CONFIG_KVM_MMIO + #define KVM_COALESCED_MMIO_ZONE_MAX 100 struct kvm_coalesced_mmio_dev { @@ -18,7 +23,17 @@ struct kvm_coalesced_mmio_dev { }; int kvm_coalesced_mmio_init(struct kvm *kvm); +void kvm_coalesced_mmio_free(struct kvm *kvm); int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *zone); int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *zone); + +#else + +static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; } +static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { } + +#endif + +#endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a9d3fc6c681..c1f1e3c6298 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -2,6 +2,7 @@ * kvm eventfd support - use eventfd objects to signal various KVM events * * Copyright 2009 Novell. All Rights Reserved. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * Author: * Gregory Haskins <ghaskins@novell.com> @@ -30,6 +31,7 @@ #include <linux/list.h> #include <linux/eventfd.h> #include <linux/kernel.h> +#include <linux/slab.h> #include "iodev.h" @@ -47,7 +49,6 @@ struct _irqfd { int gsi; struct list_head list; poll_table pt; - wait_queue_head_t *wqh; wait_queue_t wait; struct work_struct inject; struct work_struct shutdown; @@ -159,8 +160,6 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); - - irqfd->wqh = wqh; add_wait_queue(wqh, &irqfd->wait); } @@ -219,7 +218,6 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) events = file->f_op->poll(file, &irqfd->pt); list_add_tail(&irqfd->list, &kvm->irqfds.items); - spin_unlock_irq(&kvm->irqfds.lock); /* * Check if there was an event already pending on the eventfd @@ -228,6 +226,8 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) if (events & POLLIN) schedule_work(&irqfd->inject); + spin_unlock_irq(&kvm->irqfds.lock); + /* * do not drop the file until the irqfd is fully initialized, otherwise * we might race against the POLLHUP @@ -463,7 +463,7 @@ static int kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; + enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; struct _ioeventfd *p; struct eventfd_ctx *eventfd; int ret; @@ -508,7 +508,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) else p->wildcard = true; - down_write(&kvm->slots_lock); + mutex_lock(&kvm->slots_lock); /* Verify that there isnt a match already */ if (ioeventfd_check_collision(kvm, p)) { @@ -518,18 +518,18 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) kvm_iodevice_init(&p->dev, &ioeventfd_ops); - ret = __kvm_io_bus_register_dev(bus, &p->dev); + ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); if (ret < 0) goto unlock_fail; list_add_tail(&p->list, &kvm->ioeventfds); - up_write(&kvm->slots_lock); + mutex_unlock(&kvm->slots_lock); return 0; unlock_fail: - up_write(&kvm->slots_lock); + mutex_unlock(&kvm->slots_lock); fail: kfree(p); @@ -542,7 +542,7 @@ static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; + enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; @@ -551,7 +551,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (IS_ERR(eventfd)) return PTR_ERR(eventfd); - down_write(&kvm->slots_lock); + mutex_lock(&kvm->slots_lock); list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); @@ -565,13 +565,13 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (!p->wildcard && p->datamatch != args->datamatch) continue; - __kvm_io_bus_unregister_dev(bus, &p->dev); + kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); ioeventfd_release(p); ret = 0; break; } - up_write(&kvm->slots_lock); + mutex_unlock(&kvm->slots_lock); eventfd_ctx_put(eventfd); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 38a2d20b89d..0b9df8303dc 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2001 MandrakeSoft S.A. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * MandrakeSoft S.A. * 43, rue d'Aboukir @@ -33,6 +34,7 @@ #include <linux/smp.h> #include <linux/hrtimer.h> #include <linux/io.h> +#include <linux/slab.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/current.h> @@ -100,6 +102,19 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) return injected; } +static void update_handled_vectors(struct kvm_ioapic *ioapic) +{ + DECLARE_BITMAP(handled_vectors, 256); + int i; + + memset(handled_vectors, 0, sizeof(handled_vectors)); + for (i = 0; i < IOAPIC_NUM_PINS; ++i) + __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); + memcpy(ioapic->handled_vectors, handled_vectors, + sizeof(handled_vectors)); + smp_wmb(); +} + static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; @@ -134,9 +149,10 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) e->bits |= (u32) val; e->fields.remote_irr = 0; } + update_handled_vectors(ioapic); mask_after = e->fields.mask; if (mask_before != mask_after) - kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); + kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) ioapic_service(ioapic, index); @@ -177,12 +193,13 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) { - u32 old_irr = ioapic->irr; + u32 old_irr; u32 mask = 1 << irq; union kvm_ioapic_redirect_entry entry; int ret = 1; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); + old_irr = ioapic->irr; if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; level ^= entry.fields.polarity; @@ -199,7 +216,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); } - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return ret; } @@ -223,9 +240,9 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, * is dropped it will be put into irr and will be delivered * after ack notifier returns. */ - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); if (trigger_mode != IOAPIC_LEVEL_TRIG) continue; @@ -241,9 +258,12 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - mutex_lock(&ioapic->lock); + smp_rmb(); + if (!test_bit(vector, ioapic->handled_vectors)) + return; + spin_lock(&ioapic->lock); __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); } static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) @@ -269,7 +289,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, ASSERT(!(addr & 0xf)); /* check alignment */ addr &= 0xff; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: result = ioapic->ioregsel; @@ -283,7 +303,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, result = 0; break; } - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); switch (len) { case 8: @@ -320,7 +340,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, } addr &= 0xff; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: ioapic->ioregsel = data; @@ -338,7 +358,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, default: break; } - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return 0; } @@ -352,6 +372,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->ioregsel = 0; ioapic->irr = 0; ioapic->id = 0; + update_handled_vectors(ioapic); } static const struct kvm_io_device_ops ioapic_mmio_ops = { @@ -367,27 +388,42 @@ int kvm_ioapic_init(struct kvm *kvm) ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); if (!ioapic) return -ENOMEM; - mutex_init(&ioapic->lock); + spin_lock_init(&ioapic->lock); kvm->arch.vioapic = ioapic; kvm_ioapic_reset(ioapic); kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); ioapic->kvm = kvm; - ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); - if (ret < 0) + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kvm->arch.vioapic = NULL; kfree(ioapic); + } return ret; } +void kvm_ioapic_destroy(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + if (ioapic) { + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); + kvm->arch.vioapic = NULL; + kfree(ioapic); + } +} + int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) { struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); if (!ioapic) return -EINVAL; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return 0; } @@ -397,8 +433,9 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) if (!ioapic) return -EINVAL; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); - mutex_unlock(&ioapic->lock); + update_handled_vectors(ioapic); + spin_unlock(&ioapic->lock); return 0; } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 419c43b667a..0b190c34ccc 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -45,7 +45,8 @@ struct kvm_ioapic { struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); - struct mutex lock; + spinlock_t lock; + DECLARE_BITMAP(handled_vectors, 256); }; #ifdef DEBUG @@ -71,6 +72,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); +void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 15147583abd..62a9caf0563 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -16,6 +16,8 @@ * * Copyright (C) 2006-2008 Intel Corporation * Copyright IBM Corporation, 2008 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * * Author: Allen M. Kay <allen.m.kay@intel.com> * Author: Weidong Han <weidong.han@intel.com> * Author: Ben-Ami Yassour <benami@il.ibm.com> @@ -32,12 +34,30 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); -int kvm_iommu_map_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages) +static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, unsigned long size) +{ + gfn_t end_gfn; + pfn_t pfn; + + pfn = gfn_to_pfn_memslot(kvm, slot, gfn); + end_gfn = gfn + (size >> PAGE_SHIFT); + gfn += 1; + + if (is_error_pfn(pfn)) + return pfn; + + while (gfn < end_gfn) + gfn_to_pfn_memslot(kvm, slot, gfn++); + + return pfn; +} + +int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { - gfn_t gfn = base_gfn; + gfn_t gfn, end_gfn; pfn_t pfn; - int i, r = 0; + int r = 0; struct iommu_domain *domain = kvm->arch.iommu_domain; int flags; @@ -45,44 +65,79 @@ int kvm_iommu_map_pages(struct kvm *kvm, if (!domain) return 0; + gfn = slot->base_gfn; + end_gfn = gfn + slot->npages; + flags = IOMMU_READ | IOMMU_WRITE; if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) flags |= IOMMU_CACHE; - for (i = 0; i < npages; i++) { - /* check if already mapped */ - if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) + + while (gfn < end_gfn) { + unsigned long page_size; + + /* Check if already mapped */ + if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) { + gfn += 1; continue; + } + + /* Get the page size we could use to map */ + page_size = kvm_host_page_size(kvm, gfn); + + /* Make sure the page_size does not exceed the memslot */ + while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) + page_size >>= 1; + + /* Make sure gfn is aligned to the page size we want to map */ + while ((gfn << PAGE_SHIFT) & (page_size - 1)) + page_size >>= 1; + + /* + * Pin all pages we are about to map in memory. This is + * important because we unmap and unpin in 4kb steps later. + */ + pfn = kvm_pin_pages(kvm, slot, gfn, page_size); + if (is_error_pfn(pfn)) { + gfn += 1; + continue; + } - pfn = gfn_to_pfn(kvm, gfn); - r = iommu_map_range(domain, - gfn_to_gpa(gfn), - pfn_to_hpa(pfn), - PAGE_SIZE, flags); + /* Map into IO address space */ + r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), + get_order(page_size), flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" - "iommu failed to map pfn=%lx\n", pfn); + "iommu failed to map pfn=%llx\n", pfn); goto unmap_pages; } - gfn++; + + gfn += page_size >> PAGE_SHIFT; + + } + return 0; unmap_pages: - kvm_iommu_put_pages(kvm, base_gfn, i); + kvm_iommu_put_pages(kvm, slot->base_gfn, gfn); return r; } static int kvm_iommu_map_memslots(struct kvm *kvm) { - int i, r = 0; + int i, idx, r = 0; + struct kvm_memslots *slots; + + idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); - for (i = 0; i < kvm->nmemslots; i++) { - r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, - kvm->memslots[i].npages); + for (i = 0; i < slots->nmemslots; i++) { + r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); if (r) break; } + srcu_read_unlock(&kvm->srcu, idx); return r; } @@ -104,7 +159,8 @@ int kvm_assign_device(struct kvm *kvm, r = iommu_attach_device(domain, &pdev->dev); if (r) { - printk(KERN_ERR "assign device %x:%x.%x failed", + printk(KERN_ERR "assign device %x:%x:%x.%x failed", + pci_domain_nr(pdev->bus), pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); @@ -125,7 +181,8 @@ int kvm_assign_device(struct kvm *kvm, goto out_unmap; } - printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", + printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", + assigned_dev->host_segnr, assigned_dev->host_busnr, PCI_SLOT(assigned_dev->host_devfn), PCI_FUNC(assigned_dev->host_devfn)); @@ -152,7 +209,8 @@ int kvm_deassign_device(struct kvm *kvm, iommu_detach_device(domain, &pdev->dev); - printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", + printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", + assigned_dev->host_segnr, assigned_dev->host_busnr, PCI_SLOT(assigned_dev->host_devfn), PCI_FUNC(assigned_dev->host_devfn)); @@ -184,37 +242,62 @@ out_unmap: return r; } +static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) + kvm_release_pfn_clean(pfn + i); +} + static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages) { - gfn_t gfn = base_gfn; + struct iommu_domain *domain; + gfn_t end_gfn, gfn; pfn_t pfn; - struct iommu_domain *domain = kvm->arch.iommu_domain; - unsigned long i; u64 phys; + domain = kvm->arch.iommu_domain; + end_gfn = base_gfn + npages; + gfn = base_gfn; + /* check if iommu exists and in use */ if (!domain) return; - for (i = 0; i < npages; i++) { + while (gfn < end_gfn) { + unsigned long unmap_pages; + int order; + + /* Get physical address */ phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); - pfn = phys >> PAGE_SHIFT; - kvm_release_pfn_clean(pfn); - gfn++; - } + pfn = phys >> PAGE_SHIFT; - iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); + /* Unmap address from IO address space */ + order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); + unmap_pages = 1ULL << order; + + /* Unpin all pages we just unmapped to not leak any memory */ + kvm_unpin_pages(kvm, pfn, unmap_pages); + + gfn += unmap_pages; + } } static int kvm_iommu_unmap_memslots(struct kvm *kvm) { - int i; + int i, idx; + struct kvm_memslots *slots; + + idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); - for (i = 0; i < kvm->nmemslots; i++) { - kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, - kvm->memslots[i].npages); + for (i = 0; i < slots->nmemslots; i++) { + kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, + slots->memslots[i].npages); } + srcu_read_unlock(&kvm->srcu, idx); return 0; } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 9fd5b3ebc51..369e38010ad 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -17,9 +17,11 @@ * Authors: * Yaozu (Eddie) Dong <Eddie.dong@intel.com> * + * Copyright 2010 Red Hat, Inc. and/or its affilates. */ #include <linux/kvm_host.h> +#include <linux/slab.h> #include <trace/events/kvm.h> #include <asm/msidef.h> @@ -98,7 +100,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (r < 0) r = 0; r += kvm_apic_set_irq(vcpu, irq); - } else { + } else if (kvm_lapic_enabled(vcpu)) { if (!lowest) lowest = vcpu; else if (kvm_apic_compare_prio(vcpu, lowest) < 0) @@ -277,15 +279,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, synchronize_rcu(); } -void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask) { struct kvm_irq_mask_notifier *kimn; struct hlist_node *n; + int gsi; rcu_read_lock(); - hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) - if (kimn->irq == irq) - kimn->func(kimn, mask); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) + if (kimn->irq == gsi) + kimn->func(kimn, mask); rcu_read_unlock(); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a944be392d6..5186e728c53 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5,6 +5,7 @@ * machines without emulation or binary translation. * * Copyright (C) 2006 Qumranet, Inc. + * Copyright 2010 Red Hat, Inc. and/or its affilates. * * Authors: * Avi Kivity <avi@qumranet.com> @@ -22,7 +23,6 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/percpu.h> -#include <linux/gfp.h> #include <linux/mm.h> #include <linux/miscdevice.h> #include <linux/vmalloc.h> @@ -44,6 +44,9 @@ #include <linux/bitops.h> #include <linux/spinlock.h> #include <linux/compat.h> +#include <linux/srcu.h> +#include <linux/hugetlb.h> +#include <linux/slab.h> #include <asm/processor.h> #include <asm/io.h> @@ -51,9 +54,7 @@ #include <asm/pgtable.h> #include <asm-generic/bitops/le.h> -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET #include "coalesced_mmio.h" -#endif #define CREATE_TRACE_POINTS #include <trace/events/kvm.h> @@ -86,10 +87,18 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, static int hardware_enable_all(void); static void hardware_disable_all(void); +static void kvm_io_bus_destroy(struct kvm_io_bus *bus); + static bool kvm_rebooting; static bool largepages_enabled = true; +static struct page *hwpoison_page; +static pfn_t hwpoison_pfn; + +static struct page *fault_page; +static pfn_t fault_pfn; + inline int kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) { @@ -136,10 +145,10 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) zalloc_cpumask_var(&cpus, GFP_ATOMIC); - spin_lock(&kvm->requests_lock); + raw_spin_lock(&kvm->requests_lock); me = smp_processor_id(); kvm_for_each_vcpu(i, vcpu, kvm) { - if (test_and_set_bit(req, &vcpu->requests)) + if (kvm_make_check_request(req, vcpu)) continue; cpu = vcpu->cpu; if (cpus != NULL && cpu != -1 && cpu != me) @@ -151,7 +160,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) smp_call_function_many(cpus, ack_flush, NULL, 1); else called = false; - spin_unlock(&kvm->requests_lock); + raw_spin_unlock(&kvm->requests_lock); free_cpumask_var(cpus); return called; } @@ -215,7 +224,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, unsigned long address) { struct kvm *kvm = mmu_notifier_to_kvm(mn); - int need_tlb_flush; + int need_tlb_flush, idx; /* * When ->invalidate_page runs, the linux pte has been zapped @@ -235,10 +244,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, * pte after kvm_unmap_hva returned, without noticing the page * is going to be freed. */ + idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); kvm->mmu_notifier_seq++; need_tlb_flush = kvm_unmap_hva(kvm, address); spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) @@ -252,11 +263,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, pte_t pte) { struct kvm *kvm = mmu_notifier_to_kvm(mn); + int idx; + idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); kvm->mmu_notifier_seq++; kvm_set_spte_hva(kvm, address, pte); spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); } static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, @@ -265,8 +279,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, unsigned long end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); - int need_tlb_flush = 0; + int need_tlb_flush = 0, idx; + idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); /* * The count increase must become visible at unlock time as no @@ -277,6 +292,7 @@ static void kvm_mmu_ |