aboutsummaryrefslogtreecommitdiff
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c444
1 files changed, 286 insertions, 158 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a944be392d6..f032806a212 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -22,7 +22,6 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/percpu.h>
-#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/miscdevice.h>
#include <linux/vmalloc.h>
@@ -44,6 +43,9 @@
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/compat.h>
+#include <linux/srcu.h>
+#include <linux/hugetlb.h>
+#include <linux/slab.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -51,9 +53,7 @@
#include <asm/pgtable.h>
#include <asm-generic/bitops/le.h>
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
#include "coalesced_mmio.h"
-#endif
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
@@ -86,6 +86,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
static int hardware_enable_all(void);
static void hardware_disable_all(void);
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+
static bool kvm_rebooting;
static bool largepages_enabled = true;
@@ -136,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
- spin_lock(&kvm->requests_lock);
+ raw_spin_lock(&kvm->requests_lock);
me = smp_processor_id();
kvm_for_each_vcpu(i, vcpu, kvm) {
if (test_and_set_bit(req, &vcpu->requests))
@@ -151,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
smp_call_function_many(cpus, ack_flush, NULL, 1);
else
called = false;
- spin_unlock(&kvm->requests_lock);
+ raw_spin_unlock(&kvm->requests_lock);
free_cpumask_var(cpus);
return called;
}
@@ -215,7 +217,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
unsigned long address)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
- int need_tlb_flush;
+ int need_tlb_flush, idx;
/*
* When ->invalidate_page runs, the linux pte has been zapped
@@ -235,10 +237,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
* pte after kvm_unmap_hva returned, without noticing the page
* is going to be freed.
*/
+ idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
need_tlb_flush = kvm_unmap_hva(kvm, address);
spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
@@ -252,11 +256,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
pte_t pte)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ int idx;
+ idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
kvm_set_spte_hva(kvm, address, pte);
spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
}
static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -265,8 +272,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
unsigned long end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
- int need_tlb_flush = 0;
+ int need_tlb_flush = 0, idx;
+ idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
/*
* The count increase must become visible at unlock time as no
@@ -277,6 +285,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
for (; start < end; start += PAGE_SIZE)
need_tlb_flush |= kvm_unmap_hva(kvm, start);
spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
@@ -314,11 +323,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
unsigned long address)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
- int young;
+ int young, idx;
+ idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
young = kvm_age_hva(kvm, address);
spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
if (young)
kvm_flush_remote_tlbs(kvm);
@@ -330,7 +341,11 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
kvm_arch_flush_shadow(kvm);
+ srcu_read_unlock(&kvm->srcu, idx);
}
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
@@ -341,15 +356,26 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.change_pte = kvm_mmu_notifier_change_pte,
.release = kvm_mmu_notifier_release,
};
+
+static int kvm_init_mmu_notifier(struct kvm *kvm)
+{
+ kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
+ return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+}
+
+#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
+
+static int kvm_init_mmu_notifier(struct kvm *kvm)
+{
+ return 0;
+}
+
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
static struct kvm *kvm_create_vm(void)
{
- int r = 0;
+ int r = 0, i;
struct kvm *kvm = kvm_arch_create_vm();
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- struct page *page;
-#endif
if (IS_ERR(kvm))
goto out;
@@ -363,55 +389,48 @@ static struct kvm *kvm_create_vm(void)
INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
#endif
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page) {
- r = -ENOMEM;
+ r = -ENOMEM;
+ kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ if (!kvm->memslots)
goto out_err;
- }
- kvm->coalesced_mmio_ring =
- (struct kvm_coalesced_mmio_ring *)page_address(page);
-#endif
-
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
- {
- kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
- r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
- if (r) {
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- put_page(page);
-#endif
+ if (init_srcu_struct(&kvm->srcu))
+ goto out_err;
+ for (i = 0; i < KVM_NR_BUSES; i++) {
+ kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
+ GFP_KERNEL);
+ if (!kvm->buses[i]) {
+ cleanup_srcu_struct(&kvm->srcu);
goto out_err;
}
}
-#endif
+
+ r = kvm_init_mmu_notifier(kvm);
+ if (r) {
+ cleanup_srcu_struct(&kvm->srcu);
+ goto out_err;
+ }
kvm->mm = current->mm;
atomic_inc(&kvm->mm->mm_count);
spin_lock_init(&kvm->mmu_lock);
- spin_lock_init(&kvm->requests_lock);
- kvm_io_bus_init(&kvm->pio_bus);
+ raw_spin_lock_init(&kvm->requests_lock);
kvm_eventfd_init(kvm);
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
- kvm_io_bus_init(&kvm->mmio_bus);
- init_rwsem(&kvm->slots_lock);
+ mutex_init(&kvm->slots_lock);
atomic_set(&kvm->users_count, 1);
spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- kvm_coalesced_mmio_init(kvm);
-#endif
out:
return kvm;
-#if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \
- (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
out_err:
hardware_disable_all();
-#endif
out_err_nodisable:
+ for (i = 0; i < KVM_NR_BUSES; i++)
+ kfree(kvm->buses[i]);
+ kfree(kvm->memslots);
kfree(kvm);
return ERR_PTR(r);
}
@@ -446,13 +465,17 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
void kvm_free_physmem(struct kvm *kvm)
{
int i;
+ struct kvm_memslots *slots = kvm->memslots;
- for (i = 0; i < kvm->nmemslots; ++i)
- kvm_free_physmem_slot(&kvm->memslots[i], NULL);
+ for (i = 0; i < slots->nmemslots; ++i)
+ kvm_free_physmem_slot(&slots->memslots[i], NULL);
+
+ kfree(kvm->memslots);
}
static void kvm_destroy_vm(struct kvm *kvm)
{
+ int i;
struct mm_struct *mm = kvm->mm;
kvm_arch_sync_events(kvm);
@@ -460,12 +483,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
- kvm_io_bus_destroy(&kvm->pio_bus);
- kvm_io_bus_destroy(&kvm->mmio_bus);
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- if (kvm->coalesced_mmio_ring != NULL)
- free_page((unsigned long)kvm->coalesced_mmio_ring);
-#endif
+ for (i = 0; i < KVM_NR_BUSES; i++)
+ kvm_io_bus_destroy(kvm->buses[i]);
+ kvm_coalesced_mmio_free(kvm);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
#else
@@ -512,12 +532,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
- int r;
+ int r, flush_shadow = 0;
gfn_t base_gfn;
unsigned long npages;
unsigned long i;
struct kvm_memory_slot *memslot;
struct kvm_memory_slot old, new;
+ struct kvm_memslots *slots, *old_memslots;
r = -EINVAL;
/* General sanity checks */
@@ -532,10 +553,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
- memslot = &kvm->memslots[mem->slot];
+ memslot = &kvm->memslots->memslots[mem->slot];
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
+ r = -EINVAL;
+ if (npages > KVM_MEM_MAX_NR_PAGES)
+ goto out;
+
if (!npages)
mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
@@ -553,7 +578,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
/* Check for overlaps */
r = -EEXIST;
for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *s = &kvm->memslots[i];
+ struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
if (s == memslot || !s->npages)
continue;
@@ -579,15 +604,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
memset(new.rmap, 0, npages * sizeof(*new.rmap));
new.user_alloc = user_alloc;
- /*
- * hva_to_rmmap() serialzies with the mmu_lock and to be
- * safe it has to ignore memslots with !user_alloc &&
- * !userspace_addr.
- */
- if (user_alloc)
- new.userspace_addr = mem->userspace_addr;
- else
- new.userspace_addr = 0;
+ new.userspace_addr = mem->userspace_addr;
}
if (!npages)
goto skip_lpage;
@@ -636,14 +653,15 @@ skip_lpage:
/* Allocate page dirty bitmap if needed */
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
- unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
+ unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
new.dirty_bitmap = vmalloc(dirty_bytes);
if (!new.dirty_bitmap)
goto out_free;
memset(new.dirty_bitmap, 0, dirty_bytes);
+ /* destroy any largepage mappings for dirty tracking */
if (old.npages)
- kvm_arch_flush_shadow(kvm);
+ flush_shadow = 1;
}
#else /* not defined CONFIG_S390 */
new.user_alloc = user_alloc;
@@ -651,36 +669,72 @@ skip_lpage:
new.userspace_addr = mem->userspace_addr;
#endif /* not defined CONFIG_S390 */
- if (!npages)
+ if (!npages) {
+ r = -ENOMEM;
+ slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ if (!slots)
+ goto out_free;
+ memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+ if (mem->slot >= slots->nmemslots)
+ slots->nmemslots = mem->slot + 1;
+ slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+
+ old_memslots = kvm->memslots;
+ rcu_assign_pointer(kvm->memslots, slots);
+ synchronize_srcu_expedited(&kvm->srcu);
+ /* From this point no new shadow pages pointing to a deleted
+ * memslot will be created.
+ *
+ * validation of sp->gfn happens in:
+ * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+ * - kvm_is_visible_gfn (mmu_check_roots)
+ */
kvm_arch_flush_shadow(kvm);
+ kfree(old_memslots);
+ }
- spin_lock(&kvm->mmu_lock);
- if (mem->slot >= kvm->nmemslots)
- kvm->nmemslots = mem->slot + 1;
-
- *memslot = new;
- spin_unlock(&kvm->mmu_lock);
-
- r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
- if (r) {
- spin_lock(&kvm->mmu_lock);
- *memslot = old;
- spin_unlock(&kvm->mmu_lock);
+ r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
+ if (r)
goto out_free;
- }
- kvm_free_physmem_slot(&old, npages ? &new : NULL);
- /* Slot deletion case: we have to update the current slot */
- spin_lock(&kvm->mmu_lock);
- if (!npages)
- *memslot = old;
- spin_unlock(&kvm->mmu_lock);
#ifdef CONFIG_DMAR
/* map the pages in iommu page table */
- r = kvm_iommu_map_pages(kvm, base_gfn, npages);
- if (r)
- goto out;
+ if (npages) {
+ r = kvm_iommu_map_pages(kvm, &new);
+ if (r)
+ goto out_free;
+ }
#endif
+
+ r = -ENOMEM;
+ slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ if (!slots)
+ goto out_free;
+ memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+ if (mem->slot >= slots->nmemslots)
+ slots->nmemslots = mem->slot + 1;
+
+ /* actual memory is freed via old in kvm_free_physmem_slot below */
+ if (!npages) {
+ new.rmap = NULL;
+ new.dirty_bitmap = NULL;
+ for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
+ new.lpage_info[i] = NULL;
+ }
+
+ slots->memslots[mem->slot] = new;
+ old_memslots = kvm->memslots;
+ rcu_assign_pointer(kvm->memslots, slots);
+ synchronize_srcu_expedited(&kvm->srcu);
+
+ kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
+
+ kvm_free_physmem_slot(&old, &new);
+ kfree(old_memslots);
+
+ if (flush_shadow)
+ kvm_arch_flush_shadow(kvm);
+
return 0;
out_free:
@@ -697,9 +751,9 @@ int kvm_set_memory_region(struct kvm *kvm,
{
int r;
- down_write(&kvm->slots_lock);
+ mutex_lock(&kvm->slots_lock);
r = __kvm_set_memory_region(kvm, mem, user_alloc);
- up_write(&kvm->slots_lock);
+ mutex_unlock(&kvm->slots_lock);
return r;
}
EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -719,19 +773,19 @@ int kvm_get_dirty_log(struct kvm *kvm,
{
struct kvm_memory_slot *memslot;
int r, i;
- int n;
+ unsigned long n;
unsigned long any = 0;
r = -EINVAL;
if (log->slot >= KVM_MEMORY_SLOTS)
goto out;
- memslot = &kvm->memslots[log->slot];
+ memslot = &kvm->memslots->memslots[log->slot];
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
- n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+ n = kvm_dirty_bitmap_bytes(memslot);
for (i = 0; !any && i < n/sizeof(long); ++i)
any = memslot->dirty_bitmap[i];
@@ -780,9 +834,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
{
int i;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
- for (i = 0; i < kvm->nmemslots; ++i) {
- struct kvm_memory_slot *memslot = &kvm->memslots[i];
+ for (i = 0; i < slots->nmemslots; ++i) {
+ struct kvm_memory_slot *memslot = &slots->memslots[i];
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages)
@@ -801,10 +856,14 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
int i;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
- gfn = unalias_gfn(kvm, gfn);
+ gfn = unalias_gfn_instantiation(kvm, gfn);
for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *memslot = &kvm->memslots[i];
+ struct kvm_memory_slot *memslot = &slots->memslots[i];
+
+ if (memslot->flags & KVM_MEMSLOT_INVALID)
+ continue;
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages)
@@ -814,33 +873,73 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+{
+ struct vm_area_struct *vma;
+ unsigned long addr, size;
+
+ size = PAGE_SIZE;
+
+ addr = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(addr))
+ return PAGE_SIZE;
+
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, addr);
+ if (!vma)
+ goto out;
+
+ size = vma_kernel_pagesize(vma);
+
+out:
+ up_read(&current->mm->mmap_sem);
+
+ return size;
+}
+
+int memslot_id(struct kvm *kvm, gfn_t gfn)
+{
+ int i;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *memslot = NULL;
+
+ gfn = unalias_gfn(kvm, gfn);
+ for (i = 0; i < slots->nmemslots; ++i) {
+ memslot = &slots->memslots[i];
+
+ if (gfn >= memslot->base_gfn
+ && gfn < memslot->base_gfn + memslot->npages)
+ break;
+ }
+
+ return memslot - slots->memslots;
+}
+
+static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
+}
+
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot;
- gfn = unalias_gfn(kvm, gfn);
+ gfn = unalias_gfn_instantiation(kvm, gfn);
slot = gfn_to_memslot_unaliased(kvm, gfn);
- if (!slot)
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
return bad_hva();
- return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
+ return gfn_to_hva_memslot(slot, gfn);
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
{
struct page *page[1];
- unsigned long addr;
int npages;
pfn_t pfn;
might_sleep();
- addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr)) {
- get_page(bad_page);
- return page_to_pfn(bad_page);
- }
-
npages = get_user_pages_fast(addr, 1, 1, page);
if (unlikely(npages != 1)) {
@@ -865,8 +964,27 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
return pfn;
}
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+{
+ unsigned long addr;
+
+ addr = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(addr)) {
+ get_page(bad_page);
+ return page_to_pfn(bad_page);
+ }
+
+ return hva_to_pfn(kvm, addr);
+}
EXPORT_SYMBOL_GPL(gfn_to_pfn);
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ unsigned long addr = gfn_to_hva_memslot(slot, gfn);
+ return hva_to_pfn(kvm, addr);
+}
+
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
pfn_t pfn;
@@ -1074,9 +1192,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
- /* avoid RMW */
- if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap))
- generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
+ generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
}
}
@@ -1489,7 +1605,6 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&zone, argp, sizeof zone))
goto out;
- r = -ENXIO;
r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
if (r)
goto out;
@@ -1501,7 +1616,6 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&zone, argp, sizeof zone))
goto out;
- r = -ENXIO;
r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
if (r)
goto out;
@@ -1635,12 +1749,19 @@ static struct file_operations kvm_vm_fops = {
static int kvm_dev_ioctl_create_vm(void)
{
- int fd;
+ int fd, r;
struct kvm *kvm;
kvm = kvm_create_vm();
if (IS_ERR(kvm))
return PTR_ERR(kvm);
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+ r = kvm_coalesced_mmio_init(kvm);
+ if (r < 0) {
+ kvm_put_kvm(kvm);
+ return r;
+ }
+#endif
fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
if (fd < 0)
kvm_put_kvm(kvm);
@@ -1808,11 +1929,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
cpu);
hardware_disable(NULL);
break;
- case CPU_UP_CANCELED:
- printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
- cpu);
- smp_call_function_single(cpu, hardware_disable, NULL, 1);
- break;
case CPU_ONLINE:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
@@ -1854,12 +1970,7 @@ static struct notifier_block kvm_reboot_notifier = {
.priority = 0,
};
-void kvm_io_bus_init(struct kvm_io_bus *bus)
-{
- memset(bus, 0, sizeof(*bus));
-}
-
-void kvm_io_bus_destroy(struct kvm_io_bus *bus)
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
{
int i;
@@ -1868,13 +1979,17 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
kvm_iodevice_destructor(pos);
}
+ kfree(bus);
}
/* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
int i;
+ struct kvm_io_bus *bus;
+
+ bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
for (i = 0; i < bus->dev_count; i++)
if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
return 0;
@@ -1882,59 +1997,72 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
}
/* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ int len, void *val)
{
int i;
+ struct kvm_io_bus *bus;
+
+ bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
for (i = 0; i < bus->dev_count; i++)
if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
return 0;
return -EOPNOTSUPP;
}
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
- struct kvm_io_device *dev)
+/* Caller must hold slots_lock. */
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- int ret;
-
- down_write(&kvm->slots_lock);
- ret = __kvm_io_bus_register_dev(bus, dev);
- up_write(&kvm->slots_lock);
-
- return ret;
-}
+ struct kvm_io_bus *new_bus, *bus;
-/* An unlocked version. Caller must have write lock on slots_lock. */
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
- struct kvm_io_device *dev)
-{
+ bus = kvm->buses[bus_idx];
if (bus->dev_count > NR_IOBUS_DEVS-1)
return -ENOSPC;
- bus->devs[bus->dev_count++] = dev;
+ new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+ if (!new_bus)
+ return -ENOMEM;
+ memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+ new_bus->devs[new_bus->dev_count++] = dev;
+ rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+ synchronize_srcu_expedited(&kvm->srcu);
+ kfree(bus);
return 0;
}
-void kvm_io_bus_unregister_dev(struct kvm *kvm,
- struct kvm_io_bus *bus,
- struct kvm_io_device *dev)
+/* Caller must hold slots_lock. */
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- down_write(&kvm->slots_lock);
- __kvm_io_bus_unregister_dev(bus, dev);
- up_write(&kvm->slots_lock);
-}
+ int i, r;
+ struct kvm_io_bus *new_bus, *bus;
-/* An unlocked version. Caller must have write lock on slots_lock. */
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
- struct kvm_io_device *dev)
-{
- int i;
+ new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+ if (!new_bus)
+ return -ENOMEM;
- for (i = 0; i < bus->dev_count; i++)
- if (bus->devs[i] == dev) {
- bus->devs[i] = bus->devs[--bus->dev_count];
+ bus = kvm->buses[bus_idx];
+ memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+
+ r = -ENOENT;
+ for (i = 0; i < new_bus->dev_count; i++)
+ if (new_bus->devs[i] == dev) {
+ r = 0;
+ new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
break;
}
+
+ if (r) {
+ kfree(new_bus);
+ return r;
+ }
+
+ rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+ synchronize_srcu_expedited(&kvm->srcu);
+ kfree(bus);
+ return r;
}
static struct notifier_block kvm_cpu_notifier = {
@@ -2050,7 +2178,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
kvm_arch_vcpu_put(vcpu);
}
-int kvm_init(void *opaque, unsigned int vcpu_size,
+int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
struct module *module)
{
int r;
@@ -2100,8 +2228,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
goto out_free_4;
/* A kmem cache lets us meet the alignment requirements of fx_save. */
- kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
- __alignof__(struct kvm_vcpu),
+ if (!vcpu_align)
+ vcpu_align = __alignof__(struct kvm_vcpu);
+ kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
0, NULL);
if (!kvm_vcpu_cache) {
r = -ENOMEM;
@@ -2150,7 +2279,6 @@ EXPORT_SYMBOL_GPL(kvm_init);
void kvm_exit(void)
{
- tracepoint_synchronize_unregister();
kvm_exit_debug();
misc_deregister(&kvm_dev);
kmem_cache_destroy(kvm_vcpu_cache);