From 1e1c65e03ec817a64153751150f6691db9842acd Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 21 Apr 2008 13:48:24 +0200 Subject: KVM: remove long -> void *user -> long cast kvm_dev_ioctl casts the arg value to void __user *, just to recast it again to long. This seems unnecessary. According to objdump the binary code on x86 is unchanged by this patch. Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d4eae6af073..b6a59498b5a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1179,7 +1179,6 @@ static int kvm_dev_ioctl_create_vm(void) static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - void __user *argp = (void __user *)arg; long r = -EINVAL; switch (ioctl) { @@ -1196,7 +1195,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension((long)argp); + r = kvm_dev_ioctl_check_extension(arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; -- cgit v1.2.3-18-g5258 From 2e2e3738af33575cba59597acd5e80cdd5ec11ee Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Wed, 30 Apr 2008 15:37:07 -0500 Subject: KVM: Handle vma regions with no backing page This patch allows VMAs that contain no backing page to be used for guest memory. This is useful for assigning mmio regions to a guest. Signed-off-by: Anthony Liguori Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 49 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 12 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b6a59498b5a..f9dd20606c4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -532,6 +532,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) struct page *page[1]; unsigned long addr; int npages; + pfn_t pfn; might_sleep(); @@ -544,19 +545,38 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, NULL); - if (npages != 1) { - get_page(bad_page); - return page_to_pfn(bad_page); - } + if (unlikely(npages != 1)) { + struct vm_area_struct *vma; - return page_to_pfn(page[0]); + vma = find_vma(current->mm, addr); + if (vma == NULL || addr < vma->vm_start || + !(vma->vm_flags & VM_PFNMAP)) { + get_page(bad_page); + return page_to_pfn(bad_page); + } + + pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + BUG_ON(pfn_valid(pfn)); + } else + pfn = page_to_pfn(page[0]); + + return pfn; } EXPORT_SYMBOL_GPL(gfn_to_pfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { - return pfn_to_page(gfn_to_pfn(kvm, gfn)); + pfn_t pfn; + + pfn = gfn_to_pfn(kvm, gfn); + if (pfn_valid(pfn)) + return pfn_to_page(pfn); + + WARN_ON(!pfn_valid(pfn)); + + get_page(bad_page); + return bad_page; } EXPORT_SYMBOL_GPL(gfn_to_page); @@ -569,7 +589,8 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - put_page(pfn_to_page(pfn)); + if (pfn_valid(pfn)) + put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -594,21 +615,25 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty); void kvm_set_pfn_dirty(pfn_t pfn) { - struct page *page = pfn_to_page(pfn); - if (!PageReserved(page)) - SetPageDirty(page); + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + if (!PageReserved(page)) + SetPageDirty(page); + } } EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(pfn_t pfn) { - mark_page_accessed(pfn_to_page(pfn)); + if (pfn_valid(pfn)) + mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); void kvm_get_pfn(pfn_t pfn) { - get_page(pfn_to_page(pfn)); + if (pfn_valid(pfn)) + get_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_get_pfn); -- cgit v1.2.3-18-g5258 From 4ecac3fd6dc2629ad76a658a486f081c44aef10e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 13 May 2008 13:23:38 +0300 Subject: KVM: Handle virtualization instruction #UD faults during reboot KVM turns off hardware virtualization extensions during reboot, in order to disassociate the memory used by the virtualization extensions from the processor, and in order to have the system in a consistent state. Unfortunately virtual machines may still be running while this goes on, and once virtualization extensions are turned off, any virtulization instruction will #UD on execution. Fix by adding an exception handler to virtualization instructions; if we get an exception during reboot, we simply spin waiting for the reset to complete. If it's a true exception, BUG() so we can have our stack trace. Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f9dd20606c4..e4bf88a9ee4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -65,6 +65,8 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +bool kvm_rebooting; + static inline int valid_vcpu(int n) { return likely(n >= 0 && n < KVM_MAX_VCPUS); @@ -1301,6 +1303,18 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, return NOTIFY_OK; } + +asmlinkage void kvm_handle_fault_on_reboot(void) +{ + if (kvm_rebooting) + /* spin while reset goes on */ + while (true) + ; + /* Fault while not rebooting. We want the trace. */ + BUG(); +} +EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); + static int kvm_reboot(struct notifier_block *notifier, unsigned long val, void *v) { @@ -1310,6 +1324,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, * in vmx root mode. */ printk(KERN_INFO "kvm: exiting hardware virtualization\n"); + kvm_rebooting = true; on_each_cpu(hardware_disable, NULL, 1); } return NOTIFY_OK; -- cgit v1.2.3-18-g5258 From 7cc8883074b040aa8c1ebd3a17463b0ea3a9ef16 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 13 May 2008 16:29:20 +0300 Subject: KVM: Remove decache_vcpus_on_cpu() and related callbacks Obsoleted by the vmx-specific per-cpu list. Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e4bf88a9ee4..83a0e5ce603 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1273,7 +1273,6 @@ static void hardware_disable(void *junk) if (!cpu_isset(cpu, cpus_hardware_enabled)) return; cpu_clear(cpu, cpus_hardware_enabled); - decache_vcpus_on_cpu(cpu); kvm_arch_hardware_disable(NULL); } -- cgit v1.2.3-18-g5258 From 92760499d01ef91518119908eb9b8798b6c9bd3f Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 30 May 2008 16:05:53 +0200 Subject: KVM: kvm_io_device: extend in_range() to manage len and write attribute Modify member in_range() of structure kvm_io_device to pass length and the type of the I/O (write or read). This modification allows to use kvm_io_device with coalesced MMIO. Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 83a0e5ce603..9330fad2b91 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1350,14 +1350,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) } } -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr) +struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, + gpa_t addr, int len, int is_write) { int i; for (i = 0; i < bus->dev_count; i++) { struct kvm_io_device *pos = bus->devs[i]; - if (pos->in_range(pos, addr)) + if (pos->in_range(pos, addr, len, is_write)) return pos; } -- cgit v1.2.3-18-g5258 From 5f94c1741bdc7a336553122036e8a779e616ccbf Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 30 May 2008 16:05:54 +0200 Subject: KVM: Add coalesced MMIO support (common part) This patch adds all needed structures to coalesce MMIOs. Until an architecture uses it, it is not compiled. Coalesced MMIO introduces two ioctl() to define where are the MMIO zones that can be coalesced: - KVM_REGISTER_COALESCED_MMIO registers a coalesced MMIO zone. It requests one parameter (struct kvm_coalesced_mmio_zone) which defines a memory area where MMIOs can be coalesced until the next switch to user space. The maximum number of MMIO zones is KVM_COALESCED_MMIO_ZONE_MAX. - KVM_UNREGISTER_COALESCED_MMIO cancels all registered zones inside the given bounds (bounds are also given by struct kvm_coalesced_mmio_zone). The userspace client can check kernel coalesced MMIO availability by asking ioctl(KVM_CHECK_EXTENSION) for the KVM_CAP_COALESCED_MMIO capability. The ioctl() call to KVM_CAP_COALESCED_MMIO will return 0 if not supported, or the page offset where will be stored the ring buffer. The page offset depends on the architecture. After an ioctl(KVM_RUN), the first page of the KVM memory mapped points to a kvm_run structure. The offset given by KVM_CAP_COALESCED_MMIO is an offset to the coalesced MMIO ring expressed in PAGE_SIZE relatively to the address of the start of th kvm_run structure. The MMIO ring buffer is defined by the structure kvm_coalesced_mmio_ring. [akio: fix oops during guest shutdown] Signed-off-by: Laurent Vivier Signed-off-by: Akio Takebe Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9330fad2b91..7d10dfa0d38 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -47,6 +47,10 @@ #include #include +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#include "coalesced_mmio.h" +#endif + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -185,10 +189,23 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kvm_arch_create_vm(); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + struct page *page; +#endif if (IS_ERR(kvm)) goto out; +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + kfree(kvm); + return ERR_PTR(-ENOMEM); + } + kvm->coalesced_mmio_ring = + (struct kvm_coalesced_mmio_ring *)page_address(page); +#endif + kvm->mm = current->mm; atomic_inc(&kvm->mm->mm_count); spin_lock_init(&kvm->mmu_lock); @@ -200,6 +217,9 @@ static struct kvm *kvm_create_vm(void) spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + kvm_coalesced_mmio_init(kvm); +#endif out: return kvm; } @@ -242,6 +262,10 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + if (kvm->coalesced_mmio_ring != NULL) + free_page((unsigned long)kvm->coalesced_mmio_ring); +#endif kvm_arch_destroy_vm(kvm); mmdrop(mm); } @@ -825,6 +849,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) #ifdef CONFIG_X86 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) page = virt_to_page(vcpu->arch.pio_data); +#endif +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) + page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif else return VM_FAULT_SIGBUS; @@ -1148,6 +1176,32 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + case KVM_REGISTER_COALESCED_MMIO: { + struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; + if (copy_from_user(&zone, argp, sizeof zone)) + goto out; + r = -ENXIO; + r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); + if (r) + goto out; + r = 0; + break; + } + case KVM_UNREGISTER_COALESCED_MMIO: { + struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; + if (copy_from_user(&zone, argp, sizeof zone)) + goto out; + r = -ENXIO; + r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); + if (r) + goto out; + r = 0; + break; + } +#endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -1231,6 +1285,9 @@ static long kvm_dev_ioctl(struct file *filp, r = PAGE_SIZE; /* struct kvm_run */ #ifdef CONFIG_X86 r += PAGE_SIZE; /* pio data page */ +#endif +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + r += PAGE_SIZE; /* coalesced mmio ring page */ #endif break; case KVM_TRACE_ENABLE: -- cgit v1.2.3-18-g5258 From eff0114ac3d3a20a5c93b31b00134e59bfc75189 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Fri, 27 Jun 2008 15:05:31 +0200 Subject: KVM: s390: dont allocate dirty bitmap This patch #ifdefs the bitmap array for dirty tracking. We don't have dirty tracking on s390 today, and we'd love to use our storage keys to store the dirty information for migration. Therefore, we won't need this array at all, and due to our limited amount of vmalloc space this limits the amount of guests we can run. Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7d10dfa0d38..9ccaf8f5402 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -359,6 +359,7 @@ int __kvm_set_memory_region(struct kvm *kvm, r = -ENOMEM; /* Allocate if a slot is being created */ +#ifndef CONFIG_S390 if (npages && !new.rmap) { new.rmap = vmalloc(npages * sizeof(struct page *)); @@ -399,6 +400,7 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; memset(new.dirty_bitmap, 0, dirty_bytes); } +#endif /* not defined CONFIG_S390 */ if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; -- cgit v1.2.3-18-g5258 From 34d4cb8fca1f2a31be152b74797e6cd160ec9de6 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 10 Jul 2008 20:49:31 -0300 Subject: KVM: MMU: nuke shadowed pgtable pages and ptes on memslot destruction Flush the shadow mmu before removing regions to avoid stale entries. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9ccaf8f5402..30b36368fcd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -405,6 +405,9 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; + if (!npages) + kvm_arch_flush_shadow(kvm); + *memslot = new; r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); -- cgit v1.2.3-18-g5258 From 597a5f551ec4cd0aa0966e4fff4684ecc8c31c0d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 20 Jul 2008 14:24:22 +0300 Subject: KVM: Adjust smp_call_function_mask() callers to new requirements smp_call_function_mask() now complains when called in a preemptible context; adjust its callers accordingly. Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 30b36368fcd..904d7b7bd78 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -105,10 +105,11 @@ static void ack_flush(void *_completed) void kvm_flush_remote_tlbs(struct kvm *kvm) { - int i, cpu; + int i, cpu, me; cpumask_t cpus; struct kvm_vcpu *vcpu; + me = get_cpu(); cpus_clear(cpus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; @@ -117,21 +118,24 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; - if (cpu != -1 && cpu != raw_smp_processor_id()) + if (cpu != -1 && cpu != me) cpu_set(cpu, cpus); } if (cpus_empty(cpus)) - return; + goto out; ++kvm->stat.remote_tlb_flush; smp_call_function_mask(cpus, ack_flush, NULL, 1); +out: + put_cpu(); } void kvm_reload_remote_mmus(struct kvm *kvm) { - int i, cpu; + int i, cpu, me; cpumask_t cpus; struct kvm_vcpu *vcpu; + me = get_cpu(); cpus_clear(cpus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; @@ -140,12 +144,14 @@ void kvm_reload_remote_mmus(struct kvm *kvm) if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) continue; cpu = vcpu->cpu; - if (cpu != -1 && cpu != raw_smp_processor_id()) + if (cpu != -1 && cpu != me) cpu_set(cpu, cpus); } if (cpus_empty(cpus)) - return; + goto out; smp_call_function_mask(cpus, ack_flush, NULL, 1); +out: + put_cpu(); } -- cgit v1.2.3-18-g5258