aboutsummaryrefslogtreecommitdiff
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c264
1 files changed, 114 insertions, 150 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 979bff485fb..4b6c01b477f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,7 +70,8 @@ MODULE_LICENSE("GPL");
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
+static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
static cpumask_var_t cpus_hardware_enabled;
@@ -94,8 +95,14 @@ static int hardware_enable_all(void);
static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+static void update_memslots(struct kvm_memslots *slots,
+ struct kvm_memory_slot *new, u64 last_generation);
-bool kvm_rebooting;
+static void kvm_release_pfn_dirty(pfn_t pfn);
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot, gfn_t gfn);
+
+__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
static bool largepages_enabled = true;
@@ -186,6 +193,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
+EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
void kvm_reload_remote_mmus(struct kvm *kvm)
{
@@ -449,11 +457,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = kvm_arch_init_vm(kvm, type);
if (r)
- goto out_err_nodisable;
+ goto out_err_no_disable;
r = hardware_enable_all();
if (r)
- goto out_err_nodisable;
+ goto out_err_no_disable;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -465,10 +473,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = -ENOMEM;
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!kvm->memslots)
- goto out_err_nosrcu;
+ goto out_err_no_srcu;
kvm_init_memslots_id(kvm);
if (init_srcu_struct(&kvm->srcu))
- goto out_err_nosrcu;
+ goto out_err_no_srcu;
+ if (init_srcu_struct(&kvm->irq_srcu))
+ goto out_err_no_irq_srcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
GFP_KERNEL);
@@ -490,17 +500,19 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (r)
goto out_err;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return kvm;
out_err:
+ cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
+out_err_no_srcu:
hardware_disable_all();
-out_err_nodisable:
+out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
kfree(kvm->memslots);
@@ -540,24 +552,24 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
/*
* Free any memory in @free but not in @dont.
*/
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
kvm_destroy_dirty_bitmap(free);
- kvm_arch_free_memslot(free, dont);
+ kvm_arch_free_memslot(kvm, free, dont);
free->npages = 0;
}
-void kvm_free_physmem(struct kvm *kvm)
+static void kvm_free_physmem(struct kvm *kvm)
{
struct kvm_memslots *slots = kvm->memslots;
struct kvm_memory_slot *memslot;
kvm_for_each_memslot(memslot, slots)
- kvm_free_physmem_slot(memslot, NULL);
+ kvm_free_physmem_slot(kvm, memslot, NULL);
kfree(kvm->memslots);
}
@@ -581,9 +593,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
struct mm_struct *mm = kvm->mm;
kvm_arch_sync_events(kvm);
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++)
kvm_io_bus_destroy(kvm->buses[i]);
@@ -596,6 +608,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
kvm_free_physmem(kvm);
+ cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
hardware_disable_all();
@@ -632,14 +645,12 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
*/
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
{
-#ifndef CONFIG_S390
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
if (!memslot->dirty_bitmap)
return -ENOMEM;
-#endif /* !CONFIG_S390 */
return 0;
}
@@ -673,8 +684,9 @@ static void sort_memslots(struct kvm_memslots *slots)
slots->id_to_index[slots->memslots[i].id] = i;
}
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
- u64 last_generation)
+static void update_memslots(struct kvm_memslots *slots,
+ struct kvm_memory_slot *new,
+ u64 last_generation)
{
if (new) {
int id = new->id;
@@ -821,7 +833,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (change == KVM_MR_CREATE) {
new.userspace_addr = mem->userspace_addr;
- if (kvm_arch_create_memslot(&new, npages))
+ if (kvm_arch_create_memslot(kvm, &new, npages))
goto out_free;
}
@@ -872,6 +884,19 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
+ /* actual memory is freed via old in kvm_free_physmem_slot below */
+ if (change == KVM_MR_DELETE) {
+ new.dirty_bitmap = NULL;
+ memset(&new.arch, 0, sizeof(new.arch));
+ }
+
+ old_memslots = install_new_memslots(kvm, slots, &new);
+
+ kvm_arch_commit_memory_region(kvm, mem, &old, change);
+
+ kvm_free_physmem_slot(kvm, &old, &new);
+ kfree(old_memslots);
+
/*
* IOMMU mapping: New slots need to be mapped. Old slots need to be
* un-mapped and re-mapped if their base changes. Since base change
@@ -883,29 +908,15 @@ int __kvm_set_memory_region(struct kvm *kvm,
*/
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
r = kvm_iommu_map_pages(kvm, &new);
- if (r)
- goto out_slots;
- }
-
- /* actual memory is freed via old in kvm_free_physmem_slot below */
- if (change == KVM_MR_DELETE) {
- new.dirty_bitmap = NULL;
- memset(&new.arch, 0, sizeof(new.arch));
+ return r;
}
- old_memslots = install_new_memslots(kvm, slots, &new);
-
- kvm_arch_commit_memory_region(kvm, mem, &old, change);
-
- kvm_free_physmem_slot(&old, &new);
- kfree(old_memslots);
-
return 0;
out_slots:
kfree(slots);
out_free:
- kvm_free_physmem_slot(&new, &old);
+ kvm_free_physmem_slot(kvm, &new, &old);
out:
return r;
}
@@ -923,8 +934,8 @@ int kvm_set_memory_region(struct kvm *kvm,
}
EXPORT_SYMBOL_GPL(kvm_set_memory_region);
-int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
{
if (mem->slot >= KVM_USER_MEM_SLOTS)
return -EINVAL;
@@ -964,6 +975,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
out:
return r;
}
+EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
bool kvm_largepages_enabled(void)
{
@@ -1045,7 +1057,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
}
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
- gfn_t gfn)
+ gfn_t gfn)
{
return gfn_to_hva_many(slot, gfn, NULL);
}
@@ -1064,10 +1076,12 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
- if (writable)
+ unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+
+ if (!kvm_is_error_hva(hva) && writable)
*writable = !memslot_is_readonly(slot);
- return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+ return hva;
}
static int kvm_read_hva(void *data, void __user *hva, int len)
@@ -1383,18 +1397,11 @@ void kvm_release_page_dirty(struct page *page)
}
EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
-void kvm_release_pfn_dirty(pfn_t pfn)
+static void kvm_release_pfn_dirty(pfn_t pfn)
{
kvm_set_pfn_dirty(pfn);
kvm_release_pfn_clean(pfn);
}
-EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
-
-void kvm_set_page_dirty(struct page *page)
-{
- kvm_set_pfn_dirty(page_to_pfn(page));
-}
-EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
void kvm_set_pfn_dirty(pfn_t pfn)
{
@@ -1611,8 +1618,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
{
- return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
- offset, len);
+ const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+
+ return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
@@ -1635,8 +1643,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_clear_guest);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
- gfn_t gfn)
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ gfn_t gfn)
{
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
@@ -1652,6 +1661,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
memslot = gfn_to_memslot(kvm, gfn);
mark_page_dirty_in_slot(kvm, memslot, gfn);
}
+EXPORT_SYMBOL_GPL(mark_page_dirty);
/*
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
@@ -1677,6 +1687,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
finish_wait(&vcpu->wq, &wait);
}
+EXPORT_SYMBOL_GPL(kvm_vcpu_block);
#ifndef CONFIG_S390
/*
@@ -1703,19 +1714,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
#endif /* !CONFIG_S390 */
-void kvm_resched(struct kvm_vcpu *vcpu)
-{
- if (!need_resched())
- return;
- cond_resched();
-}
-EXPORT_SYMBOL_GPL(kvm_resched);
-
-bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
+int kvm_vcpu_yield_to(struct kvm_vcpu *target)
{
struct pid *pid;
struct task_struct *task = NULL;
- bool ret = false;
+ int ret = 0;
rcu_read_lock();
pid = rcu_dereference(target->pid);
@@ -1735,7 +1738,6 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
-#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
/*
* Helper that checks whether a VCPU is eligible for directed yield.
* Most eligible candidate to yield is decided by following heuristics:
@@ -1758,8 +1760,9 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
* locking does not harm. It may result in trying to yield to same VCPU, fail
* and continue with next VCPU and so on.
*/
-bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
{
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
bool eligible;
eligible = !vcpu->spin_loop.in_spin_loop ||
@@ -1770,8 +1773,10 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
return eligible;
-}
+#else
+ return true;
#endif
+}
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
{
@@ -1802,7 +1807,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
continue;
if (vcpu == me)
continue;
- if (waitqueue_active(&vcpu->wq))
+ if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
@@ -1891,6 +1896,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
int r;
struct kvm_vcpu *vcpu, *v;
+ if (id >= KVM_MAX_VCPUS)
+ return -EINVAL;
+
vcpu = kvm_arch_vcpu_create(kvm, id);
if (IS_ERR(vcpu))
return PTR_ERR(vcpu);
@@ -2269,6 +2277,21 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
ops = &kvm_xics_ops;
break;
#endif
+#ifdef CONFIG_KVM_VFIO
+ case KVM_DEV_TYPE_VFIO:
+ ops = &kvm_vfio_ops;
+ break;
+#endif
+#ifdef CONFIG_KVM_ARM_VGIC
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ ops = &kvm_arm_vgic_v2_ops;
+ break;
+#endif
+#ifdef CONFIG_S390
+ case KVM_DEV_TYPE_FLIC:
+ ops = &kvm_flic_ops;
+ break;
+#endif
default:
return -ENODEV;
}
@@ -2517,44 +2540,12 @@ out:
}
#endif
-static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *page[1];
- unsigned long addr;
- int npages;
- gfn_t gfn = vmf->pgoff;
- struct kvm *kvm = vma->vm_file->private_data;
-
- addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr))
- return VM_FAULT_SIGBUS;
-
- npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
- NULL);
- if (unlikely(npages != 1))
- return VM_FAULT_SIGBUS;
-
- vmf->page = page[0];
- return 0;
-}
-
-static const struct vm_operations_struct kvm_vm_vm_ops = {
- .fault = kvm_vm_fault,
-};
-
-static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
-{
- vma->vm_ops = &kvm_vm_vm_ops;
- return 0;
-}
-
static struct file_operations kvm_vm_fops = {
.release = kvm_vm_release,
.unlocked_ioctl = kvm_vm_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = kvm_vm_compat_ioctl,
#endif
- .mmap = kvm_vm_mmap,
.llseek = noop_llseek,
};
@@ -2681,11 +2672,12 @@ static void hardware_enable_nolock(void *junk)
}
}
-static void hardware_enable(void *junk)
+static void hardware_enable(void)
{
- raw_spin_lock(&kvm_lock);
- hardware_enable_nolock(junk);
- raw_spin_unlock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
+ if (kvm_usage_count)
+ hardware_enable_nolock(NULL);
+ raw_spin_unlock(&kvm_count_lock);
}
static void hardware_disable_nolock(void *junk)
@@ -2698,11 +2690,12 @@ static void hardware_disable_nolock(void *junk)
kvm_arch_hardware_disable(NULL);
}
-static void hardware_disable(void *junk)
+static void hardware_disable(void)
{
- raw_spin_lock(&kvm_lock);
- hardware_disable_nolock(junk);
- raw_spin_unlock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
+ if (kvm_usage_count)
+ hardware_disable_nolock(NULL);
+ raw_spin_unlock(&kvm_count_lock);
}
static void hardware_disable_all_nolock(void)
@@ -2716,16 +2709,16 @@ static void hardware_disable_all_nolock(void)
static void hardware_disable_all(void)
{
- raw_spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
hardware_disable_all_nolock();
- raw_spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_count_lock);
}
static int hardware_enable_all(void)
{
int r = 0;
- raw_spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
kvm_usage_count++;
if (kvm_usage_count == 1) {
@@ -2738,7 +2731,7 @@ static int hardware_enable_all(void)
}
}
- raw_spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_count_lock);
return r;
}
@@ -2748,20 +2741,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
{
int cpu = (long)v;
- if (!kvm_usage_count)
- return NOTIFY_OK;
-
val &= ~CPU_TASKS_FROZEN;
switch (val) {
case CPU_DYING:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
cpu);
- hardware_disable(NULL);
+ hardware_disable();
break;
case CPU_STARTING:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
- hardware_enable(NULL);
+ hardware_enable();
break;
}
return NOTIFY_OK;
@@ -2938,6 +2928,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL_GPL(kvm_io_bus_write);
/* kvm_io_bus_read - called under kvm->slots_lock */
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
@@ -2957,33 +2948,6 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
return r < 0 ? r : 0;
}
-/* kvm_io_bus_read_cookie - called under kvm->slots_lock */
-int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
- int len, void *val, long cookie)
-{
- struct kvm_io_bus *bus;
- struct kvm_io_range range;
-
- range = (struct kvm_io_range) {
- .addr = addr,
- .len = len,
- };
-
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-
- /* First try the device referenced by cookie. */
- if ((cookie >= 0) && (cookie < bus->dev_count) &&
- (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
- if (!kvm_iodevice_read(bus->range[cookie].dev, addr, len,
- val))
- return cookie;
-
- /*
- * cookie contained garbage; fall back to search and return the
- * correct cookie value.
- */
- return __kvm_io_bus_read(bus, &range, val);
-}
/* Caller must hold slots_lock. */
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
@@ -3054,10 +3018,10 @@ static int vm_stat_get(void *_offset, u64 *val)
struct kvm *kvm;
*val = 0;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
*val += *(u32 *)((void *)kvm + offset);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return 0;
}
@@ -3071,12 +3035,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
int i;
*val = 0;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
*val += *(u32 *)((void *)vcpu + offset);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return 0;
}
@@ -3089,7 +3053,7 @@ static const struct file_operations *stat_fops[] = {
static int kvm_init_debug(void)
{
- int r = -EFAULT;
+ int r = -EEXIST;
struct kvm_stats_debugfs_item *p;
kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
@@ -3131,7 +3095,7 @@ static int kvm_suspend(void)
static void kvm_resume(void)
{
if (kvm_usage_count) {
- WARN_ON(raw_spin_is_locked(&kvm_lock));
+ WARN_ON(raw_spin_is_locked(&kvm_count_lock));
hardware_enable_nolock(NULL);
}
}