diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/Kconfig | 13 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 6 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 611 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.h | 63 | ||||
-rw-r--r-- | arch/x86/kvm/irq.c | 18 | ||||
-rw-r--r-- | arch/x86/kvm/irq.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_svm.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 35 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 672 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 86 | ||||
-rw-r--r-- | arch/x86/kvm/segment_descriptor.h | 29 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 352 | ||||
-rw-r--r-- | arch/x86/kvm/svm.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/tss.h | 59 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 278 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.h | 10 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 897 | ||||
-rw-r--r-- | arch/x86/kvm/x86_emulate.c | 285 |
19 files changed, 2883 insertions, 545 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 41962e793c0..8d45fabc5f3 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -19,7 +19,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" - depends on HAVE_KVM && EXPERIMENTAL + depends on HAVE_KVM select PREEMPT_NOTIFIERS select ANON_INODES ---help--- @@ -50,6 +50,17 @@ config KVM_AMD Provides support for KVM on AMD processors equipped with the AMD-V (SVM) extensions. +config KVM_TRACE + bool "KVM trace support" + depends on KVM && MARKERS && SYSFS + select RELAY + select DEBUG_FS + default n + ---help--- + This option allows reading a trace of kvm-related events through + relayfs. Note the ABI is not considered stable and will be + modified in future updates. + # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. source drivers/lguest/Kconfig diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index ffdd0b31078..c97d35c218d 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,10 +3,14 @@ # common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) +ifeq ($(CONFIG_KVM_TRACE),y) +common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) +endif EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm -kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o +kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ + i8254.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c new file mode 100644 index 00000000000..361e3161127 --- /dev/null +++ b/arch/x86/kvm/i8254.c @@ -0,0 +1,611 @@ +/* + * 8253/8254 interval timer emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2006 Intel Corporation + * Copyright (c) 2007 Keir Fraser, XenSource Inc + * Copyright (c) 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Authors: + * Sheng Yang <sheng.yang@intel.com> + * Based on QEMU and Xen. + */ + +#include <linux/kvm_host.h> + +#include "irq.h" +#include "i8254.h" + +#ifndef CONFIG_X86_64 +#define mod_64(x, y) ((x) - (y) * div64_64(x, y)) +#else +#define mod_64(x, y) ((x) % (y)) +#endif + +#define RW_STATE_LSB 1 +#define RW_STATE_MSB 2 +#define RW_STATE_WORD0 3 +#define RW_STATE_WORD1 4 + +/* Compute with 96 bit intermediate result: (a*b)/c */ +static u64 muldiv64(u64 a, u32 b, u32 c) +{ + union { + u64 ll; + struct { + u32 low, high; + } l; + } u, res; + u64 rl, rh; + + u.ll = a; + rl = (u64)u.l.low * (u64)b; + rh = (u64)u.l.high * (u64)b; + rh += (rl >> 32); + res.l.high = div64_64(rh, c); + res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c); + return res.ll; +} + +static void pit_set_gate(struct kvm *kvm, int channel, u32 val) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + switch (c->mode) { + default: + case 0: + case 4: + /* XXX: just disable/enable counting */ + break; + case 1: + case 2: + case 3: + case 5: + /* Restart counting on rising edge. */ + if (c->gate < val) + c->count_load_time = ktime_get(); + break; + } + + c->gate = val; +} + +int pit_get_gate(struct kvm *kvm, int channel) +{ + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + return kvm->arch.vpit->pit_state.channels[channel].gate; +} + +static int pit_get_count(struct kvm *kvm, int channel) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + s64 d, t; + int counter; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); + d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); + + switch (c->mode) { + case 0: + case 1: + case 4: + case 5: + counter = (c->count - d) & 0xffff; + break; + case 3: + /* XXX: may be incorrect for odd counts */ + counter = c->count - (mod_64((2 * d), c->count)); + break; + default: + counter = c->count - mod_64(d, c->count); + break; + } + return counter; +} + +static int pit_get_out(struct kvm *kvm, int channel) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + s64 d, t; + int out; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); + d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); + + switch (c->mode) { + default: + case 0: + out = (d >= c->count); + break; + case 1: + out = (d < c->count); + break; + case 2: + out = ((mod_64(d, c->count) == 0) && (d != 0)); + break; + case 3: + out = (mod_64(d, c->count) < ((c->count + 1) >> 1)); + break; + case 4: + case 5: + out = (d == c->count); + break; + } + + return out; +} + +static void pit_latch_count(struct kvm *kvm, int channel) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + if (!c->count_latched) { + c->latched_count = pit_get_count(kvm, channel); + c->count_latched = c->rw_mode; + } +} + +static void pit_latch_status(struct kvm *kvm, int channel) +{ + struct kvm_kpit_channel_state *c = + &kvm->arch.vpit->pit_state.channels[channel]; + + WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); + + if (!c->status_latched) { + /* TODO: Return NULL COUNT (bit 6). */ + c->status = ((pit_get_out(kvm, channel) << 7) | + (c->rw_mode << 4) | + (c->mode << 1) | + c->bcd); + c->status_latched = 1; + } +} + +int __pit_timer_fn(struct kvm_kpit_state *ps) +{ + struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; + struct kvm_kpit_timer *pt = &ps->pit_timer; + + atomic_inc(&pt->pending); + smp_mb__after_atomic_inc(); + /* FIXME: handle case where the guest is in guest mode */ + if (vcpu0 && waitqueue_active(&vcpu0->wq)) { + vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(&vcpu0->wq); + } + + pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); + pt->scheduled = ktime_to_ns(pt->timer.expires); + + return (pt->period == 0 ? 0 : 1); +} + +int pit_has_pending_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_pit *pit = vcpu->kvm->arch.vpit; + + if (pit && vcpu->vcpu_id == 0) + return atomic_read(&pit->pit_state.pit_timer.pending); + + return 0; +} + +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) +{ + struct kvm_kpit_state *ps; + int restart_timer = 0; + + ps = container_of(data, struct kvm_kpit_state, pit_timer.timer); + + restart_timer = __pit_timer_fn(ps); + + if (restart_timer) + return HRTIMER_RESTART; + else + return HRTIMER_NORESTART; +} + +static void destroy_pit_timer(struct kvm_kpit_timer *pt) +{ + pr_debug("pit: execute del timer!\n"); + hrtimer_cancel(&pt->timer); +} + +static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period) +{ + s64 interval; + + interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); + + pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); + + /* TODO The new value only affected after the retriggered */ + hrtimer_cancel(&pt->timer); + pt->period = (is_period == 0) ? 0 : interval; + pt->timer.function = pit_timer_fn; + atomic_set(&pt->pending, 0); + + hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), + HRTIMER_MODE_ABS); +} + +static void pit_load_count(struct kvm *kvm, int channel, u32 val) +{ + struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; + + WARN_ON(!mutex_is_locked(&ps->lock)); + + pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); + + /* + * Though spec said the state of 8254 is undefined after power-up, + * seems some tricky OS like Windows XP depends on IRQ0 interrupt + * when booting up. + * So here setting initialize rate for it, and not a specific number + */ + if (val == 0) + val = 0x10000; + + ps->channels[channel].count_load_time = ktime_get(); + ps->channels[channel].count = val; + + if (channel != 0) + return; + + /* Two types of timer + * mode 1 is one shot, mode 2 is period, otherwise del timer */ + switch (ps->channels[0].mode) { + case 1: + create_pit_timer(&ps->pit_timer, val, 0); + break; + case 2: + create_pit_timer(&ps->pit_timer, val, 1); + break; + default: + destroy_pit_timer(&ps->pit_timer); + } +} + +void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val) +{ + mutex_lock(&kvm->arch.vpit->pit_state.lock); + pit_load_count(kvm, channel, val); + mutex_unlock(&kvm->arch.vpit->pit_state.lock); +} + +static void pit_ioport_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *data) +{ + struct kvm_pit *pit = (struct kvm_pit *)this->private; + struct kvm_kpit_state *pit_state = &pit->pit_state; + struct kvm *kvm = pit->kvm; + int channel, access; + struct kvm_kpit_channel_state *s; + u32 val = *(u32 *) data; + + val &= 0xff; + addr &= KVM_PIT_CHANNEL_MASK; + + mutex_lock(&pit_state->lock); + + if (val != 0) + pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", + (unsigned int)addr, len, val); + + if (addr == 3) { + channel = val >> 6; + if (channel == 3) { + /* Read-Back Command. */ + for (channel = 0; channel < 3; channel++) { + s = &pit_state->channels[channel]; + if (val & (2 << channel)) { + if (!(val & 0x20)) + pit_latch_count(kvm, channel); + if (!(val & 0x10)) + pit_latch_status(kvm, channel); + } + } + } else { + /* Select Counter <channel>. */ + s = &pit_state->channels[channel]; + access = (val >> 4) & KVM_PIT_CHANNEL_MASK; + if (access == 0) { + pit_latch_count(kvm, channel); + } else { + s->rw_mode = access; + s->read_state = access; + s->write_state = access; + s->mode = (val >> 1) & 7; + if (s->mode > 5) + s->mode -= 4; + s->bcd = val & 1; + } + } + } else { + /* Write Count. */ + s = &pit_state->channels[addr]; + switch (s->write_state) { + default: + case RW_STATE_LSB: + pit_load_count(kvm, addr, val); + break; + case RW_STATE_MSB: + pit_load_count(kvm, addr, val << 8); + break; + case RW_STATE_WORD0: + s->write_latch = val; + s->write_state = RW_STATE_WORD1; + break; + case RW_STATE_WORD1: + pit_load_count(kvm, addr, s->write_latch | (val << 8)); + s->write_state = RW_STATE_WORD0; + break; + } + } + + mutex_unlock(&pit_state->lock); +} + +static void pit_ioport_read(struct kvm_io_device *this, + gpa_t addr, int len, void *data) +{ + struct kvm_pit *pit = (struct kvm_pit *)this->private; + struct kvm_kpit_state *pit_state = &pit->pit_state; + struct kvm *kvm = pit->kvm; + int ret, count; + struct kvm_kpit_channel_state *s; + + addr &= KVM_PIT_CHANNEL_MASK; + s = &pit_state->channels[addr]; + + mutex_lock(&pit_state->lock); + + if (s->status_latched) { + s->status_latched = 0; + ret = s->status; + } else if (s->count_latched) { + switch (s->count_latched) { + default: + case RW_STATE_LSB: + ret = s->latched_count & 0xff; + s->count_latched = 0; + break; + case RW_STATE_MSB: + ret = s->latched_count >> 8; + s->count_latched = 0; + break; + case RW_STATE_WORD0: + ret = s->latched_count & 0xff; + s->count_latched = RW_STATE_MSB; + break; + } + } else { + switch (s->read_state) { + default: + case RW_STATE_LSB: + count = pit_get_count(kvm, addr); + ret = count & 0xff; + break; + case RW_STATE_MSB: + count = pit_get_count(kvm, addr); + ret = (count >> 8) & 0xff; + break; + case RW_STATE_WORD0: + count = pit_get_count(kvm, addr); + ret = count & 0xff; + s->read_state = RW_STATE_WORD1; + break; + case RW_STATE_WORD1: + count = pit_get_count(kvm, addr); + ret = (count >> 8) & 0xff; + s->read_state = RW_STATE_WORD0; + break; + } + } + + if (len > sizeof(ret)) + len = sizeof(ret); + memcpy(data, (char *)&ret, len); + + mutex_unlock(&pit_state->lock); +} + +static int pit_in_range(struct kvm_io_device *this, gpa_t addr) +{ + return ((addr >= KVM_PIT_BASE_ADDRESS) && + (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); +} + +static void speaker_ioport_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *data) +{ + struct kvm_pit *pit = (struct kvm_pit *)this->private; + struct kvm_kpit_state *pit_state = &pit->pit_state; + struct kvm *kvm = pit->kvm; + u32 val = *(u32 *) data; + + mutex_lock(&pit_state->lock); + pit_state->speaker_data_on = (val >> 1) & 1; + pit_set_gate(kvm, 2, val & 1); + mutex_unlock(&pit_state->lock); +} + +static void speaker_ioport_read(struct kvm_io_device *this, + gpa_t addr, int len, void *data) +{ + struct kvm_pit *pit = (struct kvm_pit *)this->private; + struct kvm_kpit_state *pit_state = &pit->pit_state; + struct kvm *kvm = pit->kvm; + unsigned int refresh_clock; + int ret; + + /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */ + refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1; + + mutex_lock(&pit_state->lock); + ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) | + (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4)); + if (len > sizeof(ret)) + len = sizeof(ret); + memcpy(data, (char *)&ret, len); + mutex_unlock(&pit_state->lock); +} + +static int speaker_in_range(struct kvm_io_device *this, gpa_t addr) +{ + return (addr == KVM_SPEAKER_BASE_ADDRESS); +} + +void kvm_pit_reset(struct kvm_pit *pit) +{ + int i; + struct kvm_kpit_channel_state *c; + + mutex_lock(&pit->pit_state.lock); + for (i = 0; i < 3; i++) { + c = &pit->pit_state.channels[i]; + c->mode = 0xff; + c->gate = (i != 2); + pit_load_count(pit->kvm, i, 0); + } + mutex_unlock(&pit->pit_state.lock); + + atomic_set(&pit->pit_state.pit_timer.pending, 0); + pit->pit_state.inject_pending = 1; +} + +struct kvm_pit *kvm_create_pit(struct kvm *kvm) +{ + struct kvm_pit *pit; + struct kvm_kpit_state *pit_state; + + pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); + if (!pit) + return NULL; + + mutex_init(&pit->pit_state.lock); + mutex_lock(&pit->pit_state.lock); + + /* Initialize PIO device */ + pit->dev.read = pit_ioport_read; + pit->dev.write = pit_ioport_write; + pit->dev.in_range = pit_in_range; + pit->dev.private = pit; + kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); + + pit->speaker_dev.read = speaker_ioport_read; + pit->speaker_dev.write = speaker_ioport_write; + pit->speaker_dev.in_range = speaker_in_range; + pit->speaker_dev.private = pit; + kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev); + + kvm->arch.vpit = pit; + pit->kvm = kvm; + + pit_state = &pit->pit_state; + pit_state->pit = pit; + hrtimer_init(&pit_state->pit_timer.timer, + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + mutex_unlock(&pit->pit_state.lock); + + kvm_pit_reset(pit); + + return pit; +} + +void kvm_free_pit(struct kvm *kvm) +{ + struct hrtimer *timer; + + if (kvm->arch.vpit) { + mutex_lock(&kvm->arch.vpit->pit_state.lock); + timer = &kvm->arch.vpit->pit_state.pit_timer.timer; + hrtimer_cancel(timer); + mutex_unlock(&kvm->arch.vpit->pit_state.lock); + kfree(kvm->arch.vpit); + } +} + +void __inject_pit_timer_intr(struct kvm *kvm) +{ + mutex_lock(&kvm->lock); + kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); + kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0); + kvm_pic_set_irq(pic_irqchip(kvm), 0, 1); + kvm_pic_set_irq(pic_irqchip(kvm), 0, 0); + mutex_unlock(&kvm->lock); +} + +void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) +{ + struct kvm_pit *pit = vcpu->kvm->arch.vpit; + struct kvm *kvm = vcpu->kvm; + struct kvm_kpit_state *ps; + + if (vcpu && pit) { + ps = &pit->pit_state; + + /* Try to inject pending interrupts when: + * 1. Pending exists + * 2. Last interrupt was accepted or waited for too long time*/ + if (atomic_read(&ps->pit_timer.pending) && + (ps->inject_pending || + (jiffies - ps->last_injected_time + >= KVM_MAX_PIT_INTR_INTERVAL))) { + ps->inject_pending = 0; + __inject_pit_timer_intr(kvm); + ps->last_injected_time = jiffies; + } + } +} + +void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec) +{ + struct kvm_arch *arch = &vcpu->kvm->arch; + struct kvm_kpit_state *ps; + + if (vcpu && arch->vpit) { + ps = &arch->vpit->pit_state; + if (atomic_read(&ps->pit_timer.pending) && + (((arch->vpic->pics[0].imr & 1) == 0 && + arch->vpic->pics[0].irq_base == vec) || + (arch->vioapic->redirtbl[0].fields.vector == vec && + arch->vioapic->redirtbl[0].fields.mask != 1))) { + ps->inject_pending = 1; + atomic_dec(&ps->pit_timer.pending); + ps->channels[0].count_load_time = ktime_get(); + } + } +} diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h new file mode 100644 index 00000000000..db25c2a6c8c --- /dev/null +++ b/arch/x86/kvm/i8254.h @@ -0,0 +1,63 @@ +#ifndef __I8254_H +#define __I8254_H + +#include "iodev.h" + +struct kvm_kpit_timer { + struct hrtimer timer; + int irq; + s64 period; /* unit: ns */ + s64 scheduled; + ktime_t last_update; + atomic_t pending; +}; + +struct kvm_kpit_channel_state { + u32 count; /* can be 65536 */ + u16 latched_count; + u8 count_latched; + u8 status_latched; + u8 status; + u8 read_state; + u8 write_state; + u8 write_latch; + u8 rw_mode; + u8 mode; + u8 bcd; /* not supported */ + u8 gate; /* timer start */ + ktime_t count_load_time; +}; + +struct kvm_kpit_state { + struct kvm_kpit_channel_state channels[3]; + struct kvm_kpit_timer pit_timer; + u32 speaker_data_on; + struct mutex lock; + struct kvm_pit *pit; + bool inject_pending; /* if inject pending interrupts */ + unsigned long last_injected_time; +}; + +struct kvm_pit { + unsigned long base_addresss; + struct kvm_io_device dev; + struct kvm_io_device speaker_dev; + struct kvm *kvm; + struct kvm_kpit_state pit_state; +}; + +#define KVM_PIT_BASE_ADDRESS 0x40 +#define KVM_SPEAKER_BASE_ADDRESS 0x61 +#define KVM_PIT_MEM_LENGTH 4 +#define KVM_PIT_FREQ 1193181 +#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 +#define KVM_PIT_CHANNEL_MASK 0x3 + +void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); +void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec); +void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val); +struct kvm_pit *kvm_create_pit(struct kvm *kvm); +void kvm_free_pit(struct kvm *kvm); +void kvm_pit_reset(struct kvm_pit *pit); + +#endif diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index e5714759e97..ce1f583459b 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -23,6 +23,22 @@ #include <linux/kvm_host.h> #include "irq.h" +#include "i8254.h" + +/* + * check if there are pending timer events + * to be processed. + */ +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + int ret; + + ret = pit_has_pending_timer(vcpu); + ret |= apic_has_pending_timer(vcpu); + + return ret; +} +EXPORT_SYMBOL(kvm_cpu_has_pending_timer); /* * check if there is pending interrupt without @@ -66,6 +82,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) { kvm_inject_apic_timer_irqs(vcpu); + kvm_inject_pit_timer_irqs(vcpu); /* TODO: PIT, RTC etc. */ } EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); @@ -73,6 +90,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) { kvm_apic_timer_intr_post(vcpu, vec); + kvm_pit_timer_intr_post(vcpu, vec); /* TODO: PIT, RTC etc. */ } EXPORT_SYMBOL_GPL(kvm_timer_intr_post); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index fa5ed5d59b5..1802134b836 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -85,4 +85,7 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); +int pit_has_pending_timer(struct kvm_vcpu *vcpu); +int apic_has_pending_timer(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index ecdfe97e463..65ef0fc2c03 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -39,6 +39,8 @@ struct vcpu_svm { unsigned long host_db_regs[NUM_DB_REGS]; unsigned long host_dr6; unsigned long host_dr7; + + u32 *msrpm; }; #endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 68a6b151193..57ac4e4c556 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -338,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } else apic_clear_vector(vector, apic->regs + APIC_TMR); - if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) + if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) kvm_vcpu_kick(vcpu); - else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) { - vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); } @@ -362,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (level) { - if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) + if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) printk(KERN_DEBUG "INIT on a runnable vcpu %d\n", vcpu->vcpu_id); - vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED; + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; kvm_vcpu_kick(vcpu); } else { printk(KERN_DEBUG @@ -379,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); - if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) { + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { vcpu->arch.sipi_vector = vector; - vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED; + vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); } @@ -658,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic) apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" PRIx64 ", " "timer initial count 0x%x, period %lldns, " - "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__, + "expire @ 0x%016" PRIx64 ".\n", __func__, APIC_BUS_CYCLE_NS, ktime_to_ns(now), apic_get_reg(apic, APIC_TMICT), apic->timer.period, @@ -691,7 +691,7 @@ static void apic_mmio_write(struct kvm_io_device *this, /* too common printing */ if (offset != APIC_EOI) apic_debug("%s: offset 0x%x with length 0x%x, and value is " - "0x%x\n", __FUNCTION__, offset, len, val); + "0x%x\n", __func__, offset, len, val); offset &= 0xff0; @@ -822,6 +822,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | (apic_get_reg(apic, APIC_TASKPRI) & 4)); } +EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) { @@ -869,7 +870,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) struct kvm_lapic *apic; int i; - apic_debug("%s\n", __FUNCTION__); + apic_debug("%s\n", __func__); ASSERT(vcpu); apic = vcpu->arch.apic; @@ -907,7 +908,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_update_ppr(apic); apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" - "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__, + "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, vcpu, kvm_apic_id(apic), vcpu->arch.apic_base, apic->base_address); } @@ -940,7 +941,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic) atomic_inc(&apic->timer.pending); if (waitqueue_active(q)) { - apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; wake_up_interruptible(q); } if (apic_lvtt_period(apic)) { @@ -952,6 +953,16 @@ static int __apic_timer_fn(struct kvm_lapic *apic) return result; } +int apic_has_pending_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *lapic = vcpu->arch.apic; + + if (lapic) + return atomic_read(&lapic->timer.pending); + + return 0; +} + static int __inject_apic_timer_irq(struct kvm_lapic *apic) { int vector; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e55af12e11b..2ad6f548167 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -27,11 +27,22 @@ #include <linux/highmem.h> #include <linux/module.h> #include <linux/swap.h> +#include <linux/hugetlb.h> +#include <linux/compiler.h> #include <asm/page.h> #include <asm/cmpxchg.h> #include <asm/io.h> +/* + * When setting this variable to true it enables Two-Dimensional-Paging + * where the hardware walks 2 page tables: + * 1. the guest-virtual to guest-physical + * 2. while doing 1. it walks guest-physical to host-physical + * If the hardware supports that we don't need to do shadow paging. + */ +bool tdp_enabled = false; + #undef MMU_DEBUG #undef AUDIT @@ -101,8 +112,6 @@ static int dbg = 1; #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 -#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) - #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define PT64_LEVEL_BITS 9 @@ -159,6 +168,13 @@ static int dbg = 1; #define ACC_USER_MASK PT_USER_MASK #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) +struct kvm_pv_mmu_op_buffer { + void *ptr; + unsigned len; + unsigned processed; + char buf[512] __aligned(sizeof(long)); +}; + struct kvm_rmap_desc { u64 *shadow_ptes[RMAP_EXT]; struct kvm_rmap_desc *more; @@ -200,11 +216,15 @@ static int is_present_pte(unsigned long pte) static int is_shadow_present_pte(u64 pte) { - pte &= ~PT_SHADOW_IO_MARK; return pte != shadow_trap_nonpresent_pte && pte != shadow_notrap_nonpresent_pte; } +static int is_large_pte(u64 pte) +{ + return pte & PT_PAGE_SIZE_MASK; +} + static int is_writeble_pte(unsigned long pte) { return pte & PT_WRITABLE_MASK; @@ -215,14 +235,14 @@ static int is_dirty_pte(unsigned long pte) return pte & PT_DIRTY_MASK; } -static int is_io_pte(unsigned long pte) +static int is_rmap_pte(u64 pte) { - return pte & PT_SHADOW_IO_MARK; + return is_shadow_present_pte(pte); } -static int is_rmap_pte(u64 pte) +static pfn_t spte_to_pfn(u64 pte) { - return is_shadow_present_pte(pte); + return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; } static gfn_t pse36_gfn_delta(u32 gpte) @@ -349,16 +369,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) } /* + * Return the pointer to the largepage write count for a given + * gfn, handling slots that are not large page aligned. + */ +static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot) +{ + unsigned long idx; + + idx = (gfn / KVM_PAGES_PER_HPAGE) - + (slot->base_gfn / KVM_PAGES_PER_HPAGE); + return &slot->lpage_info[idx].write_count; +} |