diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-31 09:30:10 +1100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-31 09:30:10 +1100 |
commit | 2c57ee6f924c95e4dce61ed4776fb3f62e1b9f92 (patch) | |
tree | b9d92e52e8c0ee68a0f5012b470c6146a9f0b65a /arch | |
parent | f389e9fcecdec4c4cb890ad28ea30a87a579ec3e (diff) | |
parent | 2f52d58c92d971bf421f461ad06eb93fb4f34981 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (249 commits)
KVM: Move apic timer migration away from critical section
KVM: Put kvm_para.h include outside __KERNEL__
KVM: Fix unbounded preemption latency
KVM: Initialize the mmu caches only after verifying cpu support
KVM: MMU: Fix dirty page setting for pages removed from rmap
KVM: Portability: Move kvm_fpu to asm-x86/kvm.h
KVM: x86 emulator: Only allow VMCALL/VMMCALL trapped by #UD
KVM: MMU: Merge shadow level check in FNAME(fetch)
KVM: MMU: Move kvm_free_some_pages() into critical section
KVM: MMU: Switch to mmu spinlock
KVM: MMU: Avoid calling gfn_to_page() in mmu_set_spte()
KVM: Add kvm_read_guest_atomic()
KVM: MMU: Concurrent guest walkers
KVM: Disable vapic support on Intel machines with FlexPriority
KVM: Accelerated apic support
KVM: local APIC TPR access reporting facility
KVM: Print data for unimplemented wrmsr
KVM: MMU: Add cache miss statistic
KVM: MMU: Coalesce remote tlb flushes
KVM: Expose ioapic to ia64 save/restore APIs
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kvm/Kconfig | 57 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 14 | ||||
-rw-r--r-- | arch/x86/kvm/i8259.c | 450 | ||||
-rw-r--r-- | arch/x86/kvm/irq.c | 78 | ||||
-rw-r--r-- | arch/x86/kvm/irq.h | 88 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_svm.h | 45 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 1154 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 50 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 1885 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 44 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 484 | ||||
-rw-r--r-- | arch/x86/kvm/segment_descriptor.h | 29 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 1731 | ||||
-rw-r--r-- | arch/x86/kvm/svm.h | 325 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 2679 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.h | 324 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 3287 | ||||
-rw-r--r-- | arch/x86/kvm/x86_emulate.c | 1912 |
20 files changed, 14641 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fb3eea3e38e..65b449134cf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -107,6 +107,7 @@ config ARCH_SUPPORTS_OPROFILE bool default y +select HAVE_KVM config ZONE_DMA32 bool @@ -1598,4 +1599,6 @@ source "security/Kconfig" source "crypto/Kconfig" +source "arch/x86/kvm/Kconfig" + source "lib/Kconfig" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b08f18261df..da8f4129780 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -7,6 +7,8 @@ else KBUILD_DEFCONFIG := $(ARCH)_defconfig endif +core-$(CONFIG_KVM) += arch/x86/kvm/ + # BITS is used as extension for files which are available in a 32 bit # and a 64 bit version to simplify shared Makefiles. # e.g.: obj-y += foo_$(BITS).o diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig new file mode 100644 index 00000000000..c83e1c9b512 --- /dev/null +++ b/arch/x86/kvm/Kconfig @@ -0,0 +1,57 @@ +# +# KVM configuration +# +config HAVE_KVM + bool + +menuconfig VIRTUALIZATION + bool "Virtualization" + depends on HAVE_KVM || X86 + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + ---help--- + Support hosting fully virtualized guest machines using hardware + virtualization extensions. You will need a fairly recent + processor equipped with virtualization extensions. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_INTEL + tristate "KVM for Intel processors support" + depends on KVM + ---help--- + Provides support for KVM on Intel processors equipped with the VT + extensions. + +config KVM_AMD + tristate "KVM for AMD processors support" + depends on KVM + ---help--- + Provides support for KVM on AMD processors equipped with the AMD-V + (SVM) extensions. + +# OK, it's a little counter-intuitive to do this, but it puts it neatly under +# the virtualization menu. +source drivers/lguest/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile new file mode 100644 index 00000000000..ffdd0b31078 --- /dev/null +++ b/arch/x86/kvm/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm + +kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o +obj-$(CONFIG_KVM) += kvm.o +kvm-intel-objs = vmx.o +obj-$(CONFIG_KVM_INTEL) += kvm-intel.o +kvm-amd-objs = svm.o +obj-$(CONFIG_KVM_AMD) += kvm-amd.o diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c new file mode 100644 index 00000000000..ab29cf2def4 --- /dev/null +++ b/arch/x86/kvm/i8259.c @@ -0,0 +1,450 @@ +/* + * 8259 interrupt controller emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * Port from Qemu. + */ +#include <linux/mm.h> +#include "irq.h" + +#include <linux/kvm_host.h> + +/* + * set irq level. If an edge is detected, then the IRR is set to 1 + */ +static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) +{ + int mask; + mask = 1 << irq; + if (s->elcr & mask) /* level triggered */ + if (level) { + s->irr |= mask; + s->last_irr |= mask; + } else { + s->irr &= ~mask; + s->last_irr &= ~mask; + } + else /* edge triggered */ + if (level) { + if ((s->last_irr & mask) == 0) + s->irr |= mask; + s->last_irr |= mask; + } else + s->last_irr &= ~mask; +} + +/* + * return the highest priority found in mask (highest = smallest + * number). Return 8 if no irq + */ +static inline int get_priority(struct kvm_kpic_state *s, int mask) +{ + int priority; + if (mask == 0) + return 8; + priority = 0; + while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0) + priority++; + return priority; +} + +/* + * return the pic wanted interrupt. return -1 if none + */ +static int pic_get_irq(struct kvm_kpic_state *s) +{ + int mask, cur_priority, priority; + + mask = s->irr & ~s->imr; + priority = get_priority(s, mask); + if (priority == 8) + return -1; + /* + * compute current priority. If special fully nested mode on the + * master, the IRQ coming from the slave is not taken into account + * for the priority computation. + */ + mask = s->isr; + if (s->special_fully_nested_mode && s == &s->pics_state->pics[0]) + mask &= ~(1 << 2); + cur_priority = get_priority(s, mask); + if (priority < cur_priority) + /* + * higher priority found: an irq should be generated + */ + return (priority + s->priority_add) & 7; + else + return -1; +} + +/* + * raise irq to CPU if necessary. must be called every time the active + * irq may change + */ +static void pic_update_irq(struct kvm_pic *s) +{ + int irq2, irq; + + irq2 = pic_get_irq(&s->pics[1]); + if (irq2 >= 0) { + /* + * if irq request by slave pic, signal master PIC + */ + pic_set_irq1(&s->pics[0], 2, 1); + pic_set_irq1(&s->pics[0], 2, 0); + } + irq = pic_get_irq(&s->pics[0]); + if (irq >= 0) + s->irq_request(s->irq_request_opaque, 1); + else + s->irq_request(s->irq_request_opaque, 0); +} + +void kvm_pic_update_irq(struct kvm_pic *s) +{ + pic_update_irq(s); +} + +void kvm_pic_set_irq(void *opaque, int irq, int level) +{ + struct kvm_pic *s = opaque; + + pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); + pic_update_irq(s); +} + +/* + * acknowledge interrupt 'irq' + */ +static inline void pic_intack(struct kvm_kpic_state *s, int irq) +{ + if (s->auto_eoi) { + if (s->rotate_on_auto_eoi) + s->priority_add = (irq + 1) & 7; + } else + s->isr |= (1 << irq); + /* + * We don't clear a level sensitive interrupt here + */ + if (!(s->elcr & (1 << irq))) + s->irr &= ~(1 << irq); +} + +int kvm_pic_read_irq(struct kvm_pic *s) +{ + int irq, irq2, intno; + + irq = pic_get_irq(&s->pics[0]); + if (irq >= 0) { + pic_intack(&s->pics[0], irq); + if (irq == 2) { + irq2 = pic_get_irq(&s->pics[1]); + if (irq2 >= 0) + pic_intack(&s->pics[1], irq2); + else + /* + * spurious IRQ on slave controller + */ + irq2 = 7; + intno = s->pics[1].irq_base + irq2; + irq = irq2 + 8; + } else + intno = s->pics[0].irq_base + irq; + } else { + /* + * spurious IRQ on host controller + */ + irq = 7; + intno = s->pics[0].irq_base + irq; + } + pic_update_irq(s); + + return intno; +} + +void kvm_pic_reset(struct kvm_kpic_state *s) +{ + s->last_irr = 0; + s->irr = 0; + s->imr = 0; + s->isr = 0; + s->priority_add = 0; + s->irq_base = 0; + s->read_reg_select = 0; + s->poll = 0; + s->special_mask = 0; + s->init_state = 0; + s->auto_eoi = 0; + s->rotate_on_auto_eoi = 0; + s->special_fully_nested_mode = 0; + s->init4 = 0; +} + +static void pic_ioport_write(void *opaque, u32 addr, u32 val) +{ + struct kvm_kpic_state *s = opaque; + int priority, cmd, irq; + + addr &= 1; + if (addr == 0) { + if (val & 0x10) { + kvm_pic_reset(s); /* init */ + /* + * deassert a pending interrupt + */ + s->pics_state->irq_request(s->pics_state-> + irq_request_opaque, 0); + s->init_state = 1; + s->init4 = val & 1; + if (val & 0x02) + printk(KERN_ERR "single mode not supported"); + if (val & 0x08) + printk(KERN_ERR + "level sensitive irq not supported"); + } else if (val & 0x08) { + if (val & 0x04) + s->poll = 1; + if (val & 0x02) + s->read_reg_select = val & 1; + if (val & 0x40) + s->special_mask = (val >> 5) & 1; + } else { + cmd = val >> 5; + switch (cmd) { + case 0: + case 4: + s->rotate_on_auto_eoi = cmd >> 2; + break; + case 1: /* end of interrupt */ + case 5: + priority = get_priority(s, s->isr); + if (priority != 8) { + irq = (priority + s->priority_add) & 7; + s->isr &= ~(1 << irq); + if (cmd == 5) + s->priority_add = (irq + 1) & 7; + pic_update_irq(s->pics_state); + } + break; + case 3: + irq = val & 7; + s->isr &= ~(1 << irq); + pic_update_irq(s->pics_state); + break; + case 6: + s->priority_add = (val + 1) & 7; + pic_update_irq(s->pics_state); + break; + case 7: + irq = val & 7; + s->isr &= ~(1 << irq); + s->priority_add = (irq + 1) & 7; + pic_update_irq(s->pics_state); + break; + default: + break; /* no operation */ + } + } + } else + switch (s->init_state) { + case 0: /* normal mode */ + s->imr = val; + pic_update_irq(s->pics_state); + break; + case 1: + s->irq_base = val & 0xf8; + s->init_state = 2; + break; + case 2: + if (s->init4) + s->init_state = 3; + else + s->init_state = 0; + break; + case 3: + s->special_fully_nested_mode = (val >> 4) & 1; + s->auto_eoi = (val >> 1) & 1; + s->init_state = 0; + break; + } +} + +static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) +{ + int ret; + + ret = pic_get_irq(s); + if (ret >= 0) { + if (addr1 >> 7) { + s->pics_state->pics[0].isr &= ~(1 << 2); + s->pics_state->pics[0].irr &= ~(1 << 2); + } + s->irr &= ~(1 << ret); + s->isr &= ~(1 << ret); + if (addr1 >> 7 || ret != 2) + pic_update_irq(s->pics_state); + } else { + ret = 0x07; + pic_update_irq(s->pics_state); + } + + return ret; +} + +static u32 pic_ioport_read(void *opaque, u32 addr1) +{ + struct kvm_kpic_state *s = opaque; + unsigned int addr; + int ret; + + addr = addr1; + addr &= 1; + if (s->poll) { + ret = pic_poll_read(s, addr1); + s->poll = 0; + } else + if (addr == 0) + if (s->read_reg_select) + ret = s->isr; + else + ret = s->irr; + else + ret = s->imr; + return ret; +} + +static void elcr_ioport_write(void *opaque, u32 addr, u32 val) +{ + struct kvm_kpic_state *s = opaque; + s->elcr = val & s->elcr_mask; +} + +static u32 elcr_ioport_read(void *opaque, u32 addr1) +{ + struct kvm_kpic_state *s = opaque; + return s->elcr; +} + +static int picdev_in_range(struct kvm_io_device *this, gpa_t addr) +{ + switch (addr) { + case 0x20: + case 0x21: + case 0xa0: + case 0xa1: + case 0x4d0: + case 0x4d1: + return 1; + default: + return 0; + } +} + +static void picdev_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + struct kvm_pic *s = this->private; + unsigned char data = *(unsigned char *)val; + + if (len != 1) { + if (printk_ratelimit()) + printk(KERN_ERR "PIC: non byte write\n"); + return; + } + switch (addr) { + case 0x20: + case 0x21: + case 0xa0: + case 0xa1: + pic_ioport_write(&s->pics[addr >> 7], addr, data); + break; + case 0x4d0: + case 0x4d1: + elcr_ioport_write(&s->pics[addr & 1], addr, data); + break; + } +} + +static void picdev_read(struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + struct kvm_pic *s = this->private; + unsigned char data = 0; + + if (len != 1) { + if (printk_ratelimit()) + printk(KERN_ERR "PIC: non byte read\n"); + return; + } + switch (addr) { + case 0x20: + case 0x21: + case 0xa0: + case 0xa1: + data = pic_ioport_read(&s->pics[addr >> 7], addr); + break; + case 0x4d0: + case 0x4d1: + data = elcr_ioport_read(&s->pics[addr & 1], addr); + break; + } + *(unsigned char *)val = data; +} + +/* + * callback when PIC0 irq status changed + */ +static void pic_irq_request(void *opaque, int level) +{ + struct kvm *kvm = opaque; + struct kvm_vcpu *vcpu = kvm->vcpus[0]; + + pic_irqchip(kvm)->output = level; + if (vcpu) + kvm_vcpu_kick(vcpu); +} + +struct kvm_pic *kvm_create_pic(struct kvm *kvm) +{ + struct kvm_pic *s; + s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); + if (!s) + return NULL; + s->pics[0].elcr_mask = 0xf8; + s->pics[1].elcr_mask = 0xde; + s->irq_request = pic_irq_request; + s->irq_request_opaque = kvm; + s->pics[0].pics_state = s; + s->pics[1].pics_state = s; + + /* + * Initialize PIO device + */ + s->dev.read = picdev_read; + s->dev.write = picdev_write; + s->dev.in_range = picdev_in_range; + s->dev.private = s; + kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev); + return s; +} diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c new file mode 100644 index 00000000000..e5714759e97 --- /dev/null +++ b/arch/x86/kvm/irq.c @@ -0,0 +1,78 @@ +/* + * irq.c: API for in kernel interrupt controller + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * + */ + +#include <linux/module.h> +#include <linux/kvm_host.h> + +#include "irq.h" + +/* + * check if there is pending interrupt without + * intack. + */ +int kvm_cpu_has_interrupt(struct kvm_vcpu *v) +{ + struct kvm_pic *s; + + if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ + if (kvm_apic_accept_pic_intr(v)) { + s = pic_irqchip(v->kvm); /* PIC */ + return s->output; + } else + return 0; + } + return 1; +} +EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); + +/* + * Read pending interrupt vector and intack. + */ +int kvm_cpu_get_interrupt(struct kvm_vcpu *v) +{ + struct kvm_pic *s; + int vector; + + vector = kvm_get_apic_interrupt(v); /* APIC */ + if (vector == -1) { + if (kvm_apic_accept_pic_intr(v)) { + s = pic_irqchip(v->kvm); + s->output = 0; /* PIC */ + vector = kvm_pic_read_irq(s); + } + } + return vector; +} +EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); + +void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) +{ + kvm_inject_apic_timer_irqs(vcpu); + /* TODO: PIT, RTC etc. */ +} +EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); + +void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) +{ + kvm_apic_timer_intr_post(vcpu, vec); + /* TODO: PIT, RTC etc. */ +} +EXPORT_SYMBOL_GPL(kvm_timer_intr_post); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h new file mode 100644 index 00000000000..fa5ed5d59b5 --- /dev/null +++ b/arch/x86/kvm/irq.h @@ -0,0 +1,88 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include <linux/mm_types.h> +#include <linux/hrtimer.h> +#include <linux/kvm_host.h> + +#include "iodev.h" +#include "ioapic.h" +#include "lapic.h" + +struct kvm; +struct kvm_vcpu; + +typedef void irq_request_func(void *opaque, int level); + +struct kvm_kpic_state { + u8 last_irr; /* edge detection */ + u8 irr; /* interrupt request register */ + u8 imr; /* interrupt mask register */ + u8 isr; /* interrupt service register */ + u8 priority_add; /* highest irq priority */ + u8 irq_base; + u8 read_reg_select; + u8 poll; + u8 special_mask; + u8 init_state; + u8 auto_eoi; + u8 rotate_on_auto_eoi; + u8 special_fully_nested_mode; + u8 init4; /* true if 4 byte init */ + u8 elcr; /* PIIX edge/trigger selection */ + u8 elcr_mask; + struct kvm_pic *pics_state; +}; + +struct kvm_pic { + struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ + irq_request_func *irq_request; + void *irq_request_opaque; + int output; /* intr from master PIC */ + struct kvm_io_device dev; +}; + +struct kvm_pic *kvm_create_pic(struct kvm *kvm); +void kvm_pic_set_irq(void *opaque, int irq, int level); +int kvm_pic_read_irq(struct kvm_pic *s); +void kvm_pic_update_irq(struct kvm_pic *s); + +static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) +{ + return kvm->arch.vpic; +} + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return pic_irqchip(kvm) != NULL; +} + +void kvm_pic_reset(struct kvm_kpic_state *s); + +void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); +void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); +void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); +void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); + +#endif diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h new file mode 100644 index 00000000000..ecdfe97e463 --- /dev/null +++ b/arch/x86/kvm/kvm_svm.h @@ -0,0 +1,45 @@ +#ifndef __KVM_SVM_H +#define __KVM_SVM_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/kvm_host.h> +#include <asm/msr.h> + +#include "svm.h" + +static const u32 host_save_user_msrs[] = { +#ifdef CONFIG_X86_64 + MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, + MSR_FS_BASE, +#endif + MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, +}; + +#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) +#define NUM_DB_REGS 4 + +struct kvm_vcpu; + +struct vcpu_svm { + struct kvm_vcpu vcpu; + struct vmcb *vmcb; + unsigned long vmcb_pa; + struct svm_cpu_data *svm_data; + uint64_t asid_generation; + + unsigned long db_regs[NUM_DB_REGS]; + + u64 next_rip; + + u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; + u64 host_gs_base; + unsigned long host_cr2; + unsigned long host_db_regs[NUM_DB_REGS]; + unsigned long host_dr6; + unsigned long host_dr7; +}; + +#endif + diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c new file mode 100644 index 00000000000..2cbee9479ce --- /dev/null +++ b/arch/x86/kvm/lapic.c @@ -0,0 +1,1154 @@ + +/* + * Local APIC virtualization + * + * Copyright (C) 2006 Qumranet, Inc. + * Copyright (C) 2007 Novell + * Copyright (C) 2007 Intel + * + * Authors: + * Dor Laor <dor.laor@qumranet.com> + * Gregory Haskins <ghaskins@novell.com> + * Yaozu (Eddie) Dong <eddie.dong@intel.com> + * + * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/smp.h> +#include <linux/hrtimer.h> +#include <linux/io.h> +#include <linux/module.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/page.h> +#include <asm/current.h> +#include <asm/apicdef.h> +#include <asm/atomic.h> +#include <asm/div64.h> +#include "irq.h" + +#define PRId64 "d" +#define PRIx64 "llx" +#define PRIu64 "u" +#define PRIo64 "o" + +#define APIC_BUS_CYCLE_NS 1 + +/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ +#define apic_debug(fmt, arg...) + +#define APIC_LVT_NUM 6 +/* 14 is the version for Xeon and Pentium 8.4.8*/ +#define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) +#define LAPIC_MMIO_LENGTH (1 << 12) +/* followed define is not in apicdef.h */ +#define APIC_SHORT_MASK 0xc0000 +#define APIC_DEST_NOSHORT 0x0 +#define APIC_DEST_MASK 0x800 +#define MAX_APIC_VECTOR 256 + +#define VEC_POS(v) ((v) & (32 - 1)) +#define REG_POS(v) (((v) >> 5) << 4) + +static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) +{ + return *((u32 *) (apic->regs + reg_off)); +} + +static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) +{ + *((u32 *) (apic->regs + reg_off)) = val; +} + +static inline int apic_test_and_set_vector(int vec, void *bitmap) +{ + return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + +static inline int apic_test_and_clear_vector(int vec, void *bitmap) +{ + return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + +static inline void apic_set_vector(int vec, void *bitmap) +{ + set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + +static inline void apic_clear_vector(int vec, void *bitmap) +{ + clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + +static inline int apic_hw_enabled(struct kvm_lapic *apic) +{ + return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; +} + +static inline int apic_sw_enabled(struct kvm_lapic *apic) +{ + return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; +} + +static inline int apic_enabled(struct kvm_lapic *apic) +{ + return apic_sw_enabled(apic) && apic_hw_enabled(apic); +} + +#define LVT_MASK \ + (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) + +#define LINT_MASK \ + (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ + APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) + +static inline int kvm_apic_id(struct kvm_lapic *apic) +{ + return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff; +} + +static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) +{ + return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); +} + +static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) +{ + return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; +} + +static inline int apic_lvtt_period(struct kvm_lapic *apic) +{ + return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; +} + +static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { + LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ + LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ + LVT_MASK | APIC_MODE_MASK, /* LVTPC */ + LINT_MASK, LINT_MASK, /* LVT0-1 */ + LVT_MASK /* LVTERR */ +}; + +static int find_highest_vector(void *bitmap) +{ + u32 *word = bitmap; + int word_offset = MAX_APIC_VECTOR >> 5; + + while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) + continue; + + if (likely(!word_offset && !word[0])) + return -1; + else + return fls(word[word_offset << 2]) - 1 + (word_offset << 5); +} + +static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) +{ + return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); +} + +static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) +{ + apic_clear_vector(vec, apic->regs + APIC_IRR); +} + +static inline int apic_find_highest_irr(struct kvm_lapic *apic) +{ + int result; + + result = find_highest_vector(apic->regs + APIC_IRR); + ASSERT(result == -1 || result >= 16); + + return result; +} + +int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + int highest_irr; + + if (!apic) + return 0; + highest_irr = apic_find_highest_irr(apic); + + return highest_irr; +} +EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); + +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (!apic_test_and_set_irr(vec, apic)) { + /* a new pending irq is set in IRR */ + if (trig) + apic_set_vector(vec, apic->regs + APIC_TMR); + else + apic_clear_vector(vec, apic->regs + APIC_TMR); + kvm_vcpu_kick(apic->vcpu); + return 1; + } + return 0; +} + +static inline int apic_find_highest_isr(struct kvm_lapic *apic) +{ + int result; + + result = find_highest_vector(apic->regs + APIC_ISR); + ASSERT(result == -1 || result >= 16); + + return result; +} + +static void apic_update_ppr(struct kvm_lapic *apic) +{ + u32 tpr, isrv, ppr; + int isr; + + tpr = apic_get_reg(apic, APIC_TASKPRI); + isr = apic_find_highest_isr(apic); + isrv = (isr != -1) ? isr : 0; + + if ((tpr & 0xf0) >= (isrv & 0xf0)) + ppr = tpr & 0xff; + else + ppr = isrv & 0xf0; + + apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", < |