diff options
author | Avi Kivity <avi@qumranet.com> | 2007-12-16 11:02:48 +0200 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-01-30 18:01:18 +0200 |
commit | edf884172e9828c6234b254208af04655855038d (patch) | |
tree | f5e5d1eecaed9737eced6ba60d09fe93149751c1 /arch | |
parent | 9584bf2c93f56656dba0de8f6c75b54ca7995143 (diff) |
KVM: Move arch dependent files to new directory arch/x86/kvm/
This paves the way for multiple architecture support. Note that while
ioapic.c could potentially be shared with ia64, it is also moved.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kvm/Kconfig | 57 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 15 | ||||
-rw-r--r-- | arch/x86/kvm/i8259.c | 450 | ||||
-rw-r--r-- | arch/x86/kvm/ioapic.c | 400 | ||||
-rw-r--r-- | arch/x86/kvm/irq.c | 98 | ||||
-rw-r--r-- | arch/x86/kvm/irq.h | 195 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_svm.h | 45 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 1085 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 1805 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 44 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 461 | ||||
-rw-r--r-- | arch/x86/kvm/segment_descriptor.h | 29 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 1725 | ||||
-rw-r--r-- | arch/x86/kvm/svm.h | 325 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 2671 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.h | 324 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 3146 | ||||
-rw-r--r-- | arch/x86/kvm/x86_emulate.c | 1912 |
20 files changed, 14791 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d289cfcf92c..65b449134cf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1599,4 +1599,6 @@ source "security/Kconfig" source "crypto/Kconfig" +source "arch/x86/kvm/Kconfig" + source "lib/Kconfig" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b08f18261df..da8f4129780 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -7,6 +7,8 @@ else KBUILD_DEFCONFIG := $(ARCH)_defconfig endif +core-$(CONFIG_KVM) += arch/x86/kvm/ + # BITS is used as extension for files which are available in a 32 bit # and a 64 bit version to simplify shared Makefiles. # e.g.: obj-y += foo_$(BITS).o diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig new file mode 100644 index 00000000000..c83e1c9b512 --- /dev/null +++ b/arch/x86/kvm/Kconfig @@ -0,0 +1,57 @@ +# +# KVM configuration +# +config HAVE_KVM + bool + +menuconfig VIRTUALIZATION + bool "Virtualization" + depends on HAVE_KVM || X86 + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + ---help--- + Support hosting fully virtualized guest machines using hardware + virtualization extensions. You will need a fairly recent + processor equipped with virtualization extensions. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_INTEL + tristate "KVM for Intel processors support" + depends on KVM + ---help--- + Provides support for KVM on Intel processors equipped with the VT + extensions. + +config KVM_AMD + tristate "KVM for AMD processors support" + depends on KVM + ---help--- + Provides support for KVM on AMD processors equipped with the AMD-V + (SVM) extensions. + +# OK, it's a little counter-intuitive to do this, but it puts it neatly under +# the virtualization menu. +source drivers/lguest/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile new file mode 100644 index 00000000000..880ffe403b3 --- /dev/null +++ b/arch/x86/kvm/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +common-objs = $(addprefix ../../../drivers/kvm/, kvm_main.o) + +EXTRA_CFLAGS += -I drivers/kvm + +kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ + ioapic.o +obj-$(CONFIG_KVM) += kvm.o +kvm-intel-objs = vmx.o +obj-$(CONFIG_KVM_INTEL) += kvm-intel.o +kvm-amd-objs = svm.o +obj-$(CONFIG_KVM_AMD) += kvm-amd.o diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c new file mode 100644 index 00000000000..ab29cf2def4 --- /dev/null +++ b/arch/x86/kvm/i8259.c @@ -0,0 +1,450 @@ +/* + * 8259 interrupt controller emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * Port from Qemu. + */ +#include <linux/mm.h> +#include "irq.h" + +#include <linux/kvm_host.h> + +/* + * set irq level. If an edge is detected, then the IRR is set to 1 + */ +static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) +{ + int mask; + mask = 1 << irq; + if (s->elcr & mask) /* level triggered */ + if (level) { + s->irr |= mask; + s->last_irr |= mask; + } else { + s->irr &= ~mask; + s->last_irr &= ~mask; + } + else /* edge triggered */ + if (level) { + if ((s->last_irr & mask) == 0) + s->irr |= mask; + s->last_irr |= mask; + } else + s->last_irr &= ~mask; +} + +/* + * return the highest priority found in mask (highest = smallest + * number). Return 8 if no irq + */ +static inline int get_priority(struct kvm_kpic_state *s, int mask) +{ + int priority; + if (mask == 0) + return 8; + priority = 0; + while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0) + priority++; + return priority; +} + +/* + * return the pic wanted interrupt. return -1 if none + */ +static int pic_get_irq(struct kvm_kpic_state *s) +{ + int mask, cur_priority, priority; + + mask = s->irr & ~s->imr; + priority = get_priority(s, mask); + if (priority == 8) + return -1; + /* + * compute current priority. If special fully nested mode on the + * master, the IRQ coming from the slave is not taken into account + * for the priority computation. + */ + mask = s->isr; + if (s->special_fully_nested_mode && s == &s->pics_state->pics[0]) + mask &= ~(1 << 2); + cur_priority = get_priority(s, mask); + if (priority < cur_priority) + /* + * higher priority found: an irq should be generated + */ + return (priority + s->priority_add) & 7; + else + return -1; +} + +/* + * raise irq to CPU if necessary. must be called every time the active + * irq may change + */ +static void pic_update_irq(struct kvm_pic *s) +{ + int irq2, irq; + + irq2 = pic_get_irq(&s->pics[1]); + if (irq2 >= 0) { + /* + * if irq request by slave pic, signal master PIC + */ + pic_set_irq1(&s->pics[0], 2, 1); + pic_set_irq1(&s->pics[0], 2, 0); + } + irq = pic_get_irq(&s->pics[0]); + if (irq >= 0) + s->irq_request(s->irq_request_opaque, 1); + else + s->irq_request(s->irq_request_opaque, 0); +} + +void kvm_pic_update_irq(struct kvm_pic *s) +{ + pic_update_irq(s); +} + +void kvm_pic_set_irq(void *opaque, int irq, int level) +{ + struct kvm_pic *s = opaque; + + pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); + pic_update_irq(s); +} + +/* + * acknowledge interrupt 'irq' + */ +static inline void pic_intack(struct kvm_kpic_state *s, int irq) +{ + if (s->auto_eoi) { + if (s->rotate_on_auto_eoi) + s->priority_add = (irq + 1) & 7; + } else + s->isr |= (1 << irq); + /* + * We don't clear a level sensitive interrupt here + */ + if (!(s->elcr & (1 << irq))) + s->irr &= ~(1 << irq); +} + +int kvm_pic_read_irq(struct kvm_pic *s) +{ + int irq, irq2, intno; + + irq = pic_get_irq(&s->pics[0]); + if (irq >= 0) { + pic_intack(&s->pics[0], irq); + if (irq == 2) { + irq2 = pic_get_irq(&s->pics[1]); + if (irq2 >= 0) + pic_intack(&s->pics[1], irq2); + else + /* + * spurious IRQ on slave controller + */ + irq2 = 7; + intno = s->pics[1].irq_base + irq2; + irq = irq2 + 8; + } else + intno = s->pics[0].irq_base + irq; + } else { + /* + * spurious IRQ on host controller + */ + irq = 7; + intno = s->pics[0].irq_base + irq; + } + pic_update_irq(s); + + return intno; +} + +void kvm_pic_reset(struct kvm_kpic_state *s) +{ + s->last_irr = 0; + s->irr = 0; + s->imr = 0; + s->isr = 0; + s->priority_add = 0; + s->irq_base = 0; + s->read_reg_select = 0; + s->poll = 0; + s->special_mask = 0; + s->init_state = 0; + s->auto_eoi = 0; + s->rotate_on_auto_eoi = 0; + s->special_fully_nested_mode = 0; + s->init4 = 0; +} + +static void pic_ioport_write(void *opaque, u32 addr, u32 val) +{ + struct kvm_kpic_state *s = opaque; + int priority, cmd, irq; + + addr &= 1; + if (addr == 0) { + if (val & 0x10) { + kvm_pic_reset(s); /* init */ + /* + * deassert a pending interrupt + */ + s->pics_state->irq_request(s->pics_state-> + irq_request_opaque, 0); + s->init_state = 1; + s->init4 = val & 1; + if (val & 0x02) + printk(KERN_ERR "single mode not supported"); + if (val & 0x08) + printk(KERN_ERR + "level sensitive irq not supported"); + } else if (val & 0x08) { + if (val & 0x04) + s->poll = 1; + if (val & 0x02) + s->read_reg_select = val & 1; + if (val & 0x40) + s->special_mask = (val >> 5) & 1; + } else { + cmd = val >> 5; + switch (cmd) { + case 0: + case 4: + s->rotate_on_auto_eoi = cmd >> 2; + break; + case 1: /* end of interrupt */ + case 5: + priority = get_priority(s, s->isr); + if (priority != 8) { + irq = (priority + s->priority_add) & 7; + s->isr &= ~(1 << irq); + if (cmd == 5) + s->priority_add = (irq + 1) & 7; + pic_update_irq(s->pics_state); + } + break; + case 3: + irq = val & 7; + s->isr &= ~(1 << irq); + pic_update_irq(s->pics_state); + break; + case 6: + s->priority_add = (val + 1) & 7; + pic_update_irq(s->pics_state); + break; + case 7: + irq = val & 7; + s->isr &= ~(1 << irq); + s->priority_add = (irq + 1) & 7; + pic_update_irq(s->pics_state); + break; + default: + break; /* no operation */ + } + } + } else + switch (s->init_state) { + case 0: /* normal mode */ + s->imr = val; + pic_update_irq(s->pics_state); + break; + case 1: + s->irq_base = val & 0xf8; + s->init_state = 2; + break; + case 2: + if (s->init4) + s->init_state = 3; + else + s->init_state = 0; + break; + case 3: + s->special_fully_nested_mode = (val >> 4) & 1; + s->auto_eoi = (val >> 1) & 1; + s->init_state = 0; + break; + } +} + +static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) +{ + int ret; + + ret = pic_get_irq(s); + if (ret >= 0) { + if (addr1 >> 7) { + s->pics_state->pics[0].isr &= ~(1 << 2); + s->pics_state->pics[0].irr &= ~(1 << 2); + } + s->irr &= ~(1 << ret); + s->isr &= ~(1 << ret); + if (addr1 >> 7 || ret != 2) + pic_update_irq(s->pics_state); + } else { + ret = 0x07; + pic_update_irq(s->pics_state); + } + + return ret; +} + +static u32 pic_ioport_read(void *opaque, u32 addr1) +{ + struct kvm_kpic_state *s = opaque; + unsigned int addr; + int ret; + + addr = addr1; + addr &= 1; + if (s->poll) { + ret = pic_poll_read(s, addr1); + s->poll = 0; + } else + if (addr == 0) + if (s->read_reg_select) + ret = s->isr; + else + ret = s->irr; + else + ret = s->imr; + return ret; +} + +static void elcr_ioport_write(void *opaque, u32 addr, u32 val) +{ + struct kvm_kpic_state *s = opaque; + s->elcr = val & s->elcr_mask; +} + +static u32 elcr_ioport_read(void *opaque, u32 addr1) +{ + struct kvm_kpic_state *s = opaque; + return s->elcr; +} + +static int picdev_in_range(struct kvm_io_device *this, gpa_t addr) +{ + switch (addr) { + case 0x20: + case 0x21: + case 0xa0: + case 0xa1: + case 0x4d0: + case 0x4d1: + return 1; + default: + return 0; + } +} + +static void picdev_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + struct kvm_pic *s = this->private; + unsigned char data = *(unsigned char *)val; + + if (len != 1) { + if (printk_ratelimit()) + printk(KERN_ERR "PIC: non byte write\n"); + return; + } + switch (addr) { + case 0x20: + case 0x21: + case 0xa0: + case 0xa1: + pic_ioport_write(&s->pics[addr >> 7], addr, data); + break; + case 0x4d0: + case 0x4d1: + elcr_ioport_write(&s->pics[addr & 1], addr, data); + break; + } +} + +static void picdev_read(struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + struct kvm_pic *s = this->private; + unsigned char data = 0; + + if (len != 1) { + if (printk_ratelimit()) + printk(KERN_ERR "PIC: non byte read\n"); + return; + } + switch (addr) { + case 0x20: + case 0x21: + case 0xa0: + case 0xa1: + data = pic_ioport_read(&s->pics[addr >> 7], addr); + break; + case 0x4d0: + case 0x4d1: + data = elcr_ioport_read(&s->pics[addr & 1], addr); + break; + } + *(unsigned char *)val = data; +} + +/* + * callback when PIC0 irq status changed + */ +static void pic_irq_request(void *opaque, int level) +{ + struct kvm *kvm = opaque; + struct kvm_vcpu *vcpu = kvm->vcpus[0]; + + pic_irqchip(kvm)->output = level; + if (vcpu) + kvm_vcpu_kick(vcpu); +} + +struct kvm_pic *kvm_create_pic(struct kvm *kvm) +{ + struct kvm_pic *s; + s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); + if (!s) + return NULL; + s->pics[0].elcr_mask = 0xf8; + s->pics[1].elcr_mask = 0xde; + s->irq_request = pic_irq_request; + s->irq_request_opaque = kvm; + s->pics[0].pics_state = s; + s->pics[1].pics_state = s; + + /* + * Initialize PIO device + */ + s->dev.read = picdev_read; + s->dev.write = picdev_write; + s->dev.in_range = picdev_in_range; + s->dev.private = s; + kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev); + return s; +} diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c new file mode 100644 index 00000000000..72f12f75495 --- /dev/null +++ b/arch/x86/kvm/ioapic.c @@ -0,0 +1,400 @@ +/* + * Copyright (C) 2001 MandrakeSoft S.A. + * + * MandrakeSoft S.A. + * 43, rue d'Aboukir + * 75002 Paris - France + * http://www.linux-mandrake.com/ + * http://www.mandrakesoft.com/ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Yunhong Jiang <yunhong.jiang@intel.com> + * Yaozu (Eddie) Dong <eddie.dong@intel.com> + * Based on Xen 3.1 code. + */ + +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/smp.h> +#include <linux/hrtimer.h> +#include <linux/io.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/current.h> +#include "irq.h" +#if 0 +#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) +#else +#define ioapic_debug(fmt, arg...) +#endif +static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq); + +static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, + unsigned long addr, + unsigned long length) +{ + unsigned long result = 0; + + switch (ioapic->ioregsel) { + case IOAPIC_REG_VERSION: + result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16) + | (IOAPIC_VERSION_ID & 0xff)); + break; + + case IOAPIC_REG_APIC_ID: + case IOAPIC_REG_ARB_ID: + result = ((ioapic->id & 0xf) << 24); + break; + + default: + { + u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; + u64 redir_content; + + ASSERT(redir_index < IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[redir_index].bits; + result = (ioapic->ioregsel & 0x1) ? + (redir_content >> 32) & 0xffffffff : + redir_content & 0xffffffff; + break; + } + } + + return result; +} + +static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) +{ + union ioapic_redir_entry *pent; + + pent = &ioapic->redirtbl[idx]; + + if (!pent->fields.mask) { + ioapic_deliver(ioapic, idx); + if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) + pent->fields.remote_irr = 1; + } + if (!pent->fields.trig_mode) + ioapic->irr &= ~(1 << idx); +} + +static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) +{ + unsigned index; + + switch (ioapic->ioregsel) { + case IOAPIC_REG_VERSION: + /* Writes are ignored. */ + break; + + case IOAPIC_REG_APIC_ID: + ioapic->id = (val >> 24) & 0xf; + break; + + case IOAPIC_REG_ARB_ID: + break; + + default: + index = (ioapic->ioregsel - 0x10) >> 1; + + ioapic_debug("change redir index %x val %x\n", index, val); + if (index >= IOAPIC_NUM_PINS) + return; + if (ioapic->ioregsel & 1) { + ioapic->redirtbl[index].bits &= 0xffffffff; + ioapic->redirtbl[index].bits |= (u64) val << 32; + } else { + ioapic->redirtbl[index].bits &= ~0xffffffffULL; + ioapic->redirtbl[index].bits |= (u32) val; + ioapic->redirtbl[index].fields.remote_irr = 0; + } + if (ioapic->irr & (1 << index)) + ioapic_service(ioapic, index); + break; + } +} + +static void ioapic_inj_irq(struct kvm_ioapic *ioapic, + struct kvm_vcpu *vcpu, + u8 vector, u8 trig_mode, u8 delivery_mode) +{ + ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode, + delivery_mode); + + ASSERT((delivery_mode == IOAPIC_FIXED) || + (delivery_mode == IOAPIC_LOWEST_PRIORITY)); + + kvm_apic_set_irq(vcpu, vector, trig_mode); +} + +static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, + u8 dest_mode) +{ + u32 mask = 0; + int i; + struct kvm *kvm = ioapic->kvm; + struct kvm_vcpu *vcpu; + + ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode); + + if (dest_mode == 0) { /* Physical mode. */ + if (dest == 0xFF) { /* Broadcast. */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) + if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic) + mask |= 1 << i; + return mask; + } + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) { + if (vcpu->arch.apic) + mask = 1 << i; + break; + } + } + } else if (dest != 0) /* Logical mode, MDA non-zero. */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (vcpu->arch.apic && + kvm_apic_match_logical_addr(vcpu->arch.apic, dest)) + mask |= 1 << vcpu->vcpu_id; + } + ioapic_debug("mask %x\n", mask); + return mask; +} + +static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq) +{ + u8 dest = ioapic->redirtbl[irq].fields.dest_id; + u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; + u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode; + u8 vector = ioapic->redirtbl[irq].fields.vector; + u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; + u32 deliver_bitmask; + struct kvm_vcpu *vcpu; + int vcpu_id; + + ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " + "vector=%x trig_mode=%x\n", + dest, dest_mode, delivery_mode, vector, trig_mode); + + deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode); + if (!deliver_bitmask) { + ioapic_debug("no target on destination\n"); + return; + } + + switch (delivery_mode) { + case IOAPIC_LOWEST_PRIORITY: + vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, + deliver_bitmask); + if (vcpu != NULL) + ioapic_inj_irq(ioapic, vcpu, vector, + trig_mode, delivery_mode); + else + ioapic_debug("null lowest prio vcpu: " + "mask=%x vector=%x delivery_mode=%x\n", + deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY); + break; + case IOAPIC_FIXED: + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { + if (!(deliver_bitmask & (1 << vcpu_id))) + continue; + deliver_bitmask &= ~(1 << vcpu_id); + vcpu = ioapic->kvm->vcpus[vcpu_id]; + if (vcpu) { + ioapic_inj_irq(ioapic, vcpu, vector, + trig_mode, delivery_mode); + } + } + break; + + /* TODO: NMI */ + default: + printk(KERN_WARNING "Unsupported delivery mode %d\n", + delivery_mode); + break; + } +} + +void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) +{ + u32 old_irr = ioapic->irr; + u32 mask = 1 << irq; + union ioapic_redir_entry entry; + + if (irq >= 0 && irq < IOAPIC_NUM_PINS) { + entry = ioapic->redirtbl[irq]; + level ^= entry.fields.polarity; + if (!level) + ioapic->irr &= ~mask; + else { + ioapic->irr |= mask; + if ((!entry.fields.trig_mode && old_irr != ioapic->irr) + || !entry.fields.remote_irr) + ioapic_service(ioapic, irq); + } + } +} + +static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector) +{ + int i; + + for (i = 0; i < IOAPIC_NUM_PINS; i++) + if (ioapic->redirtbl[i].fields.vector == vector) + return i; + return -1; +} + +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + union ioapic_redir_entry *ent; + int gsi; + + gsi = get_eoi_gsi(ioapic, vector); + if (gsi == -1) { + printk(KERN_WARNING "Can't find redir item for %d EOI\n", + vector); + return; + } + + ent = &ioapic->redirtbl[gsi]; + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); + + ent->fields.remote_irr = 0; + if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) + ioapic_deliver(ioapic, gsi); +} + +static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) +{ + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; + + return ((addr >= ioapic->base_address && + (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); +} + +static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, + void *val) +{ + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; + u32 result; + + ioapic_debug("addr %lx\n", (unsigned long)addr); + ASSERT(!(addr & 0xf)); /* check alignment */ + + addr &= 0xff; + switch (addr) { + case IOAPIC_REG_SELECT: + result = ioapic->ioregsel; + break; + + case IOAPIC_REG_WINDOW: + result = ioapic_read_indirect(ioapic, addr, len); + break; + + default: + result = 0; + break; + } + switch (len) { + case 8: + *(u64 *) val = result; + break; + case 1: + case 2: + case 4: + memcpy(val, (char *)&result, len); + break; + default: + printk(KERN_WARNING "ioapic: wrong length %d\n", len); + } +} + +static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) +{ + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; + u32 data; + + ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", + (void*)addr, len, val); + ASSERT(!(addr & 0xf)); /* check alignment */ + if (len == 4 || len == 8) + data = *(u32 *) val; + else { + printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); + return; + } + + addr &= 0xff; + switch (addr) { + case IOAPIC_REG_SELECT: + ioapic->ioregsel = data; + break; + + case IOAPIC_REG_WINDOW: + ioapic_write_indirect(ioapic, data); + break; +#ifdef CONFIG_IA64 + case IOAPIC_REG_EOI: + kvm_ioapic_update_eoi(ioapic, data); + break; +#endif + + default: + break; + } +} + +void kvm_ioapic_reset(struct kvm_ioapic *ioapic) +{ + int i; + + for (i = 0; i < IOAPIC_NUM_PINS; i++) + ioapic->redirtbl[i].fields.mask = 1; + ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; + ioapic->ioregsel = 0; + ioapic->irr = 0; + ioapic->id = 0; +} + +int kvm_ioapic_init(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic; + + ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); + if (!ioapic) + return -ENOMEM; + kvm->arch.vioapic = ioapic; + kvm_ioapic_reset(ioapic); + ioapic->dev.read = ioapic_mmio_read; + ioapic->dev.write = ioapic_mmio_write; + ioapic->dev.in_range = ioapic_in_range; + ioapic->dev.private = ioapic; + ioapic->kvm = kvm; + kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); + return 0; +} diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c new file mode 100644 index 00000000000..07a09aad4fd --- /dev/null +++ b/arch/x86/kvm/irq.c @@ -0,0 +1,98 @@ +/* + * irq.c: API for in kernel interrupt controller + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * + */ + +#include <linux/module.h> +#include <linux/kvm_host.h> + +#include "irq.h" + +/* + * check if there is pending interrupt without + * intack. + */ +int kvm_cpu_has_interrupt(struct kvm_vcpu *v) +{ + struct kvm_pic *s; + + if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ + if (kvm_apic_accept_pic_intr(v)) { + s = pic_irqchip(v->kvm); /* PIC */ + return s->output; + } else + return 0; + } + return 1; +} +EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); + +/* + * Read pending interrupt vector and intack. + */ +int kvm_cpu_get_interrupt(struct kvm_vcpu *v) +{ + struct kvm_pic *s; + int vector; + + vector = kvm_get_apic_interrupt(v); /* APIC */ + if (vector == -1) { + if (kvm_apic_accept_pic_intr(v)) { + s = pic_irqchip(v->kvm); + s->output = 0; /* PIC */ + vector = kvm_pic_read_irq(s); + } + } + return vector; +} +EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); + +static void vcpu_kick_intr(void *info) +{ +#ifdef DEBUG + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; + printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu); +#endif +} + +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int ipi_pcpu = vcpu->cpu; + + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + ++vcpu->stat.halt_wakeup; + } + if (vcpu->gue |