aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-13 10:02:11 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-13 10:02:11 -0700
commit3749c66c67fb5c257771815c186bc32290cacf44 (patch)
treede6634f722a9b79c60fabbd605660e46741f7160
parent835c34a1687f524c37d4fb8bad18d642c74bed8d (diff)
parent8a45450d0a559912873428077908f9bc1411042c (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (106 commits) KVM: Replace enum by #define KVM: Skip pio instruction when it is emulated, not executed KVM: x86 emulator: popf KVM: x86 emulator: fix src, dst value initialization KVM: x86 emulator: jmp abs KVM: x86 emulator: lea KVM: X86 emulator: jump conditional short KVM: x86 emulator: imlpement jump conditional relative KVM: x86 emulator: sort opcodes into ascending order KVM: Improve emulation failure reporting KVM: x86 emulator: pushf KVM: x86 emulator: call near KVM: x86 emulator: push imm8 KVM: VMX: Fix exit qualification width on i386 KVM: Move main vcpu loop into subarch independent code KVM: VMX: Move vm entry failure handling to the exit handler KVM: MMU: Don't do GFP_NOWAIT allocations KVM: Rename kvm_arch_ops to kvm_x86_ops KVM: Simplify memory allocation KVM: Hoist SVM's get_cs_db_l_bits into core code. ...
-rw-r--r--drivers/kvm/Kconfig1
-rw-r--r--drivers/kvm/Makefile2
-rw-r--r--drivers/kvm/i8259.c450
-rw-r--r--drivers/kvm/ioapic.c388
-rw-r--r--drivers/kvm/irq.c98
-rw-r--r--drivers/kvm/irq.h165
-rw-r--r--drivers/kvm/kvm.h201
-rw-r--r--drivers/kvm/kvm_main.c1486
-rw-r--r--drivers/kvm/kvm_svm.h3
-rw-r--r--drivers/kvm/lapic.c1064
-rw-r--r--drivers/kvm/mmu.c51
-rw-r--r--drivers/kvm/paging_tmpl.h84
-rw-r--r--drivers/kvm/svm.c1046
-rw-r--r--drivers/kvm/vmx.c1034
-rw-r--r--drivers/kvm/vmx.h73
-rw-r--r--drivers/kvm/x86_emulate.c411
-rw-r--r--drivers/kvm/x86_emulate.h20
-rw-r--r--include/asm-x86/io_apic_32.h16
-rw-r--r--include/asm-x86/processor-flags.h2
-rw-r--r--include/linux/kvm.h128
20 files changed, 4848 insertions, 1875 deletions
diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
index 0a419a0de60..8749fa4ffce 100644
--- a/drivers/kvm/Kconfig
+++ b/drivers/kvm/Kconfig
@@ -17,6 +17,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on X86 && EXPERIMENTAL
+ select PREEMPT_NOTIFIERS
select ANON_INODES
---help---
Support hosting fully virtualized guest machines using hardware
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789fa9d6..e5a8f4d3e97 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c
new file mode 100644
index 00000000000..a679157bc59
--- /dev/null
+++ b/drivers/kvm/i8259.c
@@ -0,0 +1,450 @@
+/*
+ * 8259 interrupt controller emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ * Authors:
+ * Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ * Port from Qemu.
+ */
+#include <linux/mm.h>
+#include "irq.h"
+
+/*
+ * set irq level. If an edge is detected, then the IRR is set to 1
+ */
+static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
+{
+ int mask;
+ mask = 1 << irq;
+ if (s->elcr & mask) /* level triggered */
+ if (level) {
+ s->irr |= mask;
+ s->last_irr |= mask;
+ } else {
+ s->irr &= ~mask;
+ s->last_irr &= ~mask;
+ }
+ else /* edge triggered */
+ if (level) {
+ if ((s->last_irr & mask) == 0)
+ s->irr |= mask;
+ s->last_irr |= mask;
+ } else
+ s->last_irr &= ~mask;
+}
+
+/*
+ * return the highest priority found in mask (highest = smallest
+ * number). Return 8 if no irq
+ */
+static inline int get_priority(struct kvm_kpic_state *s, int mask)
+{
+ int priority;
+ if (mask == 0)
+ return 8;
+ priority = 0;
+ while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0)
+ priority++;
+ return priority;
+}
+
+/*
+ * return the pic wanted interrupt. return -1 if none
+ */
+static int pic_get_irq(struct kvm_kpic_state *s)
+{
+ int mask, cur_priority, priority;
+
+ mask = s->irr & ~s->imr;
+ priority = get_priority(s, mask);
+ if (priority == 8)
+ return -1;
+ /*
+ * compute current priority. If special fully nested mode on the
+ * master, the IRQ coming from the slave is not taken into account
+ * for the priority computation.
+ */
+ mask = s->isr;
+ if (s->special_fully_nested_mode && s == &s->pics_state->pics[0])
+ mask &= ~(1 << 2);
+ cur_priority = get_priority(s, mask);
+ if (priority < cur_priority)
+ /*
+ * higher priority found: an irq should be generated
+ */
+ return (priority + s->priority_add) & 7;
+ else
+ return -1;
+}
+
+/*
+ * raise irq to CPU if necessary. must be called every time the active
+ * irq may change
+ */
+static void pic_update_irq(struct kvm_pic *s)
+{
+ int irq2, irq;
+
+ irq2 = pic_get_irq(&s->pics[1]);
+ if (irq2 >= 0) {
+ /*
+ * if irq request by slave pic, signal master PIC
+ */
+ pic_set_irq1(&s->pics[0], 2, 1);
+ pic_set_irq1(&s->pics[0], 2, 0);
+ }
+ irq = pic_get_irq(&s->pics[0]);
+ if (irq >= 0)
+ s->irq_request(s->irq_request_opaque, 1);
+ else
+ s->irq_request(s->irq_request_opaque, 0);
+}
+
+void kvm_pic_update_irq(struct kvm_pic *s)
+{
+ pic_update_irq(s);
+}
+
+void kvm_pic_set_irq(void *opaque, int irq, int level)
+{
+ struct kvm_pic *s = opaque;
+
+ pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+ pic_update_irq(s);
+}
+
+/*
+ * acknowledge interrupt 'irq'
+ */
+static inline void pic_intack(struct kvm_kpic_state *s, int irq)
+{
+ if (s->auto_eoi) {
+ if (s->rotate_on_auto_eoi)
+ s->priority_add = (irq + 1) & 7;
+ } else
+ s->isr |= (1 << irq);
+ /*
+ * We don't clear a level sensitive interrupt here
+ */
+ if (!(s->elcr & (1 << irq)))
+ s->irr &= ~(1 << irq);
+}
+
+int kvm_pic_read_irq(struct kvm_pic *s)
+{
+ int irq, irq2, intno;
+
+ irq = pic_get_irq(&s->pics[0]);
+ if (irq >= 0) {
+ pic_intack(&s->pics[0], irq);
+ if (irq == 2) {
+ irq2 = pic_get_irq(&s->pics[1]);
+ if (irq2 >= 0)
+ pic_intack(&s->pics[1], irq2);
+ else
+ /*
+ * spurious IRQ on slave controller
+ */
+ irq2 = 7;
+ intno = s->pics[1].irq_base + irq2;
+ irq = irq2 + 8;
+ } else
+ intno = s->pics[0].irq_base + irq;
+ } else {
+ /*
+ * spurious IRQ on host controller
+ */
+ irq = 7;
+ intno = s->pics[0].irq_base + irq;
+ }
+ pic_update_irq(s);
+
+ return intno;
+}
+
+static void pic_reset(void *opaque)
+{
+ struct kvm_kpic_state *s = opaque;
+
+ s->last_irr = 0;
+ s->irr = 0;
+ s->imr = 0;
+ s->isr = 0;
+ s->priority_add = 0;
+ s->irq_base = 0;
+ s->read_reg_select = 0;
+ s->poll = 0;
+ s->special_mask = 0;
+ s->init_state = 0;
+ s->auto_eoi = 0;
+ s->rotate_on_auto_eoi = 0;
+ s->special_fully_nested_mode = 0;
+ s->init4 = 0;
+}
+
+static void pic_ioport_write(void *opaque, u32 addr, u32 val)
+{
+ struct kvm_kpic_state *s = opaque;
+ int priority, cmd, irq;
+
+ addr &= 1;
+ if (addr == 0) {
+ if (val & 0x10) {
+ pic_reset(s); /* init */
+ /*
+ * deassert a pending interrupt
+ */
+ s->pics_state->irq_request(s->pics_state->
+ irq_request_opaque, 0);
+ s->init_state = 1;
+ s->init4 = val & 1;
+ if (val & 0x02)
+ printk(KERN_ERR "single mode not supported");
+ if (val & 0x08)
+ printk(KERN_ERR
+ "level sensitive irq not supported");
+ } else if (val & 0x08) {
+ if (val & 0x04)
+ s->poll = 1;
+ if (val & 0x02)
+ s->read_reg_select = val & 1;
+ if (val & 0x40)
+ s->special_mask = (val >> 5) & 1;
+ } else {
+ cmd = val >> 5;
+ switch (cmd) {
+ case 0:
+ case 4:
+ s->rotate_on_auto_eoi = cmd >> 2;
+ break;
+ case 1: /* end of interrupt */
+ case 5:
+ priority = get_priority(s, s->isr);
+ if (priority != 8) {
+ irq = (priority + s->priority_add) & 7;
+ s->isr &= ~(1 << irq);
+ if (cmd == 5)
+ s->priority_add = (irq + 1) & 7;
+ pic_update_irq(s->pics_state);
+ }
+ break;
+ case 3:
+ irq = val & 7;
+ s->isr &= ~(1 << irq);
+ pic_update_irq(s->pics_state);
+ break;
+ case 6:
+ s->priority_add = (val + 1) & 7;
+ pic_update_irq(s->pics_state);
+ break;
+ case 7:
+ irq = val & 7;
+ s->isr &= ~(1 << irq);
+ s->priority_add = (irq + 1) & 7;
+ pic_update_irq(s->pics_state);
+ break;
+ default:
+ break; /* no operation */
+ }
+ }
+ } else
+ switch (s->init_state) {
+ case 0: /* normal mode */
+ s->imr = val;
+ pic_update_irq(s->pics_state);
+ break;
+ case 1:
+ s->irq_base = val & 0xf8;
+ s->init_state = 2;
+ break;
+ case 2:
+ if (s->init4)
+ s->init_state = 3;
+ else
+ s->init_state = 0;
+ break;
+ case 3:
+ s->special_fully_nested_mode = (val >> 4) & 1;
+ s->auto_eoi = (val >> 1) & 1;
+ s->init_state = 0;
+ break;
+ }
+}
+
+static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
+{
+ int ret;
+
+ ret = pic_get_irq(s);
+ if (ret >= 0) {
+ if (addr1 >> 7) {
+ s->pics_state->pics[0].isr &= ~(1 << 2);
+ s->pics_state->pics[0].irr &= ~(1 << 2);
+ }
+ s->irr &= ~(1 << ret);
+ s->isr &= ~(1 << ret);
+ if (addr1 >> 7 || ret != 2)
+ pic_update_irq(s->pics_state);
+ } else {
+ ret = 0x07;
+ pic_update_irq(s->pics_state);
+ }
+
+ return ret;
+}
+
+static u32 pic_ioport_read(void *opaque, u32 addr1)
+{
+ struct kvm_kpic_state *s = opaque;
+ unsigned int addr;
+ int ret;
+
+ addr = addr1;
+ addr &= 1;
+ if (s->poll) {
+ ret = pic_poll_read(s, addr1);
+ s->poll = 0;
+ } else
+ if (addr == 0)
+ if (s->read_reg_select)
+ ret = s->isr;
+ else
+ ret = s->irr;
+ else
+ ret = s->imr;
+ return ret;
+}
+
+static void elcr_ioport_write(void *opaque, u32 addr, u32 val)
+{
+ struct kvm_kpic_state *s = opaque;
+ s->elcr = val & s->elcr_mask;
+}
+
+static u32 elcr_ioport_read(void *opaque, u32 addr1)
+{
+ struct kvm_kpic_state *s = opaque;
+ return s->elcr;
+}
+
+static int picdev_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ switch (addr) {
+ case 0x20:
+ case 0x21:
+ case 0xa0:
+ case 0xa1:
+ case 0x4d0:
+ case 0x4d1:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static void picdev_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *val)
+{
+ struct kvm_pic *s = this->private;
+ unsigned char data = *(unsigned char *)val;
+
+ if (len != 1) {
+ if (printk_ratelimit())
+ printk(KERN_ERR "PIC: non byte write\n");
+ return;
+ }
+ switch (addr) {
+ case 0x20:
+ case 0x21:
+ case 0xa0:
+ case 0xa1:
+ pic_ioport_write(&s->pics[addr >> 7], addr, data);
+ break;
+ case 0x4d0:
+ case 0x4d1:
+ elcr_ioport_write(&s->pics[addr & 1], addr, data);
+ break;
+ }
+}
+
+static void picdev_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *val)
+{
+ struct kvm_pic *s = this->private;
+ unsigned char data = 0;
+
+ if (len != 1) {
+ if (printk_ratelimit())
+ printk(KERN_ERR "PIC: non byte read\n");
+ return;
+ }
+ switch (addr) {
+ case 0x20:
+ case 0x21:
+ case 0xa0:
+ case 0xa1:
+ data = pic_ioport_read(&s->pics[addr >> 7], addr);
+ break;
+ case 0x4d0:
+ case 0x4d1:
+ data = elcr_ioport_read(&s->pics[addr & 1], addr);
+ break;
+ }
+ *(unsigned char *)val = data;
+}
+
+/*
+ * callback when PIC0 irq status changed
+ */
+static void pic_irq_request(void *opaque, int level)
+{
+ struct kvm *kvm = opaque;
+ struct kvm_vcpu *vcpu = kvm->vcpus[0];
+
+ pic_irqchip(kvm)->output = level;
+ if (vcpu)
+ kvm_vcpu_kick(vcpu);
+}
+
+struct kvm_pic *kvm_create_pic(struct kvm *kvm)
+{
+ struct kvm_pic *s;
+ s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
+ if (!s)
+ return NULL;
+ s->pics[0].elcr_mask = 0xf8;
+ s->pics[1].elcr_mask = 0xde;
+ s->irq_request = pic_irq_request;
+ s->irq_request_opaque = kvm;
+ s->pics[0].pics_state = s;
+ s->pics[1].pics_state = s;
+
+ /*
+ * Initialize PIO device
+ */
+ s->dev.read = picdev_read;
+ s->dev.write = picdev_write;
+ s->dev.in_range = picdev_in_range;
+ s->dev.private = s;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
+ return s;
+}
diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
new file mode 100644
index 00000000000..c7992e667fd
--- /dev/null
+++ b/drivers/kvm/ioapic.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright (C) 2001 MandrakeSoft S.A.
+ *
+ * MandrakeSoft S.A.
+ * 43, rue d'Aboukir
+ * 75002 Paris - France
+ * http://www.linux-mandrake.com/
+ * http://www.mandrakesoft.com/
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Yunhong Jiang <yunhong.jiang@intel.com>
+ * Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ * Based on Xen 3.1 code.
+ */
+
+#include "kvm.h"
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/apicdef.h>
+#include <asm/io_apic.h>
+#include "irq.h"
+/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+#define ioapic_debug(fmt, arg...)
+static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
+
+static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
+ unsigned long addr,
+ unsigned long length)
+{
+ unsigned long result = 0;
+
+ switch (ioapic->ioregsel) {
+ case IOAPIC_REG_VERSION:
+ result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
+ | (IOAPIC_VERSION_ID & 0xff));
+ break;
+
+ case IOAPIC_REG_APIC_ID:
+ case IOAPIC_REG_ARB_ID:
+ result = ((ioapic->id & 0xf) << 24);
+ break;
+
+ default:
+ {
+ u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
+ u64 redir_content;
+
+ ASSERT(redir_index < IOAPIC_NUM_PINS);
+
+ redir_content = ioapic->redirtbl[redir_index].bits;
+ result = (ioapic->ioregsel & 0x1) ?
+ (redir_content >> 32) & 0xffffffff :
+ redir_content & 0xffffffff;
+ break;
+ }
+ }
+
+ return result;
+}
+
+static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
+{
+ union ioapic_redir_entry *pent;
+
+ pent = &ioapic->redirtbl[idx];
+
+ if (!pent->fields.mask) {
+ ioapic_deliver(ioapic, idx);
+ if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+ pent->fields.remote_irr = 1;
+ }
+ if (!pent->fields.trig_mode)
+ ioapic->irr &= ~(1 << idx);
+}
+
+static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+{
+ unsigned index;
+
+ switch (ioapic->ioregsel) {
+ case IOAPIC_REG_VERSION:
+ /* Writes are ignored. */
+ break;
+
+ case IOAPIC_REG_APIC_ID:
+ ioapic->id = (val >> 24) & 0xf;
+ break;
+
+ case IOAPIC_REG_ARB_ID:
+ break;
+
+ default:
+ index = (ioapic->ioregsel - 0x10) >> 1;
+
+ ioapic_debug("change redir index %x val %x", index, val);
+ if (index >= IOAPIC_NUM_PINS)
+ return;
+ if (ioapic->ioregsel & 1) {
+ ioapic->redirtbl[index].bits &= 0xffffffff;
+ ioapic->redirtbl[index].bits |= (u64) val << 32;
+ } else {
+ ioapic->redirtbl[index].bits &= ~0xffffffffULL;
+ ioapic->redirtbl[index].bits |= (u32) val;
+ ioapic->redirtbl[index].fields.remote_irr = 0;
+ }
+ if (ioapic->irr & (1 << index))
+ ioapic_service(ioapic, index);
+ break;
+ }
+}
+
+static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
+ struct kvm_lapic *target,
+ u8 vector, u8 trig_mode, u8 delivery_mode)
+{
+ ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
+ delivery_mode);
+
+ ASSERT((delivery_mode == dest_Fixed) ||
+ (delivery_mode == dest_LowestPrio));
+
+ kvm_apic_set_irq(target, vector, trig_mode);
+}
+
+static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+ u8 dest_mode)
+{
+ u32 mask = 0;
+ int i;
+ struct kvm *kvm = ioapic->kvm;
+ struct kvm_vcpu *vcpu;
+
+ ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
+
+ if (dest_mode == 0) { /* Physical mode. */
+ if (dest == 0xFF) { /* Broadcast. */
+ for (i = 0; i < KVM_MAX_VCPUS; ++i)
+ if (kvm->vcpus[i] && kvm->vcpus[i]->apic)
+ mask |= 1 << i;
+ return mask;
+ }
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ if (kvm_apic_match_physical_addr(vcpu->apic, dest)) {
+ if (vcpu->apic)
+ mask = 1 << i;
+ break;
+ }
+ }
+ } else if (dest != 0) /* Logical mode, MDA non-zero. */
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ if (vcpu->apic &&
+ kvm_apic_match_logical_addr(vcpu->apic, dest))
+ mask |= 1 << vcpu->vcpu_id;
+ }
+ ioapic_debug("mask %x", mask);
+ return mask;
+}
+
+static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+{
+ u8 dest = ioapic->redirtbl[irq].fields.dest_id;
+ u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
+ u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
+ u8 vector = ioapic->redirtbl[irq].fields.vector;
+ u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
+ u32 deliver_bitmask;
+ struct kvm_lapic *target;
+ struct kvm_vcpu *vcpu;
+ int vcpu_id;
+
+ ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+ "vector=%x trig_mode=%x",
+ dest, dest_mode, delivery_mode, vector, trig_mode);
+
+ deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
+ if (!deliver_bitmask) {
+ ioapic_debug("no target on destination");
+ return;
+ }
+
+ switch (delivery_mode) {
+ case dest_LowestPrio:
+ target =
+ kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
+ if (target != NULL)
+ ioapic_inj_irq(ioapic, target, vector,
+ trig_mode, delivery_mode);
+ else
+ ioapic_debug("null round robin: "
+ "mask=%x vector=%x delivery_mode=%x",
+ deliver_bitmask, vector, dest_LowestPrio);
+ break;
+ case dest_Fixed:
+ for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+ if (!(deliver_bitmask & (1 << vcpu_id)))
+ continue;
+ deliver_bitmask &= ~(1 << vcpu_id);
+ vcpu = ioapic->kvm->vcpus[vcpu_id];
+ if (vcpu) {
+ target = vcpu->apic;
+ ioapic_inj_irq(ioapic, target, vector,
+ trig_mode, delivery_mode);
+ }
+ }
+ break;
+
+ /* TODO: NMI */
+ default:
+ printk(KERN_WARNING "Unsupported delivery mode %d\n",
+ delivery_mode);
+ break;
+ }
+}
+
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
+{
+ u32 old_irr = ioapic->irr;
+ u32 mask = 1 << irq;
+ union ioapic_redir_entry entry;
+
+ if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
+ entry = ioapic->redirtbl[irq];
+ level ^= entry.fields.polarity;
+ if (!level)
+ ioapic->irr &= ~mask;
+ else {
+ ioapic->irr |= mask;
+ if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
+ || !entry.fields.remote_irr)
+ ioapic_service(ioapic, irq);
+ }
+ }
+}
+
+static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
+{
+ int i;
+
+ for (i = 0; i < IOAPIC_NUM_PINS; i++)
+ if (ioapic->redirtbl[i].fields.vector == vector)
+ return i;
+ return -1;
+}
+
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+ struct kvm_ioapic *ioapic = kvm->vioapic;
+ union ioapic_redir_entry *ent;
+ int gsi;
+
+ gsi = get_eoi_gsi(ioapic, vector);
+ if (gsi == -1) {
+ printk(KERN_WARNING "Can't find redir item for %d EOI\n",
+ vector);
+ return;
+ }
+
+ ent = &ioapic->redirtbl[gsi];
+ ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+
+ ent->fields.remote_irr = 0;
+ if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
+ ioapic_deliver(ioapic, gsi);
+}
+
+static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+
+ return ((addr >= ioapic->base_address &&
+ (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
+}
+
+static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+ void *val)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+ u32 result;
+
+ ioapic_debug("addr %lx", (unsigned long)addr);
+ ASSERT(!(addr & 0xf)); /* check alignment */
+
+ addr &= 0xff;
+ switch (addr) {
+ case IOAPIC_REG_SELECT:
+ result = ioapic->ioregsel;
+ break;
+
+ case IOAPIC_REG_WINDOW:
+ result = ioapic_read_indirect(ioapic, addr, len);
+ break;
+
+ default:
+ result = 0;
+ break;
+ }
+ switch (len) {
+ case 8:
+ *(u64 *) val = result;
+ break;
+ case 1:
+ case 2:
+ case 4:
+ memcpy(val, (char *)&result, len);
+ break;
+ default:
+ printk(KERN_WARNING "ioapic: wrong length %d\n", len);
+ }
+}
+
+static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+ const void *val)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+ u32 data;
+
+ ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
+ addr, len, val);
+ ASSERT(!(addr & 0xf)); /* check alignment */
+ if (len == 4 || len == 8)
+ data = *(u32 *) val;
+ else {
+ printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
+ return;
+ }
+
+ addr &= 0xff;
+ switch (addr) {
+ case IOAPIC_REG_SELECT:
+ ioapic->ioregsel = data;
+ break;
+
+ case IOAPIC_REG_WINDOW:
+ ioapic_write_indirect(ioapic, data);
+ break;
+
+ default:
+ break;
+ }
+}
+
+int kvm_ioapic_init(struct kvm *kvm)
+{
+ struct kvm_ioapic *ioapic;
+ int i;
+
+ ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+ if (!ioapic)
+ return -ENOMEM;
+ kvm->vioapic = ioapic;
+ for (i = 0; i < IOAPIC_NUM_PINS; i++)
+ ioapic->redirtbl[i].fields.mask = 1;
+ ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+ ioapic->dev.read = ioapic_mmio_read;
+ ioapic->dev.write = ioapic_mmio_write;
+ ioapic->dev.in_range = ioapic_in_range;
+ ioapic->dev.private = ioapic;
+ ioapic->kvm = kvm;
+ kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
+ return 0;
+}
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
new file mode 100644
index 00000000000..7628c7ff628
--- /dev/null
+++ b/drivers/kvm/irq.c
@@ -0,0 +1,98 @@
+/*
+ * irq.c: API for in kernel interrupt controller
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ * Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ */
+
+#include <linux/module.h>
+
+#include "kvm.h"
+#include "irq.h"
+
+/*
+ * check if there is pending interrupt without
+ * intack.
+ */
+int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
+{
+ struct kvm_pic *s;
+
+ if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
+ if (kvm_apic_accept_pic_intr(v)) {
+ s = pic_irqchip(v->kvm); /* PIC */
+ return s->output;
+ } else
+ return 0;
+ }
+ return 1;
+}
+EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
+
+/*
+ * Read pending interrupt vector and intack.
+ */
+int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
+{
+ struct kvm_pic *s;
+ int vector;
+
+ vector = kvm_get_apic_interrupt(v); /* APIC */
+ if (vector == -1) {
+ if (kvm_apic_accept_pic_intr(v)) {
+ s = pic_irqchip(v->kvm);
+ s->output = 0; /* PIC */
+ vector = kvm_pic_read_irq(s);
+ }
+ }
+ return vector;
+}
+EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
+
+static void vcpu_kick_intr(void *info)
+{
+#ifdef DEBUG
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
+ printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
+#endif
+}
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int ipi_pcpu = vcpu->cpu;
+
+ if (waitqueue_active(&vcpu->wq)) {
+ wake_up_interruptible(&vcpu->wq);
+ ++vcpu->stat.halt_wakeup;
+ }
+ if (vcpu->guest_mode)
+ smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+}
+
+void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ kvm_inject_apic_timer_irqs(vcpu);
+ /* TODO: PIT, RTC etc. */
+}
+EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
+
+void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ kvm_apic_timer_intr_post(vcpu, vec);
+ /* TODO: PIT, RTC etc. */
+}
+EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
new file mode 100644
index 00000000000..11fc014e2b3
--- /dev/null
+++ b/drivers/kvm/irq.h
@@ -0,0 +1,165 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ * Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include "kvm.h"
+
+typedef void irq_request_func(void *opaque, int level);
+
+struct kvm_kpic_state {
+ u8 last_irr; /* edge detection */
+ u8 irr; /* interrupt request register */
+ u8 imr; /* interrupt mask register */
+ u8 isr; /* interrupt service register */
+ u8 priority_add; /* highest irq priority */
+ u8 irq_base;
+ u8 read_reg_select;
+ u8 poll;
+ u8 special_mask;
+ u8 init_state;
+ u8 auto_eoi;
+ u8 rotate_on_auto_eoi;
+ u8 special_fully_nested_mode;
+ u8 init4; /* true if 4 byte init */
+ u8 elcr; /* PIIX edge/trigger selection */
+ u8 elcr_mask;
+ struct kvm_pic *pics_state;
+};
+
+struct kvm_pic {
+ struct kvm_kpic_state pics[2]; /* 0