aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x.c62
-rw-r--r--arch/powerpc/kvm/44x_emulate.c8
-rw-r--r--arch/powerpc/kvm/44x_tlb.c2
-rw-r--r--arch/powerpc/kvm/Kconfig29
-rw-r--r--arch/powerpc/kvm/Makefile29
-rw-r--r--arch/powerpc/kvm/book3s.c399
-rw-r--r--arch/powerpc/kvm/book3s.h34
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c110
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c25
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c212
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c121
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c171
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S87
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c29
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c174
-rw-r--r--arch/powerpc/kvm/book3s_exports.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv.c861
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c31
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S30
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c51
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c35
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2010
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S68
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c66
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c185
-rw-r--r--arch/powerpc/kvm/book3s_pr.c907
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c68
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S81
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c44
-rw-r--r--arch/powerpc/kvm/book3s_segment.S31
-rw-r--r--arch/powerpc/kvm/book3s_xics.c11
-rw-r--r--arch/powerpc/kvm/booke.c382
-rw-r--r--arch/powerpc/kvm/booke.h34
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S34
-rw-r--r--arch/powerpc/kvm/e500.c63
-rw-r--r--arch/powerpc/kvm/e500.h10
-rw-r--r--arch/powerpc/kvm/e500_emulate.c49
-rw-r--r--arch/powerpc/kvm/e500_mmu.c6
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c84
-rw-r--r--arch/powerpc/kvm/e500mc.c62
-rw-r--r--arch/powerpc/kvm/emulate.c37
-rw-r--r--arch/powerpc/kvm/mpic.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c293
-rw-r--r--arch/powerpc/kvm/trace.h429
-rw-r--r--arch/powerpc/kvm/trace_booke.h177
-rw-r--r--arch/powerpc/kvm/trace_pr.h297
46 files changed, 5382 insertions, 2562 deletions
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 2f5c6b6d687..9cb4b0a3603 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -21,6 +21,8 @@
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -31,13 +33,13 @@
#include "44x_tlb.h"
#include "booke.h"
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu)
{
kvmppc_booke_vcpu_load(vcpu, cpu);
kvmppc_44x_tlb_load(vcpu);
}
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu)
{
kvmppc_44x_tlb_put(vcpu);
kvmppc_booke_vcpu_put(vcpu);
@@ -114,29 +116,32 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
- kvmppc_get_sregs_ivor(vcpu, sregs);
+ return kvmppc_get_sregs_ivor(vcpu, sregs);
}
-int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
- union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
return -EINVAL;
}
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
- union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
return -EINVAL;
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm,
+ unsigned int id)
{
struct kvmppc_vcpu_44x *vcpu_44x;
struct kvm_vcpu *vcpu;
@@ -167,7 +172,7 @@ out:
return ERR_PTR(err);
}
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
@@ -176,30 +181,57 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
}
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_44x(struct kvm *kvm)
{
return 0;
}
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_44x(struct kvm *kvm)
{
}
+static struct kvmppc_ops kvm_ops_44x = {
+ .get_sregs = kvmppc_core_get_sregs_44x,
+ .set_sregs = kvmppc_core_set_sregs_44x,
+ .get_one_reg = kvmppc_get_one_reg_44x,
+ .set_one_reg = kvmppc_set_one_reg_44x,
+ .vcpu_load = kvmppc_core_vcpu_load_44x,
+ .vcpu_put = kvmppc_core_vcpu_put_44x,
+ .vcpu_create = kvmppc_core_vcpu_create_44x,
+ .vcpu_free = kvmppc_core_vcpu_free_44x,
+ .mmu_destroy = kvmppc_mmu_destroy_44x,
+ .init_vm = kvmppc_core_init_vm_44x,
+ .destroy_vm = kvmppc_core_destroy_vm_44x,
+ .emulate_op = kvmppc_core_emulate_op_44x,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_44x,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_44x,
+};
+
static int __init kvmppc_44x_init(void)
{
int r;
r = kvmppc_booke_init();
if (r)
- return r;
+ goto err_out;
+
+ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
+ if (r)
+ goto err_out;
+ kvm_ops_44x.owner = THIS_MODULE;
+ kvmppc_pr_ops = &kvm_ops_44x;
- return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
+err_out:
+ return r;
}
static void __exit kvmppc_44x_exit(void)
{
+ kvmppc_pr_ops = NULL;
kvmppc_booke_exit();
}
module_init(kvmppc_44x_init);
module_exit(kvmppc_44x_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 35ec0a8547d..92c9ab4bcfe 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -91,8 +91,8 @@ static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
return EMULATE_DONE;
}
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance)
+int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
int dcrn = get_dcrn(inst);
@@ -152,7 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
@@ -172,7 +172,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ed038544814..0deef1082e0 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -268,7 +268,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
trace_kvm_stlb_inval(stlb_index);
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index ffaef2cb101..d6a53b95de9 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -34,17 +34,20 @@ config KVM_BOOK3S_64_HANDLER
bool
select KVM_BOOK3S_HANDLER
-config KVM_BOOK3S_PR
+config KVM_BOOK3S_PR_POSSIBLE
bool
select KVM_MMIO
select MMU_NOTIFIER
+config KVM_BOOK3S_HV_POSSIBLE
+ bool
+
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
select KVM
select KVM_BOOK3S_32_HANDLER
- select KVM_BOOK3S_PR
+ select KVM_BOOK3S_PR_POSSIBLE
---help---
Support running unmodified book3s_32 guest kernels
in virtual machines on book3s_32 host processors.
@@ -59,6 +62,7 @@ config KVM_BOOK3S_64
depends on PPC_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
+ select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
---help---
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
@@ -69,8 +73,10 @@ config KVM_BOOK3S_64
If unsure, say N.
config KVM_BOOK3S_64_HV
- bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+ tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
depends on KVM_BOOK3S_64
+ depends on !CPU_LITTLE_ENDIAN
+ select KVM_BOOK3S_HV_POSSIBLE
select MMU_NOTIFIER
select CMA
---help---
@@ -89,9 +95,20 @@ config KVM_BOOK3S_64_HV
If unsure, say N.
config KVM_BOOK3S_64_PR
- def_bool y
- depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
- select KVM_BOOK3S_PR
+ tristate "KVM support without using hypervisor mode in host"
+ depends on KVM_BOOK3S_64
+ select KVM_BOOK3S_PR_POSSIBLE
+ ---help---
+ Support running guest kernels in virtual machines on processors
+ without using hypervisor mode in the host, by running the
+ guest in user mode (problem state) and emulating all
+ privileged instructions and registers.
+
+ This is not as fast as using hypervisor mode, but works on
+ machines where hypervisor mode is not available or not usable,
+ and can emulate processors that are different from the host
+ processor, including emulating 32-bit processors on a 64-bit
+ host.
config KVM_BOOKE_HV
bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 6646c952c5e..ce569b6bf4d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -53,41 +53,51 @@ kvm-e500mc-objs := \
e500_emulate.o
kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
-kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
- $(KVM)/coalesced_mmio.o \
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
+ book3s_64_vio_hv.o
+
+kvm-pr-y := \
fpu.o \
book3s_paired_singles.o \
book3s_pr.o \
book3s_pr_papr.o \
- book3s_64_vio_hv.o \
book3s_emulate.o \
book3s_interrupts.o \
book3s_mmu_hpte.o \
book3s_64_mmu_host.o \
book3s_64_mmu.o \
book3s_32_mmu.o
-kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
+
+ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+kvm-book3s_64-module-objs := \
+ $(KVM)/coalesced_mmio.o
+
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_rmhandlers.o
+endif
-kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+kvm-hv-y += \
book3s_hv.o \
book3s_hv_interrupts.o \
book3s_64_mmu_hv.o
+
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
book3s_hv_rm_xics.o
-kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+
+ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
- book3s_64_vio_hv.o \
book3s_hv_ras.o \
book3s_hv_builtin.o \
book3s_hv_cma.o \
$(kvm-book3s_64-builtin-xics-objs-y)
+endif
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
-kvm-book3s_64-module-objs := \
+kvm-book3s_64-module-objs += \
$(KVM)/kvm_main.o \
$(KVM)/eventfd.o \
powerpc.o \
@@ -123,4 +133,7 @@ obj-$(CONFIG_KVM_E500MC) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
+obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o
+obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o
+
obj-y += $(kvm-book3s_64-builtin-objs-y)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 700df6f1d32..c254c27f240 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -18,6 +18,8 @@
#include <linux/err.h>
#include <linux/export.h>
#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -34,6 +36,7 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
+#include "book3s.h"
#include "trace.h"
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -69,10 +72,54 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
{
}
+static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
+{
+ if (!is_kvmppc_hv_enabled(vcpu->kvm))
+ return to_book3s(vcpu)->hior;
+ return 0;
+}
+
+static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
+ unsigned long pending_now, unsigned long old_pending)
+{
+ if (is_kvmppc_hv_enabled(vcpu->kvm))
+ return;
+ if (pending_now)
+ kvmppc_set_int_pending(vcpu, 1);
+ else if (old_pending)
+ kvmppc_set_int_pending(vcpu, 0);
+}
+
+static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
+{
+ ulong crit_raw;
+ ulong crit_r1;
+ bool crit;
+
+ if (is_kvmppc_hv_enabled(vcpu->kvm))
+ return false;
+
+ crit_raw = kvmppc_get_critical(vcpu);
+ crit_r1 = kvmppc_get_gpr(vcpu, 1);
+
+ /* Truncate crit indicators in 32 bit mode */
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
+ crit_raw &= 0xffffffff;
+ crit_r1 &= 0xffffffff;
+ }
+
+ /* Critical section when crit == r1 */
+ crit = (crit_raw == crit_r1);
+ /* ... and we're in supervisor mode */
+ crit = crit && !(kvmppc_get_msr(vcpu) & MSR_PR);
+
+ return crit;
+}
+
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
{
- vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
- vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
+ kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
+ kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);
kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
vcpu->arch.mmu.reset_msr(vcpu);
}
@@ -98,6 +145,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break;
case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break;
case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break;
+ case 0xf60: prio = BOOK3S_IRQPRIO_FAC_UNAVAIL; break;
default: prio = BOOK3S_IRQPRIO_MAX; break;
}
@@ -126,28 +174,32 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
printk(KERN_INFO "Queueing interrupt %x\n", vec);
#endif
}
-
+EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
/* might as well deliver this straight away */
kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec);
int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
{
return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec);
void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
@@ -174,12 +226,12 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
switch (priority) {
case BOOK3S_IRQPRIO_DECREMENTER:
- deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
+ deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_DECREMENTER;
break;
case BOOK3S_IRQPRIO_EXTERNAL:
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
- deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
+ deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_EXTERNAL;
break;
case BOOK3S_IRQPRIO_SYSTEM_RESET:
@@ -224,6 +276,9 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
vec = BOOK3S_INTERRUPT_PERFMON;
break;
+ case BOOK3S_IRQPRIO_FAC_UNAVAIL:
+ vec = BOOK3S_INTERRUPT_FAC_UNAVAIL;
+ break;
default:
deliver = 0;
printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
@@ -285,12 +340,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
return 0;
}
+EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
-pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+ bool *writable)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
- if (!(vcpu->arch.shared->msr & MSR_SF))
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF))
mp_pa = (uint32_t)mp_pa;
/* Magic page override */
@@ -302,20 +359,23 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
get_page(pfn_to_page(pfn));
+ if (writable)
+ *writable = true;
return pfn;
}
- return gfn_to_pfn(vcpu->kvm, gfn);
+ return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
}
+EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);
static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
- struct kvmppc_pte *pte)
+ bool iswrite, struct kvmppc_pte *pte)
{
- int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
+ int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR));
int r;
if (relocated) {
- r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
+ r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite);
} else {
pte->eaddr = eaddr;
pte->raddr = eaddr & KVM_PAM;
@@ -361,7 +421,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
vcpu->stat.st++;
- if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+ if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte))
return -ENOENT;
*eaddr = pte.raddr;
@@ -374,6 +434,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
return EMULATE_DONE;
}
+EXPORT_SYMBOL_GPL(kvmppc_st);
int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
bool data)
@@ -383,7 +444,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
vcpu->stat.ld++;
- if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+ if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte))
goto nopte;
*eaddr = pte.raddr;
@@ -404,6 +465,7 @@ nopte:
mmio:
return EMULATE_DO_MMIO;
}
+EXPORT_SYMBOL_GPL(kvmppc_ld);
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
@@ -419,6 +481,18 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
}
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+}
+
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
@@ -428,18 +502,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->ctr = kvmppc_get_ctr(vcpu);
regs->lr = kvmppc_get_lr(vcpu);
regs->xer = kvmppc_get_xer(vcpu);
- regs->msr = vcpu->arch.shared->msr;
- regs->srr0 = vcpu->arch.shared->srr0;
- regs->srr1 = vcpu->arch.shared->srr1;
+ regs->msr = kvmppc_get_msr(vcpu);
+ regs->srr0 = kvmppc_get_srr0(vcpu);
+ regs->srr1 = kvmppc_get_srr1(vcpu);
regs->pid = vcpu->arch.pid;
- regs->sprg0 = vcpu->arch.shared->sprg0;
- regs->sprg1 = vcpu->arch.shared->sprg1;
- regs->sprg2 = vcpu->arch.shared->sprg2;
- regs->sprg3 = vcpu->arch.shared->sprg3;
- regs->sprg4 = vcpu->arch.shared->sprg4;
- regs->sprg5 = vcpu->arch.shared->sprg5;
- regs->sprg6 = vcpu->arch.shared->sprg6;
- regs->sprg7 = vcpu->arch.shared->sprg7;
+ regs->sprg0 = kvmppc_get_sprg0(vcpu);
+ regs->sprg1 = kvmppc_get_sprg1(vcpu);
+ regs->sprg2 = kvmppc_get_sprg2(vcpu);
+ regs->sprg3 = kvmppc_get_sprg3(vcpu);
+ regs->sprg4 = kvmppc_get_sprg4(vcpu);
+ regs->sprg5 = kvmppc_get_sprg5(vcpu);
+ regs->sprg6 = kvmppc_get_sprg6(vcpu);
+ regs->sprg7 = kvmppc_get_sprg7(vcpu);
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -457,16 +531,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvmppc_set_lr(vcpu, regs->lr);
kvmppc_set_xer(vcpu, regs->xer);
kvmppc_set_msr(vcpu, regs->msr);
- vcpu->arch.shared->srr0 = regs->srr0;
- vcpu->arch.shared->srr1 = regs->srr1;
- vcpu->arch.shared->sprg0 = regs->sprg0;
- vcpu->arch.shared->sprg1 = regs->sprg1;
- vcpu->arch.shared->sprg2 = regs->sprg2;
- vcpu->arch.shared->sprg3 = regs->sprg3;
- vcpu->arch.shared->sprg4 = regs->sprg4;
- vcpu->arch.shared->sprg5 = regs->sprg5;
- vcpu->arch.shared->sprg6 = regs->sprg6;
- vcpu->arch.shared->sprg7 = regs->sprg7;
+ kvmppc_set_srr0(vcpu, regs->srr0);
+ kvmppc_set_srr1(vcpu, regs->srr1);
+ kvmppc_set_sprg0(vcpu, regs->sprg0);
+ kvmppc_set_sprg1(vcpu, regs->sprg1);
+ kvmppc_set_sprg2(vcpu, regs->sprg2);
+ kvmppc_set_sprg3(vcpu, regs->sprg3);
+ kvmppc_set_sprg4(vcpu, regs->sprg4);
+ kvmppc_set_sprg5(vcpu, regs->sprg5);
+ kvmppc_set_sprg6(vcpu, regs->sprg6);
+ kvmppc_set_sprg7(vcpu, regs->sprg7);
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -495,23 +569,22 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
if (size > sizeof(val))
return -EINVAL;
- r = kvmppc_get_one_reg(vcpu, reg->id, &val);
-
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
if (r == -EINVAL) {
r = 0;
switch (reg->id) {
case KVM_REG_PPC_DAR:
- val = get_reg_val(reg->id, vcpu->arch.shared->dar);
+ val = get_reg_val(reg->id, kvmppc_get_dar(vcpu));
break;
case KVM_REG_PPC_DSISR:
- val = get_reg_val(reg->id, vcpu->arch.shared->dsisr);
+ val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
i = reg->id - KVM_REG_PPC_FPR0;
- val = get_reg_val(reg->id, vcpu->arch.fpr[i]);
+ val = get_reg_val(reg->id, VCPU_FPR(vcpu, i));
break;
case KVM_REG_PPC_FPSCR:
- val = get_reg_val(reg->id, vcpu->arch.fpscr);
+ val = get_reg_val(reg->id, vcpu->arch.fp.fpscr);
break;
#ifdef CONFIG_ALTIVEC
case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
@@ -519,16 +592,30 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
r = -ENXIO;
break;
}
- val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0];
+ val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
break;
case KVM_REG_PPC_VSCR:
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
r = -ENXIO;
break;
}
- val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
+ val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ val = get_reg_val(reg->id, vcpu->arch.vrsave);
break;
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ long int i = reg->id - KVM_REG_PPC_VSR0;
+ val.vsxval[0] = vcpu->arch.fp.fpr[i][0];
+ val.vsxval[1] = vcpu->arch.fp.fpr[i][1];
+ } else {
+ r = -ENXIO;
+ }
+ break;
+#endif /* CONFIG_VSX */
case KVM_REG_PPC_DEBUG_INST: {
u32 opcode = INS_TW;
r = copy_to_user((u32 __user *)(long)reg->addr,
@@ -544,6 +631,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
+ case KVM_REG_PPC_FSCR:
+ val = get_reg_val(reg->id, vcpu->arch.fscr);
+ break;
+ case KVM_REG_PPC_TAR:
+ val = get_reg_val(reg->id, vcpu->arch.tar);
+ break;
+ case KVM_REG_PPC_EBBHR:
+ val = get_reg_val(reg->id, vcpu->arch.ebbhr);
+ break;
+ case KVM_REG_PPC_EBBRR:
+ val = get_reg_val(reg->id, vcpu->arch.ebbrr);
+ break;
+ case KVM_REG_PPC_BESCR:
+ val = get_reg_val(reg->id, vcpu->arch.bescr);
+ break;
default:
r = -EINVAL;
break;
@@ -572,23 +674,22 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
return -EFAULT;
- r = kvmppc_set_one_reg(vcpu, reg->id, &val);
-
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
if (r == -EINVAL) {
r = 0;
switch (reg->id) {
case KVM_REG_PPC_DAR:
- vcpu->arch.shared->dar = set_reg_val(reg->id, val);
+ kvmppc_set_dar(vcpu, set_reg_val(reg->id, val));
break;
case KVM_REG_PPC_DSISR:
- vcpu->arch.shared->dsisr = set_reg_val(reg->id, val);
+ kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
i = reg->id - KVM_REG_PPC_FPR0;
- vcpu->arch.fpr[i] = set_reg_val(reg->id, val);
+ VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val);
break;
case KVM_REG_PPC_FPSCR:
- vcpu->arch.fpscr = set_reg_val(reg->id, val);
+ vcpu->arch.fp.fpscr = set_reg_val(reg->id, val);
break;
#ifdef CONFIG_ALTIVEC
case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
@@ -596,16 +697,34 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
r = -ENXIO;
break;
}
- vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
break;
case KVM_REG_PPC_VSCR:
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
r = -ENXIO;
break;
}
- vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vrsave = set_reg_val(reg->id, val);
break;
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ long int i = reg->id - KVM_REG_PPC_VSR0;
+ vcpu->arch.fp.fpr[i][0] = val.vsxval[0];
+ vcpu->arch.fp.fpr[i][1] = val.vsxval[1];
+ } else {
+ r = -ENXIO;
+ }
+ break;
+#endif /* CONFIG_VSX */
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
if (!vcpu->arch.icp) {
@@ -616,6 +735,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
set_reg_val(reg->id, val));
break;
#endif /* CONFIG_KVM_XICS */
+ case KVM_REG_PPC_FSCR:
+ vcpu->arch.fscr = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_TAR:
+ vcpu->arch.tar = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_EBBHR:
+ vcpu->arch.ebbhr = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_EBBRR:
+ vcpu->arch.ebbrr = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_BESCR:
+ vcpu->arch.bescr = set_reg_val(reg->id, val);
+ break;
default:
r = -EINVAL;
break;
@@ -625,6 +759,27 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
return r;
}
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
+}
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+ vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr);
+
+int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
+}
+
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
@@ -644,3 +799,147 @@ void kvmppc_decrementer_func(unsigned long data)
kvmppc_core_queue_dec(vcpu);
kvm_vcpu_kick(vcpu);
}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+}
+
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+ return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
+}
+
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ kvm->arch.kvm_ops->free_memslot(free, dont);
+}
+
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return kvm->arch.kvm_ops->create_memslot(slot, npages);
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+ kvm->arch.kvm_ops->flush_memslot(kvm, memslot);
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem)
+{
+ return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old)
+{
+ kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
+}
+EXPORT_SYMBOL_GPL(kvm_unmap_hva);
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return kvm->arch.kvm_ops->age_hva(kvm, hva);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+
+#ifdef CONFIG_PPC64
+ INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+ INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
+#endif
+
+ return kvm->arch.kvm_ops->init_vm(kvm);
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+ kvm->arch.kvm_ops->destroy_vm(kvm);
+
+#ifdef CONFIG_PPC64
+ kvmppc_rtas_tokens_free(kvm);
+ WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+#endif
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ /*
+ * We always return 0 for book3s. We check
+ * for compatability while loading the HV
+ * or PR module
+ */
+ return 0;
+}
+
+static int kvmppc_book3s_init(void)
+{
+ int r;
+
+ r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+ if (r)
+ return r;
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ r = kvmppc_book3s_init_pr();
+#endif
+ return r;
+
+}
+
+static void kvmppc_book3s_exit(void)
+{
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ kvmppc_book3s_exit_pr();
+#endif
+ kvm_exit();
+}
+
+module_init(kvmppc_book3s_init);
+module_exit(kvmppc_book3s_exit);
+
+/* On 32bit this is our one and only kernel module */
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
+#endif
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
new file mode 100644
index 00000000000..4bf956cf94d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_BOOK3S_H__
+#define __POWERPC_KVM_BOOK3S_H__
+
+extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
+extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
+ unsigned long end);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
+ int sprn, ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
+ int sprn, ulong *spr_val);
+extern int kvmppc_book3s_init_pr(void);
+extern void kvmppc_book3s_exit_pr(void);
+
+#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index c8cefdd15fd..93503bbdae4 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -84,13 +84,14 @@ static inline bool sr_nx(u32 sr_raw)
}
static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *pte, bool data);
+ struct kvmppc_pte *pte, bool data,
+ bool iswrite);
static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
u64 *vsid);
static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
{
- return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf];
+ return kvmppc_get_sr(vcpu, (eaddr >> 28) & 0xf);
}
static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
@@ -99,7 +100,7 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
u64 vsid;
struct kvmppc_pte pte;
- if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data))
+ if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false))
return pte.vpage;
kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
@@ -111,10 +112,11 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, 0);
}
-static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s,
+static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
u32 sre, gva_t eaddr,
bool primary)
{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u32 page, hash, pteg, htabmask;
hva_t r;
@@ -129,10 +131,10 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
- kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
+ kvmppc_get_pc(vcpu), eaddr, vcpu_book3s->sdr1, pteg,
sr_vsid(sre));
- r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+ r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
if (kvm_is_error_hva(r))
return r;
return r | (pteg & ~PAGE_MASK);
@@ -145,7 +147,8 @@ static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
}
static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *pte, bool data)
+ struct kvmppc_pte *pte, bool data,
+ bool iswrite)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_bat *bat;
@@ -157,7 +160,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
else
bat = &vcpu_book3s->ibat[i];
- if (vcpu->arch.shared->msr & MSR_PR) {
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
if (!bat->vp)
continue;
} else {
@@ -186,8 +189,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
printk(KERN_INFO "BAT is not readable!\n");
continue;
}
- if (!pte->may_write) {
- /* let's treat r/o BATs as not-readable for now */
+ if (iswrite && !pte->may_write) {
dprintk_pte("BAT is read-only!\n");
continue;
}
@@ -201,12 +203,12 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data,
- bool primary)
+ bool iswrite, bool primary)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u32 sre;
hva_t ptegp;
u32 pteg[16];
+ u32 pte0, pte1;
u32 ptem = 0;
int i;
int found = 0;
@@ -218,7 +220,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
- ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary);
+ ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary);
if (kvm_is_error_hva(ptegp)) {
printk(KERN_INFO "KVM: Invalid PTEG!\n");
goto no_page_found;
@@ -232,14 +234,16 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
}
for (i=0; i<16; i+=2) {
- if (ptem == pteg[i]) {
+ pte0 = be32_to_cpu(pteg[i]);
+ pte1 = be32_to_cpu(pteg[i + 1]);
+ if (ptem == pte0) {
u8 pp;
- pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF);
- pp = pteg[i+1] & 3;
+ pte->raddr = (pte1 & ~(0xFFFULL)) | (eaddr & 0xFFF);
+ pp = pte1 & 3;
- if ((sr_kp(sre) && (vcpu->arch.shared->msr & MSR_PR)) ||
- (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR)))
+ if ((sr_kp(sre) && (kvmppc_get_msr(vcpu) & MSR_PR)) ||
+ (sr_ks(sre) && !(kvmppc_get_msr(vcpu) & MSR_PR)))
pp |= 4;
pte->may_write = false;
@@ -258,11 +262,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
break;
}
- if ( !pte->may_read )
- continue;
-
dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
- pteg[i], pteg[i+1], pp);
+ pte0, pte1, pp);
found = 1;
break;
}
@@ -271,19 +272,23 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
/* Update PTE C and A bits, so the guest's swapper knows we used the
page */
if (found) {
- u32 oldpte = pteg[i+1];
-
- if (pte->may_read)
- pteg[i+1] |= PTEG_FLAG_ACCESSED;
- if (pte->may_write)
- pteg[i+1] |= PTEG_FLAG_DIRTY;
- else
- dprintk_pte("KVM: Mapping read-only page!\n");
-
- /* Write back into the PTEG */
- if (pteg[i+1] != oldpte)
- copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
-
+ u32 pte_r = pte1;
+ char __user *addr = (char __user *) (ptegp + (i+1) * sizeof(u32));
+
+ /*
+ * Use single-byte writes to update the HPTE, to
+ * conform to what real hardware does.
+ */
+ if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) {
+ pte_r |= PTEG_FLAG_ACCESSED;
+ put_user(pte_r >> 8, addr + 2);
+ }
+ if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) {
+ pte_r |= PTEG_FLAG_DIRTY;
+ put_user(pte_r, addr + 3);
+ }
+ if (!pte->may_read || (iswrite && !pte->may_write))
+ return -EPERM;
return 0;
}
@@ -294,7 +299,8 @@ no_page_found:
to_book3s(vcpu)->sdr1, ptegp);
for (i=0; i<16; i+=2) {
dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n",
- i, pteg[i], pteg[i+1], ptem);
+ i, be32_to_cpu(pteg[i]),
+ be32_to_cpu(pteg[i+1]), ptem);
}
}
@@ -302,17 +308,19 @@ no_page_found:
}
static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *pte, bool data)
+ struct kvmppc_pte *pte, bool data,
+ bool iswrite)
{
int r;
ulong mp_ea = vcpu->arch.magic_page_ea;
pte->eaddr = eaddr;
+ pte->page_size = MMU_PAGE_4K;
/* Magic page override */
if (unlikely(mp_ea) &&
unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
- !(vcpu->arch.shared->msr & MSR_PR)) {
+ !(kvmppc_get_msr(vcpu) & MSR_PR)) {
pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
pte->raddr &= KVM_PAM;
@@ -323,11 +331,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
return 0;
}
- r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data);
+ r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite);
if (r < 0)
- r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true);
+ r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+ data, iswrite, true);
if (r < 0)
- r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false);
+ r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+ data, iswrite, false);
return r;
}
@@ -335,19 +345,24 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
{
- return vcpu->arch.shared->sr[srnum];
+ return kvmppc_get_sr(vcpu, srnum);
}
static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
ulong value)
{
- vcpu->arch.shared->sr[srnum] = value;
+ kvmppc_set_sr(vcpu, srnum, value);
kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
}
static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
{
- kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000);
+ int i;
+ struct kvm_vcpu *v;
+
+ /* flush this VA on all cpus */
+ kvm_for_each_vcpu(i, v, vcpu->kvm)
+ kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000);
}
static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
@@ -356,8 +371,9 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
ulong ea = esid << SID_SHIFT;
u32 sr;
u64 gvsid = esid;
+ u64 msr = kvmppc_get_msr(vcpu);
- if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ if (msr & (MSR_DR|MSR_IR)) {
sr = find_sr(vcpu, ea);
if (sr_valid(sr))
gvsid = sr_vsid(sr);
@@ -366,7 +382,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
/* In case we only have one of MSR_IR or MSR_DR set, let's put
that in the real-mode context (and hope RM doesn't access
high memory) */
- switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ switch (msr & (MSR_DR|MSR_IR)) {
case 0:
*vsid = VSID_REAL | esid;
break;
@@ -386,7 +402,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
BUG();
}
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (msr & MSR_PR)
*vsid |= VSID_PR;
return 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 00e619bf608..678e7537049 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -92,7 +92,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
struct kvmppc_sid_map *map;
u16 sid_map_mask;
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
gvsid |= VSID_PR;
sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -138,7 +138,8 @@ static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,
extern char etext[];
-int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+ bool iswrite)
{
pfn_t hpaddr;
u64 vpn;
@@ -152,9 +153,11 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
bool evict = false;
struct hpte_cache *pte;
int r = 0;
+ bool writable;
/* Get host physical address for gpa */
- hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
+ hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
+ iswrite, &writable);
if (is_error_noslot_pfn(hpaddr)) {
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
orig_pte->eaddr);
@@ -204,7 +207,7 @@ next_pteg:
(primary ? 0 : PTE_SEC);
pteg1 = hpaddr | PTE_M | PTE_R | PTE_C;
- if (orig_pte->may_write) {
+ if (orig_pte->may_write && writable) {
pteg1 |= PP_RWRW;
mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
} else {
@@ -240,6 +243,11 @@ next_pteg:
/* Now tell our Shadow PTE code about the new page */
pte = kvmppc_mmu_hpte_cache_next(vcpu);
+ if (!pte) {
+ kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+ r = -EAGAIN;
+ goto out;
+ }
dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
orig_pte->may_write ? 'w' : '-',
@@ -259,6 +267,11 @@ out:
return r;
}
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+ kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL);
+}
+
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
{
struct kvmppc_sid_map *map;
@@ -266,7 +279,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
u16 sid_map_mask;
static int backwards_map = 0;
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
gvsid |= VSID_PR;
/* We might get collisions that trap in preceding order, so let's
@@ -341,7 +354,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
svcpu_put(svcpu);
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
{
int i;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 7e345e00661..774a253ca4e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -38,7 +38,7 @@
static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
{
- kvmppc_set_msr(vcpu, MSR_SF);
+ kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);
}
static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
@@ -107,9 +107,20 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
return kvmppc_slb_calc_vpn(slb, eaddr);
}
+static int mmu_pagesize(int mmu_pg)
+{
+ switch (mmu_pg) {
+ case MMU_PAGE_64K:
+ return 16;
+ case MMU_PAGE_16M:
+ return 24;
+ }
+ return 12;
+}
+
static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
{
- return slbe->large ? 24 : 12;
+ return mmu_pagesize(slbe->base_page_size);
}
static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
@@ -119,11 +130,11 @@ static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
}
-static hva_t kvmppc_mmu_book3s_64_get_pteg(
- struct kvmppc_vcpu_book3s *vcpu_book3s,
+static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu,
struct kvmppc_slb *slbe, gva_t eaddr,
bool second)
{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u64 hash, pteg, htabsize;
u32 ssize;
hva_t r;
@@ -148,10 +159,10 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
/* When running a PAPR guest, SDR1 contains a HVA address instead
of a GPA */
- if (vcpu_book3s->vcpu.arch.papr_enabled)
+ if (vcpu->arch.papr_enabled)
r = pteg;
else
- r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+ r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
if (kvm_is_error_hva(r))
return r;
@@ -166,18 +177,38 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
- if (p < 24)
- avpn >>= ((80 - p) - 56) - 8;
+ if (p < 16)
+ avpn >>= ((80 - p) - 56) - 8; /* 16 - p */
else
- avpn <<= 8;
+ avpn <<= p - 16;
return avpn;
}
+/*
+ * Return page size encoded in the second word of a HPTE, or
+ * -1 for an invalid encoding for the base page size indicated by
+ * the SLB entry. This doesn't handle mixed pagesize segments yet.
+ */
+static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
+{
+ switch (slbe->base_page_size) {
+ case MMU_PAGE_64K:
+ if ((r & 0xf000) == 0x1000)
+ return MMU_PAGE_64K;
+ break;
+ case MMU_PAGE_16M:
+ if ((r & 0xff000) == 0)
+ return MMU_PAGE_16M;
+ break;
+ }
+ return -1;
+}
+
static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *gpte, bool data)
+ struct kvmppc_pte *gpte, bool data,
+ bool iswrite)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_slb *slbe;
hva_t ptegp;
u64 pteg[16];
@@ -189,12 +220,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
u8 pp, key = 0;
bool found = false;
bool second = false;
+ int pgsize;
ulong mp_ea = vcpu->arch.magic_page_ea;
/* Magic page override */
if (unlikely(mp_ea) &&
unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
- !(vcpu->arch.shared->msr & MSR_PR)) {
+ !(kvmppc_get_msr(vcpu) & MSR_PR)) {
gpte->eaddr = eaddr;
gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
@@ -202,6 +234,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
gpte->may_execute = true;
gpte->may_read = true;
gpte->may_write = true;
+ gpte->page_size = MMU_PAGE_4K;
return 0;
}
@@ -222,8 +255,12 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
HPTE_V_SECONDARY;
+ pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
+
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+
do_second:
- ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
+ ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second);
if (kvm_is_error_hva(ptegp))
goto no_page_found;
@@ -232,14 +269,24 @@ do_second:
goto no_page_found;
}
- if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp)
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Kp)
key = 4;
- else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks)
+ else if (!(kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Ks)
key = 4;
for (i=0; i<16; i+=2) {
+ u64 pte0 = be64_to_cpu(pteg[i]);
+ u64 pte1 = be64_to_cpu(pteg[i + 1]);
+
/* Check all relevant fields of 1st dword */
- if ((pteg[i] & v_mask) == v_val) {
+ if ((pte0 & v_mask) == v_val) {
+ /* If large page bit is set, check pgsize encoding */
+ if (slbe->large &&
+ (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+ pgsize = decode_pagesize(slbe, pte1);
+ if (pgsize < 0)
+ continue;
+ }
found = true;
break;
}
@@ -253,17 +300,22 @@ do_second:
goto do_second;
}
- v = pteg[i];
- r = pteg[i+1];
+ v = be64_to_cpu(pteg[i]);
+ r = be64_to_cpu(pteg[i+1]);
pp = (r & HPTE_R_PP) | key;
- eaddr_mask = 0xFFF;
+ if (r & HPTE_R_PP0)
+ pp |= 8;
gpte->eaddr = eaddr;
gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
- if (slbe->large)
- eaddr_mask = 0xFFFFFF;
+
+ eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;
gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
+ gpte->page_size = pgsize;
gpte->may_execute = ((r & HPTE_R_N) ? false : true);
+ if (unlikely(vcpu->arch.disable_kernel_nx) &&
+ !(kvmppc_get_msr(vcpu) & MSR_PR))
+ gpte->may_execute = true;
gpte->may_read = false;
gpte->may_write = false;
@@ -277,6 +329,7 @@ do_second:
case 3:
case 5:
case 7:
+ case 10:
gpte->may_read = true;
break;
}
@@ -287,30 +340,37 @@ do_second:
/* Update PTE R and C bits, so the guest's swapper knows we used the
* page */
- if (gpte->may_read) {
- /* Set the accessed flag */
+ if (gpte->may_read && !(r & HPTE_R_R)) {
+ /*
+ * Set the accessed flag.
+ * We have to write this back with a single byte write
+ * because another vcpu may be accessing this on
+ * non-PAPR platforms such as mac99, and this is
+ * what real hardware does.
+ */
+ char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
r |= HPTE_R_R;
+ put_user(r >> 8, addr + 6);
}
- if (data && gpte->may_write) {
- /* Set the dirty flag -- XXX even if not writing */
+ if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
+ /* Set the dirty flag */
+ /* Use a single byte write */
+ char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
r |= HPTE_R_C;
+ put_user(r, addr + 7);
}
- /* Write back into the PTEG */
- if (pteg[i+1] != r) {
- pteg[i+1] = r;
- copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
- }
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
- if (!gpte->may_read)
+ if (!gpte->may_read || (iswrite && !gpte->may_write))
return -EPERM;
return 0;
no_page_found:
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
return -ENOENT;
no_seg_found:
-
dprintk("KVM MMU: Trigger segment fault\n");
return -EINVAL;
}
@@ -345,6 +405,21 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
slbe->nx = (rs & SLB_VSID_N) ? 1 : 0;
slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
+ slbe->base_page_size = MMU_PAGE_4K;
+ if (slbe->large) {
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) {
+ switch (rs & SLB_VSID_LP) {
+ case SLB_VSID_LP_00:
+ slbe->base_page_size = MMU_PAGE_16M;
+ break;
+ case SLB_VSID_LP_01:
+ slbe->base_page_size = MMU_PAGE_64K;
+ break;
+ }
+ } else
+ slbe->base_page_size = MMU_PAGE_16M;
+ }
+
slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
slbe->origv = rs;
@@ -410,7 +485,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
vcpu->arch.slb[i].origv = 0;
}
- if (vcpu->arch.shared->msr & MSR_IR) {
+ if (kvmppc_get_msr(vcpu) & MSR_IR) {
kvmppc_mmu_flush_segments(vcpu);
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
}
@@ -460,13 +535,44 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
bool large)
{
u64 mask = 0xFFFFFFFFFULL;
+ long i;
+ struct kvm_vcpu *v;
dprintk("KVM MMU: tlbie(0x%lx)\n", va);
- if (large)
- mask = 0xFFFFFF000ULL;
- kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
+ /*
+ * The tlbie instruction changed behaviour starting with
+ * POWER6. POWER6 and later don't have the large page flag
+ * in the instruction but in the RB value, along with bits
+ * indicating page and segment sizes.
+ */
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) {
+ /* POWER6 or later */
+ if (va & 1) { /* L bit */
+ if ((va & 0xf000) == 0x1000)
+ mask = 0xFFFFFFFF0ULL; /* 64k page */
+ else
+ mask = 0xFFFFFF000ULL; /* 16M page */
+ }
+ } else {
+ /* older processors, e.g. PPC970 */
+ if (large)
+ mask = 0xFFFFFF000ULL;
+ }
+ /* flush this VA on all vcpus */
+ kvm_for_each_vcpu(i, v, vcpu->kvm)
+ kvmppc_mmu_pte_vflush(v, va >> 12, mask);
+}
+
+#ifdef CONFIG_PPC_64K_PAGES
+static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
+{
+ ulong mp_ea = vcpu->arch.magic_page_ea;
+
+ return mp_ea && !(kvmppc_get_msr(vcpu) & MSR_PR) &&
+ (mp_ea >> SID_SHIFT) == esid;
}
+#endif
static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
u64 *vsid)
@@ -475,11 +581,14 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
struct kvmppc_slb *slb;
u64 gvsid = esid;
ulong mp_ea = vcpu->arch.magic_page_ea;
+ int pagesize = MMU_PAGE_64K;
+ u64 msr = kvmppc_get_msr(vcpu);
- if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ if (msr & (MSR_DR|MSR_IR)) {
slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
if (slb) {
gvsid = slb->vsid;
+ pagesize = slb->base_page_size;
if (slb->tb) {
gvsid <<= SID_SHIFT_1T - SID_SHIFT;
gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
@@ -488,37 +597,50 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
}
}
- switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ switch (msr & (MSR_DR|MSR_IR)) {
case 0:
- *vsid = VSID_REAL | esid;
+ gvsid = VSID_REAL | esid;
break;
case MSR_IR:
- *vsid = VSID_REAL_IR | gvsid;
+ gvsid |= VSID_REAL_IR;
break;
case MSR_DR:
- *vsid = VSID_REAL_DR | gvsid;
+ gvsid |= VSID_REAL_DR;
break;
case MSR_DR|MSR_IR:
if (!slb)
goto no_slb;
- *vsid = gvsid;
break;
default:
BUG();
break;
}
- if (vcpu->arch.shared->msr & MSR_PR)
- *vsid |= VSID_PR;
+#ifdef CONFIG_PPC_64K_PAGES
+ /*
+ * Mark this as a 64k segment if the host is using
+ * 64k pages, the host MMU supports 64k pages and
+ * the guest segment page size is >= 64k,
+ * but not if this segment contains the magic page.
+ */
+ if (pagesize >= MMU_PAGE_64K &&
+ mmu_psize_defs[MMU_PAGE_64K].shift &&
+ !segment_contains_magic_page(vcpu, esid))
+ gvsid |= VSID_64K;
+#endif
+
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
+ gvsid |= VSID_PR;
+ *vsid = gvsid;
return 0;
no_slb:
/* Catch magic page case */
if (unlikely(mp_ea) &&
unlikely(esid == (mp_ea >> SID_SHIFT)) &&
- !(vcpu->arch.shared->msr & MSR_PR)) {
+ !(kvmppc_get_msr(vcpu) & MSR_PR)) {
*vsid = VSID_REAL | esid;
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index e5240524bf6..0ac98392f36 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -27,14 +27,14 @@
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
-#include "trace.h"
+#include "trace_pr.h"
#define PTE_SIZE 12
void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
- MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M,
+ pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M,
false);
}
@@ -58,7 +58,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
struct kvmppc_sid_map *map;
u16 sid_map_mask;
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
gvsid |= VSID_PR;
sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -78,7 +78,8 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
return NULL;
}
-int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+ bool iswrite)
{
unsigned long vpn;
pfn_t hpaddr;
@@ -90,16 +91,26 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
int attempt = 0;
struct kvmppc_sid_map *map;
int r = 0;
+ int hpsize = MMU_PAGE_4K;
+ bool writable;
+ unsigned long mmu_seq;
+ struct kvm *kvm = vcpu->kvm;
+ struct hpte_cache *cpte;
+ unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT;
+ unsigned long pfn;
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
/* Get host physical address for gpa */
- hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
- if (is_error_noslot_pfn(hpaddr)) {
- printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
+ pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable);
+ if (is_error_noslot_pfn(pfn)) {
+ printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn);
r = -EINVAL;
goto out;
}
- hpaddr <<= PAGE_SHIFT;
- hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
+ hpaddr = pfn << PAGE_SHIFT;
/* and write the mapping ea -> hpa into the pt */
vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
@@ -117,20 +128,39 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
goto out;
}
- vsid = map->host_vsid;
- vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+ vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M);
- if (!orig_pte->may_write)
- rflags |= HPTE_R_PP;
- else
- mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+ kvm_set_pfn_accessed(pfn);
+ if (!orig_pte->may_write || !writable)
+ rflags |= PP_RXRX;
+ else {
+ mark_page_dirty(vcpu->kvm, gfn);
+ kvm_set_pfn_dirty(pfn);
+ }
if (!orig_pte->may_execute)
rflags |= HPTE_R_N;
else
- kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
+ kvmppc_mmu_flush_icache(pfn);
+
+ /*
+ * Use 64K pages if possible; otherwise, on 64K page kernels,
+ * we need to transfer 4 more bits from guest real to host real addr.
+ */
+ if (vsid & VSID_64K)
+ hpsize = MMU_PAGE_64K;
+ else
+ hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
+
+ hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
- hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M);
+ cpte = kvmppc_mmu_hpte_cache_next(vcpu);
+
+ spin_lock(&kvm->mmu_lock);
+ if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
+ r = -EAGAIN;
+ goto out_unlock;
+ }
map_again:
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -139,11 +169,11 @@ map_again:
if (attempt > 1)
if (ppc_md.hpte_remove(hpteg) < 0) {
r = -1;
- goto out;
+ goto out_unlock;
}
ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
- MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M);
+ hpsize, hpsize, MMU_SEGSIZE_256M);
if (ret < 0) {
/* If we couldn't map a primary PTE, try a secondary */
@@ -152,8 +182,6 @@ map_again:
attempt++;
goto map_again;
} else {
- struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
-
trace_kvm_book3s_64_mmu_map(rflags, hpteg,
vpn, hpaddr, orig_pte);
@@ -164,19 +192,37 @@ map_again:
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
}
- pte->slot = hpteg + (ret & 7);
- pte->host_vpn = vpn;
- pte->pte = *orig_pte;
- pte->pfn = hpaddr >> PAGE_SHIFT;
+ cpte->slot = hpteg + (ret & 7);
+ cpte->host_vpn = vpn;
+ cpte->pte = *orig_pte;
+ cpte->pfn = pfn;
+ cpte->pagesize = hpsize;
- kvmppc_mmu_hpte_cache_map(vcpu, pte);
+ kvmppc_mmu_hpte_cache_map(vcpu, cpte);
+ cpte = NULL;
}
- kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+
+out_unlock:
+ spin_unlock(&kvm->mmu_lock);
+ kvm_release_pfn_clean(pfn);
+ if (cpte)
+ kvmppc_mmu_hpte_cache_free(cpte);
out:
return r;
}
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+ u64 mask = 0xfffffffffULL;
+ u64 vsid;
+
+ vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid);
+ if (vsid & VSID_64K)
+ mask = 0xffffffff0ULL;
+ kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask);
+}
+
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
{
struct kvmppc_sid_map *map;
@@ -184,7 +230,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
u16 sid_map_mask;
static int backwards_map = 0;
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
gvsid |= VSID_PR;
/* We might get collisions that trap in preceding order, so let's
@@ -225,11 +271,8 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
int found_inval = -1;
int r;
- if (!svcpu->slb_max)
- svcpu->slb_max = 1;
-
/* Are we overwriting? */
- for (i = 1; i < svcpu->slb_max; i++) {
+ for (i = 0; i < svcpu->slb_max; i++) {
if (!(svcpu->slb[i].esid & SLB_ESID_V))
found_inval = i;
else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
@@ -239,7 +282,7 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
}
/* Found a spare entry that was invalidated before */
- if (found_inval > 0) {
+ if (found_inval >= 0) {
r = found_inval;
goto out;
}
@@ -291,6 +334,12 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
slb_vsid &= ~SLB_VSID_KP;
slb_esid |= slb_index;
+#ifdef CONFIG_PPC_64K_PAGES
+ /* Set host segment base page size to 64K if possible */
+ if (gvsid & VSID_64K)
+ slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp;
+#endif
+
svcpu->slb[slb_index].esid = slb_esid;
svcpu->slb[slb_index].vsid = slb_vsid;
@@ -307,7 +356,7 @@ void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
ulong seg_mask = -seg_size;
int i;
- for (i = 1; i < svcpu->slb_max; i++) {
+ for (i = 0; i < svcpu->slb_max; i++) {
if ((svcpu->slb[i].esid & SLB_ESID_V) &&
(svcpu->slb[i].esid & seg_mask) == ea) {
/* Invalidate this entry */
@@ -321,12 +370,12 @@ void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
{
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
- svcpu->slb_max = 1;
+ svcpu->slb_max = 0;
svcpu->slb[0].esid = 0;
svcpu_put(svcpu);
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_hpte_destroy(vcpu);
__destroy_context(to_book3s(vcpu)->context_id[0]);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 043eec8461e..68468d695f1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -52,7 +52,7 @@ static void kvmppc_rmap_reset(struct kvm *kvm);
long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
- unsigned long hpt;
+ unsigned long hpt = 0;
struct revmap_entry *rev;
struct page *page = NULL;
long order = KVM_DEFAULT_HPT_ORDER;
@@ -64,22 +64,11 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
}
kvm->arch.hpt_cma_alloc = 0;
- /*
- * try first to allocate it from the kernel page allocator.
- * We keep the CMA reserved for failed allocation.
- */
- hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
- __GFP_NOWARN, order - PAGE_SHIFT);
-
- /* Next try to allocate from the preallocated pool */
- if (!hpt) {
- VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
- page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
- if (page) {
- hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
- kvm->arch.hpt_cma_alloc = 1;
- } else
- --order;
+ VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
+ page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+ if (page) {
+ hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ kvm->arch.hpt_cma_alloc = 1;
}
/* Lastly try successively smaller sizes from the page allocator */
@@ -260,13 +249,16 @@ int kvmppc_mmu_hv_init(void)
return 0;
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
-}
-
static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
{
- kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
+ unsigned long msr = vcpu->arch.intr_msr;
+
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
+ msr |= MSR_TS_S;
+ else
+ msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
+ kvmppc_set_msr(vcpu, msr);
}
/*
@@ -451,7 +443,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
}
static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *gpte, bool data)
+ struct kvmppc_pte *gpte, bool data, bool iswrite)
{
struct kvm *kvm = vcpu->kvm;
struct kvmppc_slb *slbe;
@@ -473,11 +465,14 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
slb_v = vcpu->kvm->arch.vrma_slb_v;
}
+ preempt_disable();
/* Find the HPTE in the hash table */
index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
HPTE_V_VALID | HPTE_V_ABSENT);
- if (index < 0)
+ if (index < 0) {
+ preempt_enable();
return -ENOENT;
+ }
hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
v = hptep[0] & ~HPTE_V_HVLOCK;
gr = kvm->arch.revmap[index].guest_rpte;
@@ -485,6 +480,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
/* Unlock the HPTE */
asm volatile("lwsync" : : : "memory");
hptep[0] = v;
+ preempt_enable();
gpte->eaddr = eaddr;
gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
@@ -562,7 +558,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
* we just return and retry the instruction.
*/
- if (instruction_is_store(vcpu->arch.last_inst) != !!is_store)
+ if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store)
return RESUME_GUEST;
/*
@@ -589,6 +585,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct kvm *kvm = vcpu->kvm;
unsigned long *hptep, hpte[3], r;
unsigned long mmu_seq, psize, pte_size;
+ unsigned long gpa_base, gfn_base;
unsigned long gpa, gfn, hva, pfn;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
@@ -627,7 +624,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Translate the logical address and get the page */
psize = hpte_page_size(hpte[0], r);
- gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1));
+ gpa_base = r & HPTE_R_RPN & ~(psize - 1);
+ gfn_base = gpa_base >> PAGE_SHIFT;
+ gpa = gpa_base | (ea & (psize - 1));
gfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(kvm, gfn);
@@ -639,6 +638,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!kvm->arch.using_mmu_notifiers)
return -EFAULT; /* should never get here */
+ /*
+ * This should never happen, because of the slot_is_aligned()
+ * check in kvmppc_do_h_enter().
+ */
+ if (gfn_base < memslot->base_gfn)
+ return -EFAULT;
+
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
@@ -669,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return -EFAULT;
} else {
page = pages[0];
+ pfn = page_to_pfn(page);
if (PageHuge(page)) {
page = compound_head(page);
pte_size <<= compound_order(page);
@@ -693,7 +700,6 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
rcu_read_unlock_sched();
}
- pfn = page_to_pfn(page);
}
ret = -EFAULT;
@@ -711,8 +717,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
}
- /* Set the HPTE to point to pfn */
- r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
+ /*
+ * Set the HPTE to point to pfn.
+ * Since the pfn is at PAGE_SIZE granularity, make sure we
+ * don't mask out lower-order bits if psize < PAGE_SIZE.
+ */
+ if (psize < PAGE_SIZE)
+ psize = PAGE_SIZE;
+ r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1));
if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r);
ret = RESUME_GUEST;
@@ -725,7 +737,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_unlock;
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
- rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ /* Always put the HPTE in the rmap chain for the page base address */
+ rmap = &memslot->arch.rmap[gfn_base - memslot->base_gfn];
lock_rmap(rmap);
/* Check if we might have been invalidated; let the guest retry if so */
@@ -906,21 +919,22 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
return 0;
}
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
{
if (kvm->arch.using_mmu_notifiers)
kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
return 0;
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
{
if (kvm->arch.using_mmu_notifiers)
kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
return 0;
}
-void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
{
unsigned long *rmapp;
unsigned long gfn;
@@ -994,7 +1008,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
return ret;
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
{
if (!kvm->arch.using_mmu_notifiers)
return 0;
@@ -1032,36 +1046,47 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
return ret;
}
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
{
if (!kvm->arch.using_mmu_notifiers)
return 0;
return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
}
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
{
if (!kvm->arch.using_mmu_notifiers)
return;
kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
}
-static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
+static int vcpus_running(struct kvm *kvm)
+{
+ return atomic_read(&kvm->arch.vcpus_running) != 0;
+}
+
+/*
+ * Returns the number of system pages that are dirty.
+ * This can be more than 1 if we find a huge-page HPTE.
+ */
+static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
{
struct revmap_entry *rev = kvm->arch.revmap;
unsigned long head, i, j;
+ unsigned long n;
+ unsigned long v, r;
unsigned long *hptep;
- int ret = 0;
+ int npages_dirty = 0;
retry:
lock_rmap(rmapp);
if (*rmapp & KVMPPC_RMAP_CHANGED) {
*rmapp &= ~KVMPPC_RMAP_CHANGED;
- ret = 1;
+ npages_dirty = 1;
}
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
unlock_rmap(rmapp);
- return ret;
+ return npages_dirty;
}
i = head = *rmapp & KVMPPC_RMAP_INDEX;
@@ -1069,7 +1094,22 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
j = rev[i].forw;
- if (!(hptep[1] & HPTE_R_C))
+ /*
+ * Checking the C (changed) bit here is racy since there
+ * is no guarantee about when the hardware writes it back.
+ * If the HPTE is not writable then it is stable since the
+ * page can't be written to, and we would have done a tlbie
+ * (which forces the hardware to complete any writeback)
+ * when making the HPTE read-only.
+ * If vcpus are running then this call is racy anyway
+ * since the page could get dirtied subsequently, so we
+ * expect there to be a further call which would pick up
+ * any delayed C bit writeback.
+ * Otherwise we need to do the tlbie even if C==0 in
+ * order to pick up any delayed writeback of C.
+ */
+ if (!(hptep[1] & HPTE_R_C) &&
+ (!hpte_is_writable(hptep[1]) || vcpus_running(kvm)))
continue;
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
@@ -1081,24 +1121,33 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
}
/* Now check and modify the HPTE */
- if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) {
- /* need to make it temporarily absent to clear C */
- hptep[0] |= HPTE_V_ABSENT;
- kvmppc_invalidate_hpte(kvm, hptep, i);
- hptep[1] &= ~HPTE_R_C;
- eieio();
- hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
+ if (!(hptep[0] & HPTE_V_VALID))
+ continue;
+
+ /* need to make it temporarily absent so C is stable */
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, i);
+ v = hptep[0];
+ r = hptep[1];
+ if (r & HPTE_R_C) {
+ hptep[1] = r & ~HPTE_R_C;
if (!(rev[i].guest_rpte & HPTE_R_C)) {
rev[i].guest_rpte |= HPTE_R_C;
note_hpte_modification(kvm, &rev[i]);
}
- ret = 1;
+ n = hpte_page_size(v, r);
+ n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (n > npages_dirty)
+ npages_dirty = n;
+ eieio();
}
- hptep[0] &= ~HPTE_V_HVLOCK;
+ v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
+ v |= HPTE_V_VALID;
+ hptep[0] = v;
} while ((i = j) != head);
unlock_rmap(rmapp);
- return ret;
+ return npages_dirty;
}
static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
@@ -1122,15 +1171,22 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long *map)
{
- unsigned long i;
+ unsigned long i, j;
unsigned long *rmapp;
struct kvm_vcpu *vcpu;
preempt_disable();
rmapp = memslot->arch.rmap;
for (i = 0; i < memslot->npages; ++i) {
- if (kvm_test_clear_dirty(kvm, rmapp) && map)
- __set_bit_le(i, map);
+ int npages = kvm_test_clear_dirty_npages(kvm, rmapp);
+ /*
+ * Note that if npages > 0 then i must be a multiple of npages,
+ * since we always put huge-page HPTEs in the rmap chain
+ * corresponding to their page base address.
+ */
+ if (npages && map)
+ for (j = i; npages; ++j, --npages)
+ __set_bit_le(j, map);
++rmapp;
}
@@ -1506,15 +1562,14 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
goto out;
}
if (!rma_setup && is_vrma_hpte(v)) {
- unsigned long psize = hpte_page_size(v, r);
+ unsigned long psize = hpte_base_page_size(v, r);
unsigned long senc = slb_pgsize_encoding(psize);
unsigned long lpcr;
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
(VRMA_VSID << SLB_VSID_SHIFT_1T);
- lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
- lpcr |= senc << (LPCR_VRMASD_SH - 4);
- kvm->arch.lpcr = lpcr;
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
+ kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
rma_setup = 1;
}
++i;
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 4f12e8f0c71..3589c4e3d49 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -17,30 +17,9 @@
* Authors: Alexander Graf <agraf@suse.de>
*/
-#ifdef __LITTLE_ENDIAN__
-#error Need to fix SLB shadow accesses in little endian mode
-#endif
-
-#define SHADOW_SLB_ESID(num) (SLBSHADOW_SAVEAREA + (num * 0x10))
-#define SHADOW_SLB_VSID(num) (SLBSHADOW_SAVEAREA + (num * 0x10) + 0x8)
-#define UNBOLT_SLB_ENTRY(num) \
- ld r9, SHADOW_SLB_ESID(num)(r12); \
- /* Invalid? Skip. */; \
- rldicl. r0, r9, 37, 63; \
- beq slb_entry_skip_ ## num; \
- xoris r9, r9, SLB_ESID_V@h; \
- std r9, SHADOW_SLB_ESID(num)(r12); \
- slb_entry_skip_ ## num:
-
-#define REBOLT_SLB_ENTRY(num) \
- ld r10, SHADOW_SLB_ESID(num)(r11); \
- cmpdi r10, 0; \
- beq slb_exit_skip_ ## num; \
- oris r10, r10, SLB_ESID_V@h; \
- ld r9, SHADOW_SLB_VSID(num)(r11); \
- slbmte r9, r10; \
- std r10, SHADOW_SLB_ESID(num)(r11); \
-slb_exit_skip_ ## num:
+#define SHADOW_SLB_ENTRY_LEN 0x10
+#define OFFSET_ESID(x) (SHADOW_SLB_ENTRY_LEN * x)
+#define OFFSET_VSID(x) ((SHADOW_SLB_ENTRY_LEN * x) + 8)
/******************************************************************************
* *
@@ -64,20 +43,15 @@ slb_exit_skip_ ## num:
* SVCPU[LR] = guest LR
*/
- /* Remove LPAR shadow entries */
+BEGIN_FW_FTR_SECTION
-#if SLB_NUM_BOLTED == 3
+ /* Declare SLB shadow as 0 entries big */
- ld r12, PACA_SLBSHADOWPTR(r13)
+ ld r11, PACA_SLBSHADOWPTR(r13)
+ li r8, 0
+ stb r8, 3(r11)
- /* Remove bolted entries */
- UNBOLT_SLB_ENTRY(0)
- UNBOLT_SLB_ENTRY(1)
- UNBOLT_SLB_ENTRY(2)
-
-#else
-#error unknown number of bolted entries
-#endif
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
/* Flush SLB */
@@ -100,7 +74,7 @@ slb_loop_enter:
ld r10, 0(r11)
- rldicl. r0, r10, 37, 63
+ andis. r9, r10, SLB_ESID_V@h
beq slb_loop_enter_skip
ld r9, 8(r11)
@@ -137,23 +111,42 @@ slb_do_enter:
*
*/
- /* Restore bolted entries from the shadow and fix it along the way */
+ /* Remove all SLB entries that are in use. */
- /* We don't store anything in entry 0, so we don't need to take care of it */
+ li r0, r0
+ slbmte r0, r0
slbia
- isync
-#if SLB_NUM_BOLTED == 3
+ /* Restore bolted entries from the shadow */
ld r11, PACA_SLBSHADOWPTR(r13)
- REBOLT_SLB_ENTRY(0)
- REBOLT_SLB_ENTRY(1)
- REBOLT_SLB_ENTRY(2)
-
-#else
-#error unknown number of bolted entries
-#endif
+BEGIN_FW_FTR_SECTION
+
+ /* Declare SLB shadow as SLB_NUM_BOLTED entries big */
+
+ li r8, SLB_NUM_BOLTED
+ stb r8, 3(r11)
+
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
+
+ /* Manually load all entries from shadow SLB */
+
+ li r8, SLBSHADOW_SAVEAREA
+ li r7, SLBSHADOW_SAVEAREA + 8
+
+ .rept SLB_NUM_BOLTED
+ LDX_BE r10, r11, r8
+ cmpdi r10, 0
+ beq 1f
+ LDX_BE r9, r11, r7
+ slbmte r9, r10
+1: addi r7, r7, SHADOW_SLB_ENTRY_LEN
+ addi r8, r8, SHADOW_SLB_ENTRY_LEN
+ .endr
+
+ isync
+ sync
slb_do_exit:
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 30c2f3b134c..89e96b3e003 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -74,3 +74,32 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
/* Didn't find the liobn, punt it to userspace */
return H_TOO_HARD;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
+
+long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_spapr_tce_table *stt;
+
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == liobn) {
+ unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
+ struct page *page;
+ u64 *tbl;
+
+ if (ioba >= stt->window_size)
+ return H_PARAMETER;
+
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = (u64 *)page_address(page);
+
+ vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
+ return H_SUCCESS;
+ }
+ }
+
+ /* Didn't find the liobn, punt it to userspace */
+ return H_TOO_HARD;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 360ce68c980..3f295269af3 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -80,28 +80,45 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
return false;
/* Limit user space to its own small SPR set */
- if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM)
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) && level > PRIV_PROBLEM)
return false;
return true;
}
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance)
+int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
int rt = get_rt(inst);
int rs = get_rs(inst);
int ra = get_ra(inst);
int rb = get_rb(inst);
+ u32 inst_sc = 0x44000002;
switch (get_op(inst)) {
+ case 0:
+ emulated = EMULATE_FAIL;
+ if ((kvmppc_get_msr(vcpu) & MSR_LE) &&
+ (inst == swab32(inst_sc))) {
+ /*
+ * This is the byte reversed syscall instruction of our
+ * hypercall handler. Early versions of LE Linux didn't
+ * swap the instructions correctly and ended up in
+ * illegal instructions.
+ * Just always fail hypercalls on these broken systems.
+ */
+ kvmppc_set_gpr(vcpu, 3, EV_UNIMPLEMENTED);
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+ emulated = EMULATE_DONE;
+ }
+ break;
case 19:
switch (get_xop(inst)) {
case OP_19_XOP_RFID:
case OP_19_XOP_RFI:
- kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0);
- kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
+ kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu));
+ kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu));
*advance = 0;
break;
@@ -113,16 +130,16 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
case 31:
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
+ kvmppc_set_gpr(vcpu, rt, kvmppc_get_msr(vcpu));
break;
case OP_31_XOP_MTMSRD:
{
ulong rs_val = kvmppc_get_gpr(vcpu, rs);
if (inst & 0x10000) {
- ulong new_msr = vcpu->arch.shared->msr;
+ ulong new_msr = kvmppc_get_msr(vcpu);
new_msr &= ~(MSR_RI | MSR_EE);
new_msr |= rs_val & (MSR_RI | MSR_EE);
- vcpu->arch.shared->msr = new_msr;
+ kvmppc_set_msr_fast(vcpu, new_msr);
} else
kvmppc_set_msr(vcpu, rs_val);
break;
@@ -172,14 +189,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->arch.mmu.tlbie(vcpu, addr, large);
break;
}
-#ifdef CONFIG_KVM_BOOK3S_64_PR
+#ifdef CONFIG_PPC_BOOK3S_64
case OP_31_XOP_FAKE_SC1:
{
/* SC 1 papr hypercalls */
ulong cmd = kvmppc_get_gpr(vcpu, 3);
int i;
- if ((vcpu->arch.shared->msr & MSR_PR) ||
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) ||
!vcpu->arch.papr_enabled) {
emulated = EMULATE_FAIL;
break;
@@ -261,18 +278,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
ra_val = kvmppc_get_gpr(vcpu, ra);
addr = (ra_val + rb_val) & ~31ULL;
- if (!(vcpu->arch.shared->msr & MSR_SF))
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF))
addr &= 0xffffffff;
vaddr = addr;
r = kvmppc_st(vcpu, &addr, 32, zeros, true);
if ((r == -ENOENT) || (r == -EPERM)) {
- struct kvmppc_book3s_shadow_vcpu *svcpu;
-
- svcpu = svcpu_get(vcpu);
*advance = 0;
- vcpu->arch.shared->dar = vaddr;
- svcpu->fault_dar = vaddr;
+ kvmppc_set_dar(vcpu, vaddr);
+ vcpu->arch.fault_dar = vaddr;
dsisr = DSISR_ISSTORE;
if (r == -ENOENT)
@@ -280,9 +294,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
else if (r == -EPERM)
dsisr |= DSISR_PROTFAULT;
- vcpu->arch.shared->dsisr = dsisr;
- svcpu->fault_dsisr = dsisr;
- svcpu_put(svcpu);
+ kvmppc_set_dsisr(vcpu, dsisr);
+ vcpu->arch.fault_dsisr = dsisr;
kvmppc_book3s_queue_irqprio(vcpu,
BOOK3S_INTERRUPT_DATA_STORAGE);
@@ -349,7 +362,7 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
return bat;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
@@ -360,10 +373,10 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
to_book3s(vcpu)->sdr1 = spr_val;
break;
case SPRN_DSISR:
- vcpu->arch.shared->dsisr = spr_val;
+ kvmppc_set_dsisr(vcpu, spr_val);
break;
case SPRN_DAR:
- vcpu->arch.shared->dar = spr_val;
+ kvmppc_set_dar(vcpu, spr_val);
break;
case SPRN_HIOR:
to_book3s(vcpu)->hior = spr_val;
@@ -442,6 +455,31 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_GQR7:
to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
break;
+ case SPRN_FSCR:
+ vcpu->arch.fscr = spr_val;
+ break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case SPRN_BESCR:
+ vcpu->arch.bescr = spr_val;
+ break;
+ case SPRN_EBBHR:
+ vcpu->arch.ebbhr = spr_val;
+ break;
+ case SPRN_EBBRR:
+ vcpu->arch.ebbrr = spr_val;
+ break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case SPRN_TFHAR:
+ vcpu->arch.tfhar = spr_val;
+ break;
+ case SPRN_TEXASR:
+ vcpu->arch.texasr = spr_val;
+ break;
+ case SPRN_TFIAR:
+ vcpu->arch.tfiar = spr_val;
+ break;
+#endif
+#endif
case SPRN_ICTC:
case SPRN_THRM1:
case SPRN_THRM2:
@@ -459,6 +497,13 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_WPAR_GEKKO:
case SPRN_MSSSR0:
case SPRN_DABR:
+#ifdef CONFIG_PPC_BOOK3S_64
+ case SPRN_MMCRS:
+ case SPRN_MMCRA:
+ case SPRN_MMCR0:
+ case SPRN_MMCR1:
+ case SPRN_MMCR2:
+#endif
break;
unprivileged:
default:
@@ -472,7 +517,7 @@ unprivileged:
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
@@ -497,10 +542,10 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
*spr_val = to_book3s(vcpu)->sdr1;
break;
case SPRN_DSISR:
- *spr_val = vcpu->arch.shared->dsisr;
+ *spr_val = kvmppc_get_dsisr(vcpu);
break;
case SPRN_DAR:
- *spr_val = vcpu->arch.shared->dar;
+ *spr_val = kvmppc_get_dar(vcpu);
break;
case SPRN_HIOR:
*spr_val = to_book3s(vcpu)->hior;
@@ -542,6 +587,31 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_GQR7:
*spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
break;
+ case SPRN_FSCR:
+ *spr_val = vcpu->arch.fscr;
+ break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case SPRN_BESCR:
+ *spr_val = vcpu->arch.bescr;
+ break;
+ case SPRN_EBBHR:
+ *spr_val = vcpu->arch.ebbhr;
+ break;
+ case SPRN_EBBRR:
+ *spr_val = vcpu->arch.ebbrr;
+ break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case SPRN_TFHAR:
+ *spr_val = vcpu->arch.tfhar;
+ break;
+ case SPRN_TEXASR:
+ *spr_val = vcpu->arch.texasr;
+ break;
+ case SPRN_TFIAR:
+ *spr_val = vcpu->arch.tfiar;
+ break;
+#endif
+#endif
case SPRN_THRM1:
case SPRN_THRM2:
case SPRN_THRM3:
@@ -557,6 +627,14 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_WPAR_GEKKO:
case SPRN_MSSSR0:
case SPRN_DABR:
+#ifdef CONFIG_PPC_BOOK3S_64
+ case SPRN_MMCRS:
+ case SPRN_MMCRA:
+ case SPRN_MMCR0:
+ case SPRN_MMCR1:
+ case SPRN_MMCR2:
+ case SPRN_TIR:
+#endif
*spr_val = 0;
break;
default:
@@ -573,48 +651,17 @@ unprivileged:
u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
{
- u32 dsisr = 0;
-
- /*
- * This is what the spec says about DSISR bits (not mentioned = 0):
- *
- * 12:13 [DS] Set to bits 30:31
- * 15:16 [X] Set to bits 29:30
- * 17 [X] Set to bit 25
- * [D/DS] Set to bit 5
- * 18:21 [X] Set to bits 21:24
- * [D/DS] Set to bits 1:4
- * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS)
- * 27:31 Set to bits 11:15 (RA)
- */
-
- switch (get_op(inst)) {
- /* D-form */
- case OP_LFS:
- case OP_LFD:
- case OP_STFD:
- case OP_STFS:
- dsisr |= (inst >> 12) & 0x4000; /* bit 17 */
- dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */
- break;
- /* X-form */
- case 31:
- dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */
- dsisr |= (inst << 8) & 0x04000; /* bit 17 */
- dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */
- break;
- default:
- printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
- break;
- }
-
- dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */
-
- return dsisr;
+ return make_dsisr(inst);
}
ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
{
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * Linux's fix_alignment() assumes that DAR is valid, so can we
+ */
+ return vcpu->arch.fault_dar;
+#else
ulong dar = 0;
ulong ra = get_ra(inst);
ulong rb = get_rb(inst);
@@ -639,4 +686,5 @@ ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
}
return dar;
+#endif
}
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 7057a02f090..0d013fbc2e1 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -18,15 +18,13 @@
*/
#include <linux/export.h>
+#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
-#else
-EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
-EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
-#ifdef CONFIG_ALTIVEC
-EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
#endif
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 62a2b5ab08e..7a12edbb61e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -31,6 +31,7 @@
#include <linux/spinlock.h>
#include <linux/page-flags.h>
#include <linux/srcu.h>
+#include <linux/miscdevice.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -52,6 +53,9 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
+#include <linux/module.h>
+
+#include "book3s.h"
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
@@ -66,7 +70,7 @@
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
-void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
int me;
int cpu = vcpu->cpu;
@@ -82,10 +86,13 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
/* CPU points to the first thread of the core */
if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
+#ifdef CONFIG_PPC_ICP_NATIVE
int real_cpu = cpu + vcpu->arch.ptid;
if (paca[real_cpu].kvm_hstate.xics_phys)
xics_wake_cpu(real_cpu);
- else if (cpu_online(cpu))
+ else
+#endif
+ if (cpu_online(cpu))
smp_send_reschedule(cpu);
}
put_cpu();
@@ -125,11 +132,12 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
* purely defensive; they should never fail.)
*/
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ unsigned long flags;
- spin_lock(&vcpu->arch.tbacct_lock);
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
vc->preempt_tb != TB_NIL) {
vc->stolen_tb += mftb() - vc->preempt_tb;
@@ -140,32 +148,76 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
vcpu->arch.busy_preempt = TB_NIL;
}
- spin_unlock(&vcpu->arch.tbacct_lock);
+ spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
}
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ unsigned long flags;
- spin_lock(&vcpu->arch.tbacct_lock);
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
vc->preempt_tb = mftb();
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
vcpu->arch.busy_preempt = mftb();
- spin_unlock(&vcpu->arch.tbacct_lock);
+ spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
}
-void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
{
vcpu->arch.shregs.msr = msr;
kvmppc_end_cede(vcpu);
}
-void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
{
vcpu->arch.pvr = pvr;
}
+int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
+{
+ unsigned long pcr = 0;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ if (arch_compat) {
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ return -EINVAL; /* 970 has no compat mode support */
+
+ switch (arch_compat) {
+ case PVR_ARCH_205:
+ /*
+ * If an arch bit is set in PCR, all the defined
+ * higher-order arch bits also have to be set.
+ */
+ pcr = PCR_ARCH_206 | PCR_ARCH_205;
+ break;
+ case PVR_ARCH_206:
+ case PVR_ARCH_206p:
+ pcr = PCR_ARCH_206;
+ break;
+ case PVR_ARCH_207:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ /* POWER7 can't emulate POWER8 */
+ if (!(pcr & PCR_ARCH_206))
+ return -EINVAL;
+ pcr &= ~PCR_ARCH_206;
+ }
+ }
+
+ spin_lock(&vc->lock);
+ vc->arch_compat = arch_compat;
+ vc->pcr = pcr;
+ spin_unlock(&vc->lock);
+
+ return 0;
+}
+
void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
{
int r;
@@ -195,7 +247,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
pr_err(" ESID = %.16llx VSID = %.16llx\n",
vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
- vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
+ vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
vcpu->arch.last_inst);
}
@@ -454,11 +506,11 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
*/
if (vc->vcore_state != VCORE_INACTIVE &&
vc->runner->arch.run_task != current) {
- spin_lock(&vc->runner->arch.tbacct_lock);
+ spin_lock_irq(&vc->runner->arch.tbacct_lock);
p = vc->stolen_tb;
if (vc->preempt_tb != TB_NIL)
p += now - vc->preempt_tb;
- spin_unlock(&vc->runner->arch.tbacct_lock);
+ spin_unlock_irq(&vc->runner->arch.tbacct_lock);
} else {
p = vc->stolen_tb;
}
@@ -480,16 +532,16 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
core_stolen = vcore_stolen_time(vc, now);
stolen = core_stolen - vcpu->arch.stolen_logged;
vcpu->arch.stolen_logged = core_stolen;
- spin_lock(&vcpu->arch.tbacct_lock);
+ spin_lock_irq(&vcpu->arch.tbacct_lock);
stolen += vcpu->arch.busy_stolen;
vcpu->arch.busy_stolen = 0;
- spin_unlock(&vcpu->arch.tbacct_lock);
+ spin_unlock_irq(&vcpu->arch.tbacct_lock);
if (!dt || !vpa)
return;
memset(dt, 0, sizeof(struct dtl_entry));
dt->dispatch_reason = 7;
dt->processor_id = vc->pcpu + vcpu->arch.ptid;
- dt->timebase = now;
+ dt->timebase = now + vc->tb_offset;
dt->enqueue_to_dispatch_time = stolen;
dt->srr0 = kvmppc_get_pc(vcpu);
dt->srr1 = vcpu->arch.shregs.msr;
@@ -538,6 +590,15 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
}
break;
case H_CONFER:
+ target = kvmppc_get_gpr(vcpu, 4);
+ if (target == -1)
+ break;
+ tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ if (!tvcpu) {
+ ret = H_PARAMETER;
+ break;
+ }
+ kvm_vcpu_yield_to(tvcpu);
break;
case H_REGISTER_VPA:
ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -548,7 +609,9 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (list_empty(&vcpu->kvm->arch.rtas_tokens))
return RESUME_HOST;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvmppc_rtas_hcall(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (rc == -ENOENT)
return RESUME_HOST;
@@ -576,8 +639,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
-static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
- struct task_struct *tsk)
+static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ struct task_struct *tsk)
{
int r = RESUME_HOST;
@@ -592,6 +655,7 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_EXTERNAL:
+ case BOOK3S_INTERRUPT_H_DOORBELL:
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
@@ -628,12 +692,10 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* hcall - punt to userspace */
int i;
- if (vcpu->arch.shregs.msr & MSR_PR) {
- /* sc 1 from userspace - reflect to guest syscall */
- kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
- r = RESUME_GUEST;
- break;
- }
+ /* hypercall with MSR_PR has already been handled in rmode,
+ * and never reaches here.
+ */
+
run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
for (i = 0; i < 9; ++i)
run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -663,7 +725,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* we don't emulate any guest instructions at this stage.
*/
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
- kvmppc_core_queue_program(vcpu, 0x80000);
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ r = RESUME_GUEST;
+ break;
+ /*
+ * This occurs if the guest (kernel or userspace), does something that
+ * is prohibited by HFSCR. We just generate a program interrupt to
+ * the guest.
+ */
+ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
break;
default:
@@ -671,16 +742,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
vcpu->arch.trap, kvmppc_get_pc(vcpu),
vcpu->arch.shregs.msr);
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
r = RESUME_HOST;
- BUG();
break;
}
return r;
}
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
int i;
@@ -694,12 +765,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
int i, j;
- kvmppc_set_pvr(vcpu, sregs->pvr);
+ kvmppc_set_pvr_hv(vcpu, sregs->pvr);
j = 0;
for (i = 0; i < vcpu->arch.slb_nr; i++) {
@@ -714,7 +785,47 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 mask;
+
+ spin_lock(&vc->lock);
+ /*
+ * If ILE (interrupt little-endian) has changed, update the
+ * MSR_LE bit in the intr_msr for each vcpu in this vcore.
+ */
+ if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.vcore != vc)
+ continue;
+ if (new_lpcr & LPCR_ILE)
+ vcpu->arch.intr_msr |= MSR_LE;
+ else
+ vcpu->arch.intr_msr &= ~MSR_LE;
+ }
+ mutex_unlock(&kvm->lock);
+ }
+
+ /*
+ * Userspace can only modify DPFD (default prefetch depth),
+ * ILE (interrupt little-endian) and TC (translation control).
+ * On POWER8 userspace can also modify AIL (alt. interrupt loc.)
+ */
+ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ mask |= LPCR_AIL;
+ vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
+ spin_unlock(&vc->lock);
+}
+
+static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
long int i;
@@ -726,6 +837,9 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
case KVM_REG_PPC_DABR:
*val = get_reg_val(id, vcpu->arch.dabr);
break;
+ case KVM_REG_PPC_DABRX:
+ *val = get_reg_val(id, vcpu->arch.dabrx);
+ break;
case KVM_REG_PPC_DSCR:
*val = get_reg_val(id, vcpu->arch.dscr);
break;
@@ -741,7 +855,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
case KVM_REG_PPC_UAMOR:
*val = get_reg_val(id, vcpu->arch.uamor);
break;
- case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
i = id - KVM_REG_PPC_MMCR0;
*val = get_reg_val(id, vcpu->arch.mmcr[i]);
break;
@@ -749,27 +863,61 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
i = id - KVM_REG_PPC_PMC1;
*val = get_reg_val(id, vcpu->arch.pmc[i]);
break;
-#ifdef CONFIG_VSX
- case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
- if (cpu_has_feature(CPU_FTR_VSX)) {
- /* VSX => FP reg i is stored in arch.vsr[2*i] */
- long int i = id - KVM_REG_PPC_FPR0;
- *val = get_reg_val(id, vcpu->arch.vsr[2 * i]);
- } else {
- /* let generic code handle it */
- r = -EINVAL;
- }
+ case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
+ i = id - KVM_REG_PPC_SPMC1;
+ *val = get_reg_val(id, vcpu->arch.spmc[i]);
break;
- case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
- if (cpu_has_feature(CPU_FTR_VSX)) {
- long int i = id - KVM_REG_PPC_VSR0;
- val->vsxval[0] = vcpu->arch.vsr[2 * i];
- val->vsxval[1] = vcpu->arch.vsr[2 * i + 1];
- } else {
- r = -ENXIO;
- }
+ case KVM_REG_PPC_SIAR:
+ *val = get_reg_val(id, vcpu->arch.siar);
+ break;
+ case KVM_REG_PPC_SDAR:
+ *val = get_reg_val(id, vcpu->arch.sdar);
+ break;
+ case KVM_REG_PPC_SIER:
+ *val = get_reg_val(id, vcpu->arch.sier);
+ break;
+ case KVM_REG_PPC_IAMR:
+ *val = get_reg_val(id, vcpu->arch.iamr);
+ break;
+ case KVM_REG_PPC_PSPB:
+ *val = get_reg_val(id, vcpu->arch.pspb);
+ break;
+ case KVM_REG_PPC_DPDES:
+ *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+ break;
+ case KVM_REG_PPC_DAWR:
+ *val = get_reg_val(id, vcpu->arch.dawr);
+ break;
+ case KVM_REG_PPC_DAWRX:
+ *val = get_reg_val(id, vcpu->arch.dawrx);
+ break;
+ case KVM_REG_PPC_CIABR:
+ *val = get_reg_val(id, vcpu->arch.ciabr);
+ break;
+ case KVM_REG_PPC_IC:
+ *val = get_reg_val(id, vcpu->arch.ic);
+ break;
+ case KVM_REG_PPC_VTB:
+ *val = get_reg_val(id, vcpu->arch.vtb);
+ break;
+ case KVM_REG_PPC_CSIGR:
+ *val = get_reg_val(id, vcpu->arch.csigr);
+ break;
+ case KVM_REG_PPC_TACR:
+ *val = get_reg_val(id, vcpu->arch.tacr);
+ break;
+ case KVM_REG_PPC_TCSCR:
+ *val = get_reg_val(id, vcpu->arch.tcscr);
+ break;
+ case KVM_REG_PPC_PID:
+ *val = get_reg_val(id, vcpu->arch.pid);
+ break;
+ case KVM_REG_PPC_ACOP:
+ *val = get_reg_val(id, vcpu->arch.acop);
+ break;
+ case KVM_REG_PPC_WORT:
+ *val = get_reg_val(id, vcpu->arch.wort);
break;
-#endif /* CONFIG_VSX */
case KVM_REG_PPC_VPA_ADDR:
spin_lock(&vcpu->arch.vpa_update_lock);
*val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
@@ -787,6 +935,81 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
val->vpaval.length = vcpu->arch.dtl.len;
spin_unlock(&vcpu->arch.vpa_update_lock);
break;
+ case KVM_REG_PPC_TB_OFFSET:
+ *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
+ break;
+ case KVM_REG_PPC_LPCR:
+ *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
+ break;
+ case KVM_REG_PPC_PPR:
+ *val = get_reg_val(id, vcpu->arch.ppr);
+ break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case KVM_REG_PPC_TFHAR:
+ *val = get_reg_val(id, vcpu->arch.tfhar);
+ break;
+ case KVM_REG_PPC_TFIAR:
+ *val = get_reg_val(id, vcpu->arch.tfiar);
+ break;
+ case KVM_REG_PPC_TEXASR:
+ *val = get_reg_val(id, vcpu->arch.texasr);
+ break;
+ case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+ i = id - KVM_REG_PPC_TM_GPR0;
+ *val = get_reg_val(id, vcpu->arch.gpr_tm[i]);
+ break;
+ case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+ {
+ int j;
+ i = id - KVM_REG_PPC_TM_VSR0;
+ if (i < 32)
+ for (j = 0; j < TS_FPRWIDTH; j++)
+ val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
+ else {
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ val->vval = vcpu->arch.vr_tm.vr[i-32];
+ else
+ r = -ENXIO;
+ }
+ break;
+ }
+ case KVM_REG_PPC_TM_CR:
+ *val = get_reg_val(id, vcpu->arch.cr_tm);
+ break;
+ case KVM_REG_PPC_TM_LR:
+ *val = get_reg_val(id, vcpu->arch.lr_tm);
+ break;
+ case KVM_REG_PPC_TM_CTR:
+ *val = get_reg_val(id, vcpu->arch.ctr_tm);
+ break;
+ case KVM_REG_PPC_TM_FPSCR:
+ *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
+ break;
+ case KVM_REG_PPC_TM_AMR:
+ *val = get_reg_val(id, vcpu->arch.amr_tm);
+ break;
+ case KVM_REG_PPC_TM_PPR:
+ *val = get_reg_val(id, vcpu->arch.ppr_tm);
+ break;
+ case KVM_REG_PPC_TM_VRSAVE:
+ *val = get_reg_val(id, vcpu->arch.vrsave_tm);
+ break;
+ case KVM_REG_PPC_TM_VSCR:
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
+ else
+ r = -ENXIO;
+ break;
+ case KVM_REG_PPC_TM_DSCR:
+ *val = get_reg_val(id, vcpu->arch.dscr_tm);
+ break;
+ case KVM_REG_PPC_TM_TAR:
+ *val = get_reg_val(id, vcpu->arch.tar_tm);
+ break;
+#endif
+ case KVM_REG_PPC_ARCH_COMPAT:
+ *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
+ break;
default:
r = -EINVAL;
break;
@@ -795,7 +1018,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
return r;
}
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
long int i;
@@ -810,6 +1034,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
case KVM_REG_PPC_DABR:
vcpu->arch.dabr = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_DABRX:
+ vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
+ break;
case KVM_REG_PPC_DSCR:
vcpu->arch.dscr = set_reg_val(id, *val);
break;
@@ -825,7 +1052,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
case KVM_REG_PPC_UAMOR:
vcpu->arch.uamor = set_reg_val(id, *val);
break;
- case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
i = id - KVM_REG_PPC_MMCR0;
vcpu->arch.mmcr[i] = set_reg_val(id, *val);
break;
@@ -833,27 +1060,64 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
i = id - KVM_REG_PPC_PMC1;
vcpu->arch.pmc[i] = set_reg_val(id, *val);
break;
-#ifdef CONFIG_VSX
- case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
- if (cpu_has_feature(CPU_FTR_VSX)) {
- /* VSX => FP reg i is stored in arch.vsr[2*i] */
- long int i = id - KVM_REG_PPC_FPR0;
- vcpu->arch.vsr[2 * i] = set_reg_val(id, *val);
- } else {
- /* let generic code handle it */
- r = -EINVAL;
- }
+ case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
+ i = id - KVM_REG_PPC_SPMC1;
+ vcpu->arch.spmc[i] = set_reg_val(id, *val);
break;
- case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
- if (cpu_has_feature(CPU_FTR_VSX)) {
- long int i = id - KVM_REG_PPC_VSR0;
- vcpu->arch.vsr[2 * i] = val->vsxval[0];
- vcpu->arch.vsr[2 * i + 1] = val->vsxval[1];
- } else {
- r = -ENXIO;
- }
+ case KVM_REG_PPC_SIAR:
+ vcpu->arch.siar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SDAR:
+ vcpu->arch.sdar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SIER:
+ vcpu->arch.sier = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_IAMR:
+ vcpu->arch.iamr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PSPB:
+ vcpu->arch.pspb = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DPDES:
+ vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DAWR:
+ vcpu->arch.dawr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DAWRX:
+ vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
+ break;
+ case KVM_REG_PPC_CIABR:
+ vcpu->arch.ciabr = set_reg_val(id, *val);
+ /* Don't allow setting breakpoints in hypervisor code */
+ if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+ vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */
+ break;
+ case KVM_REG_PPC_IC:
+ vcpu->arch.ic = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_VTB:
+ vcpu->arch.vtb = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_CSIGR:
+ vcpu->arch.csigr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TACR:
+ vcpu->arch.tacr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TCSCR:
+ vcpu->arch.tcscr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PID:
+ vcpu->arch.pid = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_ACOP:
+ vcpu->arch.acop = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_WORT:
+ vcpu->arch.wort = set_reg_val(id, *val);
break;
-#endif /* CONFIG_VSX */
case KVM_REG_PPC_VPA_ADDR:
addr = set_reg_val(id, *val);
r = -EINVAL;
@@ -880,6 +1144,82 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
len -= len % sizeof(struct dtl_entry);
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
+ case KVM_REG_PPC_TB_OFFSET:
+ /* round up to multiple of 2^24 */
+ vcpu->arch.vcore->tb_offset =
+ ALIGN(set_reg_val(id, *val), 1UL << 24);
+ break;
+ case KVM_REG_PPC_LPCR:
+ kvmppc_set_lpcr(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_PPR:
+ vcpu->arch.ppr = set_reg_val(id, *val);
+ break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case KVM_REG_PPC_TFHAR:
+ vcpu->arch.tfhar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TFIAR:
+ vcpu->arch.tfiar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TEXASR:
+ vcpu->arch.texasr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+ i = id - KVM_REG_PPC_TM_GPR0;
+ vcpu->arch.gpr_tm[i] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+ {
+ int j;
+ i = id - KVM_REG_PPC_TM_VSR0;
+ if (i < 32)
+ for (j = 0; j < TS_FPRWIDTH; j++)
+ vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
+ else
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ vcpu->arch.vr_tm.vr[i-32] = val->vval;
+ else
+ r = -ENXIO;
+ break;
+ }
+ case KVM_REG_PPC_TM_CR:
+ vcpu->arch.cr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_LR:
+ vcpu->arch.lr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_CTR:
+ vcpu->arch.ctr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_FPSCR:
+ vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_AMR:
+ vcpu->arch.amr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_PPR:
+ vcpu->arch.ppr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_VRSAVE:
+ vcpu->arch.vrsave_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_VSCR:
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
+ else
+ r = - ENXIO;
+ break;
+ case KVM_REG_PPC_TM_DSCR:
+ vcpu->arch.dscr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_TAR:
+ vcpu->arch.tar_tm = set_reg_val(id, *val);
+ break;
+#endif
+ case KVM_REG_PPC_ARCH_COMPAT:
+ r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
+ break;
default:
r = -EINVAL;
break;
@@ -888,21 +1228,15 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
return r;
}
-int kvmppc_core_check_processor_compat(void)
-{
- if (cpu_has_feature(CPU_FTR_HVMODE))
- return 0;
- return -EIO;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+ unsigned int id)
{
struct kvm_vcpu *vcpu;
int err = -EINVAL;
int core;
struct kvmppc_vcore *vcore;
- core = id / threads_per_core;
+ core = id / threads_per_subcore;
if (core >= KVM_MAX_VCORES)
goto out;
@@ -916,14 +1250,25 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
goto free_vcpu;
vcpu->arch.shared = &vcpu->arch.shregs;
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ /*
+ * The shared struct is never shared on HV,
+ * so we can always use host endianness
+ */
+#ifdef __BIG_ENDIAN__
+ vcpu->arch.shared_big_endian = true;
+#else
+ vcpu->arch.shared_big_endian = false;
+#endif
+#endif
vcpu->arch.mmcr[0] = MMCR0_FC;
vcpu->arch.ctrl = CTRL_RUNLATCH;
/* default to host PVR, since we can't spoof it */
- vcpu->arch.pvr = mfspr(SPRN_PVR);
- kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+ kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
spin_lock_init(&vcpu->arch.vpa_update_lock);
spin_lock_init(&vcpu->arch.tbacct_lock);
vcpu->arch.busy_preempt = TB_NIL;
+ vcpu->arch.intr_msr = MSR_SF | MSR_ME;
kvmppc_mmu_book3s_hv_init(vcpu);
@@ -940,6 +1285,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
spin_lock_init(&vcore->lock);
init_waitqueue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
+ vcore->lpcr = kvm->arch.lpcr;
+ vcore->first_vcpuid = core * threads_per_subcore;
+ vcore->kvm = kvm;
}
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
@@ -953,6 +1301,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
++vcore->num_threads;
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
+ vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
@@ -972,7 +1321,7 @@ static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
vpa->dirty);
}
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->arch.vpa_update_lock);
unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
@@ -983,6 +1332,12 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
+static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
+{
+ /* Indicate we want to get back into the guest */
+ return 1;
+}
+
static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
{
unsigned long dec_nsec, now;
@@ -1010,7 +1365,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
}
}
-extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
+extern void __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
@@ -1019,13 +1374,13 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
return;
- spin_lock(&vcpu->arch.tbacct_lock);
+ spin_lock_irq(&vcpu->arch.tbacct_lock);
now = mftb();
vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
vcpu->arch.stolen_logged;
vcpu->arch.busy_preempt = now;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
- spin_unlock(&vcpu->arch.tbacct_lock);
+ spin_unlock_irq(&vcpu->arch.tbacct_lock);
--vc->n_runnable;
list_del(&vcpu->arch.run_list);
}
@@ -1084,13 +1439,14 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
tpaca = &paca[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
tpaca->kvm_hstate.kvm_vcore = vc;
- tpaca->kvm_hstate.napping = 0;
+ tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
vcpu->cpu = vc->pcpu;
smp_wmb();
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
- if (vcpu->arch.ptid) {
+ if (cpu != smp_processor_id()) {
xics_wake_cpu(cpu);
- ++vc->n_woken;
+ if (vcpu->arch.ptid)
+ ++vc->n_woken;
}
#endif
}
@@ -1120,16 +1476,19 @@ static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
static int on_primary_thread(void)
{
int cpu = smp_processor_id();
- int thr = cpu_thread_in_core(cpu);
+ int thr;
- if (thr)
+ /* Are we on a primary subcore? */
+ if (cpu_thread_in_subcore(cpu))
return 0;
- while (++thr < threads_per_core)
+
+ thr = 0;
+ while (++thr < threads_per_subcore)
if (cpu_online(cpu + thr))
return 0;
/* Grab all hw threads so they can't go into the kernel */
- for (thr = 1; thr < threads_per_core; ++thr) {
+ for (thr = 1; thr < threads_per_subcore; ++thr) {
if (kvmppc_grab_hwthread(cpu + thr)) {
/* Couldn't grab one; let the others go */
do {
@@ -1147,10 +1506,10 @@ static int on_primary_thread(void)
*/
static void kvmppc_run_core(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu, *vcpu0, *vnext;
+ struct kvm_vcpu *vcpu, *vnext;
long ret;
u64 now;
- int ptid, i, need_vpa_update;
+ int i, need_vpa_update;
int srcu_idx;
struct kvm_vcpu *vcpus_to_update[threads_per_core];
@@ -1188,49 +1547,37 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
}
/*
- * Assign physical thread IDs, first to non-ceded vcpus
- * and then to ceded ones.
- */
- ptid = 0;
- vcpu0 = NULL;
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
- if (!vcpu->arch.ceded) {
- if (!ptid)
- vcpu0 = vcpu;
- vcpu->arch.ptid = ptid++;
- }
- }
- if (!vcpu0)
- goto out; /* nothing to run; should never happen */
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
- if (vcpu->arch.ceded)
- vcpu->arch.ptid = ptid++;
-
- /*
- * Make sure we are running on thread 0, and that
- * secondary threads are offline.
+ * Make sure we are running on primary threads, and that secondary
+ * threads are offline. Also check if the number of threads in this
+ * guest are greater than the current system threads per guest.
*/
- if (threads_per_core > 1 && !on_primary_thread()) {
+ if ((threads_per_core > 1) &&
+ ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
vcpu->arch.ret = -EBUSY;
goto out;
}
+
vc->pcpu = smp_processor_id();
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
kvmppc_start_thread(vcpu);
kvmppc_create_dtl_entry(vcpu, vc);
}
+ /* Set this explicitly in case thread 0 doesn't have a vcpu */
+ get_paca()->kvm_hstate.kvm_vcore = vc;
+ get_paca()->kvm_hstate.ptid = 0;
+
vc->vcore_state = VCORE_RUNNING;
preempt_disable();
spin_unlock(&vc->lock);
kvm_guest_enter();
- srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
+ srcu_idx = srcu_read_lock(&vc->kvm->srcu);
- __kvmppc_vcore_entry(NULL, vcpu0);
+ __kvmppc_vcore_entry();
spin_lock(&vc->lock);
/* disable sending of IPIs on virtual external irqs */
@@ -1239,20 +1586,20 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
/* wait for secondary threads to finish writing their state to memory */
if (vc->nap_count < vc->n_woken)
kvmppc_wait_for_nap(vc);
- for (i = 0; i < threads_per_core; ++i)
+ for (i = 0; i < threads_per_subcore; ++i)
kvmppc_release_hwthread(vc->pcpu + i);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
spin_unlock(&vc->lock);
- srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
+ srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
kvm_guest_exit();
preempt_enable();
- kvm_resched(vcpu);
+ cond_resched();
spin_lock(&vc->lock);
now = get_tb();
@@ -1264,14 +1611,14 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
ret = RESUME_GUEST;
if (vcpu->arch.trap)
- ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
- vcpu->arch.run_task);
+ ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
+ vcpu->arch.run_task);
vcpu->arch.ret = ret;
vcpu->arch.trap = 0;
if (vcpu->arch.ceded) {
- if (ret != RESUME_GUEST)
+ if (!is_kvmppc_resume_guest(ret))
kvmppc_end_cede(vcpu);
else
kvmppc_set_timer(vcpu);
@@ -1282,7 +1629,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_INACTIVE;
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
- if (vcpu->arch.ret != RESUME_GUEST) {
+ if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run);
}
@@ -1353,7 +1700,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (!signal_pending(current)) {
if (vc->vcore_state == VCORE_RUNNING &&
VCORE_EXIT_COUNT(vc) == 0) {
- vcpu->arch.ptid = vc->n_runnable - 1;
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
@@ -1424,7 +1770,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
return vcpu->arch.ret;
}
-int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
int srcu_idx;
@@ -1473,7 +1819,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
}
- } while (r == RESUME_GUEST);
+ } while (is_kvmppc_resume_guest(r));
out:
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -1546,7 +1892,8 @@ static const struct file_operations kvm_rma_fops = {
.release = kvm_rma_release,
};
-long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
+static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
+ struct kvm_allocate_rma *ret)
{
long fd;
struct kvm_rma_info *ri;
@@ -1589,10 +1936,18 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
* support pte_enc here
*/
(*sps)->enc[0].pte_enc = def->penc[linux_psize];
+ /*
+ * Add 16MB MPSS support if host supports it
+ */
+ if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) {
+ (*sps)->enc[1].page_shift = 24;
+ (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
+ }
(*sps)++;
}
-int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
+ struct kvm_ppc_smmu_info *info)
{
struct kvm_ppc_one_seg_page_size *sps;
@@ -1613,7 +1968,8 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
+ struct kvm_dirty_log *log)
{
struct kvm_memory_slot *memslot;
int r;
@@ -1667,8 +2023,8 @@ static void unpin_slot(struct kvm_memory_slot *memslot)
}
}
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
{
if (!dont || free->arch.rmap != dont->arch.rmap) {
vfree(free->arch.rmap);
@@ -1681,8 +2037,8 @@ void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
}
}
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
- unsigned long npages)
+static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
+ unsigned long npages)
{
slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
if (!slot->arch.rmap)
@@ -1692,9 +2048,9 @@ int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
return 0;
}
-int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem)
{
unsigned long *phys;
@@ -1710,9 +2066,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
return 0;
}
-void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
+static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old)
{
unsigned long npages = mem->memory_size >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
@@ -1729,6 +2085,37 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
}
}
+/*
+ * Update LPCR values in kvm->arch and in vcores.
+ * Caller must hold kvm->lock.
+ */
+void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
+{
+ long int i;
+ u32 cores_done = 0;
+
+ if ((kvm->arch.lpcr & mask) == lpcr)
+ return;
+
+ kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
+
+ for (i = 0; i < KVM_MAX_VCORES; ++i) {
+ struct kvmppc_vcore *vc = kvm->arch.vcores[i];
+ if (!vc)
+ continue;
+ spin_lock(&vc->lock);
+ vc->lpcr = (vc->lpcr & ~mask) | lpcr;
+ spin_unlock(&vc->lock);
+ if (++cores_done >= kvm->arch.online_vcores)
+ break;
+ }
+}
+
+static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
+{
+ return;
+}
+
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
@@ -1737,7 +2124,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
unsigned long hva;
struct kvm_memory_slot *memslot;
struct vm_area_struct *vma;
- unsigned long lpcr, senc;
+ unsigned long lpcr = 0, senc;
+ unsigned long lpcr_mask = 0;
unsigned long psize, porder;
unsigned long rma_size;
unsigned long rmls;
@@ -1802,9 +2190,9 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
senc = slb_pgsize_encoding(psize);
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
(VRMA_VSID << SLB_VSID_SHIFT_1T);
- lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
- lpcr |= senc << (LPCR_VRMASD_SH - 4);
- kvm->arch.lpcr = lpcr;
+ lpcr_mask = LPCR_VRMASD;
+ /* the -4 is to account for senc values starting at 0x10 */
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
/* Create HPTEs in the hash page table for the VRMA */
kvmppc_map_vrma(vcpu, memslot, porder);
@@ -1825,23 +2213,21 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
kvm->arch.rma = ri;
/* Update LPCR and RMOR */
- lpcr = kvm->arch.lpcr;
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
/* PPC970; insert RMLS value (split field) in HID4 */
- lpcr &= ~((1ul << HID4_RMLS0_SH) |
- (3ul << HID4_RMLS2_SH));
- lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
+ lpcr_mask = (1ul << HID4_RMLS0_SH) |
+ (3ul << HID4_RMLS2_SH) | HID4_RMOR;
+ lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
((rmls & 3) << HID4_RMLS2_SH);
/* RMOR is also in HID4 */
lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
<< HID4_RMOR_SH;
} else {
/* POWER7 */
- lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
- lpcr |= rmls << LPCR_RMLS_SH;
+ lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
+ lpcr = rmls << LPCR_RMLS_SH;
kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
}
- kvm->arch.lpcr = lpcr;
pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
@@ -1860,6 +2246,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
}
}
+ kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
smp_wmb();
kvm->arch.rma_setup_done = 1;
@@ -1875,7 +2263,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto out_srcu;
}
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{
unsigned long lpcr, lpid;
@@ -1893,9 +2281,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
*/
cpumask_setall(&kvm->arch.need_tlb_flush);
- INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
- INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
-
kvm->arch.rma = NULL;
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -1916,6 +2301,9 @@ int kvmppc_core_init_vm(struct kvm *kvm)
LPCR_VPM0 | LPCR_VPM1;
kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
(VRMA_VSID << SLB_VSID_SHIFT_1T);
+ /* On POWER8 turn on online bit to enable PURR/SPURR */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ lpcr |= LPCR_ONL;
}
kvm->arch.lpcr = lpcr;
@@ -1923,69 +2311,172 @@ int kvmppc_core_init_vm(struct kvm *kvm)
spin_lock_init(&kvm->arch.slot_phys_lock);
/*
- * Don't allow secondary CPU threads to come online
- * while any KVM VMs exist.
+ * Track that we now have a HV mode VM active. This blocks secondary
+ * CPU threads from coming online.
*/
- inhibit_secondary_onlining();
+ kvm_hv_vm_activated();
return 0;
}
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_free_vcores(struct kvm *kvm)
+{
+ long int i;
+
+ for (i = 0; i < KVM_MAX_VCORES; ++i)
+ kfree(kvm->arch.vcores[i]);
+ kvm->arch.online_vcores = 0;
+}
+
+static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
- uninhibit_secondary_onlining();
+ kvm_hv_vm_deactivated();
+ kvmppc_free_vcores(kvm);
if (kvm->arch.rma) {
kvm_release_rma(kvm->arch.rma);
kvm->arch.rma = NULL;
}
- kvmppc_rtas_tokens_free(kvm);
-
kvmppc_free_hpt(kvm);
- WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
}
-/* These are stubs for now */
-void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
+/* We don't need to emulate any privileged instructions or dcbz */
+static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
{
+ return EMULATE_FAIL;
}
-/* We don't need to emulate any privileged instructions or dcbz */
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance)
+static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
+ ulong spr_val)
{
return EMULATE_FAIL;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
+ ulong *spr_val)
{
return EMULATE_FAIL;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+static int kvmppc_core_check_processor_compat_hv(void)
{
- return EMULATE_FAIL;
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return -EIO;
+ return 0;
}
-static int kvmppc_book3s_hv_init(void)
+static long kvm_arch_vm_ioctl_hv(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
{
- int r;
+ struct kvm *kvm __maybe_unused = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r;
- r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+ switch (ioctl) {
- if (r)
- return r;
+ case KVM_ALLOCATE_RMA: {
+ struct kvm_allocate_rma rma;
+ struct kvm *kvm = filp->private_data;
- r = kvmppc_mmu_hv_init();
+ r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
+ if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
+ r = -EFAULT;
+ break;
+ }
+
+ case KVM_PPC_ALLOCATE_HTAB: {
+ u32 htab_order;
+
+ r = -EFAULT;
+ if (get_user(htab_order, (u32 __user *)argp))
+ break;
+ r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+ if (r)
+ break;
+ r = -EFAULT;
+ if (put_user(htab_order, (u32 __user *)argp))
+ break;
+ r = 0;
+ break;
+ }
+
+ case KVM_PPC_GET_HTAB_FD: {
+ struct kvm_get_htab_fd ghf;
+
+ r = -EFAULT;
+ if (copy_from_user(&ghf, argp, sizeof(ghf)))
+ break;
+ r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ }
+
+ return r;
+}
+
+static struct kvmppc_ops kvm_ops_hv = {
+ .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
+ .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
+ .get_one_reg = kvmppc_get_one_reg_hv,
+ .set_one_reg = kvmppc_set_one_reg_hv,
+ .vcpu_load = kvmppc_core_vcpu_load_hv,
+ .vcpu_put = kvmppc_core_vcpu_put_hv,
+ .set_msr = kvmppc_set_msr_hv,
+ .vcpu_run = kvmppc_vcpu_run_hv,
+ .vcpu_create = kvmppc_core_vcpu_create_hv,
+ .vcpu_free = kvmppc_core_vcpu_free_hv,
+ .check_requests = kvmppc_core_check_requests_hv,
+ .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv,
+ .flush_memslot = kvmppc_core_flush_memslot_hv,
+ .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
+ .commit_memory_region = kvmppc_core_commit_memory_region_hv,
+ .unmap_hva = kvm_unmap_hva_hv,
+ .unmap_hva_range = kvm_unmap_hva_range_hv,
+ .age_hva = kvm_age_hva_hv,
+ .test_age_hva = kvm_test_age_hva_hv,
+ .set_spte_hva = kvm_set_spte_hva_hv,
+ .mmu_destroy = kvmppc_mmu_destroy_hv,
+ .free_memslot = kvmppc_core_free_memslot_hv,
+ .create_memslot = kvmppc_core_create_memslot_hv,
+ .init_vm = kvmppc_core_init_vm_hv,
+ .destroy_vm = kvmppc_core_destroy_vm_hv,
+ .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
+ .emulate_op = kvmppc_core_emulate_op_hv,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
+ .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
+ .arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
+};
+
+static int kvmppc_book3s_init_hv(void)
+{
+ int r;
+ /*
+ * FIXME!! Do we need to check on all cpus ?
+ */
+ r = kvmppc_core_check_processor_compat_hv();
+ if (r < 0)
+ return -ENODEV;
+
+ kvm_ops_hv.owner = THIS_MODULE;
+ kvmppc_hv_ops = &kvm_ops_hv;
+
+ r = kvmppc_mmu_hv_init();
return r;
}
-static void kvmppc_book3s_hv_exit(void)
+static void kvmppc_book3s_exit_hv(void)
{
- kvm_exit();
+ kvmppc_hv_ops = NULL;
}
-module_init(kvmppc_book3s_hv_init);
-module_exit(kvmppc_book3s_hv_exit);
+module_init(kvmppc_book3s_init_hv);
+module_exit(kvmppc_book3s_exit_hv);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 8cd0daebb82..7cde8a66520 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -6,6 +6,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/cpu.h>
#include <linux/kvm_host.h>
#include <linux/preempt.h>
#include <linux/export.h>
@@ -181,3 +182,33 @@ void __init kvm_cma_reserve(void)
kvm_cma_declare_contiguous(selected_size, align_size);
}
}
+
+/*
+ * When running HV mode KVM we need to block certain operations while KVM VMs
+ * exist in the system. We use a counter of VMs to track this.
+ *
+ * One of the operations we need to block is onlining of secondaries, so we
+ * protect hv_vm_count with get/put_online_cpus().
+ */
+static atomic_t hv_vm_count;
+
+void kvm_hv_vm_activated(void)
+{
+ get_online_cpus();
+ atomic_inc(&hv_vm_count);
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
+
+void kvm_hv_vm_deactivated(void)
+{
+ get_online_cpus();
+ atomic_dec(&hv_vm_count);
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
+
+bool kvm_hv_mode_active(void)
+{
+ return atomic_read(&hv_vm_count) != 0;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 37f1cc417ca..731be7478b2 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -35,7 +35,7 @@
****************************************************************************/
/* Registers:
- * r4: vcpu pointer
+ * none
*/
_GLOBAL(__kvmppc_vcore_entry)
@@ -57,9 +57,11 @@ BEGIN_FTR_SECTION
std r3, HSTATE_DSCR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+BEGIN_FTR_SECTION
/* Save host DABR */
mfspr r3, SPRN_DABR
std r3, HSTATE_DABR(r13)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Hard-disable interrupts */
mfmsr r10
@@ -69,7 +71,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
mtmsrd r10,1
/* Save host PMU registers */
- /* R4 is live here (vcpu pointer) but not r3 or r5 */
+BEGIN_FTR_SECTION
+ /* Work around P8 PMAE bug */
+ li r3, -1
+ clrrdi r3, r3, 10
+ mfspr r8, SPRN_MMCR2
+ mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
@@ -86,9 +95,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
+ mfspr r9, SPRN_SIAR
+ mfspr r10, SPRN_SDAR
std r7, HSTATE_MMCR(r13)
std r5, HSTATE_MMCR + 8(r13)
std r6, HSTATE_MMCR + 16(r13)
+ std r9, HSTATE_MMCR + 24(r13)
+ std r10, HSTATE_MMCR + 32(r13)
+BEGIN_FTR_SECTION
+ mfspr r9, SPRN_SIER
+ std r8, HSTATE_MMCR + 40(r13)
+ std r9, HSTATE_MMCR + 48(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
@@ -134,22 +152,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
* enters the guest with interrupts enabled.
*/
BEGIN_FTR_SECTION
+ ld r4, HSTATE_KVM_VCPU(r13)
ld r0, VCPU_PENDING_EXC(r4)
li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
and. r0, r0, r7
beq 32f
- mr r31, r4
lhz r3, PACAPACAINDEX(r13)
bl smp_send_reschedule
nop
- mr r4, r31
32:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
#endif /* CONFIG_SMP */
/* Jump to partition switch code */
- bl .kvmppc_hv_entry_trampoline
+ bl kvmppc_hv_entry_trampoline
nop
/*
@@ -158,9 +175,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
* Interrupts are enabled again at this point.
*/
-.global kvmppc_handler_highmem
-kvmppc_handler_highmem:
-
/*
* Register usage at this point:
*
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index a353c485808..3a5c568b1e8 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -12,6 +12,7 @@
#include <linux/kvm_host.h>
#include <linux/kernel.h>
#include <asm/opal.h>
+#include <asm/mce.h>
/* SRR1 bits for machine check on POWER7 */
#define SRR1_MC_LDSTERR (1ul << (63-42))
@@ -58,18 +59,6 @@ static void reload_slb(struct kvm_vcpu *vcpu)
}
}
-/* POWER7 TLB flush */
-static void flush_tlb_power7(struct kvm_vcpu *vcpu)
-{
- unsigned long i, rb;
-
- rb = TLBIEL_INVAL_SET_LPID;
- for (i = 0; i < POWER7_TLB_SETS; ++i) {
- asm volatile("tlbiel %0" : : "r" (rb));
- rb += 1 << TLBIEL_INVAL_SET_SHIFT;
- }
-}
-
/*
* On POWER7, see if we can handle a machine check that occurred inside
* the guest in real mode, without switching to the host partition.
@@ -79,9 +68,7 @@ static void flush_tlb_power7(struct kvm_vcpu *vcpu)
static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
{
unsigned long srr1 = vcpu->arch.shregs.msr;
-#ifdef CONFIG_PPC_POWERNV
- struct opal_machine_check_event *opal_evt;
-#endif
+ struct machine_check_event mce_evt;
long handled = 1;
if (srr1 & SRR1_MC_LDSTERR) {
@@ -96,7 +83,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
}
if (dsisr & DSISR_MC_TLB_MULTI) {
- flush_tlb_power7(vcpu);
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
dsisr &= ~DSISR_MC_TLB_MULTI;
}
/* Any other errors we don't understand? */
@@ -113,28 +101,37 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
reload_slb(vcpu);
break;
case SRR1_MC_IFETCH_TLBMULTI:
- flush_tlb_power7(vcpu);
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
break;
default:
handled = 0;
}
-#ifdef CONFIG_PPC_POWERNV
/*
- * See if OPAL has already handled the condition.
- * We assume that if the condition is recovered then OPAL
+ * See if we have already handled the condition in the linux host.
+ * We assume that if the condition is recovered then linux host
* will have generated an error log event that we will pick
* up and log later.
+ * Don't release mce event now. We will queue up the event so that
+ * we can log the MCE event info on host console.
*/
- opal_evt = local_paca->opal_mc_evt;
- if (opal_evt->version == OpalMCE_V1 &&
- (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
- opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
+ if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
+ goto out;
+
+ if (mce_evt.version == MCE_V1 &&
+ (mce_evt.severity == MCE_SEV_NO_ERROR ||
+ mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
handled = 1;
- if (handled)
- opal_evt->in_use = 0;
-#endif
+out:
+ /*
+ * We are now going enter guest either through machine check
+ * interrupt (for unhandled errors) or will continue from
+ * current HSRR0 (for handled errors) in guest. Hence
+ * queue up the event so that we can log it from host console later.
+ */
+ machine_check_queue_event();
return handled;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 9c515440ad1..5a24d3c2b6b 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -42,13 +42,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
/*
* If there is only one vcore, and it's currently running,
+ * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
* we can use tlbiel as long as we mark all other physical
* cores as potentially having stale TLB entries for this lpid.
* If we're not using MMU notifiers, we never take pages away
* from the guest, so we can use tlbiel if requested.
* Otherwise, don't use tlbiel.
*/
- if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
+ if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
global = 0;
else if (kvm->arch.using_mmu_notifiers)
global = 1;
@@ -111,7 +112,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
ptel = rev->guest_rpte |= rcbits;
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
- memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
if (!memslot)
return;
@@ -134,7 +135,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
unlock_rmap(rmap);
}
-static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
+static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
int writing, unsigned long *pte_sizep)
{
pte_t *ptep;
@@ -192,7 +193,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
/* Find the memslot (if any) for this address */
gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
gfn = gpa >> PAGE_SHIFT;
- memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
pa = 0;
is_io = ~0ul;
rmap = NULL;
@@ -225,26 +226,28 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
is_io = pa & (HPTE_R_I | HPTE_R_W);
pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
pa &= PAGE_MASK;
+ pa |= gpa & ~PAGE_MASK;
} else {
/* Translate to host virtual address */
hva = __gfn_to_hva_memslot(memslot, gfn);
/* Look up the Linux PTE for the backing page */
pte_size = psize;
- pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
- if (pte_present(pte)) {
+ pte = lookup_linux_pte_and_update(pgdir, hva, writing,
+ &pte_size);
+ if (pte_present(pte) && !pte_numa(pte)) {
if (writing && !pte_write(pte))
/* make the actual HPTE be read-only */
ptel = hpte_make_readonly(ptel);
is_io = hpte_cache_bits(pte_val(pte));
pa = pte_pfn(pte) << PAGE_SHIFT;
+ pa |= hva & (pte_size - 1);
+ pa |= gpa & ~PAGE_MASK;
}
}
if (pte_size < psize)
return H_PARAMETER;
- if (pa && pte_size > psize)
- pa |= gpa & (pte_size - 1);
ptel &= ~(HPTE_R_PP0 - psize);
ptel |= pa;
@@ -668,10 +671,11 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
psize = hpte_page_size(v, r);
gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
- memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
if (memslot) {
hva = __gfn_to_hva_memslot(memslot, gfn);
- pte = lookup_linux_pte(pgdir, hva, 1, &psize);
+ pte = lookup_linux_pte_and_update(pgdir, hva,
+ 1, &psize);
if (pte_present(pte) && !pte_write(pte))
r = hpte_make_readonly(r);
}
@@ -749,6 +753,10 @@ static int slb_base_page_shift[4] = {
20, /* 1M, unsupported */
};
+/* When called from virtmode, this func should be protected by
+ * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
+ * can trigger deadlock issue.
+ */
long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
unsigned long valid)
{
@@ -806,13 +814,10 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
r = hpte[i+1];
/*
- * Check the HPTE again, including large page size
- * Since we don't currently allow any MPSS (mixed
- * page-size segment) page sizes, it is sufficient
- * to check against the actual page size.
+ * Check the HPTE again, including base page size
*/
if ((v & valid) && (v & mask) == val &&
- hpte_page_size(v, r) == (1ul << pshift))
+ hpte_base_page_size(v, r) == (1ul << pshift))
/* Return with the HPTE still locked */
return (hash << 3) + (i >> 1);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 294b7af28cd..558a67df812 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -28,34 +28,17 @@
#include <asm/exception-64s.h>
#include <asm/kvm_book3s_asm.h>
#include <asm/mmu-hash64.h>
+#include <asm/tm.h>
+
+#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
#ifdef __LITTLE_ENDIAN__
#error Need to fix lppaca and SLB shadow accesses in little endian mode
#endif
-/*****************************************************************************
- * *
- * Real Mode handlers that need to be in the linear mapping *
- * *
- ****************************************************************************/
-
- .globl kvmppc_skip_interrupt
-kvmppc_skip_interrupt:
- mfspr r13,SPRN_SRR0
- addi r13,r13,4
- mtspr SPRN_SRR0,r13
- GET_SCRATCH0(r13)
- rfid
- b .
-
- .globl kvmppc_skip_Hinterrupt
-kvmppc_skip_Hinterrupt:
- mfspr r13,SPRN_HSRR0
- addi r13,r13,4
- mtspr SPRN_HSRR0,r13
- GET_SCRATCH0(r13)
- hrfid
- b .
+/* Values in HSTATE_NAPPING(r13) */
+#define NAPPING_CEDE 1
+#define NAPPING_NOVCPU 2
/*
* Call kvmppc_hv_entry in real mode.
@@ -65,9 +48,12 @@ kvmppc_skip_Hinterrupt:
*
* LR = return address to continue at after eventually re-enabling MMU
*/
-_GLOBAL(kvmppc_hv_entry_trampoline)
+_GLOBAL_TOC(kvmppc_hv_entry_trampoline)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -112(r1)
mfmsr r10
- LOAD_REG_ADDR(r5, kvmppc_hv_entry)
+ LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
li r0,MSR_RI
andc r0,r10,r0
li r6,MSR_IR | MSR_DR
@@ -77,21 +63,197 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
mtsrr1 r6
RFI
-/******************************************************************************
- * *
- * Entry code *
- * *
- *****************************************************************************/
+kvmppc_call_hv_entry:
+ ld r4, HSTATE_KVM_VCPU(r13)
+ bl kvmppc_hv_entry
+
+ /* Back from guest - restore host state and return to caller */
+
+BEGIN_FTR_SECTION
+ /* Restore host DABR and DABRX */
+ ld r5,HSTATE_DABR(r13)
+ li r6,7
+ mtspr SPRN_DABR,r5
+ mtspr SPRN_DABRX,r6
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+ /* Restore SPRG3 */
+ ld r3,PACA_SPRG_VDSO(r13)
+ mtspr SPRN_SPRG_VDSO_WRITE,r3
+
+ /* Reload the host's PMU registers */
+ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
+ lbz r4, LPPACA_PMCINUSE(r3)
+ cmpwi r4, 0
+ beq 23f /* skip if not */
+BEGIN_FTR_SECTION
+ ld r3, HSTATE_MMCR(r13)
+ andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+ cmpwi r4, MMCR0_PMAO
+ beql kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+ lwz r3, HSTATE_PMC(r13)
+ lwz r4, HSTATE_PMC + 4(r13)
+ lwz r5, HSTATE_PMC + 8(r13)
+ lwz r6, HSTATE_PMC + 12(r13)
+ lwz r8, HSTATE_PMC + 16(r13)
+ lwz r9, HSTATE_PMC + 20(r13)
+BEGIN_FTR_SECTION
+ lwz r10, HSTATE_PMC + 24(r13)
+ lwz r11, HSTATE_PMC + 28(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r4
+ mtspr SPRN_PMC3, r5
+ mtspr SPRN_PMC4, r6
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+ mtspr SPRN_PMC7, r10
+ mtspr SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ ld r3, HSTATE_MMCR(r13)
+ ld r4, HSTATE_MMCR + 8(r13)
+ ld r5, HSTATE_MMCR + 16(r13)
+ ld r6, HSTATE_MMCR + 24(r13)
+ ld r7, HSTATE_MMCR + 32(r13)
+ mtspr SPRN_MMCR1, r4
+ mtspr SPRN_MMCRA, r5
+ mtspr SPRN_SIAR, r6
+ mtspr SPRN_SDAR, r7
+BEGIN_FTR_SECTION
+ ld r8, HSTATE_MMCR + 40(r13)
+ ld r9, HSTATE_MMCR + 48(r13)
+ mtspr SPRN_MMCR2, r8
+ mtspr SPRN_SIER, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtspr SPRN_MMCR0, r3
+ isync
+23:
+
+ /*
+ * Reload DEC. HDEC interrupts were disabled when
+ * we reloaded the host's LPCR value.
+ */
+ ld r3, HSTATE_DECEXP(r13)
+ mftb r4
+ subf r4, r4, r3
+ mtspr SPRN_DEC, r4
+
+ /*
+ * For external and machine check interrupts, we need
+ * to call the Linux handler to process the interrupt.
+ * We do that by jumping to absolute address 0x500 for
+ * external interrupts, or the machine_check_fwnmi label
+ * for machine checks (since firmware might have patched
+ * the vector area at 0x200). The [h]rfid at the end of the
+ * handler will return to the book3s_hv_interrupts.S code.
+ * For other interrupts we do the rfid to get back
+ * to the book3s_hv_interrupts.S code here.
+ */
+ ld r8, 112+PPC_LR_STKOFF(r1)
+ addi r1, r1, 112
+ ld r7, HSTATE_HOST_MSR(r13)
+
+ cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+BEGIN_FTR_SECTION
+ beq 11f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* RFI into the highmem handler, or branch to interrupt handler */
+ mfmsr r6
+ li r0, MSR_RI
+ andc r6, r6, r0
+ mtmsrd r6, 1 /* Clear RI in MSR */
+ mtsrr0 r8
+ mtsrr1 r7
+ beqa 0x500 /* external interrupt (PPC970) */
+ beq cr1, 13f /* machine check */
+ RFI
+
+ /* On POWER7, we have external interrupts set to use HSRR0/1 */
+11: mtspr SPRN_HSRR0, r8
+ mtspr SPRN_HSRR1, r7
+ ba 0x500
+
+13: b machine_check_fwnmi
+
+kvmppc_primary_no_guest:
+ /* We handle this much like a ceded vcpu */
+ /* set our bit in napping_threads */
+ ld r5, HSTATE_KVM_VCORE(r13)
+ lbz r7, HSTATE_PTID(r13)
+ li r0, 1
+ sld r0, r0, r7
+ addi r6, r5, VCORE_NAPPING_THREADS
+1: lwarx r3, 0, r6
+ or r3, r3, r0
+ stwcx. r3, 0, r6
+ bne 1b
+ /* order napping_threads update vs testing entry_exit_count */
+ isync
+ li r12, 0
+ lwz r7, VCORE_ENTRY_EXIT(r5)
+ cmpwi r7, 0x100
+ bge kvm_novcpu_exit /* another thread already exiting */
+ li r3, NAPPING_NOVCPU
+ stb r3, HSTATE_NAPPING(r13)
+ li r3, 1
+ stb r3, HSTATE_HWTHREAD_REQ(r13)
+
+ b kvm_do_nap
+
+kvm_novcpu_wakeup:
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r5, HSTATE_KVM_VCORE(r13)
+ li r0, 0
+ stb r0, HSTATE_NAPPING(r13)
+ stb r0, HSTATE_HWTHREAD_REQ(r13)
+
+ /* check the wake reason */
+ bl kvmppc_check_wake_reason
+
+ /* see if any other thread is already exiting */
+ lwz r0, VCORE_ENTRY_EXIT(r5)
+ cmpwi r0, 0x100
+ bge kvm_novcpu_exit
+
+ /* clear our bit in napping_threads */
+ lbz r7, HSTATE_PTID(r13)
+ li r0, 1
+ sld r0, r0, r7
+ addi r6, r5, VCORE_NAPPING_THREADS
+4: lwarx r7, 0, r6
+ andc r7, r7, r0
+ stwcx. r7, 0, r6
+ bne 4b
+
+ /* See if the wake reason means we need to exit */
+ cmpdi r3, 0
+ bge kvm_novcpu_exit
+
+ /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
+ ld r4, HSTATE_KVM_VCPU(r13)
+ cmpdi r4, 0
+ bne kvmppc_got_guest
+
+kvm_novcpu_exit:
+ b hdec_soon
/*
- * We come in here when wakened from nap mode on a secondary hw thread.
+ * We come in here when wakened from nap mode.
* Relocation is off and most register values are lost.
* r13 points to the PACA.
*/
.globl kvm_start_guest
kvm_start_guest:
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
+
+ /* Set runlatch bit the minute you wake up from nap */
+ mfspr r1, SPRN_CTRLF
+ ori r1, r1, 1
+ mtspr SPRN_CTRLT, r1
+
ld r2,PACATOC(r13)
li r0,KVM_HWTHREAD_IN_KVM
@@ -103,8 +265,13 @@ kvm_start_guest:
/* were we napping due to cede? */
lbz r0,HSTATE_NAPPING(r13)
- cmpwi r0,0
- bne kvm_end_cede
+ cmpwi r0,NAPPING_CEDE
+ beq kvm_end_cede
+ cmpwi r0,NAPPING_NOVCPU
+ beq kvm_novcpu_wakeup
+
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_OVERHEAD
/*
* We weren't napping due to cede, so this must be a secondary
@@ -114,171 +281,89 @@ kvm_start_guest:
*/
/* Check the wake reason in SRR1 to see why we got here */
- mfspr r3,SPRN_SRR1
- rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
- cmpwi r3,4 /* was it an external interrupt? */
- bne 27f /* if not */
- ld r5,HSTATE_XICS_PHYS(r13)
- li r7,XICS_XIRR /* if it was an external interrupt, */
- lwzcix r8,r5,r7 /* get and ack the interrupt */
- sync
- clrldi. r9,r8,40 /* get interrupt source ID. */
- beq 28f /* none there? */
- cmpwi r9,XICS_IPI /* was it an IPI? */
- bne 29f
- li r0,0xff
- li r6,XICS_MFRR
- stbcix r0,r5,r6 /* clear IPI */
- stwcix r8,r5,r7 /* EOI the interrupt */
- sync /* order loading of vcpu after that */
+ bl kvmppc_check_wake_reason
+ cmpdi r3, 0
+ bge kvm_no_guest
/* get vcpu pointer, NULL if we have no vcpu to run */
ld r4,HSTATE_KVM_VCPU(r13)
cmpdi r4,0
/* if we have no vcpu to run, go back to sleep */
beq kvm_no_guest
- b kvmppc_hv_entry
-27: /* XXX should handle hypervisor maintenance interrupts etc. here */
- b kvm_no_guest
-28: /* SRR1 said external but ICP said nope?? */
- b kvm_no_guest
-29: /* External non-IPI interrupt to offline secondary thread? help?? */
- stw r8,HSTATE_SAVED_XIRR(r13)
- b kvm_no_guest
+ /* Set HSTATE_DSCR(r13) to something sensible */
+ ld r6, PACA_DSCR(r13)
+ std r6, HSTATE_DSCR(r13)
+
+ bl kvmppc_hv_entry
+
+ /* Back from the guest, go back to nap */
+ /* Clear our vcpu pointer so we don't come back in early */
+ li r0, 0
+ std r0, HSTATE_KVM_VCPU(r13)
+ /*
+ * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing
+ * the nap_count, because once the increment to nap_count is
+ * visible we could be given another vcpu.
+ */
+ lwsync
+
+ /* increment the nap count and then go to nap mode */
+ ld r4, HSTATE_KVM_VCORE(r13)
+ addi r4, r4, VCORE_NAP_COUNT
+51: lwarx r3, 0, r4
+ addi r3, r3, 1
+ stwcx. r3, 0, r4
+ bne 51b
+
+kvm_no_guest:
+ li r0, KVM_HWTHREAD_IN_NAP
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+kvm_do_nap:
+ /* Clear the runlatch bit before napping */
+ mfspr r2, SPRN_CTRLF
+ clrrdi r2, r2, 1
+ mtspr SPRN_CTRLT, r2
+
+ li r3, LPCR_PECE0
+ mfspr r4, SPRN_LPCR
+ rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
+ mtspr SPRN_LPCR, r4
+ isync
+ std r0, HSTATE_SCRATCH0(r13)
+ ptesync
+ ld r0, HSTATE_SCRATCH0(r13)
+1: cmpd r0, r0
+ bne 1b
+ nap
+ b .
+
+/******************************************************************************
+ * *
+ * Entry code *
+ * *
+ *****************************************************************************/
.global kvmppc_hv_entry
kvmppc_hv_entry:
/* Required state:
*
- * R4 = vcpu pointer
+ * R4 = vcpu pointer (or NULL)
* MSR = ~IR|DR
* R13 = PACA
* R1 = host R1
* all other volatile GPRS = free
*/
mflr r0
- std r0, HSTATE_VMHANDLER(r13)
-
- /* Set partition DABR */
- /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
- li r5,3
- ld r6,VCPU_DABR(r4)
- mtspr SPRN_DABRX,r5
- mtspr SPRN_DABR,r6
-BEGIN_FTR_SECTION
- isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
- /* Load guest PMU registers */
- /* R4 is live here (vcpu pointer) */
- li r3, 1
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
- isync
- lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
- lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
- lwz r6, VCPU_PMC + 8(r4)
- lwz r7, VCPU_PMC + 12(r4)
- lwz r8, VCPU_PMC + 16(r4)
- lwz r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
- lwz r10, VCPU_PMC + 24(r4)
- lwz r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- mtspr SPRN_PMC1, r3
- mtspr SPRN_PMC2, r5
- mtspr SPRN_PMC3, r6
- mtspr SPRN_PMC4, r7
- mtspr SPRN_PMC5, r8
- mtspr SPRN_PMC6, r9
-BEGIN_FTR_SECTION
- mtspr SPRN_PMC7, r10
- mtspr SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- ld r3, VCPU_MMCR(r4)
- ld r5, VCPU_MMCR + 8(r4)
- ld r6, VCPU_MMCR + 16(r4)
- mtspr SPRN_MMCR1, r5
- mtspr SPRN_MMCRA, r6
- mtspr SPRN_MMCR0, r3
- isync
-
- /* Load up FP, VMX and VSX registers */
- bl kvmppc_load_fp
-
- ld r14, VCPU_GPR(R14)(r4)
- ld r15, VCPU_GPR(R15)(r4)
- ld r16, VCPU_GPR(R16)(r4)
- ld r17, VCPU_GPR(R17)(r4)
- ld r18, VCPU_GPR(R18)(r4)
- ld r19, VCPU_GPR(R19)(r4)
- ld r20, VCPU_GPR(R20)(r4)
- ld r21, VCPU_GPR(R21)(r4)
- ld r22, VCPU_GPR(R22)(r4)
- ld r23, VCPU_GPR(R23)(r4)
- ld r24, VCPU_GPR(R24)(r4)
- ld r25, VCPU_GPR(R25)(r4)
- ld r26, VCPU_GPR(R26)(r4)
- ld r27, VCPU_GPR(R27)(r4)
- ld r28, VCPU_GPR(R28)(r4)
- ld r29, VCPU_GPR(R29)(r4)
- ld r30, VCPU_GPR(R30)(r4)
- ld r31, VCPU_GPR(R31)(r4)
-
-BEGIN_FTR_SECTION
- /* Switch DSCR to guest value */
- ld r5, VCPU_DSCR(r4)
- mtspr SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
- /*
- * Set the decrementer to the guest decrementer.
- */
- ld r8,VCPU_DEC_EXPIRES(r4)
- mftb r7
- subf r3,r7,r8
- mtspr SPRN_DEC,r3
- stw r3,VCPU_DEC(r4)
-
- ld r5, VCPU_SPRG0(r4)
- ld r6, VCPU_SPRG1(r4)
- ld r7, VCPU_SPRG2(r4)
- ld r8, VCPU_SPRG3(r4)
- mtspr SPRN_SPRG0, r5
- mtspr SPRN_SPRG1, r6
- mtspr SPRN_SPRG2, r7
- mtspr SPRN_SPRG3, r8
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -112(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
- /* Increment yield count if they have a VPA */
- ld r3, VCPU_VPA(r4)
- cmpdi r3, 0
- beq 25f
- lwz r5, LPPACA_YIELDCOUNT(r3)
- addi r5, r5, 1
- stw r5, LPPACA_YIELDCOUNT(r3)
- li r6, 1
- stb r6, VCPU_VPA_DIRTY(r4)
-25:
- /* Load up DAR and DSISR */
- ld r5, VCPU_DAR(r4)
- lwz r6, VCPU_DSISR(r4)
- mtspr SPRN_DAR, r5
- mtspr SPRN_DSISR, r6
-
-BEGIN_FTR_SECTION
- /* Restore AMR and UAMOR, set AMOR to all 1s */
- ld r5,VCPU_AMR(r4)
- ld r6,VCPU_UAMOR(r4)
- li r7,-1
- mtspr SPRN_AMR,r5
- mtspr SPRN_UAMOR,r6
- mtspr SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ li r6, KVM_GUEST_MODE_HOST_HV
+ stb r6, HSTATE_IN_GUEST(r13)
/* Clear out SLB */
li r6,0
@@ -305,8 +390,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
bne 21b
/* Primary thread switches to guest partition. */
- ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
- lwz r6,VCPU_PTID(r4)
+ ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
+ lbz r6,HSTATE_PTID(r13)
cmpwi r6,0
bne 20f
ld r6,KVM_SDR1(r9)
@@ -334,7 +419,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
andc r7,r7,r0
stdcx. r7,0,r6
bne 23b
- li r6,128 /* and flush the TLB */
+ /* Flush the TLB of any entries for this LPID */
+ /* use arch 2.07S as a proxy for POWER8 */
+BEGIN_FTR_SECTION
+ li r6,512 /* POWER8 has 512 sets */
+FTR_SECTION_ELSE
+ li r6,128 /* POWER7 has 128 sets */
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
mtctr r6
li r7,0x800 /* IS field = 0b10 */
ptesync
@@ -343,7 +434,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
bdnz 28b
ptesync
-22: li r0,1
+ /* Add timebase offset onto timebase */
+22: ld r8,VCORE_TB_OFFSET(r5)
+ cmpdi r8,0
+ beq 37f
+ mftb r6 /* current host timebase */
+ add r8,r8,r6
+ mtspr SPRN_TBU40,r8 /* update upper 40 bits */
+ mftb r7 /* check if lower 24 bits overflowed */
+ clrldi r6,r6,40
+ clrldi r7,r7,40
+ cmpld r7,r6
+ bge 37f
+ addis r8,r8,0x100 /* if so, increment upper 40 bits */
+ mtspr SPRN_TBU40,r8
+
+ /* Load guest PCR value to select appropriate compat mode */
+37: ld r7, VCORE_PCR(r5)
+ cmpdi r7, 0
+ beq 38f
+ mtspr SPRN_PCR, r7
+38:
+
+BEGIN_FTR_SECTION
+ /* DPDES is shared between threads */
+ ld r8, VCORE_DPDES(r5)
+ mtspr SPRN_DPDES, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+ li r0,1
stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
b 10f
@@ -353,7 +472,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
beq 20b
/* Set LPCR and RMOR. */
-10: ld r8,KVM_LPCR(r9)
+10: ld r8,VCORE_LPCR(r5)
mtspr SPRN_LPCR,r8
ld r8,KVM_RMOR(r9)
mtspr SPRN_RMOR,r8
@@ -361,20 +480,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/* Check if HDEC expires soon */
mfspr r3,SPRN_HDEC
- cmpwi r3,10
+ cmpwi r3,512 /* 1 microsecond */
li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
- mr r9,r4
blt hdec_soon
-
- /* Save purr/spurr */
- mfspr r5,SPRN_PURR
- mfspr r6,SPRN_SPURR
- std r5,HSTATE_PURR(r13)
- std r6,HSTATE_SPURR(r13)
- ld r7,VCPU_PURR(r4)
- ld r8,VCPU_SPURR(r4)
- mtspr SPRN_PURR,r7
- mtspr SPRN_SPURR,r8
b 31f
/*
@@ -385,7 +493,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
* We also have to invalidate the TLB since its
* entries aren't tagged with the LPID.
*/
-30: ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
+30: ld r5,HSTATE_KVM_VCORE(r13)
+ ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
/* first take native_tlbie_lock */
.section ".toc","aw"
@@ -405,7 +514,8 @@ toc_tlbie_lock:
bne 24b
isync
- ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */
li r0,0x18f
rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
or r0,r7,r0
@@ -449,7 +559,6 @@ toc_tlbie_lock:
mfspr r3,SPRN_HDEC
cmpwi r3,10
li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
- mr r9,r4
blt hdec_soon
/* Enable HDEC interrupts */
@@ -464,9 +573,14 @@ toc_tlbie_lock:
mfspr r0,SPRN_HID0
mfspr r0,SPRN_HID0
mfspr r0,SPRN_HID0
+31:
+ /* Do we have a guest vcpu to run? */
+ cmpdi r4, 0
+ beq kvmppc_primary_no_guest
+kvmppc_got_guest:
/* Load up guest SLB entries */
-31: lwz r5,VCPU_SLB_MAX(r4)
+ lwz r5,VCPU_SLB_MAX(r4)
cmpwi r5,0
beq 9f
mtctr r5
@@ -477,6 +591,321 @@ toc_tlbie_lock:
addi r6,r6,VCPU_SLB_SIZE
bdnz 1b
9:
+ /* Increment yield count if they have a VPA */
+ ld r3, VCPU_VPA(r4)
+ cmpdi r3, 0
+ beq 25f
+ lwz r5, LPPACA_YIELDCOUNT(r3)
+ addi r5, r5, 1
+ stw r5, LPPACA_YIELDCOUNT(r3)
+ li r6, 1
+ stb r6, VCPU_VPA_DIRTY(r4)
+25:
+
+BEGIN_FTR_SECTION
+ /* Save purr/spurr */
+ mfspr r5,SPRN_PURR
+ mfspr r6,SPRN_SPURR
+ std r5,HSTATE_PURR(r13)
+ std r6,HSTATE_SPURR(r13)
+ ld r7,VCPU_PURR(r4)
+ ld r8,VCPU_SPURR(r4)
+ mtspr SPRN_PURR,r7
+ mtspr SPRN_SPURR,r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+ /* Set partition DABR */
+ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
+ lwz r5,VCPU_DABRX(r4)
+ ld r6,VCPU_DABR(r4)
+ mtspr SPRN_DABRX,r5
+ mtspr SPRN_DABR,r6
+ BEGIN_FTR_SECTION_NESTED(89)
+ isync
+ END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ b skip_tm
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+
+ /* Turn on TM/FP/VSX/VMX so we can restore them. */
+ mfmsr r5
+ li r6, MSR_TM >> 32
+ sldi r6, r6, 32
+ or r5, r5, r6
+ ori r5, r5, MSR_FP
+ oris r5, r5, (MSR_VEC | MSR_VSX)@h
+ mtmsrd r5
+
+ /*
+ * The user may change these outside of a transaction, so they must
+ * always be context switched.
+ */
+ ld r5, VCPU_TFHAR(r4)
+ ld r6, VCPU_TFIAR(r4)
+ ld r7, VCPU_TEXASR(r4)
+ mtspr SPRN_TFHAR, r5
+ mtspr SPRN_TFIAR, r6
+ mtspr SPRN_TEXASR, r7
+
+ ld r5, VCPU_MSR(r4)
+ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+ beq skip_tm /* TM not active in guest */
+
+ /* Make sure the failure summary is set, otherwise we'll program check
+ * when we trechkpt. It's possible that this might have been not set
+ * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+ * host.
+ */
+ oris r7, r7, (TEXASR_FS)@h
+ mtspr SPRN_TEXASR, r7
+
+ /*
+ * We need to load up the checkpointed state for the guest.
+ * We need to do this early as it will blow away any GPRs, VSRs and
+ * some SPRs.
+ */
+
+ mr r31, r4
+ addi r3, r31, VCPU_FPRS_TM
+ bl .load_fp_state
+ addi r3, r31, VCPU_VRS_TM
+ bl .load_vr_state
+ mr r4, r31
+ lwz r7, VCPU_VRSAVE_TM(r4)
+ mtspr SPRN_VRSAVE, r7
+
+ ld r5, VCPU_LR_TM(r4)
+ lwz r6, VCPU_CR_TM(r4)
+ ld r7, VCPU_CTR_TM(r4)
+ ld r8, VCPU_AMR_TM(r4)
+ ld r9, VCPU_TAR_TM(r4)
+ mtlr r5
+ mtcr r6
+ mtctr r7
+ mtspr SPRN_AMR, r8
+ mtspr SPRN_TAR, r9
+
+ /*
+ * Load up PPR and DSCR values but don't put them in the actual SPRs
+ * till the last moment to avoid running with userspace PPR and DSCR for
+ * too long.
+ */
+ ld r29, VCPU_DSCR_TM(r4)
+ ld r30, VCPU_PPR_TM(r4)
+
+ std r2, PACATMSCRATCH(r13) /* Save TOC */
+
+ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /* Load GPRs r0-r28 */
+ reg = 0
+ .rept 29
+ ld reg, VCPU_GPRS_TM(reg)(r31)
+ reg = reg + 1
+ .endr
+
+ mtspr SPRN_DSCR, r29
+ mtspr SPRN_PPR, r30
+
+ /* Load final GPRs */
+ ld 29, VCPU_GPRS_TM(29)(r31)
+ ld 30, VCPU_GPRS_TM(30)(r31)
+ ld 31, VCPU_GPRS_TM(31)(r31)
+
+ /* TM checkpointed state is now setup. All GPRs are now volatile. */
+ TRECHKPT
+
+ /* Now let's get back the state we need. */
+ HMT_MEDIUM
+ GET_PACA(r13)
+ ld r29, HSTATE_DSCR(r13)
+ mtspr SPRN_DSCR, r29
+ ld r4, HSTATE_KVM_VCPU(r13)
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATMSCRATCH(r13)
+
+ /* Set the MSR RI since we have our registers back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
+skip_tm:
+#endif
+
+ /* Load guest PMU registers */
+ /* R4 is live here (vcpu pointer) */
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ isync
+BEGIN_FTR_SECTION
+ ld r3, VCPU_MMCR(r4)
+ andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+ cmpwi r5, MMCR0_PMAO
+ beql kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+ lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
+ lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
+ lwz r6, VCPU_PMC + 8(r4)
+ lwz r7, VCPU_PMC + 12(r4)
+ lwz r8, VCPU_PMC + 16(r4)
+ lwz r9, VCPU_PMC + 20(r4)
+BEGIN_FTR_SECTION
+ lwz r10, VCPU_PMC + 24(r4)
+ lwz r11, VCPU_PMC + 28(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r5
+ mtspr SPRN_PMC3, r6
+ mtspr SPRN_PMC4, r7
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+ mtspr SPRN_PMC7, r10
+ mtspr SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ ld r3, VCPU_MMCR(r4)
+ ld r5, VCPU_MMCR + 8(r4)
+ ld r6, VCPU_MMCR + 16(r4)
+ ld r7, VCPU_SIAR(r4)
+ ld r8, VCPU_SDAR(r4)
+ mtspr SPRN_MMCR1, r5
+ mtspr SPRN_MMCRA, r6
+ mtspr SPRN_SIAR, r7
+ mtspr SPRN_SDAR, r8
+BEGIN_FTR_SECTION
+ ld r5, VCPU_MMCR + 24(r4)
+ ld r6, VCPU_SIER(r4)
+ lwz r7, VCPU_PMC + 24(r4)
+ lwz r8, VCPU_PMC + 28(r4)
+ ld r9, VCPU_MMCR + 32(r4)
+ mtspr SPRN_MMCR2, r5
+ mtspr SPRN_SIER, r6
+ mtspr SPRN_SPMC1, r7
+ mtspr SPRN_SPMC2, r8
+ mtspr SPRN_MMCRS, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtspr SPRN_MMCR0, r3
+ isync
+
+ /* Load up FP, VMX and VSX registers */
+ bl kvmppc_load_fp
+
+ ld r14, VCPU_GPR(R14)(r4)
+ ld r15, VCPU_GPR(R15)(r4)
+ ld r16, VCPU_GPR(R16)(r4)
+ ld r17, VCPU_GPR(R17)(r4)
+ ld r18, VCPU_GPR(R18)(r4)
+ ld r19, VCPU_GPR(R19)(r4)
+ ld r20, VCPU_GPR(R20)(r4)
+ ld r21, VCPU_GPR(R21)(r4)
+ ld r22, VCPU_GPR(R22)(r4)
+ ld r23, VCPU_GPR(R23)(r4)
+ ld r24, VCPU_GPR(R24)(r4)
+ ld r25, VCPU_GPR(R25)(r4)
+ ld r26, VCPU_GPR(R26)(r4)
+ ld r27, VCPU_GPR(R27)(r4)
+ ld r28, VCPU_GPR(R28)(r4)
+ ld r29, VCPU_GPR(R29)(r4)
+ ld r30, VCPU_GPR(R30)(r4)
+ ld r31, VCPU_GPR(R31)(r4)
+
+BEGIN_FTR_SECTION
+ /* Switch DSCR to guest value */
+ ld r5, VCPU_DSCR(r4)
+ mtspr SPRN_DSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+ /* Skip next section on POWER7 or PPC970 */
+ b 8f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+ /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
+ mfmsr r8
+ li r0, 1
+ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r8
+
+ /* Load up POWER8-specific registers */
+ ld r5, VCPU_IAMR(r4)
+ lwz r6, VCPU_PSPB(r4)
+ ld r7, VCPU_FSCR(r4)
+ mtspr SPRN_IAMR, r5
+ mtspr SPRN_PSPB, r6
+ mtspr SPRN_FSCR, r7
+ ld r5, VCPU_DAWR(r4)
+ ld r6, VCPU_DAWRX(r4)
+ ld r7, VCPU_CIABR(r4)
+ ld r8, VCPU_TAR(r4)
+ mtspr SPRN_DAWR, r5
+ mtspr SPRN_DAWRX, r6
+ mtspr SPRN_CIABR, r7
+ mtspr SPRN_TAR, r8
+ ld r5, VCPU_IC(r4)
+ ld r6, VCPU_VTB(r4)
+ mtspr SPRN_IC, r5
+ mtspr SPRN_VTB, r6
+ ld r8, VCPU_EBBHR(r4)
+ mtspr SPRN_EBBHR, r8
+ ld r5, VCPU_EBBRR(r4)
+ ld r6, VCPU_BESCR(r4)
+ ld r7, VCPU_CSIGR(r4)
+ ld r8, VCPU_TACR(r4)
+ mtspr SPRN_EBBRR, r5
+ mtspr SPRN_BESCR, r6
+ mtspr SPRN_CSIGR, r7
+ mtspr SPRN_TACR, r8
+ ld r5, VCPU_TCSCR(r4)
+ ld r6, VCPU_ACOP(r4)
+ lwz r7, VCPU_GUEST_PID(r4)
+ ld r8, VCPU_WORT(r4)
+ mtspr SPRN_TCSCR, r5
+ mtspr SPRN_ACOP, r6
+ mtspr SPRN_PID, r7
+ mtspr SPRN_WORT, r8
+8:
+
+ /*
+ * Set the decrementer to the guest decrementer.
+ */
+ ld r8,VCPU_DEC_EXPIRES(r4)
+ /* r8 is a host timebase value here, convert to guest TB */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ ld r6,VCORE_TB_OFFSET(r5)
+ add r8,r8,r6
+ mftb r7
+ subf r3,r7,r8
+ mtspr SPRN_DEC,r3
+ stw r3,VCPU_DEC(r4)
+
+ ld r5, VCPU_SPRG0(r4)
+ ld r6, VCPU_SPRG1(r4)
+ ld r7, VCPU_SPRG2(r4)
+ ld r8, VCPU_SPRG3(r4)
+ mtspr SPRN_SPRG0, r5
+ mtspr SPRN_SPRG1, r6
+ mtspr SPRN_SPRG2, r7
+ mtspr SPRN_SPRG3, r8
+
+ /* Load up DAR and DSISR */
+ ld r5, VCPU_DAR(r4)
+ lwz r6, VCPU_DSISR(r4)
+ mtspr SPRN_DAR, r5
+ mtspr SPRN_DSISR, r6
+
+BEGIN_FTR_SECTION
+ /* Restore AMR and UAMOR, set AMOR to all 1s */
+ ld r5,VCPU_AMR(r4)
+ ld r6,VCPU_UAMOR(r4)
+ li r7,-1
+ mtspr SPRN_AMR,r5
+ mtspr SPRN_UAMOR,r6
+ mtspr SPRN_AMOR,r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Restore state of CTRL run bit; assume 1 on entry */
lwz r5,VCPU_CTRL(r4)
@@ -492,48 +921,54 @@ toc_tlbie_lock:
mtctr r6
mtxer r7
+kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
-kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
ld r6, VCPU_SRR0(r4)
ld r7, VCPU_SRR1(r4)
+ mtspr SPRN_SRR0, r6
+ mtspr SPRN_SRR1, r7
+deliver_guest_interrupt:
/* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
ori r11, r11, MSR_ME
/* Check if we can deliver an external or decrementer interrupt now */
- ld r0,VCPU_PENDING_EXC(r4)
- lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
- and r0,r0,r8
- cmpdi cr1,r0,0
- andi. r0,r11,MSR_EE
- beq cr1,11f
+ ld r0, VCPU_PENDING_EXC(r4)
+ rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
+ cmpdi cr1, r0, 0
+ andi. r8, r11, MSR_EE
BEGIN_FTR_SECTION
- mfspr r8,SPRN_LPCR
- ori r8,r8,LPCR_MER
- mtspr SPRN_LPCR,r8
+ mfspr r8, SPRN_LPCR
+ /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
+ rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
+ mtspr SPRN_LPCR, r8
isync
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
beq 5f
- li r0,BOOK3S_INTERRUPT_EXTERNAL
-12: mr r6,r10
- mr r10,r0
- mr r7,r11
- li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
- rotldi r11,r11,63
- b 5f
-11: beq 5f
- mfspr r0,SPRN_DEC
- cmpwi r0,0
- li r0,BOOK3S_INTERRUPT_DECREMENTER
- blt 12b
+ li r0, BOOK3S_INTERRUPT_EXTERNAL
+ bne cr1, 12f
+ mfspr r0, SPRN_DEC
+ cmpwi r0, 0
+ li r0, BOOK3S_INTERRUPT_DECREMENTER
+ bge 5f
- /* Move SRR0 and SRR1 into the respective regs */
-5: mtspr SPRN_SRR0, r6
- mtspr SPRN_SRR1, r7
+12: mtspr SPRN_SRR0, r10
+ mr r10,r0
+ mtspr SPRN_SRR1, r11
+ mr r9, r4
+ bl kvmppc_msr_interrupt
+5:
+/*
+ * Required state:
+ * R4 = vcpu
+ * R10: value for HSRR0
+ * R11: value for HSRR1
+ * R13 = PACA
+ */
fast_guest_return:
li r0,0
stb r0,VCPU_CEDED(r4) /* cancel cede */
@@ -541,7 +976,7 @@ fast_guest_return:
mtspr SPRN_HSRR1,r11
/* Activate guest mode, so faults get handled by KVM */
- li r9, KVM_GUEST_MODE_GUEST
+ li r9, KVM_GUEST_MODE_GUEST_HV
stb r9, HSTATE_IN_GUEST(r13)
/* Enter guest */
@@ -550,13 +985,15 @@ BEGIN_FTR_SECTION
ld r5, VCPU_CFAR(r4)
mtspr SPRN_CFAR, r5
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ ld r0, VCPU_PPR(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r5, VCPU_LR(r4)
lwz r6, VCPU_CR(r4)
mtlr r5
mtcr r6
- ld r0, VCPU_GPR(R0)(r4)
ld r1, VCPU_GPR(R1)(r4)
ld r2, VCPU_GPR(R2)(r4)
ld r3, VCPU_GPR(R3)(r4)
@@ -570,6 +1007,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r12, VCPU_GPR(R12)(r4)
ld r13, VCPU_GPR(R13)(r4)
+BEGIN_FTR_SECTION
+ mtspr SPRN_PPR, r0
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r0, VCPU_GPR(R0)(r4)
ld r4, VCPU_GPR(R4)(r4)
hrfid
@@ -584,8 +1025,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
/*
* We come here from the first-level interrupt handlers.
*/
- .globl kvmppc_interrupt
-kvmppc_interrupt:
+ .globl kvmppc_interrupt_hv
+kvmppc_interrupt_hv:
/*
* Register contents:
* R12 = interrupt vector
@@ -593,8 +1034,20 @@ kvmppc_interrupt:
* guest CR, R12 saved in shadow VCPU SCRATCH1/0
* guest R13 saved in SPRN_SCRATCH0
*/
- /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
- std r9, HSTATE_HOST_R2(r13)
+ std r9, HSTATE_SCRATCH2(r13)
+
+ lbz r9, HSTATE_IN_GUEST(r13)
+ cmpwi r9, KVM_GUEST_MODE_HOST_HV
+ beq kvmppc_bad_host_intr
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ cmpwi r9, KVM_GUEST_MODE_GUEST
+ ld r9, HSTATE_SCRATCH2(r13)
+ beq kvmppc_interrupt_pr
+#endif
+ /* We're now back in the host but in guest MMU context */
+ li r9, KVM_GUEST_MODE_HOST_HV
+ stb r9, HSTATE_IN_GUEST(r13)
+
ld r9, HSTATE_KVM_VCPU(r13)
/* Save registers */
@@ -608,7 +1061,7 @@ kvmppc_interrupt:
std r6, VCPU_GPR(R6)(r9)
std r7, VCPU_GPR(R7)(r9)
std r8, VCPU_GPR(R8)(r9)
- ld r0, HSTATE_HOST_R2(r13)
+ ld r0, HSTATE_SCRATCH2(r13)
std r0, VCPU_GPR(R9)(r9)
std r10, VCPU_GPR(R10)(r9)
std r11, VCPU_GPR(R11)(r9)
@@ -620,6 +1073,10 @@ BEGIN_FTR_SECTION
ld r3, HSTATE_CFAR(r13)
std r3, VCPU_CFAR(r9)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ ld r4, HSTATE_PPR(r13)
+ std r4, VCPU_PPR(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Restore R1/R2 so we can handle faults */
ld r1, HSTATE_HOST_R1(r13)
@@ -642,10 +1099,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
std r3, VCPU_GPR(R13)(r9)
std r4, VCPU_LR(r9)
- /* Unset guest mode */
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
stw r12,VCPU_TRAP(r9)
/* Save HEIR (HV emulation assist reg) in last_inst
@@ -695,96 +1148,23 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
/* External interrupt, first check for host_ipi. If this is
* set, we know the host wants us out so let's do it now
*/
-do_ext_interrupt:
- lbz r0, HSTATE_HOST_IPI(r13)
- cmpwi r0, 0
- bne ext_interrupt_to_host
-
- /* Now read the interrupt from the ICP */
- ld r5, HSTATE_XICS_PHYS(r13)
- li r7, XICS_XIRR
- cmpdi r5, 0
- beq- ext_interrupt_to_host
- lwzcix r3, r5, r7
- rlwinm. r0, r3, 0, 0xffffff
- sync
- beq 3f /* if nothing pending in the ICP */
-
- /* We found something in the ICP...
- *
- * If it's not an IPI, stash it in the PACA and return to
- * the host, we don't (yet) handle directing real external
- * interrupts directly to the guest
- */
- cmpwi r0, XICS_IPI
- bne ext_stash_for_host
-
- /* It's an IPI, clear the MFRR and EOI it */
- li r0, 0xff
- li r6, XICS_MFRR
- stbcix r0, r5, r6 /* clear the IPI */
- stwcix r3, r5, r7 /* EOI it */
- sync
-
- /* We need to re-check host IPI now in case it got set in the
- * meantime. If it's clear, we bounce the interrupt to the
- * guest
- */
- lbz r0, HSTATE_HOST_IPI(r13)
- cmpwi r0, 0
- bne- 1f
+ bl kvmppc_read_intr
+ cmpdi r3, 0
+ bgt ext_interrupt_to_host
- /* Allright, looks like an IPI for the guest, we need to set MER */
-3:
/* Check if any CPU is heading out to the host, if so head out too */
ld r5, HSTATE_KVM_VCORE(r13)
lwz r0, VCORE_ENTRY_EXIT(r5)
cmpwi r0, 0x100
bge ext_interrupt_to_host
- /* See if there is a pending interrupt for the guest */
- mfspr r8, SPRN_LPCR
- ld r0, VCPU_PENDING_EXC(r9)
- /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
- rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
- rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
- beq 2f
-
- /* And if the guest EE is set, we can deliver immediately, else
- * we return to the guest with MER set
- */
- andi. r0, r11, MSR_EE
- beq 2f
- mtspr SPRN_SRR0, r10
- mtspr SPRN_SRR1, r11
- li r10, BOOK3S_INTERRUPT_EXTERNAL
- li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
- rotldi r11, r11, 63
-2: mr r4, r9
- mtspr SPRN_LPCR, r8
- b fast_guest_return
-
- /* We raced with the host, we need to resend that IPI, bummer */
-1: li r0, IPI_PRIORITY
- stbcix r0, r5, r6 /* set the IPI */
- sync
- b ext_interrupt_to_host
+ /* Return to guest after delivering any pending interrupt */
+ mr r4, r9
+ b deliver_guest_interrupt
-ext_stash_for_host:
- /* It's not an IPI and it's for the host, stash it in the PACA
- * before exit, it will be picked up by the host ICP driver
- */
- stw r3, HSTATE_SAVED_XIRR(r13)
ext_interrupt_to_host:
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
- /* Save DEC */
- mfspr r5,SPRN_DEC
- mftb r6
- extsw r5,r5
- add r5,r5,r6
- std r5,VCPU_DEC_EXPIRES(r9)
-
/* Save more register state */
mfdar r6
mfdsisr r7
@@ -855,13 +1235,313 @@ BEGIN_FTR_SECTION
mtspr SPRN_SPURR,r4
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
+ /* Save DEC */
+ mfspr r5,SPRN_DEC
+ mftb r6
+ extsw r5,r5
+ add r5,r5,r6
+ /* r5 is a guest timebase value here, convert to host TB */
+ ld r3,HSTATE_KVM_VCORE(r13)
+ ld r4,VCORE_TB_OFFSET(r3)
+ subf r5,r4,r5
+ std r5,VCPU_DEC_EXPIRES(r9)
+
+BEGIN_FTR_SECTION
+ b 8f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+ /* Save POWER8-specific registers */
+ mfspr r5, SPRN_IAMR
+ mfspr r6, SPRN_PSPB
+ mfspr r7, SPRN_FSCR
+ std r5, VCPU_IAMR(r9)
+ stw r6, VCPU_PSPB(r9)
+ std r7, VCPU_FSCR(r9)
+ mfspr r5, SPRN_IC
+ mfspr r6, SPRN_VTB
+ mfspr r7, SPRN_TAR
+ std r5, VCPU_IC(r9)
+ std r6, VCPU_VTB(r9)
+ std r7, VCPU_TAR(r9)
+ mfspr r8, SPRN_EBBHR
+ std r8, VCPU_EBBHR(r9)
+ mfspr r5, SPRN_EBBRR
+ mfspr r6, SPRN_BESCR
+ mfspr r7, SPRN_CSIGR
+ mfspr r8, SPRN_TACR
+ std r5, VCPU_EBBRR(r9)
+ std r6, VCPU_BESCR(r9)
+ std r7, VCPU_CSIGR(r9)
+ std r8, VCPU_TACR(r9)
+ mfspr r5, SPRN_TCSCR
+ mfspr r6, SPRN_ACOP
+ mfspr r7, SPRN_PID
+ mfspr r8, SPRN_WORT
+ std r5, VCPU_TCSCR(r9)
+ std r6, VCPU_ACOP(r9)
+ stw r7, VCPU_GUEST_PID(r9)
+ std r8, VCPU_WORT(r9)
+8:
+
+ /* Save and reset AMR and UAMOR before turning on the MMU */
+BEGIN_FTR_SECTION
+ mfspr r5,SPRN_AMR
+ mfspr r6,SPRN_UAMOR
+ std r5,VCPU_AMR(r9)
+ std r6,VCPU_UAMOR(r9)
+ li r6,0
+ mtspr SPRN_AMR,r6
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Switch DSCR back to host value */
+BEGIN_FTR_SECTION
+ mfspr r8, SPRN_DSCR
+ ld r7, HSTATE_DSCR(r13)
+ std r8, VCPU_DSCR(r9)
+ mtspr SPRN_DSCR, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Save non-volatile GPRs */
+ std r14, VCPU_GPR(R14)(r9)
+ std r15, VCPU_GPR(R15)(r9)
+ std r16, VCPU_GPR(R16)(r9)
+ std r17, VCPU_GPR(R17)(r9)
+ std r18, VCPU_GPR(R18)(r9)
+ std r19, VCPU_GPR(R19)(r9)
+ std r20, VCPU_GPR(R20)(r9)
+ std r21, VCPU_GPR(R21)(r9)
+ std r22, VCPU_GPR(R22)(r9)
+ std r23, VCPU_GPR(R23)(r9)
+ std r24, VCPU_GPR(R24)(r9)
+ std r25, VCPU_GPR(R25)(r9)
+ std r26, VCPU_GPR(R26)(r9)
+ std r27, VCPU_GPR(R27)(r9)
+ std r28, VCPU_GPR(R28)(r9)
+ std r29, VCPU_GPR(R29)(r9)
+ std r30, VCPU_GPR(R30)(r9)
+ std r31, VCPU_GPR(R31)(r9)
+
+ /* Save SPRGs */
+ mfspr r3, SPRN_SPRG0
+ mfspr r4, SPRN_SPRG1
+ mfspr r5, SPRN_SPRG2
+ mfspr r6, SPRN_SPRG3
+ std r3, VCPU_SPRG0(r9)
+ std r4, VCPU_SPRG1(r9)
+ std r5, VCPU_SPRG2(r9)
+ std r6, VCPU_SPRG3(r9)
+
+ /* save FP state */
+ mr r3, r9
+ bl kvmppc_save_fp
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ b 2f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+ /* Turn on TM. */
+ mfmsr r8
+ li r0, 1
+ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r8
+
+ ld r5, VCPU_MSR(r9)
+ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+ beq 1f /* TM not active in guest. */
+
+ li r3, TM_CAUSE_KVM_RESCHED
+
+ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /* All GPRs are volatile at this point. */
+ TRECLAIM(R3)
+
+ /* Temporarily store r13 and r9 so we have some regs to play with */
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ std r9, PACATMSCRATCH(r13)
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ /* Get a few more GPRs free. */
+ std r29, VCPU_GPRS_TM(29)(r9)
+ std r30, VCPU_GPRS_TM(30)(r9)
+ std r31, VCPU_GPRS_TM(31)(r9)
+
+ /* Save away PPR and DSCR soon so don't run with user values. */
+ mfspr r31, SPRN_PPR
+ HMT_MEDIUM
+ mfspr r30, SPRN_DSCR
+ ld r29, HSTATE_DSCR(r13)
+ mtspr SPRN_DSCR, r29
+
+ /* Save all but r9, r13 & r29-r31 */
+ reg = 0
+ .rept 29
+ .if (reg != 9) && (reg != 13)
+ std reg, VCPU_GPRS_TM(reg)(r9)
+ .endif
+ reg = reg + 1
+ .endr
+ /* ... now save r13 */
+ GET_SCRATCH0(r4)
+ std r4, VCPU_GPRS_TM(13)(r9)
+ /* ... and save r9 */
+ ld r4, PACATMSCRATCH(r13)
+ std r4, VCPU_GPRS_TM(9)(r9)
+
+ /* Reload stack pointer and TOC. */
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
+
+ /* Set MSR RI now we have r1 and r13 back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
+
+ /* Save away checkpinted SPRs. */
+ std r31, VCPU_PPR_TM(r9)
+ std r30, VCPU_DSCR_TM(r9)
+ mflr r5
+ mfcr r6
+ mfctr r7
+ mfspr r8, SPRN_AMR
+ mfspr r10, SPRN_TAR
+ std r5, VCPU_LR_TM(r9)
+ stw r6, VCPU_CR_TM(r9)
+ std r7, VCPU_CTR_TM(r9)
+ std r8, VCPU_AMR_TM(r9)
+ std r10, VCPU_TAR_TM(r9)
+
+ /* Restore r12 as trap number. */
+ lwz r12, VCPU_TRAP(r9)
+
+ /* Save FP/VSX. */
+ addi r3, r9, VCPU_FPRS_TM
+ bl .store_fp_state
+ addi r3, r9, VCPU_VRS_TM
+ bl .store_vr_state
+ mfspr r6, SPRN_VRSAVE
+ stw r6, VCPU_VRSAVE_TM(r9)
+1:
+ /*
+ * We need to save these SPRs after the treclaim so that the software
+ * error code is recorded correctly in the TEXASR. Also the user may
+ * change these outside of a transaction, so they must always be
+ * context switched.
+ */
+ mfspr r5, SPRN_TFHAR
+ mfspr r6, SPRN_TFIAR
+ mfspr r7, SPRN_TEXASR
+ std r5, VCPU_TFHAR(r9)
+ std r6, VCPU_TFIAR(r9)
+ std r7, VCPU_TEXASR(r9)
+2:
+#endif
+
+ /* Increment yield count if they have a VPA */
+ ld r8, VCPU_VPA(r9) /* do they have a VPA? */
+ cmpdi r8, 0
+ beq 25f
+ lwz r3, LPPACA_YIELDCOUNT(r8)
+ addi r3, r3, 1
+ stw r3, LPPACA_YIELDCOUNT(r8)
+ li r3, 1
+ stb r3, VCPU_VPA_DIRTY(r9)
+25:
+ /* Save PMU registers if requested */
+ /* r8 and cr0.eq are live here */
+BEGIN_FTR_SECTION
+ /*
+ * POWER8 seems to have a hardware bug where setting
+ * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
+ * when some counters are already negative doesn't seem
+ * to cause a performance monitor alert (and hence interrupt).
+ * The effect of this is that when saving the PMU state,
+ * if there is no PMU alert pending when we read MMCR0
+ * before freezing the counters, but one becomes pending
+ * before we read the counters, we lose it.
+ * To work around this, we need a way to freeze the counters
+ * before reading MMCR0. Normally, freezing the counters
+ * is done by writing MMCR0 (to set MMCR0[FC]) which
+ * unavoidably writes MMCR0[PMA0] as well. On POWER8,
+ * we can also freeze the counters using MMCR2, by writing
+ * 1s to all the counter freeze condition bits (there are
+ * 9 bits each for 6 counters).
+ */
+ li r3, -1 /* set all freeze bits */
+ clrrdi r3, r3, 10
+ mfspr r10, SPRN_MMCR2
+ mtspr SPRN_MMCR2, r3
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r4, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ mfspr r6, SPRN_MMCRA
+BEGIN_FTR_SECTION
+ /* On P7, clear MMCRA in order to disable SDAR updates */
+ li r7, 0
+ mtspr SPRN_MMCRA, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ isync
+ beq 21f /* if no VPA, save PMU stuff anyway */
+ lbz r7, LPPACA_PMCINUSE(r8)
+ cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */
+ bne 21f
+ std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
+ b 22f
+21: mfspr r5, SPRN_MMCR1
+ mfspr r7, SPRN_SIAR
+ mfspr r8, SPRN_SDAR
+ std r4, VCPU_MMCR(r9)
+ std r5, VCPU_MMCR + 8(r9)
+ std r6, VCPU_MMCR + 16(r9)
+BEGIN_FTR_SECTION
+ std r10, VCPU_MMCR + 24(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ std r7, VCPU_SIAR(r9)
+ std r8, VCPU_SDAR(r9)
+ mfspr r3, SPRN_PMC1
+ mfspr r4, SPRN_PMC2
+ mfspr r5, SPRN_PMC3
+ mfspr r6, SPRN_PMC4
+ mfspr r7, SPRN_PMC5
+ mfspr r8, SPRN_PMC6
+BEGIN_FTR_SECTION
+ mfspr r10, SPRN_PMC7
+ mfspr r11, SPRN_PMC8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ stw r3, VCPU_PMC(r9)
+ stw r4, VCPU_PMC + 4(r9)
+ stw r5, VCPU_PMC + 8(r9)
+ stw r6, VCPU_PMC + 12(r9)
+ stw r7, VCPU_PMC + 16(r9)
+ stw r8, VCPU_PMC + 20(r9)
+BEGIN_FTR_SECTION
+ stw r10, VCPU_PMC + 24(r9)
+ stw r11, VCPU_PMC + 28(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_SIER
+ mfspr r6, SPRN_SPMC1
+ mfspr r7, SPRN_SPMC2
+ mfspr r8, SPRN_MMCRS
+ std r5, VCPU_SIER(r9)
+ stw r6, VCPU_PMC + 24(r9)
+ stw r7, VCPU_PMC + 28(r9)
+ std r8, VCPU_MMCR + 32(r9)
+ lis r4, 0x8000
+ mtspr SPRN_MMCRS, r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+22:
/* Clear out SLB */
li r5,0
slbmte r5,r5
slbia
ptesync
-hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */
+hdec_soon: /* r12 = trap, r13 = paca */
BEGIN_FTR_SECTION
b 32f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
@@ -872,14 +1552,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
*/
/* Increment the threads-exiting-guest count in the 0xff00
bits of vcore->entry_exit_count */
- lwsync
ld r5,HSTATE_KVM_VCORE(r13)
addi r6,r5,VCORE_ENTRY_EXIT
41: lwarx r3,0,r6
addi r0,r3,0x100
stwcx. r0,0,r6
bne 41b
- lwsync
+ isync /* order stwcx. vs. reading napping_threads */
/*
* At this point we have an interrupt that we have to pass
@@ -895,8 +1574,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
*/
cmpwi r3,0x100 /* Are we the first here? */
bge 43f
- cmpwi r3,1 /* Are any other threads in the guest? */
- ble 43f
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
beq 40f
li r0,0
@@ -907,11 +1584,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
* doesn't wake CPUs up from nap.
*/
lwz r3,VCORE_NAPPING_THREADS(r5)
- lwz r4,VCPU_PTID(r9)
+ lbz r4,HSTATE_PTID(r13)
li r0,1
sld r0,r0,r4
andc. r3,r3,r0 /* no sense IPI'ing ourselves */
beq 43f
+ /* Order entry/exit update vs. IPIs */
+ sync
mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
subf r6,r4,r13
42: andi. r0,r3,1
@@ -924,10 +1603,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
addi r6,r6,PACA_SIZE
bne 42b
+secondary_too_late:
/* Secondary threads wait for primary to do partition switch */
-43: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
- ld r5,HSTATE_KVM_VCORE(r13)
- lwz r3,VCPU_PTID(r9)
+43: ld r5,HSTATE_KVM_VCORE(r13)
+ ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
+ lbz r3,HSTATE_PTID(r13)
cmpwi r3,0
beq 15f
HMT_LOW
@@ -954,7 +1634,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
mtspr SPRN_SDR1,r6 /* switch to partition page table */
mtspr SPRN_LPID,r7
isync
- li r0,0
+
+BEGIN_FTR_SECTION
+ /* DPDES is shared between threads */
+ mfspr r7, SPRN_DPDES
+ std r7, VCORE_DPDES(r5)
+ /* clear DPDES so we don't get guest doorbells in the host */
+ li r8, 0
+ mtspr SPRN_DPDES, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+ /* Subtract timebase offset from timebase */
+ ld r8,VCORE_TB_OFFSET(r5)
+ cmpdi r8,0
+ beq 17f
+ mftb r6 /* current guest timebase */
+ subf r8,r8,r6
+ mtspr SPRN_TBU40,r8 /* update upper 40 bits */
+ mftb r7 /* check if lower 24 bits overflowed */
+ clrldi r6,r6,40
+ clrldi r7,r7,40
+ cmpld r7,r6
+ bge 17f
+ addis r8,r8,0x100 /* if so, increment upper 40 bits */
+ mtspr SPRN_TBU40,r8
+
+ /* Reset PCR */
+17: ld r0, VCORE_PCR(r5)
+ cmpdi r0, 0
+ beq 18f
+ li r0, 0
+ mtspr SPRN_PCR, r0
+18:
+ /* Signal secondary CPUs to continue */
stb r0,VCORE_IN_GUEST(r5)
lis r8,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC,r8
@@ -969,7 +1681,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
* We have to lock against concurrent tlbies, and
* we have to flush the whole TLB.
*/
-32: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
+32: ld r5,HSTATE_KVM_VCORE(r13)
+ ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
/* Take the guest's tlbie_lock */
#ifdef __BIG_ENDIAN__
@@ -1052,209 +1765,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1: addi r8,r8,16
.endr
- /* Save and reset AMR and UAMOR before turning on the MMU */
-BEGIN_FTR_SECTION
- mfspr r5,SPRN_AMR
- mfspr r6,SPRN_UAMOR
- std r5,VCPU_AMR(r9)
- std r6,VCPU_UAMOR(r9)
- li r6,0
- mtspr SPRN_AMR,r6
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
- /* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
- mfspr r8, SPRN_DSCR
- ld r7, HSTATE_DSCR(r13)
- std r8, VCPU_DSCR(r7)
- mtspr SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
- /* Save non-volatile GPRs */
- std r14, VCPU_GPR(R14)(r9)
- std r15, VCPU_GPR(R15)(r9)
- std r16, VCPU_GPR(R16)(r9)
- std r17, VCPU_GPR(R17)(r9)
- std r18, VCPU_GPR(R18)(r9)
- std r19, VCPU_GPR(R19)(r9)
- std r20, VCPU_GPR(R20)(r9)
- std r21, VCPU_GPR(R21)(r9)
- std r22, VCPU_GPR(R22)(r9)
- std r23, VCPU_GPR(R23)(r9)
- std r24, VCPU_GPR(R24)(r9)
- std r25, VCPU_GPR(R25)(r9)
- std r26, VCPU_GPR(R26)(r9)
- std r27, VCPU_GPR(R27)(r9)
- std r28, VCPU_GPR(R28)(r9)
- std r29, VCPU_GPR(R29)(r9)
- std r30, VCPU_GPR(R30)(r9)
- std r31, VCPU_GPR(R31)(r9)
-
- /* Save SPRGs */
- mfspr r3, SPRN_SPRG0
- mfspr r4, SPRN_SPRG1
- mfspr r5, SPRN_SPRG2
- mfspr r6, SPRN_SPRG3
- std r3, VCPU_SPRG0(r9)
- std r4, VCPU_SPRG1(r9)
- std r5, VCPU_SPRG2(r9)
- std r6, VCPU_SPRG3(r9)
-
- /* save FP state */
- mr r3, r9
- bl .kvmppc_save_fp
-
- /* Increment yield count if they have a VPA */
- ld r8, VCPU_VPA(r9) /* do they have a VPA? */
- cmpdi r8, 0
- beq 25f
- lwz r3, LPPACA_YIELDCOUNT(r8)
- addi r3, r3, 1
- stw r3, LPPACA_YIELDCOUNT(r8)
- li r3, 1
- stb r3, VCPU_VPA_DIRTY(r9)
-25:
- /* Save PMU registers if requested */
- /* r8 and cr0.eq are live here */
- li r3, 1
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
- mfspr r4, SPRN_MMCR0 /* save MMCR0 */
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
- mfspr r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
- /* On P7, clear MMCRA in order to disable SDAR updates */
- li r7, 0
- mtspr SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
- isync
- beq 21f /* if no VPA, save PMU stuff anyway */
- lbz r7, LPPACA_PMCINUSE(r8)
- cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */
- bne 21f
- std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
- b 22f
-21: mfspr r5, SPRN_MMCR1
- std r4, VCPU_MMCR(r9)
- std r5, VCPU_MMCR + 8(r9)
- std r6, VCPU_MMCR + 16(r9)
- mfspr r3, SPRN_PMC1
- mfspr r4, SPRN_PMC2
- mfspr r5, SPRN_PMC3
- mfspr r6, SPRN_PMC4
- mfspr r7, SPRN_PMC5
- mfspr r8, SPRN_PMC6
-BEGIN_FTR_SECTION
- mfspr r10, SPRN_PMC7
- mfspr r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- stw r3, VCPU_PMC(r9)
- stw r4, VCPU_PMC + 4(r9)
- stw r5, VCPU_PMC + 8(r9)
- stw r6, VCPU_PMC + 12(r9)
- stw r7, VCPU_PMC + 16(r9)
- stw r8, VCPU_PMC + 20(r9)
-BEGIN_FTR_SECTION
- stw r10, VCPU_PMC + 24(r9)
- stw r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-22:
-
- /* Secondary threads go off to take a nap on POWER7 */
-BEGIN_FTR_SECTION
- lwz r0,VCPU_PTID(r9)
- cmpwi r0,0
- bne secondary_nap
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
- /* Restore host DABR and DABRX */
- ld r5,HSTATE_DABR(r13)
- li r6,7
- mtspr SPRN_DABR,r5
- mtspr SPRN_DABRX,r6
-
- /* Restore SPRG3 */
- ld r3,PACA_SPRG3(r13)
- mtspr SPRN_SPRG3,r3
-
- /*
- * Reload DEC. HDEC interrupts were disabled when
- * we reloaded the host's LPCR value.
- */
- ld r3, HSTATE_DECEXP(r13)
- mftb r4
- subf r4, r4, r3
- mtspr SPRN_DEC, r4
-
- /* Reload the host's PMU registers */
- ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
- lbz r4, LPPACA_PMCINUSE(r3)
- cmpwi r4, 0
- beq 23f /* skip if not */
- lwz r3, HSTATE_PMC(r13)
- lwz r4, HSTATE_PMC + 4(r13)
- lwz r5, HSTATE_PMC + 8(r13)
- lwz r6, HSTATE_PMC + 12(r13)
- lwz r8, HSTATE_PMC + 16(r13)
- lwz r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
- lwz r10, HSTATE_PMC + 24(r13)
- lwz r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- mtspr SPRN_PMC1, r3
- mtspr SPRN_PMC2, r4
- mtspr SPRN_PMC3, r5
- mtspr SPRN_PMC4, r6
- mtspr SPRN_PMC5, r8
- mtspr SPRN_PMC6, r9
-BEGIN_FTR_SECTION
- mtspr SPRN_PMC7, r10
- mtspr SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- ld r3, HSTATE_MMCR(r13)
- ld r4, HSTATE_MMCR + 8(r13)
- ld r5, HSTATE_MMCR + 16(r13)
- mtspr SPRN_MMCR1, r4
- mtspr SPRN_MMCRA, r5
- mtspr SPRN_MMCR0, r3
- isync
-23:
- /*
- * For external and machine check interrupts, we need
- * to call the Linux handler to process the interrupt.
- * We do that by jumping to absolute address 0x500 for
- * external interrupts, or the machine_check_fwnmi label
- * for machine checks (since firmware might have patched
- * the vector area at 0x200). The [h]rfid at the end of the
- * handler will return to the book3s_hv_interrupts.S code.
- * For other interrupts we do the rfid to get back
- * to the book3s_hv_interrupts.S code here.
- */
- ld r8, HSTATE_VMHANDLER(r13)
- ld r7, HSTATE_HOST_MSR(r13)
-
- cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
- beq 11f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
- /* RFI into the highmem handler, or branch to interrupt handler */
- mfmsr r6
- li r0, MSR_RI
- andc r6, r6, r0
- mtmsrd r6, 1 /* Clear RI in MSR */
- mtsrr0 r8
- mtsrr1 r7
- beqa 0x500 /* external interrupt (PPC970) */
- beq cr1, 13f /* machine check */
- RFI
-
- /* On POWER7, we have external interrupts set to use HSRR0/1 */
-11: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- ba 0x500
+ /* Unset guest mode */
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
-13: b machine_check_fwnmi
+ ld r0, 112+PPC_LR_STKOFF(r1)
+ addi r1, r1, 112
+ mtlr r0
+ blr
/*
* Check whether an HDSI is an HPTE not found fault or something else.
@@ -1280,7 +1798,7 @@ kvmppc_hdsi:
/* Search the hash table. */
mr r3, r9 /* vcpu pointer */
li r7, 1 /* data fault */
- bl .kvmppc_hpte_hv_fault
+ bl kvmppc_hpte_hv_fault
ld r9, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_PC(r9)
ld r11, VCPU_MSR(r9)
@@ -1300,8 +1818,7 @@ kvmppc_hdsi:
mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
li r10, BOOK3S_INTERRUPT_DATA_STORAGE
- li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
- rotldi r11, r11, 63
+ bl kvmppc_msr_interrupt
fast_interrupt_c_return:
6: ld r7, VCPU_CTR(r9)
lwz r8, VCPU_XER(r9)
@@ -1333,7 +1850,7 @@ fast_interrupt_c_return:
stw r8, VCPU_LAST_INST(r9)
/* Unset guest mode. */
- li r0, KVM_GUEST_MODE_NONE
+ li r0, KVM_GUEST_MODE_HOST_HV
stb r0, HSTATE_IN_GUEST(r13)
b guest_exit_cont
@@ -1355,7 +1872,7 @@ kvmppc_hisi:
mr r4, r10
mr r6, r11
li r7, 0 /* instruction fault */
- bl .kvmppc_hpte_hv_fault
+ bl kvmppc_hpte_hv_fault
ld r9, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_PC(r9)
ld r11, VCPU_MSR(r9)
@@ -1370,8 +1887,7 @@ kvmppc_hisi:
1: mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
li r10, BOOK3S_INTERRUPT_INST_STORAGE
- li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
- rotldi r11, r11, 63
+ bl kvmppc_msr_interrupt
b fast_interrupt_c_return
3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
@@ -1388,7 +1904,8 @@ kvmppc_hisi:
hcall_try_real_mode:
ld r3,VCPU_GPR(R3)(r9)
andi. r0,r11,MSR_PR
- bne guest_exit_cont
+ /* sc 1 from userspace - reflect to guest syscall */
+ bne sc_1_fast_return
clrrdi r3,r3,2
cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont
@@ -1409,6 +1926,14 @@ hcall_try_real_mode:
ld r11,VCPU_MSR(r4)
b fast_guest_return
+sc_1_fast_return:
+ mtspr SPRN_SRR0,r10
+ mtspr SPRN_SRR1,r11
+ li r10, BOOK3S_INTERRUPT_SYSCALL
+ bl kvmppc_msr_interrupt
+ mr r4,r9
+ b fast_guest_return
+
/* We've attempted a real mode hcall, but it's punted it back
* to userspace. We need to restore some clobbered volatiles
* before resuming the pass-it-to-qemu path */
@@ -1421,16 +1946,16 @@ hcall_real_fallback:
.globl hcall_real_table
hcall_real_table:
.long 0 /* 0 - unused */
- .long .kvmppc_h_remove - hcall_real_table
- .long .kvmppc_h_enter - hcall_real_table
- .long .kvmppc_h_read - hcall_real_table
+ .long DOTSYM(kvmppc_h_remove) - hcall_real_table
+ .long DOTSYM(kvmppc_h_enter) - hcall_real_table
+ .long DOTSYM(kvmppc_h_read) - hcall_real_table
.long 0 /* 0x10 - H_CLEAR_MOD */
.long 0 /* 0x14 - H_CLEAR_REF */
- .long .kvmppc_h_protect - hcall_real_table
- .long 0 /* 0x1c - H_GET_TCE */
- .long .kvmppc_h_put_tce - hcall_real_table
+ .long DOTSYM(kvmppc_h_protect) - hcall_real_table
+ .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
+ .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
.long 0 /* 0x24 - H_SET_SPRG0 */
- .long .kvmppc_h_set_dabr - hcall_real_table
+ .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
.long 0 /* 0x2c */
.long 0 /* 0x30 */
.long 0 /* 0x34 */
@@ -1446,11 +1971,11 @@ hcall_real_table:
.long 0 /* 0x5c */
.long 0 /* 0x60 */
#ifdef CONFIG_KVM_XICS
- .long .kvmppc_rm_h_eoi - hcall_real_table
- .long .kvmppc_rm_h_cppr - hcall_real_table
- .long .kvmppc_rm_h_ipi - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
.long 0 /* 0x70 - H_IPOLL */
- .long .kvmppc_rm_h_xirr - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
#else
.long 0 /* 0x64 - H_EOI */
.long 0 /* 0x68 - H_CPPR */
@@ -1484,7 +2009,7 @@ hcall_real_table:
.long 0 /* 0xd4 */
.long 0 /* 0xd8 */
.long 0 /* 0xdc */
- .long .kvmppc_h_cede - hcall_real_table
+ .long DOTSYM(kvmppc_h_cede) - hcall_real_table
.long 0 /* 0xe4 */
.long 0 /* 0xe8 */
.long 0 /* 0xec */
@@ -1501,15 +2026,35 @@ hcall_real_table:
.long 0 /* 0x118 */
.long 0 /* 0x11c */
.long 0 /* 0x120 */
- .long .kvmppc_h_bulk_remove - hcall_real_table
+ .long DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
+ .long 0 /* 0x128 */
+ .long 0 /* 0x12c */
+ .long 0 /* 0x130 */
+ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
hcall_real_table_end:
ignore_hdec:
mr r4,r9
b fast_guest_return
+_GLOBAL(kvmppc_h_set_xdabr)
+ andi. r0, r5, DABRX_USER | DABRX_KERNEL
+ beq 6f
+ li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
+ andc. r0, r5, r0
+ beq 3f
+6: li r3, H_PARAMETER
+ blr
+
_GLOBAL(kvmppc_h_set_dabr)
+ li r5, DABRX_USER | DABRX_KERNEL
+3:
+BEGIN_FTR_SECTION
+ b 2f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
std r4,VCPU_DABR(r3)
+ stw r5, VCPU_DABRX(r3)
+ mtspr SPRN_DABRX, r5
/* Work around P7 bug where DABR can get corrupted on mtspr */
1: mtspr SPRN_DABR,r4
mfspr r5, SPRN_DABR
@@ -1519,6 +2064,17 @@ _GLOBAL(kvmppc_h_set_dabr)
li r3,0
blr
+ /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
+2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
+ rlwimi r5, r4, 1, DAWRX_WT
+ clrrdi r4, r4, 3
+ std r4, VCPU_DAWR(r3)
+ std r5, VCPU_DAWRX(r3)
+ mtspr SPRN_DAWR, r4
+ mtspr SPRN_DAWRX, r5
+ li r3, 0
+ blr
+
_GLOBAL(kvmppc_h_cede)
ori r11,r11,MSR_EE
std r11,VCPU_MSR(r3)
@@ -1542,7 +2098,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
* up to the host.
*/
ld r5,HSTATE_KVM_VCORE(r13)
- lwz r6,VCPU_PTID(r3)
+ lbz r6,HSTATE_PTID(r13)
lwz r8,VCORE_ENTRY_EXIT(r5)
clrldi r8,r8,56
li r0,1
@@ -1555,11 +2111,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
bge kvm_cede_exit
stwcx. r4,0,r6
bne 31b
- li r0,1
- stb r0,HSTATE_NAPPING(r13)
/* order napping_threads update vs testing entry_exit_count */
- lwsync
- mr r4,r3
+ isync
+ li r0,NAPPING_CEDE
+ stb r0,HSTATE_NAPPING(r13)
lwz r7,VCORE_ENTRY_EXIT(r5)
cmpwi r7,0x100
bge 33f /* another thread already exiting */
@@ -1591,16 +2146,24 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
std r31, VCPU_GPR(R31)(r3)
/* save FP state */
- bl .kvmppc_save_fp
+ bl kvmppc_save_fp
/*
- * Take a nap until a decrementer or external interrupt occurs,
- * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
+ * Take a nap until a decrementer or external or doobell interrupt
+ * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
+ * runlatch bit before napping.
*/
+ mfspr r2, SPRN_CTRLF
+ clrrdi r2, r2, 1
+ mtspr SPRN_CTRLT, r2
+
li r0,1
stb r0,HSTATE_HWTHREAD_REQ(r13)
mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
+BEGIN_FTR_SECTION
+ oris r5,r5,LPCR_PECEDP@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_LPCR,r5
isync
li r0, 0
@@ -1612,6 +2175,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
nap
b .
+33: mr r4, r3
+ li r3, 0
+ li r12, 0
+ b 34f
+
kvm_end_cede:
/* get vcpu pointer */
ld r4, HSTATE_KVM_VCPU(r13)
@@ -1641,12 +2209,15 @@ kvm_end_cede:
ld r29, VCPU_GPR(R29)(r4)
ld r30, VCPU_GPR(R30)(r4)
ld r31, VCPU_GPR(R31)(r4)
+
+ /* Check the wake reason in SRR1 to see why we got here */
+ bl kvmppc_check_wake_reason
/* clear our bit in vcore->napping_threads */
-33: ld r5,HSTATE_KVM_VCORE(r13)
- lwz r3,VCPU_PTID(r4)
+34: ld r5,HSTATE_KVM_VCORE(r13)
+ lbz r7,HSTATE_PTID(r13)
li r0,1
- sld r0,r0,r3
+ sld r0,r0,r7
addi r6,r5,VCORE_NAPPING_THREADS
32: lwarx r7,0,r6
andc r7,r7,r0
@@ -1655,23 +2226,18 @@ kvm_end_cede:
li r0,0
stb r0,HSTATE_NAPPING(r13)
- /* Check the wake reason in SRR1 to see why we got here */
- mfspr r3, SPRN_SRR1
- rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */
- cmpwi r3, 4 /* was it an external interrupt? */
- li r12, BOOK3S_INTERRUPT_EXTERNAL
+ /* See if the wake reason means we need to exit */
+ stw r12, VCPU_TRAP(r4)
mr r9, r4
- ld r10, VCPU_PC(r9)
- ld r11, VCPU_MSR(r9)
- beq do_ext_interrupt /* if so */
+ cmpdi r3, 0
+ bgt guest_exit_cont
/* see if any other thread is already exiting */
lwz r0,VCORE_ENTRY_EXIT(r5)
cmpwi r0,0x100
- blt kvmppc_cede_reentry /* if not go back to guest */
+ bge guest_exit_cont
- /* some threads are exiting, so go to the guest exit path */
- b hcall_real_fallback
+ b kvmppc_cede_reentry /* if not go back to guest */
/* cede when already previously prodded case */
kvm_cede_prodded:
@@ -1689,85 +2255,144 @@ kvm_cede_exit:
/* Try to handle a machine check in real mode */
machine_check_realmode:
mr r3, r9 /* get vcpu pointer */
- bl .kvmppc_realmode_machine_check
+ bl kvmppc_realmode_machine_check
nop
- cmpdi r3, 0 /* continue exiting from guest? */
+ cmpdi r3, 0 /* Did we handle MCE ? */
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- beq mc_cont
+ /*
+ * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
+ * machine check interrupt (set HSRR0 to 0x200). And for handled
+ * errors (no-fatal), just go back to guest execution with current
+ * HSRR0 instead of exiting guest. This new approach will inject
+ * machine check to guest for fatal error causing guest to crash.
+ *
+ * The old code used to return to host for unhandled errors which
+ * was causing guest to hang with soft lockups inside guest and
+ * makes it difficult to recover guest instance.
+ */
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ bne 2f /* Continue guest execution. */
/* If not, deliver a machine check. SRR0/1 are already set */
li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
- li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
- rotldi r11, r11, 63
- b fast_interrupt_c_return
+ ld r11, VCPU_MSR(r9)
+ bl kvmppc_msr_interrupt
+2: b fast_interrupt_c_return
-secondary_too_late:
- ld r5,HSTATE_KVM_VCORE(r13)
- HMT_LOW
-13: lbz r3,VCORE_IN_GUEST(r5)
- cmpwi r3,0
- bne 13b
- HMT_MEDIUM
- ld r11,PACA_SLBSHADOWPTR(r13)
+/*
+ * Check the reason we woke from nap, and take appropriate action.
+ * Returns:
+ * 0 if nothing needs to be done
+ * 1 if something happened that needs to be handled by the host
+ * -1 if there was a guest wakeup (IPI)
+ *
+ * Also sets r12 to the interrupt vector for any interrupt that needs
+ * to be handled now by the host (0x500 for external interrupt), or zero.
+ */
+kvmppc_check_wake_reason:
+ mfspr r6, SPRN_SRR1
+BEGIN_FTR_SECTION
+ rlwinm r6, r6, 45-31, 0xf /* extract wake reason field (P8) */
+FTR_SECTION_ELSE
+ rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
+ cmpwi r6, 8 /* was it an external interrupt? */
+ li r12, BOOK3S_INTERRUPT_EXTERNAL
+ beq kvmppc_read_intr /* if so, see what it was */
+ li r3, 0
+ li r12, 0
+ cmpwi r6, 6 /* was it the decrementer? */
+ beq 0f
+BEGIN_FTR_SECTION
+ cmpwi r6, 5 /* privileged doorbell? */
+ beq 0f
+ cmpwi r6, 3 /* hypervisor doorbell? */
+ beq 3f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ li r3, 1 /* anything else, return 1 */
+0: blr
- .rept SLB_NUM_BOLTED
- ld r5,SLBSHADOW_SAVEAREA(r11)
- ld r6,SLBSHADOW_SAVEAREA+8(r11)
- andis. r7,r5,SLB_ESID_V@h
- beq 1f
- slbmte r6,r5
-1: addi r11,r11,16
- .endr
+ /* hypervisor doorbell */
+3: li r12, BOOK3S_INTERRUPT_H_DOORBELL
+ li r3, 1
+ blr
-secondary_nap:
- /* Clear our vcpu pointer so we don't come back in early */
- li r0, 0
- std r0, HSTATE_KVM_VCPU(r13)
- lwsync
- /* Clear any pending IPI - assume we're a secondary thread */
- ld r5, HSTATE_XICS_PHYS(r13)
+/*
+ * Determine what sort of external interrupt is pending (if any).
+ * Returns:
+ * 0 if no interrupt is pending
+ * 1 if an interrupt is pending that needs to be handled by the host
+ * -1 if there was a guest wakeup IPI (which has now been cleared)
+ */
+kvmppc_read_intr:
+ /* see if a host IPI is pending */
+ li r3, 1
+ lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
+ bne 1f
+
+ /* Now read the interrupt from the ICP */
+ ld r6, HSTATE_XICS_PHYS(r13)
li r7, XICS_XIRR
- lwzcix r3, r5, r7 /* ack any pending interrupt */
- rlwinm. r0, r3, 0, 0xffffff /* any pending? */
- beq 37f
+ cmpdi r6, 0
+ beq- 1f
+ lwzcix r0, r6, r7
+ rlwinm. r3, r0, 0, 0xffffff
sync
- li r0, 0xff
- li r6, XICS_MFRR
- stbcix r0, r5, r6 /* clear the IPI */
- stwcix r3, r5, r7 /* EOI it */
-37: sync
+ beq 1f /* if nothing pending in the ICP */
- /* increment the nap count and then go to nap mode */
- ld r4, HSTATE_KVM_VCORE(r13)
- addi r4, r4, VCORE_NAP_COUNT
- lwsync /* make previous updates visible */
-51: lwarx r3, 0, r4
- addi r3, r3, 1
- stwcx. r3, 0, r4
- bne 51b
+ /* We found something in the ICP...
+ *
+ * If it's not an IPI, stash it in the PACA and return to
+ * the host, we don't (yet) handle directing real external
+ * interrupts directly to the guest
+ */
+ cmpwi r3, XICS_IPI /* if there is, is it an IPI? */
+ bne 42f
-kvm_no_guest:
- li r0, KVM_HWTHREAD_IN_NAP
- stb r0, HSTATE_HWTHREAD_STATE(r13)
+ /* It's an IPI, clear the MFRR and EOI it */
+ li r3, 0xff
+ li r8, XICS_MFRR
+ stbcix r3, r6, r8 /* clear the IPI */
+ stwcix r0, r6, r7 /* EOI it */
+ sync
- li r3, LPCR_PECE0
- mfspr r4, SPRN_LPCR
- rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
- mtspr SPRN_LPCR, r4
- isync
- std r0, HSTATE_SCRATCH0(r13)
- ptesync
- ld r0, HSTATE_SCRATCH0(r13)
-1: cmpd r0, r0
- bne 1b
- nap
- b .
+ /* We need to re-check host IPI now in case it got set in the
+ * meantime. If it's clear, we bounce the interrupt to the
+ * guest
+ */
+ lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
+ bne- 43f
+
+ /* OK, it's an IPI for us */
+ li r3, -1
+1: blr
+
+42: /* It's not an IPI and it's for the host, stash it in the PACA
+ * before exit, it will be picked up by the host ICP driver
+ */
+ stw r0, HSTATE_SAVED_XIRR(r13)
+ li r3, 1
+ b 1b
+
+43: /* We raced with the host, we need to resend that IPI, bummer */
+ li r0, IPI_PRIORITY
+ stbcix r0, r6, r8 /* set the IPI */
+ sync
+ li r3, 1
+ b 1b
/*
* Save away FP, VMX and VSX registers.
* r3 = vcpu pointer
+ * N.B. r30 and r31 are volatile across this function,
+ * thus it is not callable from C.
*/
-_GLOBAL(kvmppc_save_fp)
+kvmppc_save_fp:
+ mflr r30
+ mr r31,r3
mfmsr r5
ori r8,r5,MSR_FP
#ifdef CONFIG_ALTIVEC
@@ -1782,52 +2407,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
mtmsrd r8
isync
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- reg = 0
- .rept 32
- li r6,reg*16+VCPU_VSRS
- STXVD2X(reg,R6,R3)
- reg = reg + 1
- .endr
-FTR_SECTION_ELSE
-#endif
- reg = 0
- .rept 32
- stfd reg,reg*8+VCPU_FPRS(r3)
- reg = reg + 1
- .endr
-#ifdef CONFIG_VSX
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#endif
- mffs fr0
- stfd fr0,VCPU_FPSCR(r3)
-
+ addi r3,r3,VCPU_FPRS
+ bl .store_fp_state
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
- reg = 0
- .rept 32
- li r6,reg*16+VCPU_VRS
- stvx reg,r6,r3
- reg = reg + 1
- .endr
- mfvscr vr0
- li r6,VCPU_VSCR
- stvx vr0,r6,r3
+ addi r3,r31,VCPU_VRS
+ bl .store_vr_state
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
mfspr r6,SPRN_VRSAVE
- stw r6,VCPU_VRSAVE(r3)
- mtmsrd r5
- isync
+ stw r6,VCPU_VRSAVE(r31)
+ mtlr r30
blr
/*
* Load up FP, VMX and VSX registers
* r4 = vcpu pointer
+ * N.B. r30 and r31 are volatile across this function,
+ * thus it is not callable from C.
*/
- .globl kvmppc_load_fp
kvmppc_load_fp:
+ mflr r30
+ mr r31,r4
mfmsr r9
ori r8,r9,MSR_FP
#ifdef CONFIG_ALTIVEC
@@ -1842,40 +2443,59 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
mtmsrd r8
isync
- lfd fr0,VCPU_FPSCR(r4)
- MTFSF_L(fr0)
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- reg = 0
- .rept 32
- li r7,reg*16+VCPU_VSRS
- LXVD2X(reg,R7,R4)
- reg = reg + 1
- .endr
-FTR_SECTION_ELSE
-#endif
- reg = 0
- .rept 32
- lfd reg,reg*8+VCPU_FPRS(r4)
- reg = reg + 1
- .endr
-#ifdef CONFIG_VSX
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#endif
-
+ addi r3,r4,VCPU_FPRS
+ bl .load_fp_state
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
- li r7,VCPU_VSCR
- lvx vr0,r7,r4
- mtvscr vr0
- reg = 0
- .rept 32
- li r7,reg*16+VCPU_VRS
- lvx reg,r7,r4
- reg = reg + 1
- .endr
+ addi r3,r31,VCPU_VRS
+ bl .load_vr_state
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- lwz r7,VCPU_VRSAVE(r4)
+ lwz r7,VCPU_VRSAVE(r31)
mtspr SPRN_VRSAVE,r7
+ mtlr r30
+ mr r4,r31
+ blr
+
+/*
+ * We come here if we get any exception or interrupt while we are
+ * executing host real mode code while in guest MMU context.
+ * For now just spin, but we should do something better.
+ */
+kvmppc_bad_host_intr:
+ b .
+
+/*
+ * This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
+ * from VCPU_INTR_MSR and is modified based on the required TM state changes.
+ * r11 has the guest MSR value (in/out)
+ * r9 has a vcpu pointer (in)
+ * r0 is used as a scratch register
+ */
+kvmppc_msr_interrupt:
+ rldicl r0, r11, 64 - MSR_TS_S_LG, 62
+ cmpwi r0, 2 /* Check if we are in transactional state.. */
+ ld r11, VCPU_INTR_MSR(r9)
+ bne 1f
+ /* ... if transactional, change to suspended */
+ li r0, 1
+1: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+ blr
+
+/*
+ * This works around a hardware bug on POWER8E processors, where
+ * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
+ * performance monitor interrupt. Instead, when we need to have
+ * an interrupt pending, we have to arrange for a counter to overflow.
+ */
+kvmppc_fix_pmao:
+ li r3, 0
+ mtspr SPRN_MMCR2, r3
+ lis r3, (MMCR0_PMXE | MMCR0_FCECE)@h
+ ori r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
+ mtspr SPRN_MMCR0, r3
+ lis r3, 0x7fff
+ ori r3, r3, 0xffff
+ mtspr SPRN_PMC6, r3
+ isync
blr
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 17cfae5497a..d044b8b7c69 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -25,9 +25,17 @@
#include <asm/exception-64s.h>
#if defined(CONFIG_PPC_BOOK3S_64)
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define FUNC(name) name
+#else
#define FUNC(name) GLUE(.,name)
+#endif
+#define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU
+
#elif defined(CONFIG_PPC_BOOK3S_32)
#define FUNC(name) name
+#define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2)
+
#endif /* CONFIG_PPC_BOOK3S_XX */
#define VCPU_LOAD_NVGPRS(vcpu) \
@@ -87,15 +95,40 @@ kvm_start_entry:
VCPU_LOAD_NVGPRS(r4)
kvm_start_lightweight:
+ /* Copy registers into shadow vcpu so we can access them in real mode */
+ GET_SHADOW_VCPU(r3)
+ bl FUNC(kvmppc_copy_to_svcpu)
+ nop
+ REST_GPR(4, r1)
#ifdef CONFIG_PPC_BOOK3S_64
+ /* Get the dcbz32 flag */
PPC_LL r3, VCPU_HFLAGS(r4)
rldicl r3, r3, 0, 63 /* r3 &= 1 */
stb r3, HSTATE_RESTORE_HID5(r13)
/* Load up guest SPRG3 value, since it's user readable */
- ld r3, VCPU_SHARED(r4)
- ld r3, VCPU_SHARED_SPRG3(r3)
+ lwz r3, VCPU_SHAREDBE(r4)
+ cmpwi r3, 0
+ ld r5, VCPU_SHARED(r4)
+ beq sprg3_little_endian
+sprg3_big_endian:
+#ifdef __BIG_ENDIAN__
+ ld r3, VCPU_SHARED_SPRG3(r5)
+#else
+ addi r5, r5, VCPU_SHARED_SPRG3
+ ldbrx r3, 0, r5
+#endif
+ b after_sprg3_load
+sprg3_little_endian:
+#ifdef __LITTLE_ENDIAN__
+ ld r3, VCPU_SHARED_SPRG3(r5)
+#else
+ addi r5, r5, VCPU_SHARED_SPRG3
+ ldbrx r3, 0, r5
+#endif
+
+after_sprg3_load:
mtspr SPRN_SPRG3, r3
#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -111,9 +144,6 @@ kvm_start_lightweight:
*
*/
-.global kvmppc_handler_highmem
-kvmppc_handler_highmem:
-
/*
* Register usage at this point:
*
@@ -122,21 +152,37 @@ kvmppc_handler_highmem:
* R12 = exit handler id
* R13 = PACA
* SVCPU.* = guest *
+ * MSR.EE = 1
*
*/
- /* R7 = vcpu */
- PPC_LL r7, GPR4(r1)
+ PPC_LL r3, GPR4(r1) /* vcpu pointer */
+
+ /*
+ * kvmppc_copy_from_svcpu can clobber volatile registers, save
+ * the exit handler id to the vcpu and restore it from there later.
+ */
+ stw r12, VCPU_TRAP(r3)
+
+ /* Transfer reg values from shadow vcpu back to vcpu struct */
+ /* On 64-bit, interrupts are still off at this point */
+
+ GET_SHADOW_VCPU(r4)
+ bl FUNC(kvmppc_copy_from_svcpu)
+ nop
#ifdef CONFIG_PPC_BOOK3S_64
/*
* Reload kernel SPRG3 value.
* No need to save guest value as usermode can't modify SPRG3.
*/
- ld r3, PACA_SPRG3(r13)
- mtspr SPRN_SPRG3, r3
+ ld r3, PACA_SPRG_VDSO(r13)
+ mtspr SPRN_SPRG_VDSO_WRITE, r3
#endif /* CONFIG_PPC_BOOK3S_64 */
+ /* R7 = vcpu */
+ PPC_LL r7, GPR4(r1)
+
PPC_STL r14, VCPU_GPR(R14)(r7)
PPC_STL r15, VCPU_GPR(R15)(r7)
PPC_STL r16, VCPU_GPR(R16)(r7)
@@ -157,11 +203,11 @@ kvmppc_handler_highmem:
PPC_STL r31, VCPU_GPR(R31)(r7)
/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
- mr r5, r12
+ lwz r5, VCPU_TRAP(r7)
/* Restore r3 (kvm_run) and r4 (vcpu) */
REST_2GPRS(3, r1)
- bl FUNC(kvmppc_handle_exit)
+ bl FUNC(kvmppc_handle_exit_pr)
/* If RESUME_GUEST, get back in the loop */
cmpwi r3, RESUME_GUEST
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index da8b13c4b77..5a1ab1250a0 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -28,7 +28,7 @@
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
-#include "trace.h"
+#include "trace_pr.h"
#define PTE_SIZE 12
@@ -56,6 +56,14 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
HPTEG_HASH_BITS_VPTE_LONG);
}
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage)
+{
+ return hash_64((vpage & 0xffffffff0ULL) >> 4,
+ HPTEG_HASH_BITS_VPTE_64K);
+}
+#endif
+
void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
u64 index;
@@ -83,6 +91,15 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
hlist_add_head_rcu(&pte->list_vpte_long,
&vcpu3s->hpte_hash_vpte_long[index]);
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Add to vPTE_64k list */
+ index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage);
+ hlist_add_head_rcu(&pte->list_vpte_64k,
+ &vcpu3s->hpte_hash_vpte_64k[index]);
+#endif
+
+ vcpu3s->hpte_cache_count++;
+
spin_unlock(&vcpu3s->mmu_lock);
}
@@ -113,10 +130,13 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
hlist_del_init_rcu(&pte->list_pte_long);
hlist_del_init_rcu(&pte->list_vpte);
hlist_del_init_rcu(&pte->list_vpte_long);
+#ifdef CONFIG_PPC_BOOK3S_64
+ hlist_del_init_rcu(&pte->list_vpte_64k);
+#endif
+ vcpu3s->hpte_cache_count--;
spin_unlock(&vcpu3s->mmu_lock);
- vcpu3s->hpte_cache_count--;
call_rcu(&pte->rcu_head, free_pte_rcu);
}
@@ -219,6 +239,29 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
rcu_read_unlock();
}
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Flush with mask 0xffffffff0 */
+static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ struct hlist_head *list;
+ struct hpte_cache *pte;
+ u64 vp_mask = 0xffffffff0ULL;
+
+ list = &vcpu3s->hpte_hash_vpte_64k[
+ kvmppc_mmu_hash_vpte_64k(guest_vp)];
+
+ rcu_read_lock();
+
+ /* Check the list for matching entries and invalidate */
+ hlist_for_each_entry_rcu(pte, list, list_vpte_64k)
+ if ((pte->pte.vpage & vp_mask) == guest_vp)
+ invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
+}
+#endif
+
/* Flush with mask 0xffffff000 */
static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
{
@@ -249,6 +292,11 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
case 0xfffffffffULL:
kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case 0xffffffff0ULL:
+ kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp);
+ break;
+#endif
case 0xffffff000ULL:
kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
break;
@@ -285,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hpte_cache *pte;
- pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
- vcpu3s->hpte_cache_count++;
-
if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
kvmppc_mmu_pte_flush_all(vcpu);
+ pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
+
return pte;
}
+void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte)
+{
+ kmem_cache_free(hpte_cache, pte);
+}
+
void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_pte_flush(vcpu, 0, 0);
@@ -320,6 +372,10 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
+#ifdef CONFIG_PPC_BOOK3S_64
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k,
+ ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k));
+#endif
spin_lock_init(&vcpu3s->mmu_lock);
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index a59a25a1321..6c8011fd57e 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -160,21 +160,23 @@
static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
{
- kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]);
+ kvm_cvt_df(&VCPU_FPR(vcpu, rt), &vcpu->arch.qpr[rt]);
}
static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
{
- u64 dsisr;
- struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
+ u32 dsisr;
+ u64 msr = kvmppc_get_msr(vcpu);
- shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0);
- shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0);
- shared->dar = eaddr;
+ msr = kvmppc_set_field(msr, 33, 36, 0);
+ msr = kvmppc_set_field(msr, 42, 47, 0);
+ kvmppc_set_msr(vcpu, msr);
+ kvmppc_set_dar(vcpu, eaddr);
/* Page Fault */
dsisr = kvmppc_set_field(0, 33, 33, 1);
if (is_store)
- shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+ dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+ kvmppc_set_dsisr(vcpu, dsisr);
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
}
@@ -207,11 +209,11 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* put in registers */
switch (ls_type) {
case FPU_LS_SINGLE:
- kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]);
+ kvm_cvt_fd((u32*)tmp, &VCPU_FPR(vcpu, rs));
vcpu->arch.qpr[rs] = *((u32*)tmp);
break;
case FPU_LS_DOUBLE:
- vcpu->arch.fpr[rs] = *((u64*)tmp);
+ VCPU_FPR(vcpu, rs) = *((u64*)tmp);
break;
}
@@ -233,18 +235,18 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
switch (ls_type) {
case FPU_LS_SINGLE:
- kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp);
+ kvm_cvt_df(&VCPU_FPR(vcpu, rs), (u32*)tmp);
val = *((u32*)tmp);
len = sizeof(u32);
break;
case FPU_LS_SINGLE_LOW:
- *((u32*)tmp) = vcpu->arch.fpr[rs];
- val = vcpu->arch.fpr[rs] & 0xffffffff;
+ *((u32*)tmp) = VCPU_FPR(vcpu, rs);
+ val = VCPU_FPR(vcpu, rs) & 0xffffffff;
len = sizeof(u32);
break;
case FPU_LS_DOUBLE:
- *((u64*)tmp) = vcpu->arch.fpr[rs];
- val = vcpu->arch.fpr[rs];
+ *((u64*)tmp) = VCPU_FPR(vcpu, rs);
+ val = VCPU_FPR(vcpu, rs);
len = sizeof(u64);
break;
default:
@@ -301,7 +303,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
emulated = EMULATE_DONE;
/* put in registers */
- kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]);
+ kvm_cvt_fd(&tmp[0], &VCPU_FPR(vcpu, rs));
vcpu->arch.qpr[rs] = tmp[1];
dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
@@ -319,7 +321,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
u32 tmp[2];
int len = w ? sizeof(u32) : sizeof(u64);
- kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]);
+ kvm_cvt_df(&VCPU_FPR(vcpu, rs), &tmp[0]);
tmp[1] = vcpu->arch.qpr[rs];
r = kvmppc_st(vcpu, &addr, len, tmp, true);
@@ -512,7 +514,6 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
u32 *src2, u32 *src3))
{
u32 *qpr = vcpu->arch.qpr;
- u64 *fpr = vcpu->arch.fpr;
u32 ps0_out;
u32 ps0_in1, ps0_in2, ps0_in3;
u32 ps1_in1, ps1_in2, ps1_in3;
@@ -521,20 +522,20 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
WARN_ON(rc);
/* PS0 */
- kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
- kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
- kvm_cvt_df(&fpr[reg_in3], &ps0_in3);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in3), &ps0_in3);
if (scalar & SCALAR_LOW)
ps0_in2 = qpr[reg_in2];
- func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
+ func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
ps0_in1, ps0_in2, ps0_in3, ps0_out);
if (!(scalar & SCALAR_NO_PS0))
- kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+ kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
/* PS1 */
ps1_in1 = qpr[reg_in1];
@@ -545,7 +546,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
ps1_in2 = ps0_in2;
if (!(scalar & SCALAR_NO_PS1))
- func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
+ func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]);
@@ -561,7 +562,6 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
u32 *src2))
{
u32 *qpr = vcpu->arch.qpr;
- u64 *fpr = vcpu->arch.fpr;
u32 ps0_out;
u32 ps0_in1, ps0_in2;
u32 ps1_out;
@@ -571,20 +571,20 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
WARN_ON(rc);
/* PS0 */
- kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1);
if (scalar & SCALAR_LOW)
ps0_in2 = qpr[reg_in2];
else
- kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2);
- func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
+ func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
if (!(scalar & SCALAR_NO_PS0)) {
dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
ps0_in1, ps0_in2, ps0_out);
- kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+ kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
}
/* PS1 */
@@ -594,7 +594,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
if (scalar & SCALAR_HIGH)
ps1_in2 = ps0_in2;
- func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2);
+ func(&vcpu->arch.fp.fpscr, &ps1_out, &ps1_in1, &ps1_in2);
if (!(scalar & SCALAR_NO_PS1)) {
qpr[reg_out] = ps1_out;
@@ -612,7 +612,6 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
u32 *dst, u32 *src1))
{
u32 *qpr = vcpu->arch.qpr;
- u64 *fpr = vcpu->arch.fpr;
u32 ps0_out, ps0_in;
u32 ps1_in;
@@ -620,17 +619,17 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
WARN_ON(rc);
/* PS0 */
- kvm_cvt_df(&fpr[reg_in], &ps0_in);
- func(&vcpu->arch.fpscr, &ps0_out, &ps0_in);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in), &ps0_in);
+ func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in);
dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
ps0_in, ps0_out);
- kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+ kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
/* PS1 */
ps1_in = qpr[reg_in];
- func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in);
+ func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in);
dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n",
ps1_in, qpr[reg_out]);
@@ -649,10 +648,10 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int ax_rc = inst_get_field(inst, 21, 25);
short full_d = inst_get_field(inst, 16, 31);
- u64 *fpr_d = &vcpu->arch.fpr[ax_rd];
- u64 *fpr_a = &vcpu->arch.fpr[ax_ra];
- u64 *fpr_b = &vcpu->arch.fpr[ax_rb];
- u64 *fpr_c = &vcpu->arch.fpr[ax_rc];
+ u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd);
+ u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra);
+ u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb);
+ u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc);
bool rcomp = (inst & 1) ? true : false;
u32 cr = kvmppc_get_cr(vcpu);
@@ -663,7 +662,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
if (!kvmppc_inst_is_paired_single(vcpu, inst))
return EMULATE_FAIL;
- if (!(vcpu->arch.shared->msr & MSR_FP)) {
+ if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
return EMULATE_AGAIN;
}
@@ -674,11 +673,11 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Do we need to clear FE0 / FE1 here? Don't think so. */
#ifdef DEBUG
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {
u32 f;
- kvm_cvt_df(&vcpu->arch.fpr[i], &f);
+ kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);
dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n",
- i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
+ i, f, VCPU_FPR(vcpu, i), i, vcpu->arch.qpr[i]);
}
#endif
@@ -764,8 +763,8 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
}
case OP_4X_PS_NEG:
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
- vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL;
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+ VCPU_FPR(vcpu, ax_rd) ^= 0x8000000000000000ULL;
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
vcpu->arch.qpr[ax_rd] ^= 0x80000000;
break;
@@ -775,7 +774,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_4X_PS_MR:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
break;
case OP_4X_PS_CMPO1:
@@ -784,44 +783,44 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_4X_PS_NABS:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
- vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL;
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+ VCPU_FPR(vcpu, ax_rd) |= 0x8000000000000000ULL;
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
vcpu->arch.qpr[ax_rd] |= 0x80000000;
break;
case OP_4X_PS_ABS:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
- vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL;
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+ VCPU_FPR(vcpu, ax_rd) &= ~0x8000000000000000ULL;
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
vcpu->arch.qpr[ax_rd] &= ~0x80000000;
break;
case OP_4X_PS_MERGE00:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
- /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
- kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra);
+ /* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */
+ kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),
&vcpu->arch.qpr[ax_rd]);
break;
case OP_4X_PS_MERGE01:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra);
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
break;
case OP_4X_PS_MERGE10:
WARN_ON(rcomp);
- /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
+ /* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */
kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
- &vcpu->arch.fpr[ax_rd]);
- /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
- kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+ &VCPU_FPR(vcpu, ax_rd));
+ /* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */
+ kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),
&vcpu->arch.qpr[ax_rd]);
break;
case OP_4X_PS_MERGE11:
WARN_ON(rcomp);
- /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
+ /* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */
kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
- &vcpu->arch.fpr[ax_rd]);
+ &VCPU_FPR(vcpu, ax_rd));
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
break;
}
@@ -856,7 +855,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
case OP_4A_PS_SUM1:
emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc];
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rc);
break;
case OP_4A_PS_SUM0:
emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
@@ -1106,45 +1105,45 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
case 59:
switch (inst_get_field(inst, 21, 30)) {
case OP_59_FADDS:
- fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FSUBS:
- fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FDIVS:
- fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fdivs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FRES:
- fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fres(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FRSQRTES:
- fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_frsqrtes(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
}
switch (inst_get_field(inst, 26, 30)) {
case OP_59_FMULS:
- fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+ fpd_fmuls(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FMSUBS:
- fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FMADDS:
- fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FNMSUBS:
- fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fnmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FNMADDS:
- fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fnmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
}
@@ -1159,12 +1158,12 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_63_MFFS:
/* XXX missing CR */
- *fpr_d = vcpu->arch.fpscr;
+ *fpr_d = vcpu->arch.fp.fpscr;
break;
case OP_63_MTFSF:
/* XXX missing fm bits */
/* XXX missing CR */
- vcpu->arch.fpscr = *fpr_b;
+ vcpu->arch.fp.fpscr = *fpr_b;
break;
case OP_63_FCMPU:
{
@@ -1172,7 +1171,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
u32 cr0_mask = 0xf0000000;
u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
- fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
+ fpd_fcmpu(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);
cr &= ~(cr0_mask >> cr_shift);
cr |= (cr & cr0_mask) >> cr_shift;
break;
@@ -1183,40 +1182,40 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
u32 cr0_mask = 0xf0000000;
u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
- fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
+ fpd_fcmpo(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);
cr &= ~(cr0_mask >> cr_shift);
cr |= (cr & cr0_mask) >> cr_shift;
break;
}
case OP_63_FNEG:
- fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fneg(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
break;
case OP_63_FMR:
*fpr_d = *fpr_b;
break;
case OP_63_FABS:
- fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fabs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
break;
case OP_63_FCPSGN:
- fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fcpsgn(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
break;
case OP_63_FDIV:
- fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
break;
case OP_63_FADD:
- fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
break;
case OP_63_FSUB:
- fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
break;
case OP_63_FCTIW:
- fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fctiw(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
break;
case OP_63_FCTIWZ:
- fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fctiwz(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
break;
case OP_63_FRSP:
- fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_frsp(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_63_FRSQRTE:
@@ -1224,39 +1223,39 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
double one = 1.0f;
/* fD = sqrt(fB) */
- fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fsqrt(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
/* fD = 1.0f / fD */
- fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);
+ fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);
break;
}
}
switch (inst_get_field(inst, 26, 30)) {
case OP_63_FMUL:
- fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+ fpd_fmul(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);
break;
case OP_63_FSEL:
- fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fsel(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
case OP_63_FMSUB:
- fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
case OP_63_FMADD:
- fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
case OP_63_FNMSUB:
- fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fnmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
case OP_63_FNMADD:
- fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fnmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
}
break;
}
#ifdef DEBUG
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {
u32 f;
- kvm_cvt_df(&vcpu->arch.fpr[i], &f);
+ kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);
dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
}
#endif
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 27db1e66595..8eef1e51907 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -40,14 +40,20 @@
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
-#include "trace.h"
+#include "book3s.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_pr.h"
/* #define EXIT_DEBUG */
/* #define DEBUG_EXT */
static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
ulong msr);
+static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
/* Some compatibility defines */
#ifdef CONFIG_PPC_BOOK3S_32
@@ -56,38 +62,117 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
#define HW_PAGE_SIZE PAGE_SIZE
#endif
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_PPC_BOOK3S_64
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
- memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
- sizeof(get_paca()->shadow_vcpu));
svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
+ svcpu->in_use = 0;
svcpu_put(svcpu);
#endif
vcpu->cpu = smp_processor_id();
#ifdef CONFIG_PPC_BOOK3S_32
- current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
+ current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
#endif
}
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_PPC_BOOK3S_64
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ if (svcpu->in_use) {
+ kvmppc_copy_from_svcpu(vcpu, svcpu);
+ }
memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
- memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
- sizeof(get_paca()->shadow_vcpu));
to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
svcpu_put(svcpu);
#endif
kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
vcpu->cpu = -1;
}
-int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+/* Copy data needed by real-mode code from vcpu to shadow vcpu */
+void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
+ struct kvm_vcpu *vcpu)
+{
+ svcpu->gpr[0] = vcpu->arch.gpr[0];
+ svcpu->gpr[1] = vcpu->arch.gpr[1];
+ svcpu->gpr[2] = vcpu->arch.gpr[2];
+ svcpu->gpr[3] = vcpu->arch.gpr[3];
+ svcpu->gpr[4] = vcpu->arch.gpr[4];
+ svcpu->gpr[5] = vcpu->arch.gpr[5];
+ svcpu->gpr[6] = vcpu->arch.gpr[6];
+ svcpu->gpr[7] = vcpu->arch.gpr[7];
+ svcpu->gpr[8] = vcpu->arch.gpr[8];
+ svcpu->gpr[9] = vcpu->arch.gpr[9];
+ svcpu->gpr[10] = vcpu->arch.gpr[10];
+ svcpu->gpr[11] = vcpu->arch.gpr[11];
+ svcpu->gpr[12] = vcpu->arch.gpr[12];
+ svcpu->gpr[13] = vcpu->arch.gpr[13];
+ svcpu->cr = vcpu->arch.cr;
+ svcpu->xer = vcpu->arch.xer;
+ svcpu->ctr = vcpu->arch.ctr;
+ svcpu->lr = vcpu->arch.lr;
+ svcpu->pc = vcpu->arch.pc;
+#ifdef CONFIG_PPC_BOOK3S_64
+ svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
+#endif
+ svcpu->in_use = true;
+}
+
+/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
+ struct kvmppc_book3s_shadow_vcpu *svcpu)
+{
+ /*
+ * vcpu_put would just call us again because in_use hasn't
+ * been updated yet.
+ */
+ preempt_disable();
+
+ /*
+ * Maybe we were already preempted and synced the svcpu from
+ * our preempt notifiers. Don't bother touching this svcpu then.
+ */
+ if (!svcpu->in_use)
+ goto out;
+
+ vcpu->arch.gpr[0] = svcpu->gpr[0];
+ vcpu->arch.gpr[1] = svcpu->gpr[1];
+ vcpu->arch.gpr[2] = svcpu->gpr[2];
+ vcpu->arch.gpr[3] = svcpu->gpr[3];
+ vcpu->arch.gpr[4] = svcpu->gpr[4];
+ vcpu->arch.gpr[5] = svcpu->gpr[5];
+ vcpu->arch.gpr[6] = svcpu->gpr[6];
+ vcpu->arch.gpr[7] = svcpu->gpr[7];
+ vcpu->arch.gpr[8] = svcpu->gpr[8];
+ vcpu->arch.gpr[9] = svcpu->gpr[9];
+ vcpu->arch.gpr[10] = svcpu->gpr[10];
+ vcpu->arch.gpr[11] = svcpu->gpr[11];
+ vcpu->arch.gpr[12] = svcpu->gpr[12];
+ vcpu->arch.gpr[13] = svcpu->gpr[13];
+ vcpu->arch.cr = svcpu->cr;
+ vcpu->arch.xer = svcpu->xer;
+ vcpu->arch.ctr = svcpu->ctr;
+ vcpu->arch.lr = svcpu->lr;
+ vcpu->arch.pc = svcpu->pc;
+ vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
+ vcpu->arch.fault_dar = svcpu->fault_dar;
+ vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
+ vcpu->arch.last_inst = svcpu->last_inst;
+#ifdef CONFIG_PPC_BOOK3S_64
+ vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
+#endif
+ svcpu->in_use = false;
+
+out:
+ preempt_enable();
+}
+
+static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
{
int r = 1; /* Indicate we want to get back into the guest */
@@ -100,58 +185,84 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
}
/************* MMU Notifiers *************/
+static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
+ unsigned long end)
+{
+ long i;
+ struct kvm_vcpu *vcpu;
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long hva_start, hva_end;
+ gfn_t gfn, gfn_end;
+
+ hva_start = max(start, memslot->userspace_addr);
+ hva_end = min(end, memslot->userspace_addr +
+ (memslot->npages << PAGE_SHIFT));
+ if (hva_start >= hva_end)
+ continue;
+ /*
+ * {gfn(page) | page intersects with [hva_start, hva_end)} =
+ * {gfn, gfn+1, ..., gfn_end-1}.
+ */
+ gfn = hva_to_gfn_memslot(hva_start, memslot);
+ gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT,
+ gfn_end << PAGE_SHIFT);
+ }
+}
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
{
trace_kvm_unmap_hva(hva);
- /*
- * Flush all shadow tlb entries everywhere. This is slow, but
- * we are 100% sure that we catch the to be unmapped page
- */
- kvm_flush_remote_tlbs(kvm);
+ do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
return 0;
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
- /* kvm_unmap_hva flushes everything anyways */
- kvm_unmap_hva(kvm, start);
+ do_kvm_unmap_hva(kvm, start, end);
return 0;
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
{
/* XXX could be more clever ;) */
return 0;
}
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
{
/* XXX could be more clever ;) */
return 0;
}
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
{
/* The page will get remapped properly on its next fault */
- kvm_unmap_hva(kvm, hva);
+ do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
}
/*****************************************/
static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
{
- ulong smsr = vcpu->arch.shared->msr;
+ ulong guest_msr = kvmppc_get_msr(vcpu);
+ ulong smsr = guest_msr;
/* Guest MSR values */
- smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE;
+ smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
/* Process MSR values */
smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
/* External providers the guest reserved */
- smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
+ smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
/* 64-bit Process MSR values */
#ifdef CONFIG_PPC_BOOK3S_64
smsr |= MSR_ISF | MSR_HV;
@@ -159,16 +270,16 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
vcpu->arch.shadow_msr = smsr;
}
-void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
{
- ulong old_msr = vcpu->arch.shared->msr;
+ ulong old_msr = kvmppc_get_msr(vcpu);
#ifdef EXIT_DEBUG
printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
#endif
msr &= to_book3s(vcpu)->msr_mask;
- vcpu->arch.shared->msr = msr;
+ kvmppc_set_msr_fast(vcpu, msr);
kvmppc_recalc_shadow_msr(vcpu);
if (msr & MSR_POW) {
@@ -179,11 +290,11 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
/* Unset POW bit after we woke up */
msr &= ~MSR_POW;
- vcpu->arch.shared->msr = msr;
+ kvmppc_set_msr_fast(vcpu, msr);
}
}
- if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
+ if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
(old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
kvmppc_mmu_flush_segments(vcpu);
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
@@ -215,11 +326,11 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
}
/* Preload FPU if it's enabled */
- if (vcpu->arch.shared->msr & MSR_FP)
+ if (kvmppc_get_msr(vcpu) & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
}
-void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
{
u32 host_pvr;
@@ -256,6 +367,23 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
+ /*
+ * If they're asking for POWER6 or later, set the flag
+ * indicating that we can do multiple large page sizes
+ * and 1TB segments.
+ * Also set the flag that indicates that tlbie has the large
+ * page bit in the RB operand instead of the instruction.
+ */
+ switch (PVR_VER(pvr)) {
+ case PVR_POWER6:
+ case PVR_POWER7:
+ case PVR_POWER7p:
+ case PVR_POWER8:
+ vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
+ BOOK3S_HFLAG_NEW_TLBIE;
+ break;
+ }
+
#ifdef CONFIG_PPC_BOOK3S_32
/* 32 bit Book3S always has 32 byte dcbz */
vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
@@ -308,8 +436,8 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
/* patch dcbz into reserved instruction, so we trap */
for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
- if ((page[i] & 0xff0007ff) == INS_DCBZ)
- page[i] &= 0xfffffff7;
+ if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
+ page[i] &= cpu_to_be32(0xfffffff7);
kunmap_atomic(page);
put_page(hpage);
@@ -319,7 +447,7 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
- if (!(vcpu->arch.shared->msr & MSR_SF))
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF))
mp_pa = (uint32_t)mp_pa;
if (unlikely(mp_pa) &&
@@ -334,20 +462,23 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
ulong eaddr, int vec)
{
bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+ bool iswrite = false;
int r = RESUME_GUEST;
int relocated;
int page_found = 0;
struct kvmppc_pte pte;
bool is_mmio = false;
- bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
- bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
+ bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
+ bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
u64 vsid;
relocated = data ? dr : ir;
+ if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
+ iswrite = true;
/* Resolve real address if translation turned on */
if (relocated) {
- page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
+ page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
} else {
pte.may_execute = true;
pte.may_read = true;
@@ -355,9 +486,10 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pte.raddr = eaddr & KVM_PAM;
pte.eaddr = eaddr;
pte.vpage = eaddr >> 12;
+ pte.page_size = MMU_PAGE_64K;
}
- switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
case 0:
pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
break;
@@ -365,7 +497,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
case MSR_IR:
vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
- if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
+ if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR)
pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
else
pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
@@ -388,35 +520,42 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (page_found == -ENOENT) {
/* Page not found in guest PTE entries */
- struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
- vcpu->arch.shared->msr |=
- (svcpu->shadow_srr1 & 0x00000000f8000000ULL);
- svcpu_put(svcpu);
+ u64 ssrr1 = vcpu->arch.shadow_srr1;
+ u64 msr = kvmppc_get_msr(vcpu);
+ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+ kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr);
+ kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EPERM) {
/* Storage protection */
- struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
- vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
- vcpu->arch.shared->msr |=
- svcpu->shadow_srr1 & 0x00000000f8000000ULL;
- svcpu_put(svcpu);
+ u32 dsisr = vcpu->arch.fault_dsisr;
+ u64 ssrr1 = vcpu->arch.shadow_srr1;
+ u64 msr = kvmppc_get_msr(vcpu);
+ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+ dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT;
+ kvmppc_set_dsisr(vcpu, dsisr);
+ kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EINVAL) {
/* Page not found in guest SLB */
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
} else if (!is_mmio &&
kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+ if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
+ /*
+ * There is already a host HPTE there, presumably
+ * a read-only one for a page the guest thinks
+ * is writable, so get rid of it first.
+ */
+ kvmppc_mmu_unmap_page(vcpu, &pte);
+ }
/* The guest's PTE is not mapped yet. Map on the host */
- kvmppc_mmu_map_page(vcpu, &pte);
+ kvmppc_mmu_map_page(vcpu, &pte, iswrite);
if (data)
vcpu->stat.sp_storage++;
else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
- (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
kvmppc_patch_dcbz(vcpu, &pte);
} else {
/* MMIO */
@@ -440,12 +579,6 @@ static inline int get_fpr_index(int i)
void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
{
struct thread_struct *t = &current->thread;
- u64 *vcpu_fpr = vcpu->arch.fpr;
-#ifdef CONFIG_VSX
- u64 *vcpu_vsx = vcpu->arch.vsr;
-#endif
- u64 *thread_fpr = (u64*)t->fpr;
- int i;
/*
* VSX instructions can access FP and vector registers, so if
@@ -466,28 +599,18 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
/*
* Note that on CPUs with VSX, giveup_fpu stores
* both the traditional FP registers and the added VSX
- * registers into thread.fpr[].
+ * registers into thread.fp_state.fpr[].
*/
- if (current->thread.regs->msr & MSR_FP)
+ if (t->regs->msr & MSR_FP)
giveup_fpu(current);
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
- vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
-
- vcpu->arch.fpscr = t->fpscr.val;
-
-#ifdef CONFIG_VSX
- if (cpu_has_feature(CPU_FTR_VSX))
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
- vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
-#endif
+ t->fp_save_area = NULL;
}
#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
if (current->thread.regs->msr & MSR_VEC)
giveup_altivec(current);
- memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
- vcpu->arch.vscr = t->vscr;
+ t->vr_save_area = NULL;
}
#endif
@@ -495,6 +618,25 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
kvmppc_recalc_shadow_msr(vcpu);
}
+/* Give up facility (TAR / EBB / DSCR) */
+static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
+ /* Facility not available to the guest, ignore giveup request*/
+ return;
+ }
+
+ switch (fac) {
+ case FSCR_TAR_LG:
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+ mtspr(SPRN_TAR, current->thread.tar);
+ vcpu->arch.shadow_fscr &= ~FSCR_TAR;
+ break;
+ }
+#endif
+}
+
static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
{
ulong srr0 = kvmppc_get_pc(vcpu);
@@ -503,11 +645,12 @@ static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
if (ret == -ENOENT) {
- ulong msr = vcpu->arch.shared->msr;
+ ulong msr = kvmppc_get_msr(vcpu);
msr = kvmppc_set_field(msr, 33, 33, 1);
msr = kvmppc_set_field(msr, 34, 36, 0);
- vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
+ msr = kvmppc_set_field(msr, 42, 47, 0);
+ kvmppc_set_msr_fast(vcpu, msr);
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
return EMULATE_AGAIN;
}
@@ -535,18 +678,12 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
ulong msr)
{
struct thread_struct *t = &current->thread;
- u64 *vcpu_fpr = vcpu->arch.fpr;
-#ifdef CONFIG_VSX
- u64 *vcpu_vsx = vcpu->arch.vsr;
-#endif
- u64 *thread_fpr = (u64*)t->fpr;
- int i;
/* When we have paired singles, we emulate in software */
if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
return RESUME_GUEST;
- if (!(vcpu->arch.shared->msr & msr)) {
+ if (!(kvmppc_get_msr(vcpu) & msr)) {
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
return RESUME_GUEST;
}
@@ -578,27 +715,24 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
#endif
if (msr & MSR_FP) {
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
- thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
-#ifdef CONFIG_VSX
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
- thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
-#endif
- t->fpscr.val = vcpu->arch.fpscr;
- t->fpexc_mode = 0;
- kvmppc_load_up_fpu();
+ preempt_disable();
+ enable_kernel_fp();
+ load_fp_state(&vcpu->arch.fp);
+ t->fp_save_area = &vcpu->arch.fp;
+ preempt_enable();
}
if (msr & MSR_VEC) {
#ifdef CONFIG_ALTIVEC
- memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
- t->vscr = vcpu->arch.vscr;
- t->vrsave = -1;
- kvmppc_load_up_altivec();
+ preempt_disable();
+ enable_kernel_altivec();
+ load_vr_state(&vcpu->arch.vr);
+ t->vr_save_area = &vcpu->arch.vr;
+ preempt_enable();
#endif
}
- current->thread.regs->msr |= msr;
+ t->regs->msr |= msr;
vcpu->arch.guest_owned_ext |= msr;
kvmppc_recalc_shadow_msr(vcpu);
@@ -617,15 +751,93 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
if (!lost_ext)
return;
- if (lost_ext & MSR_FP)
- kvmppc_load_up_fpu();
- if (lost_ext & MSR_VEC)
- kvmppc_load_up_altivec();
+ if (lost_ext & MSR_FP) {
+ preempt_disable();
+ enable_kernel_fp();
+ load_fp_state(&vcpu->arch.fp);
+ preempt_enable();
+ }
+#ifdef CONFIG_ALTIVEC
+ if (lost_ext & MSR_VEC) {
+ preempt_disable();
+ enable_kernel_altivec();
+ load_vr_state(&vcpu->arch.vr);
+ preempt_enable();
+ }
+#endif
current->thread.regs->msr |= lost_ext;
}
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+#ifdef CONFIG_PPC_BOOK3S_64
+
+static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
+{
+ /* Inject the Interrupt Cause field and trigger a guest interrupt */
+ vcpu->arch.fscr &= ~(0xffULL << 56);
+ vcpu->arch.fscr |= (fac << 56);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+}
+
+static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+ enum emulation_result er = EMULATE_FAIL;
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_PR))
+ er = kvmppc_emulate_instruction(vcpu->run, vcpu);
+
+ if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
+ /* Couldn't emulate, trigger interrupt in guest */
+ kvmppc_trigger_fac_interrupt(vcpu, fac);
+ }
+}
+
+/* Enable facilities (TAR, EBB, DSCR) for the guest */
+static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+ bool guest_fac_enabled;
+ BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S));
+
+ /*
+ * Not every facility is enabled by FSCR bits, check whether the
+ * guest has this facility enabled at all.
+ */
+ switch (fac) {
+ case FSCR_TAR_LG:
+ case FSCR_EBB_LG:
+ guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac));
+ break;
+ case FSCR_TM_LG:
+ guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM;
+ break;
+ default:
+ guest_fac_enabled = false;
+ break;
+ }
+
+ if (!guest_fac_enabled) {
+ /* Facility not enabled by the guest */
+ kvmppc_trigger_fac_interrupt(vcpu, fac);
+ return RESUME_GUEST;
+ }
+
+ switch (fac) {
+ case FSCR_TAR_LG:
+ /* TAR switching isn't lazy in Linux yet */
+ current->thread.tar = mfspr(SPRN_TAR);
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+ vcpu->arch.shadow_fscr |= FSCR_TAR;
+ break;
+ default:
+ kvmppc_emulate_fac(vcpu, fac);
+ break;
+ }
+
+ return RESUME_GUEST;
+}
+#endif
+
+int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
{
int r = RESUME_HOST;
int s;
@@ -643,25 +855,32 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
switch (exit_nr) {
case BOOK3S_INTERRUPT_INST_STORAGE:
{
- struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
- ulong shadow_srr1 = svcpu->shadow_srr1;
+ ulong shadow_srr1 = vcpu->arch.shadow_srr1;
vcpu->stat.pf_instruc++;
#ifdef CONFIG_PPC_BOOK3S_32
/* We set segments as unused segments when invalidating them. So
* treat the respective fault as segment fault. */
- if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
- kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
- r = RESUME_GUEST;
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+ u32 sr;
+
+ svcpu = svcpu_get(vcpu);
+ sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
svcpu_put(svcpu);
- break;
+ if (sr == SR_INVALID) {
+ kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+ r = RESUME_GUEST;
+ break;
+ }
}
#endif
- svcpu_put(svcpu);
/* only care about PTEG not found errors, but leave NX alone */
if (shadow_srr1 & 0x40000000) {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
vcpu->stat.sp_instruc++;
} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
(!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
@@ -673,7 +892,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
r = RESUME_GUEST;
} else {
- vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000;
+ u64 msr = kvmppc_get_msr(vcpu);
+ msr |= shadow_srr1 & 0x58000000;
+ kvmppc_set_msr_fast(vcpu, msr);
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
}
@@ -682,28 +903,39 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_DATA_STORAGE:
{
ulong dar = kvmppc_get_fault_dar(vcpu);
- struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
- u32 fault_dsisr = svcpu->fault_dsisr;
+ u32 fault_dsisr = vcpu->arch.fault_dsisr;
vcpu->stat.pf_storage++;
#ifdef CONFIG_PPC_BOOK3S_32
/* We set segments as unused segments when invalidating them. So
* treat the respective fault as segment fault. */
- if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
- kvmppc_mmu_map_segment(vcpu, dar);
- r = RESUME_GUEST;
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+ u32 sr;
+
+ svcpu = svcpu_get(vcpu);
+ sr = svcpu->sr[dar >> SID_SHIFT];
svcpu_put(svcpu);
- break;
+ if (sr == SR_INVALID) {
+ kvmppc_mmu_map_segment(vcpu, dar);
+ r = RESUME_GUEST;
+ break;
+ }
}
#endif
- svcpu_put(svcpu);
- /* The only case we need to handle is missing shadow PTEs */
- if (fault_dsisr & DSISR_NOHPTE) {
+ /*
+ * We need to handle missing shadow PTEs, and
+ * protection faults due to us mapping a page read-only
+ * when the guest thinks it is writable.
+ */
+ if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
} else {
- vcpu->arch.shared->dar = dar;
- vcpu->arch.shared->dsisr = fault_dsisr;
+ kvmppc_set_dar(vcpu, dar);
+ kvmppc_set_dsisr(vcpu, fault_dsisr);
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
}
@@ -711,7 +943,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
case BOOK3S_INTERRUPT_DATA_SEGMENT:
if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
kvmppc_book3s_queue_irqprio(vcpu,
BOOK3S_INTERRUPT_DATA_SEGMENT);
}
@@ -727,6 +959,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* We're good on these - the host merely wanted to get our attention */
case BOOK3S_INTERRUPT_DECREMENTER:
case BOOK3S_INTERRUPT_HV_DECREMENTER:
+ case BOOK3S_INTERRUPT_DOORBELL:
vcpu->stat.dec_exits++;
r = RESUME_GUEST;
break;
@@ -743,15 +976,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
{
enum emulation_result er;
- struct kvmppc_book3s_shadow_vcpu *svcpu;
ulong flags;
program_interrupt:
- svcpu = svcpu_get(vcpu);
- flags = svcpu->shadow_srr1 & 0x1f0000ull;
- svcpu_put(svcpu);
+ flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
- if (vcpu->arch.shared->msr & MSR_PR) {
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
#ifdef EXIT_DEBUG
printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
#endif
@@ -793,12 +1023,12 @@ program_interrupt:
case BOOK3S_INTERRUPT_SYSCALL:
if (vcpu->arch.papr_enabled &&
(kvmppc_get_last_sc(vcpu) == 0x44000022) &&
- !(vcpu->arch.shared->msr & MSR_PR)) {
+ !(kvmppc_get_msr(vcpu) & MSR_PR)) {
/* SC 1 papr hypercalls */
ulong cmd = kvmppc_get_gpr(vcpu, 3);
int i;
-#ifdef CONFIG_KVM_BOOK3S_64_PR
+#ifdef CONFIG_PPC_BOOK3S_64
if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
r = RESUME_GUEST;
break;
@@ -825,7 +1055,7 @@ program_interrupt:
gprs[i] = kvmppc_get_gpr(vcpu, i);
vcpu->arch.osi_needed = 1;
r = RESUME_HOST_NV;
- } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
+ } else if (!(kvmppc_get_msr(vcpu) & MSR_PR) &&
(((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
/* KVM PV hypercalls */
kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
@@ -866,14 +1096,26 @@ program_interrupt:
}
case BOOK3S_INTERRUPT_ALIGNMENT:
if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
- vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
- kvmppc_get_last_inst(vcpu));
- vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
- kvmppc_get_last_inst(vcpu));
+ u32 last_inst = kvmppc_get_last_inst(vcpu);
+ u32 dsisr;
+ u64 dar;
+
+ dsisr = kvmppc_alignment_dsisr(vcpu, last_inst);
+ dar = kvmppc_alignment_dar(vcpu, last_inst);
+
+ kvmppc_set_dsisr(vcpu, dsisr);
+ kvmppc_set_dar(vcpu, dar);
+
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
}
r = RESUME_GUEST;
break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case BOOK3S_INTERRUPT_FAC_UNAVAIL:
+ kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
+ r = RESUME_GUEST;
+ break;
+#endif
case BOOK3S_INTERRUPT_MACHINE_CHECK:
case BOOK3S_INTERRUPT_TRACE:
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
@@ -881,9 +1123,7 @@ program_interrupt:
break;
default:
{
- struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
- ulong shadow_srr1 = svcpu->shadow_srr1;
- svcpu_put(svcpu);
+ ulong shadow_srr1 = vcpu->arch.shadow_srr1;
/* Ugh - bork here! What did we get? */
printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
@@ -904,14 +1144,14 @@ program_interrupt:
* and if we really did time things so badly, then we just exit
* again due to a host external interrupt.
*/
- local_irq_disable();
s = kvmppc_prepare_to_enter(vcpu);
- if (s <= 0) {
- local_irq_enable();
+ if (s <= 0)
r = s;
- } else {
+ else {
+ /* interrupts now hard-disabled */
kvmppc_fix_ee_before_entry();
}
+
kvmppc_handle_lost_ext(vcpu);
}
@@ -920,8 +1160,8 @@ program_interrupt:
return r;
}
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int i;
@@ -936,7 +1176,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
}
} else {
for (i = 0; i < 16; i++)
- sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
+ sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i);
for (i = 0; i < 8; i++) {
sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
@@ -947,13 +1187,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int i;
- kvmppc_set_pvr(vcpu, sregs->pvr);
+ kvmppc_set_pvr_pr(vcpu, sregs->pvr);
vcpu3s->sdr1 = sregs->u.s.sdr1;
if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
@@ -983,7 +1223,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
@@ -991,19 +1232,15 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
case KVM_REG_PPC_HIOR:
*val = get_reg_val(id, to_book3s(vcpu)->hior);
break;
-#ifdef CONFIG_VSX
- case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
- long int i = id - KVM_REG_PPC_VSR0;
-
- if (!cpu_has_feature(CPU_FTR_VSX)) {
- r = -ENXIO;
- break;
- }
- val->vsxval[0] = vcpu->arch.fpr[i];
- val->vsxval[1] = vcpu->arch.vsr[i];
+ case KVM_REG_PPC_LPCR:
+ /*
+ * We are only interested in the LPCR_ILE bit
+ */
+ if (vcpu->arch.intr_msr & MSR_LE)
+ *val = get_reg_val(id, LPCR_ILE);
+ else
+ *val = get_reg_val(id, 0);
break;
- }
-#endif /* CONFIG_VSX */
default:
r = -EINVAL;
break;
@@ -1012,7 +1249,16 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
return r;
}
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+{
+ if (new_lpcr & LPCR_ILE)
+ vcpu->arch.intr_msr |= MSR_LE;
+ else
+ vcpu->arch.intr_msr &= ~MSR_LE;
+}
+
+static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
@@ -1021,19 +1267,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
to_book3s(vcpu)->hior = set_reg_val(id, *val);
to_book3s(vcpu)->hior_explicit = true;
break;
-#ifdef CONFIG_VSX
- case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
- long int i = id - KVM_REG_PPC_VSR0;
-
- if (!cpu_has_feature(CPU_FTR_VSX)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.fpr[i] = val->vsxval[0];
- vcpu->arch.vsr[i] = val->vsxval[1];
+ case KVM_REG_PPC_LPCR:
+ kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
break;
- }
-#endif /* CONFIG_VSX */
default:
r = -EINVAL;
break;
@@ -1042,28 +1278,30 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
return r;
}
-int kvmppc_core_check_processor_compat(void)
-{
- return 0;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
+ unsigned int id)
{
struct kvmppc_vcpu_book3s *vcpu_book3s;
struct kvm_vcpu *vcpu;
int err = -ENOMEM;
unsigned long p;
- vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
- if (!vcpu_book3s)
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu)
goto out;
- vcpu_book3s->shadow_vcpu =
- kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
- if (!vcpu_book3s->shadow_vcpu)
+ vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
+ if (!vcpu_book3s)
goto free_vcpu;
+ vcpu->arch.book3s = vcpu_book3s;
+
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ vcpu->arch.shadow_vcpu =
+ kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
+ if (!vcpu->arch.shadow_vcpu)
+ goto free_vcpu3s;
+#endif
- vcpu = &vcpu_book3s->vcpu;
err = kvm_vcpu_init(vcpu, kvm, id);
if (err)
goto free_shadow_vcpu;
@@ -1074,18 +1312,31 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
goto uninit_vcpu;
/* the real shared page fills the last 4k of our page */
vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
-
#ifdef CONFIG_PPC_BOOK3S_64
- /* default to book3s_64 (970fx) */
+ /* Always start the shared struct in native endian mode */
+#ifdef __BIG_ENDIAN__
+ vcpu->arch.shared_big_endian = true;
+#else
+ vcpu->arch.shared_big_endian = false;
+#endif
+
+ /*
+ * Default to the same as the host if we're on sufficiently
+ * recent machine that we have 1TB segments;
+ * otherwise default to PPC970FX.
+ */
vcpu->arch.pvr = 0x3C0301;
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+ vcpu->arch.intr_msr = MSR_SF;
#else
/* default to book3s_32 (750) */
vcpu->arch.pvr = 0x84202;
#endif
- kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+ kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
vcpu->arch.slb_nr = 64;
- vcpu->arch.shadow_msr = MSR_USER64;
+ vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
err = kvmppc_mmu_init(vcpu);
if (err < 0)
@@ -1096,39 +1347,36 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
uninit_vcpu:
kvm_vcpu_uninit(vcpu);
free_shadow_vcpu:
- kfree(vcpu_book3s->shadow_vcpu);
-free_vcpu:
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ kfree(vcpu->arch.shadow_vcpu);
+free_vcpu3s:
+#endif
vfree(vcpu_book3s);
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
return ERR_PTR(err);
}
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
kvm_vcpu_uninit(vcpu);
- kfree(vcpu_book3s->shadow_vcpu);
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ kfree(vcpu->arch.shadow_vcpu);
+#endif
vfree(vcpu_book3s);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
}
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int ret;
- double fpr[32][TS_FPRWIDTH];
- unsigned int fpscr;
- int fpexc_mode;
#ifdef CONFIG_ALTIVEC
- vector128 vr[32];
- vector128 vscr;
unsigned long uninitialized_var(vrsave);
- int used_vr;
#endif
-#ifdef CONFIG_VSX
- int used_vsr;
-#endif
- ulong ext_msr;
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
@@ -1143,44 +1391,29 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
* really did time things so badly, then we just exit again due to
* a host external interrupt.
*/
- local_irq_disable();
ret = kvmppc_prepare_to_enter(vcpu);
- if (ret <= 0) {
- local_irq_enable();
+ if (ret <= 0)
goto out;
- }
+ /* interrupts now hard-disabled */
- /* Save FPU state in stack */
+ /* Save FPU state in thread_struct */
if (current->thread.regs->msr & MSR_FP)
giveup_fpu(current);
- memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
- fpscr = current->thread.fpscr.val;
- fpexc_mode = current->thread.fpexc_mode;
#ifdef CONFIG_ALTIVEC
- /* Save Altivec state in stack */
- used_vr = current->thread.used_vr;
- if (used_vr) {
- if (current->thread.regs->msr & MSR_VEC)
- giveup_altivec(current);
- memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
- vscr = current->thread.vscr;
- vrsave = current->thread.vrsave;
- }
+ /* Save Altivec state in thread_struct */
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
#endif
#ifdef CONFIG_VSX
- /* Save VSX state in stack */
- used_vsr = current->thread.used_vsr;
- if (used_vsr && (current->thread.regs->msr & MSR_VSX))
+ /* Save VSX state in thread_struct */
+ if (current->thread.regs->msr & MSR_VSX)
__giveup_vsx(current);
#endif
- /* Remember the MSR with disabled extensions */
- ext_msr = current->thread.regs->msr;
-
/* Preload FPU if it's enabled */
- if (vcpu->arch.shared->msr & MSR_FP)
+ if (kvmppc_get_msr(vcpu) & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
kvmppc_fix_ee_before_entry();
@@ -1193,26 +1426,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/* Make sure we save the guest FPU/Altivec/VSX state */
kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
- current->thread.regs->msr = ext_msr;
-
- /* Restore FPU/VSX state from stack */
- memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
- current->thread.fpscr.val = fpscr;
- current->thread.fpexc_mode = fpexc_mode;
-
-#ifdef CONFIG_ALTIVEC
- /* Restore Altivec state from stack */
- if (used_vr && current->thread.used_vr) {
- memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
- current->thread.vscr = vscr;
- current->thread.vrsave = vrsave;
- }
- current->thread.used_vr = used_vr;
-#endif
-
-#ifdef CONFIG_VSX
- current->thread.used_vsr = used_vsr;
-#endif
+ /* Make sure we save the guest TAR/EBB/DSCR state */
+ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
out:
vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -1222,8 +1437,8 @@ out:
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
- struct kvm_dirty_log *log)
+static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
+ struct kvm_dirty_log *log)
{
struct kvm_memory_slot *memslot;
struct kvm_vcpu *vcpu;
@@ -1258,67 +1473,100 @@ out:
return r;
}
-#ifdef CONFIG_PPC64
-int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
{
- info->flags = KVM_PPC_1T_SEGMENTS;
-
- /* SLB is always 64 entries */
- info->slb_size = 64;
-
- /* Standard 4k base page size segment */
- info->sps[0].page_shift = 12;
- info->sps[0].slb_enc = 0;
- info->sps[0].enc[0].page_shift = 12;
- info->sps[0].enc[0].pte_enc = 0;
-
- /* Standard 16M large page size segment */
- info->sps[1].page_shift = 24;
- info->sps[1].slb_enc = SLB_VSID_L;
- info->sps[1].enc[0].page_shift = 24;
- info->sps[1].enc[0].pte_enc = 0;
+ return;
+}
+static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem)
+{
return 0;
}
-#endif /* CONFIG_PPC64 */
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old)
{
+ return;
}
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
- unsigned long npages)
+static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
{
- return 0;
+ return;
}
-int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
+ unsigned long npages)
{
return 0;
}
-void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
+
+#ifdef CONFIG_PPC64
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+ struct kvm_ppc_smmu_info *info)
{
-}
+ long int i;
+ struct kvm_vcpu *vcpu;
+
+ info->flags = 0;
+
+ /* SLB is always 64 entries */
+ info->slb_size = 64;
+
+ /* Standard 4k base page size segment */
+ info->sps[0].page_shift = 12;
+ info->sps[0].slb_enc = 0;
+ info->sps[0].enc[0].page_shift = 12;
+ info->sps[0].enc[0].pte_enc = 0;
-void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+ /*
+ * 64k large page size.
+ * We only want to put this in if the CPUs we're emulating
+ * support it, but unfortunately we don't have a vcpu easily
+ * to hand here to test. Just pick the first vcpu, and if
+ * that doesn't exist yet, report the minimum capability,
+ * i.e., no 64k pages.
+ * 1T segment support goes along with 64k pages.
+ */
+ i = 1;
+ vcpu = kvm_get_vcpu(kvm, 0);
+ if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+ info->flags = KVM_PPC_1T_SEGMENTS;
+ info->sps[i].page_shift = 16;
+ info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
+ info->sps[i].enc[0].page_shift = 16;
+ info->sps[i].enc[0].pte_enc = 1;
+ ++i;
+ }
+
+ /* Standard 16M large page size segment */
+ info->sps[i].page_shift = 24;
+ info->sps[i].slb_enc = SLB_VSID_L;
+ info->sps[i].enc[0].page_shift = 24;
+ info->sps[i].enc[0].pte_enc = 0;
+
+ return 0;
+}
+#else
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+ struct kvm_ppc_smmu_info *info)
{
+ /* We should not get called */
+ BUG();
}
+#endif /* CONFIG_PPC64 */
static unsigned int kvm_global_user_count = 0;
static DEFINE_SPINLOCK(kvm_global_user_count_lock);
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_pr(struct kvm *kvm)
{
-#ifdef CONFIG_PPC64
- INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
- INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
-#endif
+ mutex_init(&kvm->arch.hpt_mutex);
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
spin_lock(&kvm_global_user_count_lock);
@@ -1329,7 +1577,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
return 0;
}
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
@@ -1344,26 +1592,83 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
}
}
-static int kvmppc_book3s_init(void)
+static int kvmppc_core_check_processor_compat_pr(void)
{
- int r;
+ /* we are always compatible */
+ return 0;
+}
- r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
- THIS_MODULE);
+static long kvm_arch_vm_ioctl_pr(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -ENOTTY;
+}
- if (r)
+static struct kvmppc_ops kvm_ops_pr = {
+ .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
+ .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
+ .get_one_reg = kvmppc_get_one_reg_pr,
+ .set_one_reg = kvmppc_set_one_reg_pr,
+ .vcpu_load = kvmppc_core_vcpu_load_pr,
+ .vcpu_put = kvmppc_core_vcpu_put_pr,
+ .set_msr = kvmppc_set_msr_pr,
+ .vcpu_run = kvmppc_vcpu_run_pr,
+ .vcpu_create = kvmppc_core_vcpu_create_pr,
+ .vcpu_free = kvmppc_core_vcpu_free_pr,
+ .check_requests = kvmppc_core_check_requests_pr,
+ .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
+ .flush_memslot = kvmppc_core_flush_memslot_pr,
+ .prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
+ .commit_memory_region = kvmppc_core_commit_memory_region_pr,
+ .unmap_hva = kvm_unmap_hva_pr,
+ .unmap_hva_range = kvm_unmap_hva_range_pr,
+ .age_hva = kvm_age_hva_pr,
+ .test_age_hva = kvm_test_age_hva_pr,
+ .set_spte_hva = kvm_set_spte_hva_pr,
+ .mmu_destroy = kvmppc_mmu_destroy_pr,
+ .free_memslot = kvmppc_core_free_memslot_pr,
+ .create_memslot = kvmppc_core_create_memslot_pr,
+ .init_vm = kvmppc_core_init_vm_pr,
+ .destroy_vm = kvmppc_core_destroy_vm_pr,
+ .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
+ .emulate_op = kvmppc_core_emulate_op_pr,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
+ .fast_vcpu_kick = kvm_vcpu_kick,
+ .arch_vm_ioctl = kvm_arch_vm_ioctl_pr,
+};
+
+
+int kvmppc_book3s_init_pr(void)
+{
+ int r;
+
+ r = kvmppc_core_check_processor_compat_pr();
+ if (r < 0)
return r;
- r = kvmppc_mmu_hpte_sysinit();
+ kvm_ops_pr.owner = THIS_MODULE;
+ kvmppc_pr_ops = &kvm_ops_pr;
+ r = kvmppc_mmu_hpte_sysinit();
return r;
}
-static void kvmppc_book3s_exit(void)
+void kvmppc_book3s_exit_pr(void)
{
+ kvmppc_pr_ops = NULL;
kvmppc_mmu_hpte_sysexit();
- kvm_exit();
}
-module_init(kvmppc_book3s_init);
-module_exit(kvmppc_book3s_exit);
+/*
+ * We only support separate modules for book3s 64
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+
+module_init(kvmppc_book3s_init_pr);
+module_exit(kvmppc_book3s_exit_pr);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
+#endif
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index da0e0bc268b..52a63bfe3f0 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -21,6 +21,8 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
+#define HPTE_SIZE 16 /* bytes per HPT entry */
+
static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
@@ -40,32 +42,41 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
long pte_index = kvmppc_get_gpr(vcpu, 5);
unsigned long pteg[2 * 8];
unsigned long pteg_addr, i, *hpte;
+ long int ret;
+ i = pte_index & 7;
pte_index &= ~7UL;
pteg_addr = get_pteg_addr(vcpu, pte_index);
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
hpte = pteg;
+ ret = H_PTEG_FULL;
if (likely((flags & H_EXACT) == 0)) {
- pte_index &= ~7UL;
for (i = 0; ; ++i) {
if (i == 8)
- return H_PTEG_FULL;
- if ((*hpte & HPTE_V_VALID) == 0)
+ goto done;
+ if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0)
break;
hpte += 2;
}
} else {
- i = kvmppc_get_gpr(vcpu, 5) & 7UL;
hpte += i * 2;
+ if (*hpte & HPTE_V_VALID)
+ goto done;
}
- hpte[0] = kvmppc_get_gpr(vcpu, 6);
- hpte[1] = kvmppc_get_gpr(vcpu, 7);
- copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg));
- kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6));
+ hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7));
+ pteg_addr += i * HPTE_SIZE;
+ copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
kvmppc_set_gpr(vcpu, 4, pte_index | i);
+ ret = H_SUCCESS;
+
+ done:
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+ kvmppc_set_gpr(vcpu, 3, ret);
return EMULATE_DONE;
}
@@ -77,26 +88,33 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
unsigned long v = 0, pteg, rb;
unsigned long pte[2];
+ long int ret;
pteg = get_pteg_addr(vcpu, pte_index);
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+ pte[0] = be64_to_cpu(pte[0]);
+ pte[1] = be64_to_cpu(pte[1]);
+ ret = H_NOT_FOUND;
if ((pte[0] & HPTE_V_VALID) == 0 ||
((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
- ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) {
- kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
- return EMULATE_DONE;
- }
+ ((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
+ goto done;
copy_to_user((void __user *)pteg, &v, sizeof(v));
rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
- kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ ret = H_SUCCESS;
kvmppc_set_gpr(vcpu, 4, pte[0]);
kvmppc_set_gpr(vcpu, 5, pte[1]);
+ done:
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+ kvmppc_set_gpr(vcpu, 3, ret);
+
return EMULATE_DONE;
}
@@ -124,6 +142,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
int paramnr = 4;
int ret = H_SUCCESS;
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
@@ -152,6 +171,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+ pte[0] = be64_to_cpu(pte[0]);
+ pte[1] = be64_to_cpu(pte[1]);
/* tsl = AVPN */
flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
@@ -172,6 +193,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
}
kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
}
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
kvmppc_set_gpr(vcpu, 3, ret);
return EMULATE_DONE;
@@ -184,15 +206,18 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
unsigned long rb, pteg, r, v;
unsigned long pte[2];
+ long int ret;
pteg = get_pteg_addr(vcpu, pte_index);
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+ pte[0] = be64_to_cpu(pte[0]);
+ pte[1] = be64_to_cpu(pte[1]);
+ ret = H_NOT_FOUND;
if ((pte[0] & HPTE_V_VALID) == 0 ||
- ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) {
- kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
- return EMULATE_DONE;
- }
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn))
+ goto done;
v = pte[0];
r = pte[1];
@@ -206,9 +231,14 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
rb = compute_tlbie_rb(v, r, pte_index);
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+ pte[0] = cpu_to_be64(pte[0]);
+ pte[1] = cpu_to_be64(pte[1]);
copy_to_user((void __user *)pteg, pte, sizeof(pte));
+ ret = H_SUCCESS;
- kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ done:
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+ kvmppc_set_gpr(vcpu, 3, ret);
return EMULATE_DONE;
}
@@ -248,7 +278,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
case H_PUT_TCE:
return kvmppc_h_pr_put_tce(vcpu);
case H_CEDE:
- vcpu->arch.shared->msr |= MSR_EE;
+ kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu);
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 8f7633e3afb..16c4d88ba27 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -36,33 +36,11 @@
#if defined(CONFIG_PPC_BOOK3S_64)
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define FUNC(name) name
+#else
#define FUNC(name) GLUE(.,name)
-
- .globl kvmppc_skip_interrupt
-kvmppc_skip_interrupt:
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_SRR0
- addi r13, r13, 4
- mtspr SPRN_SRR0, r13
- GET_SCRATCH0(r13)
- rfid
- b .
-
- .globl kvmppc_skip_Hinterrupt
-kvmppc_skip_Hinterrupt:
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_HSRR0
- addi r13, r13, 4
- mtspr SPRN_HSRR0, r13
- GET_SCRATCH0(r13)
- hrfid
- b .
+#endif
#elif defined(CONFIG_PPC_BOOK3S_32)
@@ -172,7 +150,7 @@ kvmppc_handler_skip_ins:
* On entry, r4 contains the guest shadow MSR
* MSR.EE has to be 0 when calling this function
*/
-_GLOBAL(kvmppc_entry_trampoline)
+_GLOBAL_TOC(kvmppc_entry_trampoline)
mfmsr r5
LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
toreal(r7)
@@ -181,58 +159,11 @@ _GLOBAL(kvmppc_entry_trampoline)
andc r6, r5, r6 /* Clear DR and IR in MSR value */
/*
* Set EE in HOST_MSR so that it's enabled when we get into our
- * C exit handler function
+ * C exit handler function.
*/
ori r5, r5, MSR_EE
mtsrr0 r7
mtsrr1 r6
RFI
-#if defined(CONFIG_PPC_BOOK3S_32)
-#define STACK_LR INT_FRAME_SIZE+4
-
-/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */
-#define MSR_EXT_START \
- PPC_STL r20, _NIP(r1); \
- mfmsr r20; \
- LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \
- andc r3,r20,r3; /* Disable DR,EE */ \
- mtmsr r3; \
- sync
-
-#define MSR_EXT_END \
- mtmsr r20; /* Enable DR,EE */ \
- sync; \
- PPC_LL r20, _NIP(r1)
-
-#elif defined(CONFIG_PPC_BOOK3S_64)
-#define STACK_LR _LINK
-#define MSR_EXT_START
-#define MSR_EXT_END
-#endif
-
-/*
- * Activate current's external feature (FPU/Altivec/VSX)
- */
-#define define_load_up(what) \
- \
-_GLOBAL(kvmppc_load_up_ ## what); \
- PPC_STLU r1, -INT_FRAME_SIZE(r1); \
- mflr r3; \
- PPC_STL r3, STACK_LR(r1); \
- MSR_EXT_START; \
- \
- bl FUNC(load_up_ ## what); \
- \
- MSR_EXT_END; \
- PPC_LL r3, STACK_LR(r1); \
- mtlr r3; \
- addi r1, r1, INT_FRAME_SIZE; \
- blr
-
-define_load_up(fpu)
-#ifdef CONFIG_ALTIVEC
-define_load_up(altivec)
-#endif
-
#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 3219ba89524..ef27fbd5d9c 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -23,20 +23,20 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
u32 irq, server, priority;
int rc;
- if (args->nargs != 3 || args->nret != 1) {
+ if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) {
rc = -3;
goto out;
}
- irq = args->args[0];
- server = args->args[1];
- priority = args->args[2];
+ irq = be32_to_cpu(args->args[0]);
+ server = be32_to_cpu(args->args[1]);
+ priority = be32_to_cpu(args->args[2]);
rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
if (rc)
rc = -3;
out:
- args->rets[0] = rc;
+ args->rets[0] = cpu_to_be32(rc);
}
static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -44,12 +44,12 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
u32 irq, server, priority;
int rc;
- if (args->nargs != 1 || args->nret != 3) {
+ if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) {
rc = -3;
goto out;
}
- irq = args->args[0];
+ irq = be32_to_cpu(args->args[0]);
server = priority = 0;
rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
@@ -58,10 +58,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
goto out;
}
- args->rets[1] = server;
- args->rets[2] = priority;
+ args->rets[1] = cpu_to_be32(server);
+ args->rets[2] = cpu_to_be32(priority);
out:
- args->rets[0] = rc;
+ args->rets[0] = cpu_to_be32(rc);
}
static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -69,18 +69,18 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
u32 irq;
int rc;
- if (args->nargs != 1 || args->nret != 1) {
+ if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
rc = -3;
goto out;
}
- irq = args->args[0];
+ irq = be32_to_cpu(args->args[0]);
rc = kvmppc_xics_int_off(vcpu->kvm, irq);
if (rc)
rc = -3;
out:
- args->rets[0] = rc;
+ args->rets[0] = cpu_to_be32(rc);
}
static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -88,18 +88,18 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
u32 irq;
int rc;
- if (args->nargs != 1 || args->nret != 1) {
+ if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
rc = -3;
goto out;
}
- irq = args->args[0];
+ irq = be32_to_cpu(args->args[0]);
rc = kvmppc_xics_int_on(vcpu->kvm, irq);
if (rc)
rc = -3;
out:
- args->rets[0] = rc;
+ args->rets[0] = cpu_to_be32(rc);
}
#endif /* CONFIG_KVM_XICS */
@@ -213,8 +213,11 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
gpa_t args_phys;
int rc;
- /* r4 contains the guest physical address of the RTAS args */
- args_phys = kvmppc_get_gpr(vcpu, 4);
+ /*
+ * r4 contains the guest physical address of the RTAS args
+ * Mask off the top 4 bits since this is a guest real address
+ */
+ args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
if (rc)
@@ -227,13 +230,13 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
* value so we can restore it on the way out.
*/
orig_rets = args.rets;
- args.rets = &args.args[args.nargs];
+ args.rets = &args.args[be32_to_cpu(args.nargs)];
mutex_lock(&vcpu->kvm->lock);
rc = -ENOENT;
list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
- if (d->token == args.token) {
+ if (d->token == be32_to_cpu(args.token)) {
d->handler->handler(vcpu, &args);
rc = 0;
break;
@@ -260,6 +263,7 @@ fail:
*/
return rc;
}
+EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall);
void kvmppc_rtas_tokens_free(struct kvm *kvm)
{
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1abe4788191..acee37cde84 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -90,6 +90,15 @@ kvmppc_handler_trampoline_enter:
LOAD_GUEST_SEGMENTS
#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+ /* Save host FSCR */
+ mfspr r8, SPRN_FSCR
+ std r8, HSTATE_HOST_FSCR(r13)
+ /* Set FSCR during guest execution */
+ ld r9, SVCPU_SHADOW_FSCR(r13)
+ mtspr SPRN_FSCR, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
/* Some guests may need to have dcbz set to 32 byte length.
*
* Usually we ensure that by patching the guest's instructions
@@ -161,8 +170,8 @@ kvmppc_handler_trampoline_enter_end:
.global kvmppc_handler_trampoline_exit
kvmppc_handler_trampoline_exit:
-.global kvmppc_interrupt
-kvmppc_interrupt:
+.global kvmppc_interrupt_pr
+kvmppc_interrupt_pr:
/* Register usage at this point:
*
@@ -255,6 +264,10 @@ BEGIN_FTR_SECTION
cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
beq- ld_last_inst
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+BEGIN_FTR_SECTION
+ cmpwi r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
+ beq- ld_last_inst
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
#endif
b no_ld_last_inst
@@ -311,6 +324,18 @@ no_ld_last_inst:
no_dcbz32_off:
+BEGIN_FTR_SECTION
+ /* Save guest FSCR on a FAC_UNAVAIL interrupt */
+ cmpwi r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
+ bne+ no_fscr_save
+ mfspr r7, SPRN_FSCR
+ std r7, SVCPU_SHADOW_FSCR(r13)
+no_fscr_save:
+ /* Restore host FSCR */
+ ld r8, HSTATE_HOST_FSCR(r13)
+ mtspr SPRN_FSCR, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
#endif /* CONFIG_PPC_BOOK3S_64 */
/*
@@ -361,6 +386,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
beqa BOOK3S_INTERRUPT_DECREMENTER
cmpwi r12, BOOK3S_INTERRUPT_PERFMON
beqa BOOK3S_INTERRUPT_PERFMON
+ cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
+ beqa BOOK3S_INTERRUPT_DOORBELL
RFI
kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index a3a5cb8ee7e..d1acd32a64c 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
}
/* Check for real mode returning too hard */
- if (xics->real_mode)
+ if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm))
return kvmppc_xics_rm_complete(vcpu, req);
switch (req) {
@@ -840,6 +840,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
return rc;
}
+EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
/* -- Initialisation code etc. -- */
@@ -1245,18 +1246,20 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
kvm->arch.xics = xics;
mutex_unlock(&kvm->lock);
- if (ret)
+ if (ret) {
+ kfree(xics);
return ret;
+ }
xics_debugfs_init(xics);
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (cpu_has_feature(CPU_FTR_ARCH_206)) {
/* Enable real mode support */
xics->real_mode = ENABLE_REALMODE;
xics->real_mode_dbg = DEBUG_REALMODE;
}
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
return 0;
}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 17722d82f1d..ab62109fdfa 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -40,7 +40,9 @@
#include "timing.h"
#include "booke.h"
-#include "trace.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_booke.h"
unsigned long kvmppc_booke_handlers;
@@ -133,6 +135,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
#endif
}
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+ /* Synchronize guest's desire to get debug interrupts into shadow MSR */
+#ifndef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_msr &= ~MSR_DE;
+ vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
+#endif
+
+ /* Force enable debug interrupts when user space wants to debug */
+ if (vcpu->guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+ /*
+ * Since there is no shadow MSR, sync MSR_DE into the guest
+ * visible MSR.
+ */
+ vcpu->arch.shared->msr |= MSR_DE;
+#else
+ vcpu->arch.shadow_msr |= MSR_DE;
+ vcpu->arch.shared->msr &= ~MSR_DE;
+#endif
+ }
+}
+
/*
* Helper function for "full" MSR writes. No need to call this if only
* EE/CE/ME/DE/RI are changing.
@@ -150,6 +175,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
kvmppc_mmu_msr_notify(vcpu, old_msr);
kvmppc_vcpu_sync_spe(vcpu);
kvmppc_vcpu_sync_fpu(vcpu);
+ kvmppc_vcpu_sync_debug(vcpu);
}
static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@ -617,7 +643,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
local_irq_enable();
kvm_vcpu_block(vcpu);
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
- local_irq_disable();
+ hard_irq_disable();
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
r = 1;
@@ -655,35 +681,23 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int ret, s;
-#ifdef CONFIG_PPC_FPU
- unsigned int fpscr;
- int fpexc_mode;
- u64 fpr[32];
-#endif
+ struct debug_reg debug;
if (!vcpu->arch.sane) {
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
- local_irq_disable();
s = kvmppc_prepare_to_enter(vcpu);
if (s <= 0) {
- local_irq_enable();
ret = s;
goto out;
}
+ /* interrupts now hard-disabled */
#ifdef CONFIG_PPC_FPU
/* Save userspace FPU state in stack */
enable_kernel_fp();
- memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
- fpscr = current->thread.fpscr.val;
- fpexc_mode = current->thread.fpexc_mode;
-
- /* Restore guest FPU state to thread */
- memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr));
- current->thread.fpscr.val = vcpu->arch.fpscr;
/*
* Since we can't trap on MSR_FP in GS-mode, we consider the guest
@@ -696,6 +710,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_load_guest_fp(vcpu);
#endif
+ /* Switch to guest debug context */
+ debug = vcpu->arch.shadow_dbg_reg;
+ switch_booke_debug_regs(&debug);
+ debug = current->thread.debug;
+ current->thread.debug = vcpu->arch.shadow_dbg_reg;
+
+ vcpu->arch.pgdir = current->mm->pgd;
kvmppc_fix_ee_before_entry();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -703,19 +724,14 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/* No need for kvm_guest_exit. It's done in handle_exit.
We also get here with interrupts enabled. */
+ /* Switch back to user space debug context */
+ switch_booke_debug_regs(&debug);
+ current->thread.debug = debug;
+
#ifdef CONFIG_PPC_FPU
kvmppc_save_guest_fp(vcpu);
vcpu->fpu_active = 0;
-
- /* Save guest FPU state from thread */
- memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr));
- vcpu->arch.fpscr = current->thread.fpscr.val;
-
- /* Restore userspace FPU state from stack */
- memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
- current->thread.fpscr.val = fpscr;
- current->thread.fpexc_mode = fpexc_mode;
#endif
out:
@@ -758,6 +774,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
}
+static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+ u32 dbsr = vcpu->arch.dbsr;
+
+ run->debug.arch.status = 0;
+ run->debug.arch.address = vcpu->arch.pc;
+
+ if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+ run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+ } else {
+ if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
+ run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
+ else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
+ run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
+ if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
+ run->debug.arch.address = dbg_reg->dac1;
+ else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
+ run->debug.arch.address = dbg_reg->dac2;
+ }
+
+ return RESUME_HOST;
+}
+
static void kvmppc_fill_pt_regs(struct pt_regs *regs)
{
ulong r1, ip, msr, lr;
@@ -818,6 +858,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
case BOOKE_INTERRUPT_CRITICAL:
unknown_exception(&regs);
break;
+ case BOOKE_INTERRUPT_DEBUG:
+ /* Save DBSR before preemption is enabled */
+ vcpu->arch.dbsr = mfspr(SPRN_DBSR);
+ kvmppc_clear_dbsr();
+ break;
}
}
@@ -833,17 +878,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
int s;
int idx;
-#ifdef CONFIG_PPC64
- WARN_ON(local_paca->irq_happened != 0);
-#endif
-
- /*
- * We enter with interrupts disabled in hardware, but
- * we need to call hard_irq_disable anyway to ensure that
- * the software state is kept in sync.
- */
- hard_irq_disable();
-
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
@@ -1135,18 +1169,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
case BOOKE_INTERRUPT_DEBUG: {
- u32 dbsr;
-
- vcpu->arch.pc = mfspr(SPRN_CSRR0);
-
- /* clear IAC events in DBSR register */
- dbsr = mfspr(SPRN_DBSR);
- dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
- mtspr(SPRN_DBSR, dbsr);
-
- run->exit_reason = KVM_EXIT_DEBUG;
+ r = kvmppc_handle_debug(run, vcpu);
+ if (r == RESUME_HOST)
+ run->exit_reason = KVM_EXIT_DEBUG;
kvmppc_account_exit(vcpu, DEBUG_EXITS);
- r = RESUME_HOST;
break;
}
@@ -1160,12 +1186,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* aren't already exiting to userspace for some other reason.
*/
if (!(r & RESUME_HOST)) {
- local_irq_disable();
s = kvmppc_prepare_to_enter(vcpu);
- if (s <= 0) {
- local_irq_enable();
+ if (s <= 0)
r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
- } else {
+ else {
+ /* interrupts now hard-disabled */
kvmppc_fix_ee_before_entry();
}
}
@@ -1197,7 +1222,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, 0);
#ifndef CONFIG_KVM_BOOKE_HV
- vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
+ vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
vcpu->arch.shadow_pid = 1;
vcpu->arch.shared->msr = 0;
#endif
@@ -1359,7 +1384,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu,
return 0;
}
-void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
sregs->u.e.features |= KVM_SREGS_E_IVOR;
@@ -1379,6 +1404,7 @@ void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+ return 0;
}
int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
@@ -1413,8 +1439,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
get_sregs_base(vcpu, sregs);
get_sregs_arch206(vcpu, sregs);
- kvmppc_core_get_sregs(vcpu, sregs);
- return 0;
+ return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
@@ -1433,7 +1458,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
if (ret < 0)
return ret;
- return kvmppc_core_set_sregs(vcpu, sregs);
+ return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
}
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
@@ -1441,7 +1466,6 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
int r = 0;
union kvmppc_one_reg val;
int size;
- long int i;
size = one_reg_size(reg->id);
if (size > sizeof(val))
@@ -1449,16 +1473,24 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
switch (reg->id) {
case KVM_REG_PPC_IAC1:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
+ break;
case KVM_REG_PPC_IAC2:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
case KVM_REG_PPC_IAC3:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
+ break;
case KVM_REG_PPC_IAC4:
- i = reg->id - KVM_REG_PPC_IAC1;
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
break;
+#endif
case KVM_REG_PPC_DAC1:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
+ break;
case KVM_REG_PPC_DAC2:
- i = reg->id - KVM_REG_PPC_DAC1;
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
break;
case KVM_REG_PPC_EPR: {
u32 epr = get_guest_epr(vcpu);
@@ -1477,10 +1509,13 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
val = get_reg_val(reg->id, vcpu->arch.tsr);
break;
case KVM_REG_PPC_DEBUG_INST:
- val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
+ val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ val = get_reg_val(reg->id, vcpu->arch.vrsave);
break;
default:
- r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
break;
}
@@ -1498,7 +1533,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
int r = 0;
union kvmppc_one_reg val;
int size;
- long int i;
size = one_reg_size(reg->id);
if (size > sizeof(val))
@@ -1509,16 +1543,24 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
switch (reg->id) {
case KVM_REG_PPC_IAC1:
+ vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
+ break;
case KVM_REG_PPC_IAC2:
+ vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
case KVM_REG_PPC_IAC3:
+ vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
+ break;
case KVM_REG_PPC_IAC4:
- i = reg->id - KVM_REG_PPC_IAC1;
- vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
break;
+#endif
case KVM_REG_PPC_DAC1:
+ vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
+ break;
case KVM_REG_PPC_DAC2:
- i = reg->id - KVM_REG_PPC_DAC1;
- vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
break;
case KVM_REG_PPC_EPR: {
u32 new_epr = set_reg_val(reg->id, val);
@@ -1552,20 +1594,17 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
kvmppc_set_tcr(vcpu, tcr);
break;
}
+ case KVM_REG_PPC_VRSAVE:
+ vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ break;
default:
- r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
break;
}
return r;
}
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
-{
- return -EINVAL;
-}
-
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -ENOTSUPP;
@@ -1590,12 +1629,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
return -ENOTSUPP;
}
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
}
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages)
{
return 0;
@@ -1671,6 +1710,157 @@ void kvmppc_decrementer_func(unsigned long data)
kvmppc_set_tsr_bits(vcpu, TSR_DIS);
}
+static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg,
+ uint64_t addr, int index)
+{
+ switch (index) {
+ case 0:
+ dbg_reg->dbcr0 |= DBCR0_IAC1;
+ dbg_reg->iac1 = addr;
+ break;
+ case 1:
+ dbg_reg->dbcr0 |= DBCR0_IAC2;
+ dbg_reg->iac2 = addr;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case 2:
+ dbg_reg->dbcr0 |= DBCR0_IAC3;
+ dbg_reg->iac3 = addr;
+ break;
+ case 3:
+ dbg_reg->dbcr0 |= DBCR0_IAC4;
+ dbg_reg->iac4 = addr;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ dbg_reg->dbcr0 |= DBCR0_IDM;
+ return 0;
+}
+
+static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr,
+ int type, int index)
+{
+ switch (index) {
+ case 0:
+ if (type & KVMPPC_DEBUG_WATCH_READ)
+ dbg_reg->dbcr0 |= DBCR0_DAC1R;
+ if (type & KVMPPC_DEBUG_WATCH_WRITE)
+ dbg_reg->dbcr0 |= DBCR0_DAC1W;
+ dbg_reg->dac1 = addr;
+ break;
+ case 1:
+ if (type & KVMPPC_DEBUG_WATCH_READ)
+ dbg_reg->dbcr0 |= DBCR0_DAC2R;
+ if (type & KVMPPC_DEBUG_WATCH_WRITE)
+ dbg_reg->dbcr0 |= DBCR0_DAC2W;
+ dbg_reg->dac2 = addr;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dbg_reg->dbcr0 |= DBCR0_IDM;
+ return 0;
+}
+void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set)
+{
+ /* XXX: Add similar MSR protection for BookE-PR */
+#ifdef CONFIG_KVM_BOOKE_HV
+ BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP));
+ if (set) {
+ if (prot_bitmap & MSR_UCLE)
+ vcpu->arch.shadow_msrp |= MSRP_UCLEP;
+ if (prot_bitmap & MSR_DE)
+ vcpu->arch.shadow_msrp |= MSRP_DEP;
+ if (prot_bitmap & MSR_PMM)
+ vcpu->arch.shadow_msrp |= MSRP_PMMP;
+ } else {
+ if (prot_bitmap & MSR_UCLE)
+ vcpu->arch.shadow_msrp &= ~MSRP_UCLEP;
+ if (prot_bitmap & MSR_DE)
+ vcpu->arch.shadow_msrp &= ~MSRP_DEP;
+ if (prot_bitmap & MSR_PMM)
+ vcpu->arch.shadow_msrp &= ~MSRP_PMMP;
+ }
+#endif
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ struct debug_reg *dbg_reg;
+ int n, b = 0, w = 0;
+
+ if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
+ vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+ vcpu->guest_debug = 0;
+ kvm_guest_protect_msr(vcpu, MSR_DE, false);
+ return 0;
+ }
+
+ kvm_guest_protect_msr(vcpu, MSR_DE, true);
+ vcpu->guest_debug = dbg->control;
+ vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+ /* Set DBCR0_EDM in guest visible DBCR0 register. */
+ vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+
+ /* Code below handles only HW breakpoints */
+ dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ /*
+ * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1
+ * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0
+ */
+ dbg_reg->dbcr1 = 0;
+ dbg_reg->dbcr2 = 0;
+#else
+ /*
+ * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1
+ * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR
+ * is set.
+ */
+ dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
+ DBCR1_IAC4US;
+ dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#endif
+
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ return 0;
+
+ for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
+ uint64_t addr = dbg->arch.bp[n].addr;
+ uint32_t type = dbg->arch.bp[n].type;
+
+ if (type == KVMPPC_DEBUG_NONE)
+ continue;
+
+ if (type & !(KVMPPC_DEBUG_WATCH_READ |
+ KVMPPC_DEBUG_WATCH_WRITE |
+ KVMPPC_DEBUG_BREAKPOINT))
+ return -EINVAL;
+
+ if (type & KVMPPC_DEBUG_BREAKPOINT) {
+ /* Setting H/W breakpoint */
+ if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
+ return -EINVAL;
+ } else {
+ /* Setting H/W watchpoint */
+ if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
+ type, w++))
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
vcpu->cpu = smp_processor_id();
@@ -1681,6 +1871,44 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
{
current->thread.kvm_vcpu = NULL;
vcpu->cpu = -1;
+
+ /* Clear pending debug event in DBSR */
+ kvmppc_clear_dbsr();
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return kvm->arch.kvm_ops->init_vm(kvm);
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+ kvm->arch.kvm_ops->destroy_vm(kvm);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
}
int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 5fd1ba69357..b632cd35919 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -99,6 +99,30 @@ enum int_class {
void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
+extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn,
+ ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn,
+ ulong *spr_val);
+extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
+ struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+ ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+ ulong *spr_val);
+extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
+ struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+ ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+ ulong *spr_val);
+
/*
* Load up guest vcpu FP state if it's needed.
* It also set the MSR_FP in thread so that host know
@@ -112,7 +136,9 @@ static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_PPC_FPU
if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
- load_up_fpu();
+ enable_kernel_fp();
+ load_fp_state(&vcpu->arch.fp);
+ current->thread.fp_save_area = &vcpu->arch.fp;
current->thread.regs->msr |= MSR_FP;
}
#endif
@@ -127,6 +153,12 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_FPU
if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
giveup_fpu(current);
+ current->thread.fp_save_area = NULL;
#endif
}
+
+static inline void kvmppc_clear_dbsr(void)
+{
+ mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
+}
#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index e8ed7d659c5..a1712b818a5 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -33,6 +33,8 @@
#ifdef CONFIG_64BIT
#include <asm/exception-64e.h>
+#include <asm/hw_irq.h>
+#include <asm/irqflags.h>
#else
#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
#endif
@@ -227,17 +229,20 @@
stw r10, VCPU_CR(r4)
PPC_STL r11, VCPU_GPR(R4)(r4)
PPC_STL r5, VCPU_GPR(R5)(r4)
- .if \type == EX_CRIT
- PPC_LL r5, (\paca_ex + EX_R13)(r13)
- .else
- mfspr r5, \scratch
- .endif
PPC_STL r6, VCPU_GPR(R6)(r4)
PPC_STL r8, VCPU_GPR(R8)(r4)
PPC_STL r9, VCPU_GPR(R9)(r4)
- PPC_STL r5, VCPU_GPR(R13)(r4)
+ .if \type == EX_TLB
+ PPC_LL r5, EX_TLB_R13(r12)
+ PPC_LL r6, EX_TLB_R10(r12)
+ PPC_LL r8, EX_TLB_R11(r12)
+ mfspr r12, \scratch
+ .else
+ mfspr r5, \scratch
PPC_LL r6, (\paca_ex + \ex_r10)(r13)
PPC_LL r8, (\paca_ex + \ex_r11)(r13)
+ .endif
+ PPC_STL r5, VCPU_GPR(R13)(r4)
PPC_STL r3, VCPU_GPR(R3)(r4)
PPC_STL r7, VCPU_GPR(R7)(r4)
PPC_STL r12, VCPU_GPR(R12)(r4)
@@ -319,6 +324,8 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
SPRN_DSRR0, SPRN_DSRR1, 0
kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
#else
/*
* For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -431,10 +438,16 @@ _GLOBAL(kvmppc_resume_host)
PPC_STL r5, VCPU_LR(r4)
mfspr r7, SPRN_SPRG5
stw r3, VCPU_VRSAVE(r4)
+#ifdef CONFIG_64BIT
+ PPC_LL r3, PACA_SPRG_VDSO(r13)
+#endif
PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
mfspr r8, SPRN_SPRG6
PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
mfspr r9, SPRN_SPRG7
+#ifdef CONFIG_64BIT
+ mtspr SPRN_SPRG_VDSO_WRITE, r3
+#endif
PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
mfxer r3
PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
@@ -465,6 +478,15 @@ _GLOBAL(kvmppc_resume_host)
mtspr SPRN_EPCR, r3
isync
+#ifdef CONFIG_64BIT
+ /*
+ * We enter with interrupts disabled in hardware, but
+ * we need to call RECONCILE_IRQ_STATE to ensure
+ * that the software state is kept in sync.
+ */
+ RECONCILE_IRQ_STATE(r3,r5)
+#endif
+
/* Switch to kernel stack and jump to handler. */
PPC_LL r3, HOST_RUN(r1)
mr r5, r14 /* intno */
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index ce6b73c2961..2e02ed849f3 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -16,6 +16,8 @@
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -305,7 +307,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
{
}
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
{
kvmppc_booke_vcpu_load(vcpu, cpu);
@@ -313,7 +315,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
}
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_SPE
if (vcpu->arch.shadow_msr & MSR_SPE)
@@ -367,7 +369,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
}
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -388,9 +391,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvmppc_get_sregs_ivor(vcpu, sregs);
kvmppc_get_sregs_e500_tlb(vcpu, sregs);
+ return 0;
}
-int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int ret;
@@ -425,21 +430,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
- union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
return r;
}
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
- union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
return r;
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
+ unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
struct kvm_vcpu *vcpu;
@@ -481,7 +487,7 @@ out:
return ERR_PTR(err);
}
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -492,15 +498,32 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_e500(struct kvm *kvm)
{
return 0;
}
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_e500(struct kvm *kvm)
{
}
+static struct kvmppc_ops kvm_ops_e500 = {
+ .get_sregs = kvmppc_core_get_sregs_e500,
+ .set_sregs = kvmppc_core_set_sregs_e500,
+ .get_one_reg = kvmppc_get_one_reg_e500,
+ .set_one_reg = kvmppc_set_one_reg_e500,
+ .vcpu_load = kvmppc_core_vcpu_load_e500,
+ .vcpu_put = kvmppc_core_vcpu_put_e500,
+ .vcpu_create = kvmppc_core_vcpu_create_e500,
+ .vcpu_free = kvmppc_core_vcpu_free_e500,
+ .mmu_destroy = kvmppc_mmu_destroy_e500,
+ .init_vm = kvmppc_core_init_vm_e500,
+ .destroy_vm = kvmppc_core_destroy_vm_e500,
+ .emulate_op = kvmppc_core_emulate_op_e500,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+};
+
static int __init kvmppc_e500_init(void)
{
int r, i;
@@ -512,11 +535,11 @@ static int __init kvmppc_e500_init(void)
r = kvmppc_core_check_processor_compat();
if (r)
- return r;
+ goto err_out;
r = kvmppc_booke_init();
if (r)
- return r;
+ goto err_out;
/* copy extra E500 exception handlers */
ivor[0] = mfspr(SPRN_IVOR32);
@@ -534,13 +557,23 @@ static int __init kvmppc_e500_init(void)
flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
ivor[max_ivor] + handler_len);
- return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+ if (r)
+ goto err_out;
+ kvm_ops_e500.owner = THIS_MODULE;
+ kvmppc_pr_ops = &kvm_ops_e500;
+
+err_out:
+ return r;
}
static void __exit kvmppc_e500_exit(void)
{
+ kvmppc_pr_ops = NULL;
kvmppc_booke_exit();
}
module_init(kvmppc_e500_init);
module_exit(kvmppc_e500_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index c2e5e98453a..a326178bdea 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -31,11 +31,13 @@ enum vcpu_ftr {
#define E500_TLB_NUM 2
/* entry is mapped somewhere in host TLB */
-#define E500_TLB_VALID (1 << 0)
+#define E500_TLB_VALID (1 << 31)
/* TLB1 entry is mapped by host TLB1, tracked by bitmaps */
-#define E500_TLB_BITMAP (1 << 1)
+#define E500_TLB_BITMAP (1 << 30)
/* TLB1 entry is mapped by host TLB0 */
-#define E500_TLB_TLB0 (1 << 2)
+#define E500_TLB_TLB0 (1 << 29)
+/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */
+#define E500_TLB_MAS2_ATTR (0x7f)
struct tlbe_ref {
pfn_t pfn; /* valid only for TLB0, except briefly */
@@ -117,7 +119,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
#define MAS2_ATTRIB_MASK \
- (MAS2_X0 | MAS2_X1)
+ (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G)
#define MAS3_ATTRIB_MASK \
(MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
| E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index b10a01243ab..002d5176414 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -19,6 +19,7 @@
#include "booke.h"
#include "e500.h"
+#define XOP_DCBTLS 166
#define XOP_MSGSND 206
#define XOP_MSGCLR 238
#define XOP_TLBIVAX 786
@@ -26,6 +27,7 @@
#define XOP_TLBRE 946
#define XOP_TLBWE 978
#define XOP_TLBILX 18
+#define XOP_EHPRIV 270
#ifdef CONFIG_KVM_E500MC
static int dbell2prio(ulong param)
@@ -82,8 +84,37 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
}
#endif
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance)
+static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (get_oc(inst)) {
+ case EHPRIV_OC_DEBUG:
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.address = vcpu->arch.pc;
+ run->debug.arch.status = 0;
+ kvmppc_account_exit(vcpu, DEBUG_EXITS);
+ emulated = EMULATE_EXIT_USER;
+ *advance = 0;
+ break;
+ default:
+ emulated = EMULATE_FAIL;
+ }
+ return emulated;
+}
+
+static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ /* Always fail to lock the cache */
+ vcpu_e500->l1csr0 |= L1CSR0_CUL;
+ return EMULATE_DONE;
+}
+
+int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
int ra = get_ra(inst);
@@ -95,6 +126,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
case 31:
switch (get_xop(inst)) {
+ case XOP_DCBTLS:
+ emulated = kvmppc_e500_emul_dcbtls(vcpu);
+ break;
+
#ifdef CONFIG_KVM_E500MC
case XOP_MSGSND:
emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
@@ -130,6 +165,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
break;
+ case XOP_EHPRIV:
+ emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
+ advance);
+ break;
+
default:
emulated = EMULATE_FAIL;
}
@@ -146,7 +186,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
@@ -196,6 +236,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
break;
case SPRN_L1CSR1:
vcpu_e500->l1csr1 = spr_val;
+ vcpu_e500->l1csr1 &= ~(L1CSR1_ICFI | L1CSR1_ICLFR);
break;
case SPRN_HID0:
vcpu_e500->hid0 = spr_val;
@@ -237,7 +278,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 6d6f153b6c1..50860e919cb 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -32,7 +32,7 @@
#include <asm/kvm_ppc.h>
#include "e500.h"
-#include "trace.h"
+#include "trace_booke.h"
#include "timing.h"
#include "e500_mmu_host.h"
@@ -127,7 +127,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
}
static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
- unsigned int eaddr, int as)
+ gva_t eaddr, int as)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
unsigned int victim, tsized;
@@ -536,7 +536,7 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu)
{
}
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 1c6a9d729df..86903d3f5a0 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -32,10 +32,11 @@
#include <asm/kvm_ppc.h>
#include "e500.h"
-#include "trace.h"
#include "timing.h"
#include "e500_mmu_host.h"
+#include "trace_booke.h"
+
#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
@@ -64,15 +65,6 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
return mas3;
}
-static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
-{
-#ifdef CONFIG_SMP
- return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
-#else
- return mas2 & MAS2_ATTRIB_MASK;
-#endif
-}
-
/*
* writing shadow tlb entry to host TLB
*/
@@ -230,15 +222,15 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID);
}
- /* Already invalidated in between */
- if (!(ref->flags & E500_TLB_VALID))
- return;
-
- /* Guest tlbe is backed by at most one host tlbe per shadow pid. */
- kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
+ /*
+ * If TLB entry is still valid then it's a TLB0 entry, and thus
+ * backed by at most one host tlbe per shadow pid
+ */
+ if (ref->flags & E500_TLB_VALID)
+ kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
/* Mark the TLB as not backed by the host anymore */
- ref->flags &= ~E500_TLB_VALID;
+ ref->flags = 0;
}
static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
@@ -248,10 +240,16 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
struct kvm_book3e_206_tlb_entry *gtlbe,
- pfn_t pfn)
+ pfn_t pfn, unsigned int wimg)
{
ref->pfn = pfn;
- ref->flags |= E500_TLB_VALID;
+ ref->flags = E500_TLB_VALID;
+
+ /* Use guest supplied MAS2_G and MAS2_E */
+ ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
+
+ /* Mark the page accessed */
+ kvm_set_pfn_accessed(pfn);
if (tlbe_is_writable(gtlbe))
kvm_set_pfn_dirty(pfn);
@@ -312,8 +310,7 @@ static void kvmppc_e500_setup_stlbe(
/* Force IPROT=0 for all guest mappings. */
stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
- stlbe->mas2 = (gvaddr & MAS2_EPN) |
- e500_shadow_mas2_attrib(gtlbe->mas2, pr);
+ stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
@@ -332,6 +329,17 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
unsigned long hva;
int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
+ int ret = 0;
+ unsigned long mmu_seq;
+ struct kvm *kvm = vcpu_e500->vcpu.kvm;
+ unsigned long tsize_pages = 0;
+ pte_t *ptep;
+ unsigned int wimg = 0;
+ pgd_t *pgdir;
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
/*
* Translate guest physical to true physical, acquiring
@@ -394,7 +402,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
*/
for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
- unsigned long gfn_start, gfn_end, tsize_pages;
+ unsigned long gfn_start, gfn_end;
tsize_pages = 1 << (tsize - 2);
gfn_start = gfn & ~(tsize_pages - 1);
@@ -436,11 +444,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
}
if (likely(!pfnmap)) {
- unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+ tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
pfn = gfn_to_pfn_memslot(slot, gfn);
if (is_error_noslot_pfn(pfn)) {
- printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
- (long)gfn);
+ if (printk_ratelimit())
+ pr_err("%s: real page not found for gfn %lx\n",
+ __func__, (long)gfn);
return -EINVAL;
}
@@ -449,7 +458,25 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
}
- kvmppc_e500_ref_setup(ref, gtlbe, pfn);
+ spin_lock(&kvm->mmu_lock);
+ if (mmu_notifier_retry(kvm, mmu_seq)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+
+ pgdir = vcpu_e500->vcpu.arch.pgdir;
+ ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages);
+ if (pte_present(*ptep))
+ wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
+ else {
+ if (printk_ratelimit())
+ pr_err("%s: pte not present: gfn %lx, pfn %lx\n",
+ __func__, (long)gfn, pfn);
+ ret = -EINVAL;
+ goto out;
+ }
+ kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
ref, gvaddr, stlbe);
@@ -457,10 +484,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
/* Clear i-cache for new pages */
kvmppc_mmu_flush_icache(pfn);
+out:
+ spin_unlock(&kvm->mmu_lock);
+
/* Drop refcount on page, so that mmu notifiers can clear it */
kvm_release_pfn_clean(pfn);
- return 0;
+ return ret;
}
/* XXX only map the one-one case, for now use TLB0 */
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 19c8379575f..17e45627922 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -16,6 +16,8 @@
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/export.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -110,7 +112,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu);
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -147,7 +149,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvmppc_load_guest_fp(vcpu);
}
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
{
vcpu->arch.eplc = mfspr(SPRN_EPLC);
vcpu->arch.epsc = mfspr(SPRN_EPSC);
@@ -204,7 +206,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
}
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -224,10 +227,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
- kvmppc_get_sregs_ivor(vcpu, sregs);
+ return kvmppc_get_sregs_ivor(vcpu, sregs);
}
-int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int ret;
@@ -260,21 +264,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
- union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
return r;
}
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
- union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
return r;
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
+ unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
struct kvm_vcpu *vcpu;
@@ -315,7 +320,7 @@ out:
return ERR_PTR(err);
}
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -325,7 +330,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
{
int lpid;
@@ -337,29 +342,56 @@ int kvmppc_core_init_vm(struct kvm *kvm)
return 0;
}
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
{
kvmppc_free_lpid(kvm->arch.lpid);
}
+static struct kvmppc_ops kvm_ops_e500mc = {
+ .get_sregs = kvmppc_core_get_sregs_e500mc,
+ .set_sregs = kvmppc_core_set_sregs_e500mc,
+ .get_one_reg = kvmppc_get_one_reg_e500mc,
+ .set_one_reg = kvmppc_set_one_reg_e500mc,
+ .vcpu_load = kvmppc_core_vcpu_load_e500mc,
+ .vcpu_put = kvmppc_core_vcpu_put_e500mc,
+ .vcpu_create = kvmppc_core_vcpu_create_e500mc,
+ .vcpu_free = kvmppc_core_vcpu_free_e500mc,
+ .mmu_destroy = kvmppc_mmu_destroy_e500,
+ .init_vm = kvmppc_core_init_vm_e500mc,
+ .destroy_vm = kvmppc_core_destroy_vm_e500mc,
+ .emulate_op = kvmppc_core_emulate_op_e500,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+};
+
static int __init kvmppc_e500mc_init(void)
{
int r;
r = kvmppc_booke_init();
if (r)
- return r;
+ goto err_out;
kvmppc_init_lpid(64);
kvmppc_claim_lpid(0); /* host */
- return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+ if (r)
+ goto err_out;
+ kvm_ops_e500mc.owner = THIS_MODULE;
+ kvmppc_pr_ops = &kvm_ops_e500mc;
+
+err_out:
+ return r;
}
static void __exit kvmppc_e500mc_exit(void)
{
+ kvmppc_pr_ops = NULL;
kvmppc_booke_exit();
}
module_init(kvmppc_e500mc_init);
module_exit(kvmppc_e500mc_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 751cd45f65a..da86d9ba347 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -97,10 +97,10 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
switch (sprn) {
case SPRN_SRR0:
- vcpu->arch.shared->srr0 = spr_val;
+ kvmppc_set_srr0(vcpu, spr_val);
break;
case SPRN_SRR1:
- vcpu->arch.shared->srr1 = spr_val;
+ kvmppc_set_srr1(vcpu, spr_val);
break;
/* XXX We need to context-switch the timebase for
@@ -114,24 +114,24 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
break;
case SPRN_SPRG0:
- vcpu->arch.shared->sprg0 = spr_val;
+ kvmppc_set_sprg0(vcpu, spr_val);
break;
case SPRN_SPRG1:
- vcpu->arch.shared->sprg1 = spr_val;
+ kvmppc_set_sprg1(vcpu, spr_val);
break;
case SPRN_SPRG2:
- vcpu->arch.shared->sprg2 = spr_val;
+ kvmppc_set_sprg2(vcpu, spr_val);
break;
case SPRN_SPRG3:
- vcpu->arch.shared->sprg3 = spr_val;
+ kvmppc_set_sprg3(vcpu, spr_val);
break;
/* PIR can legally be written, but we ignore it */
case SPRN_PIR: break;
default:
- emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
- spr_val);
+ emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn,
+ spr_val);
if (emulated == EMULATE_FAIL)
printk(KERN_INFO "mtspr: unknown spr "
"0x%x\n", sprn);
@@ -150,10 +150,10 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
switch (sprn) {
case SPRN_SRR0:
- spr_val = vcpu->arch.shared->srr0;
+ spr_val = kvmppc_get_srr0(vcpu);
break;
case SPRN_SRR1:
- spr_val = vcpu->arch.shared->srr1;
+ spr_val = kvmppc_get_srr1(vcpu);
break;
case SPRN_PVR:
spr_val = vcpu->arch.pvr;
@@ -173,16 +173,16 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
break;
case SPRN_SPRG0:
- spr_val = vcpu->arch.shared->sprg0;
+ spr_val = kvmppc_get_sprg0(vcpu);
break;
case SPRN_SPRG1:
- spr_val = vcpu->arch.shared->sprg1;
+ spr_val = kvmppc_get_sprg1(vcpu);
break;
case SPRN_SPRG2:
- spr_val = vcpu->arch.shared->sprg2;
+ spr_val = kvmppc_get_sprg2(vcpu);
break;
case SPRN_SPRG3:
- spr_val = vcpu->arch.shared->sprg3;
+ spr_val = kvmppc_get_sprg3(vcpu);
break;
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
@@ -191,8 +191,8 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
spr_val = kvmppc_get_dec(vcpu, get_tb());
break;
default:
- emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
- &spr_val);
+ emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn,
+ &spr_val);
if (unlikely(emulated == EMULATE_FAIL)) {
printk(KERN_INFO "mfspr: unknown spr "
"0x%x\n", sprn);
@@ -219,7 +219,6 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
* lmw
* stmw
*
- * XXX is_bigendian should depend on MMU mapping or MSR[LE]
*/
/* XXX Should probably auto-generate instruction decoding for a particular core
* from opcode tables in the future. */
@@ -464,7 +463,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
if (emulated == EMULATE_FAIL) {
- emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+ emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst,
+ &advance);
if (emulated == EMULATE_AGAIN) {
advance = 0;
} else if (emulated == EMULATE_FAIL) {
@@ -483,3 +483,4 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
return emulated;
}
+EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 2861ae9eaae..b68d0dc9479 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -126,6 +126,8 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
u32 val, int idx);
static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
u32 *ptr, int idx);
+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
+ uint32_t val);
enum irq_type {
IRQ_TYPE_NORMAL = 0,
@@ -528,7 +530,6 @@ static void openpic_reset(struct openpic *opp)
/* Initialise IRQ sources */
for (i = 0; i < opp->max_irq; i++) {
opp->src[i].ivpr = opp->ivpr_reset;
- opp->src[i].idr = opp->idr_reset;
switch (opp->src[i].type) {
case IRQ_TYPE_NORMAL:
@@ -543,6 +544,8 @@ static void openpic_reset(struct openpic *opp)
case IRQ_TYPE_FSLSPECIAL:
break;
}
+
+ write_IRQreg_idr(opp, i, opp->idr_reset);
}
/* Initialise IRQ destinations */
for (i = 0; i < MAX_CPU; i++) {
@@ -1635,6 +1638,7 @@ static void mpic_destroy(struct kvm_device *dev)
dev->kvm->arch.mpic = NULL;
kfree(opp);
+ kfree(dev);
}
static int mpic_set_default_irq_routing(struct openpic *opp)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 07c0106fab7..61c738ab128 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -26,6 +26,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/module.h>
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
@@ -39,6 +40,12 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
+struct kvmppc_ops *kvmppc_hv_ops;
+EXPORT_SYMBOL_GPL(kvmppc_hv_ops);
+struct kvmppc_ops *kvmppc_pr_ops;
+EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
+
+
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
return !!(v->arch.pending_exceptions) ||
@@ -50,7 +57,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return 1;
}
-#ifndef CONFIG_KVM_BOOK3S_64_HV
/*
* Common checks before entering the guest world. Call with interrupts
* disabled.
@@ -62,14 +68,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
*/
int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
{
- int r = 1;
+ int r;
+
+ WARN_ON(irqs_disabled());
+ hard_irq_disable();
- WARN_ON_ONCE(!irqs_disabled());
while (true) {
if (need_resched()) {
local_irq_enable();
cond_resched();
- local_irq_disable();
+ hard_irq_disable();
continue;
}
@@ -95,7 +103,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
local_irq_enable();
trace_kvm_check_requests(vcpu);
r = kvmppc_core_check_requests(vcpu);
- local_irq_disable();
+ hard_irq_disable();
if (r > 0)
continue;
break;
@@ -107,25 +115,36 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
continue;
}
-#ifdef CONFIG_PPC64
- /* lazy EE magic */
- hard_irq_disable();
- if (lazy_irq_pending()) {
- /* Got an interrupt in between, try again */
- local_irq_enable();
- local_irq_disable();
- kvm_guest_exit();
- continue;
- }
-#endif
-
kvm_guest_enter();
- break;
+ return 1;
}
+ /* return to host */
+ local_irq_enable();
return r;
}
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+static void kvmppc_swab_shared(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
+ int i;
+
+ shared->sprg0 = swab64(shared->sprg0);
+ shared->sprg1 = swab64(shared->sprg1);
+ shared->sprg2 = swab64(shared->sprg2);
+ shared->sprg3 = swab64(shared->sprg3);
+ shared->srr0 = swab64(shared->srr0);
+ shared->srr1 = swab64(shared->srr1);
+ shared->dar = swab64(shared->dar);
+ shared->msr = swab64(shared->msr);
+ shared->dsisr = swab32(shared->dsisr);
+ shared->int_pending = swab32(shared->int_pending);
+ for (i = 0; i < ARRAY_SIZE(shared->sr); i++)
+ shared->sr[i] = swab32(shared->sr[i]);
+}
+#endif
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
{
@@ -137,7 +156,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
unsigned long r2 = 0;
- if (!(vcpu->arch.shared->msr & MSR_SF)) {
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
/* 32 bit mode */
param1 &= 0xffffffff;
param2 &= 0xffffffff;
@@ -148,8 +167,28 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
switch (nr) {
case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
{
- vcpu->arch.magic_page_pa = param1;
- vcpu->arch.magic_page_ea = param2;
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+ /* Book3S can be little endian, find it out here */
+ int shared_big_endian = true;
+ if (vcpu->arch.intr_msr & MSR_LE)
+ shared_big_endian = false;
+ if (shared_big_endian != vcpu->arch.shared_big_endian)
+ kvmppc_swab_shared(vcpu);
+ vcpu->arch.shared_big_endian = shared_big_endian;
+#endif
+
+ if (!(param2 & MAGIC_PAGE_FLAG_NOT_MAPPED_NX)) {
+ /*
+ * Older versions of the Linux magic page code had
+ * a bug where they would map their trampoline code
+ * NX. If that's the case, remove !PR NX capability.
+ */
+ vcpu->arch.disable_kernel_nx = true;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+
+ vcpu->arch.magic_page_pa = param1 & ~0xfffULL;
+ vcpu->arch.magic_page_ea = param2 & ~0xfffULL;
r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
@@ -179,6 +218,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
return r;
}
+EXPORT_SYMBOL_GPL(kvmppc_kvm_pv);
int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
{
@@ -192,11 +232,9 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
goto out;
-#ifdef CONFIG_KVM_BOOK3S_64_HV
/* HV KVM can only do PAPR mode for now */
- if (!vcpu->arch.papr_enabled)
+ if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm))
goto out;
-#endif
#ifdef CONFIG_KVM_BOOKE_HV
if (!cpu_has_feature(CPU_FTR_EMB_HV))
@@ -209,6 +247,7 @@ out:
vcpu->arch.sane = r;
return r ? 0 : -EINVAL;
}
+EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
@@ -243,6 +282,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
return r;
}
+EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
int kvm_arch_hardware_enable(void *garbage)
{
@@ -269,10 +309,35 @@ void kvm_arch_check_processor_compat(void *rtn)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
- if (type)
- return -EINVAL;
-
+ struct kvmppc_ops *kvm_ops = NULL;
+ /*
+ * if we have both HV and PR enabled, default is HV
+ */
+ if (type == 0) {
+ if (kvmppc_hv_ops)
+ kvm_ops = kvmppc_hv_ops;
+ else
+ kvm_ops = kvmppc_pr_ops;
+ if (!kvm_ops)
+ goto err_out;
+ } else if (type == KVM_VM_PPC_HV) {
+ if (!kvmppc_hv_ops)
+ goto err_out;
+ kvm_ops = kvmppc_hv_ops;
+ } else if (type == KVM_VM_PPC_PR) {
+ if (!kvmppc_pr_ops)
+ goto err_out;
+ kvm_ops = kvmppc_pr_ops;
+ } else
+ goto err_out;
+
+ if (kvm_ops->owner && !try_module_get(kvm_ops->owner))
+ return -ENOENT;
+
+ kvm->arch.kvm_ops = kvm_ops;
return kvmppc_core_init_vm(kvm);
+err_out:
+ return -EINVAL;
}
void kvm_arch_destroy_vm(struct kvm *kvm)
@@ -292,6 +357,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvmppc_core_destroy_vm(kvm);
mutex_unlock(&kvm->lock);
+
+ /* drop the module reference */
+ module_put(kvm->arch.kvm_ops->owner);
}
void kvm_arch_sync_events(struct kvm *kvm)
@@ -301,6 +369,10 @@ void kvm_arch_sync_events(struct kvm *kvm)
int kvm_dev_ioctl_check_extension(long ext)
{
int r;
+ /* FIXME!!
+ * Should some of this be vm ioctl ? is it possible now ?
+ */
+ int hv_enabled = kvmppc_hv_ops ? 1 : 0;
switch (ext) {
#ifdef CONFIG_BOOKE
@@ -320,58 +392,68 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_DEVICE_CTRL:
r = 1;
break;
-#ifndef CONFIG_KVM_BOOK3S_64_HV
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB:
#endif
-#ifdef CONFIG_KVM_MPIC
- case KVM_CAP_IRQ_MPIC:
-#endif
- r = 1;
+ /* We support this only for PR */
+ r = !hv_enabled;
break;
+#ifdef CONFIG_KVM_MMIO
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
#endif
+#ifdef CONFIG_KVM_MPIC
+ case KVM_CAP_IRQ_MPIC:
+ r = 1;
+ break;
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
+ case KVM_CAP_PPC_FIXUP_HCALL:
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS:
#endif
r = 1;
break;
#endif /* CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT:
- r = threads_per_core;
+ if (hv_enabled)
+ r = threads_per_subcore;
+ else
+ r = 0;
break;
case KVM_CAP_PPC_RMA:
- r = 1;
+ r = hv_enabled;
/* PPC970 requires an RMA */
- if (cpu_has_feature(CPU_FTR_ARCH_201))
+ if (r && cpu_has_feature(CPU_FTR_ARCH_201))
r = 2;
break;
#endif
case KVM_CAP_SYNC_MMU:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
- r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (hv_enabled)
+ r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+ else
+ r = 0;
#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
r = 1;
#else
r = 0;
- break;
#endif
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+ break;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_HTAB_FD:
- r = 1;
+ r = hv_enabled;
break;
#endif
- break;
case KVM_CAP_NR_VCPUS:
/*
* Recommending a number of CPUs is somewhat arbitrary; we
@@ -379,11 +461,10 @@ int kvm_dev_ioctl_check_extension(long ext)
* will have secondary threads "offline"), and for other KVM
* implementations just count online CPUs.
*/
-#ifdef CONFIG_KVM_BOOK3S_64_HV
- r = num_present_cpus();
-#else
- r = num_online_cpus();
-#endif
+ if (hv_enabled)
+ r = num_present_cpus();
+ else
+ r = num_online_cpus();
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
@@ -407,15 +488,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
- kvmppc_core_free_memslot(free, dont);
+ kvmppc_core_free_memslot(kvm, free, dont);
}
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
- return kvmppc_core_create_memslot(slot, npages);
+ return kvmppc_core_create_memslot(kvm, slot, npages);
}
void kvm_arch_memslots_updated(struct kvm *kvm)
@@ -608,14 +690,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
break;
case KVM_MMIO_REG_FPR:
- vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+ VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
break;
#ifdef CONFIG_PPC_BOOK3S
case KVM_MMIO_REG_QPR:
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
case KVM_MMIO_REG_FQPR:
- vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+ VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
#endif
@@ -625,9 +707,19 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
}
int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int rt, unsigned int bytes, int is_bigendian)
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian)
{
int idx, ret;
+ int is_bigendian;
+
+ if (kvmppc_need_byteswap(vcpu)) {
+ /* Default endianness is "little endian". */
+ is_bigendian = !is_default_endian;
+ } else {
+ /* Default endianness is "big endian". */
+ is_bigendian = is_default_endian;
+ }
if (bytes > sizeof(run->mmio.data)) {
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
@@ -659,24 +751,35 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
return EMULATE_DO_MMIO;
}
+EXPORT_SYMBOL_GPL(kvmppc_handle_load);
/* Same as above, but sign extends */
int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int rt, unsigned int bytes, int is_bigendian)
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian)
{
int r;
vcpu->arch.mmio_sign_extend = 1;
- r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
+ r = kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian);
return r;
}
int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
- u64 val, unsigned int bytes, int is_bigendian)
+ u64 val, unsigned int bytes, int is_default_endian)
{
void *data = run->mmio.data;
int idx, ret;
+ int is_bigendian;
+
+ if (kvmppc_need_byteswap(vcpu)) {
+ /* Default endianness is "little endian". */
+ is_bigendian = !is_default_endian;
+ } else {
+ /* Default endianness is "big endian". */
+ is_bigendian = is_default_endian;
+ }
if (bytes > sizeof(run->mmio.data)) {
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
@@ -720,6 +823,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
return EMULATE_DO_MMIO;
}
+EXPORT_SYMBOL_GPL(kvmppc_handle_store);
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
@@ -953,10 +1057,10 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
u32 inst_nop = 0x60000000;
#ifdef CONFIG_KVM_BOOKE_HV
u32 inst_sc1 = 0x44000022;
- pvinfo->hcall[0] = inst_sc1;
- pvinfo->hcall[1] = inst_nop;
- pvinfo->hcall[2] = inst_nop;
- pvinfo->hcall[3] = inst_nop;
+ pvinfo->hcall[0] = cpu_to_be32(inst_sc1);
+ pvinfo->hcall[1] = cpu_to_be32(inst_nop);
+ pvinfo->hcall[2] = cpu_to_be32(inst_nop);
+ pvinfo->hcall[3] = cpu_to_be32(inst_nop);
#else
u32 inst_lis = 0x3c000000;
u32 inst_ori = 0x60000000;
@@ -972,10 +1076,10 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
* sc
* nop
*/
- pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
- pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
- pvinfo->hcall[2] = inst_sc;
- pvinfo->hcall[3] = inst_nop;
+ pvinfo->hcall[0] = cpu_to_be32(inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask));
+ pvinfo->hcall[1] = cpu_to_be32(inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask));
+ pvinfo->hcall[2] = cpu_to_be32(inst_sc);
+ pvinfo->hcall[3] = cpu_to_be32(inst_nop);
#endif
pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
@@ -1024,52 +1128,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
goto out;
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-#ifdef CONFIG_KVM_BOOK3S_64_HV
- case KVM_ALLOCATE_RMA: {
- struct kvm_allocate_rma rma;
- struct kvm *kvm = filp->private_data;
-
- r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
- if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
- r = -EFAULT;
- break;
- }
-
- case KVM_PPC_ALLOCATE_HTAB: {
- u32 htab_order;
-
- r = -EFAULT;
- if (get_user(htab_order, (u32 __user *)argp))
- break;
- r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
- if (r)
- break;
- r = -EFAULT;
- if (put_user(htab_order, (u32 __user *)argp))
- break;
- r = 0;
- break;
- }
-
- case KVM_PPC_GET_HTAB_FD: {
- struct kvm_get_htab_fd ghf;
-
- r = -EFAULT;
- if (copy_from_user(&ghf, argp, sizeof(ghf)))
- break;
- r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
- break;
- }
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
-
-#ifdef CONFIG_PPC_BOOK3S_64
case KVM_PPC_GET_SMMU_INFO: {
struct kvm_ppc_smmu_info info;
+ struct kvm *kvm = filp->private_data;
memset(&info, 0, sizeof(info));
- r = kvm_vm_ioctl_get_smmu_info(kvm, &info);
+ r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info);
if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
r = -EFAULT;
break;
@@ -1080,11 +1144,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
break;
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+ default: {
+ struct kvm *kvm = filp->private_data;
+ r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
+ }
+#else /* CONFIG_PPC_BOOK3S_64 */
default:
r = -ENOTTY;
+#endif
}
-
out:
return r;
}
@@ -1106,22 +1174,26 @@ long kvmppc_alloc_lpid(void)
return lpid;
}
+EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);
void kvmppc_claim_lpid(long lpid)
{
set_bit(lpid, lpid_inuse);
}
+EXPORT_SYMBOL_GPL(kvmppc_claim_lpid);
void kvmppc_free_lpid(long lpid)
{
clear_bit(lpid, lpid_inuse);
}
+EXPORT_SYMBOL_GPL(kvmppc_free_lpid);
void kvmppc_init_lpid(unsigned long nr_lpids_param)
{
nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
memset(lpid_inuse, 0, sizeof(lpid_inuse));
}
+EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
int kvm_arch_init(void *opaque)
{
@@ -1130,4 +1202,5 @@ int kvm_arch_init(void *opaque)
void kvm_arch_exit(void)
{
+
}
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index e326489a542..2e0e67ef354 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -31,126 +31,6 @@ TRACE_EVENT(kvm_ppc_instr,
__entry->inst, __entry->pc, __entry->emulate)
);
-#ifdef CONFIG_PPC_BOOK3S
-#define kvm_trace_symbol_exit \
- {0x100, "SYSTEM_RESET"}, \
- {0x200, "MACHINE_CHECK"}, \
- {0x300, "DATA_STORAGE"}, \
- {0x380, "DATA_SEGMENT"}, \
- {0x400, "INST_STORAGE"}, \
- {0x480, "INST_SEGMENT"}, \
- {0x500, "EXTERNAL"}, \
- {0x501, "EXTERNAL_LEVEL"}, \
- {0x502, "EXTERNAL_HV"}, \
- {0x600, "ALIGNMENT"}, \
- {0x700, "PROGRAM"}, \
- {0x800, "FP_UNAVAIL"}, \
- {0x900, "DECREMENTER"}, \
- {0x980, "HV_DECREMENTER"}, \
- {0xc00, "SYSCALL"}, \
- {0xd00, "TRACE"}, \
- {0xe00, "H_DATA_STORAGE"}, \
- {0xe20, "H_INST_STORAGE"}, \
- {0xe40, "H_EMUL_ASSIST"}, \
- {0xf00, "PERFMON"}, \
- {0xf20, "ALTIVEC"}, \
- {0xf40, "VSX"}
-#else
-#define kvm_trace_symbol_exit \
- {0, "CRITICAL"}, \
- {1, "MACHINE_CHECK"}, \
- {2, "DATA_STORAGE"}, \
- {3, "INST_STORAGE"}, \
- {4, "EXTERNAL"}, \
- {5, "ALIGNMENT"}, \
- {6, "PROGRAM"}, \
- {7, "FP_UNAVAIL"}, \
- {8, "SYSCALL"}, \
- {9, "AP_UNAVAIL"}, \
- {10, "DECREMENTER"}, \
- {11, "FIT"}, \
- {12, "WATCHDOG"}, \
- {13, "DTLB_MISS"}, \
- {14, "ITLB_MISS"}, \
- {15, "DEBUG"}, \
- {32, "SPE_UNAVAIL"}, \
- {33, "SPE_FP_DATA"}, \
- {34, "SPE_FP_ROUND"}, \
- {35, "PERFORMANCE_MONITOR"}, \
- {36, "DOORBELL"}, \
- {37, "DOORBELL_CRITICAL"}, \
- {38, "GUEST_DBELL"}, \
- {39, "GUEST_DBELL_CRIT"}, \
- {40, "HV_SYSCALL"}, \
- {41, "HV_PRIV"}
-#endif
-
-TRACE_EVENT(kvm_exit,
- TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
- TP_ARGS(exit_nr, vcpu),
-
- TP_STRUCT__entry(
- __field( unsigned int, exit_nr )
- __field( unsigned long, pc )
- __field( unsigned long, msr )
- __field( unsigned long, dar )
-#ifdef CONFIG_KVM_BOOK3S_PR
- __field( unsigned long, srr1 )
-#endif
- __field( unsigned long, last_inst )
- ),
-
- TP_fast_assign(
-#ifdef CONFIG_KVM_BOOK3S_PR
- struct kvmppc_book3s_shadow_vcpu *svcpu;
-#endif
- __entry->exit_nr = exit_nr;
- __entry->pc = kvmppc_get_pc(vcpu);
- __entry->dar = kvmppc_get_fault_dar(vcpu);
- __entry->msr = vcpu->arch.shared->msr;
-#ifdef CONFIG_KVM_BOOK3S_PR
- svcpu = svcpu_get(vcpu);
- __entry->srr1 = svcpu->shadow_srr1;
- svcpu_put(svcpu);
-#endif
- __entry->last_inst = vcpu->arch.last_inst;
- ),
-
- TP_printk("exit=%s"
- " | pc=0x%lx"
- " | msr=0x%lx"
- " | dar=0x%lx"
-#ifdef CONFIG_KVM_BOOK3S_PR
- " | srr1=0x%lx"
-#endif
- " | last_inst=0x%lx"
- ,
- __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
- __entry->pc,
- __entry->msr,
- __entry->dar,
-#ifdef CONFIG_KVM_BOOK3S_PR
- __entry->srr1,
-#endif
- __entry->last_inst
- )
-);
-
-TRACE_EVENT(kvm_unmap_hva,
- TP_PROTO(unsigned long hva),
- TP_ARGS(hva),
-
- TP_STRUCT__entry(
- __field( unsigned long, hva )
- ),
-
- TP_fast_assign(
- __entry->hva = hva;
- ),
-
- TP_printk("unmap hva 0x%lx\n", __entry->hva)
-);
-
TRACE_EVENT(kvm_stlb_inval,
TP_PROTO(unsigned int stlb_index),
TP_ARGS(stlb_index),
@@ -236,315 +116,6 @@ TRACE_EVENT(kvm_check_requests,
__entry->cpu_nr, __entry->requests)
);
-
-/*************************************************************************
- * Book3S trace points *
- *************************************************************************/
-
-#ifdef CONFIG_KVM_BOOK3S_PR
-
-TRACE_EVENT(kvm_book3s_reenter,
- TP_PROTO(int r, struct kvm_vcpu *vcpu),
- TP_ARGS(r, vcpu),
-
- TP_STRUCT__entry(
- __field( unsigned int, r )
- __field( unsigned long, pc )
- ),
-
- TP_fast_assign(
- __entry->r = r;
- __entry->pc = kvmppc_get_pc(vcpu);
- ),
-
- TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
-);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-
-TRACE_EVENT(kvm_book3s_64_mmu_map,
- TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
- struct kvmppc_pte *orig_pte),
- TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
-
- TP_STRUCT__entry(
- __field( unsigned char, flag_w )
- __field( unsigned char, flag_x )
- __field( unsigned long, eaddr )
- __field( unsigned long, hpteg )
- __field( unsigned long, va )
- __field( unsigned long long, vpage )
- __field( unsigned long, hpaddr )
- ),
-
- TP_fast_assign(
- __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
- __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
- __entry->eaddr = orig_pte->eaddr;
- __entry->hpteg = hpteg;
- __entry->va = va;
- __entry->vpage = orig_pte->vpage;
- __entry->hpaddr = hpaddr;
- ),
-
- TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
- __entry->flag_w, __entry->flag_x, __entry->eaddr,
- __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
-);
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-TRACE_EVENT(kvm_book3s_mmu_map,
- TP_PROTO(struct hpte_cache *pte),
- TP_ARGS(pte),
-
- TP_STRUCT__entry(
- __field( u64, host_vpn )
- __field( u64, pfn )
- __field( ulong, eaddr )
- __field( u64, vpage )
- __field( ulong, raddr )
- __field( int, flags )
- ),
-
- TP_fast_assign(
- __entry->host_vpn = pte->host_vpn;
- __entry->pfn = pte->pfn;
- __entry->eaddr = pte->pte.eaddr;
- __entry->vpage = pte->pte.vpage;
- __entry->raddr = pte->pte.raddr;
- __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
- (pte->pte.may_write ? 0x2 : 0) |
- (pte->pte.may_execute ? 0x1 : 0);
- ),
-
- TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
- __entry->host_vpn, __entry->pfn, __entry->eaddr,
- __entry->vpage, __entry->raddr, __entry->flags)
-);
-
-TRACE_EVENT(kvm_book3s_mmu_invalidate,
- TP_PROTO(struct hpte_cache *pte),
- TP_ARGS(pte),
-
- TP_STRUCT__entry(
- __field( u64, host_vpn )
- __field( u64, pfn )
- __field( ulong, eaddr )
- __field( u64, vpage )
- __field( ulong, raddr )
- __field( int, flags )
- ),
-
- TP_fast_assign(
- __entry->host_vpn = pte->host_vpn;
- __entry->pfn = pte->pfn;
- __entry->eaddr = pte->pte.eaddr;
- __entry->vpage = pte->pte.vpage;
- __entry->raddr = pte->pte.raddr;
- __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
- (pte->pte.may_write ? 0x2 : 0) |
- (pte->pte.may_execute ? 0x1 : 0);
- ),
-
- TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
- __entry->host_vpn, __entry->pfn, __entry->eaddr,
- __entry->vpage, __entry->raddr, __entry->flags)
-);
-
-TRACE_EVENT(kvm_book3s_mmu_flush,
- TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
- unsigned long long p2),
- TP_ARGS(type, vcpu, p1, p2),
-
- TP_STRUCT__entry(
- __field( int, count )
- __field( unsigned long long, p1 )
- __field( unsigned long long, p2 )
- __field( const char *, type )
- ),
-
- TP_fast_assign(
- __entry->count = to_book3s(vcpu)->hpte_cache_count;
- __entry->p1 = p1;
- __entry->p2 = p2;
- __entry->type = type;
- ),
-
- TP_printk("Flush %d %sPTEs: %llx - %llx",
- __entry->count, __entry->type, __entry->p1, __entry->p2)
-);
-
-TRACE_EVENT(kvm_book3s_slb_found,
- TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
- TP_ARGS(gvsid, hvsid),
-
- TP_STRUCT__entry(
- __field( unsigned long long, gvsid )
- __field( unsigned long long, hvsid )
- ),
-
- TP_fast_assign(
- __entry->gvsid = gvsid;
- __entry->hvsid = hvsid;
- ),
-
- TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
-);
-
-TRACE_EVENT(kvm_book3s_slb_fail,
- TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
- TP_ARGS(sid_map_mask, gvsid),
-
- TP_STRUCT__entry(
- __field( unsigned short, sid_map_mask )
- __field( unsigned long long, gvsid )
- ),
-
- TP_fast_assign(
- __entry->sid_map_mask = sid_map_mask;
- __entry->gvsid = gvsid;
- ),
-
- TP_printk("%x/%x: %llx", __entry->sid_map_mask,
- SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
-);
-
-TRACE_EVENT(kvm_book3s_slb_map,
- TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
- unsigned long long hvsid),
- TP_ARGS(sid_map_mask, gvsid, hvsid),
-
- TP_STRUCT__entry(
- __field( unsigned short, sid_map_mask )
- __field( unsigned long long, guest_vsid )
- __field( unsigned long long, host_vsid )
- ),
-
- TP_fast_assign(
- __entry->sid_map_mask = sid_map_mask;
- __entry->guest_vsid = gvsid;
- __entry->host_vsid = hvsid;
- ),
-
- TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
- __entry->guest_vsid, __entry->host_vsid)
-);
-
-TRACE_EVENT(kvm_book3s_slbmte,
- TP_PROTO(u64 slb_vsid, u64 slb_esid),
- TP_ARGS(slb_vsid, slb_esid),
-
- TP_STRUCT__entry(
- __field( u64, slb_vsid )
- __field( u64, slb_esid )
- ),
-
- TP_fast_assign(
- __entry->slb_vsid = slb_vsid;
- __entry->slb_esid = slb_esid;
- ),
-
- TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
-);
-
-#endif /* CONFIG_PPC_BOOK3S */
-
-
-/*************************************************************************
- * Book3E trace points *
- *************************************************************************/
-
-#ifdef CONFIG_BOOKE
-
-TRACE_EVENT(kvm_booke206_stlb_write,
- TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
- TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
-
- TP_STRUCT__entry(
- __field( __u32, mas0 )
- __field( __u32, mas8 )
- __field( __u32, mas1 )
- __field( __u64, mas2 )
- __field( __u64, mas7_3 )
- ),
-
- TP_fast_assign(
- __entry->mas0 = mas0;
- __entry->mas8 = mas8;
- __entry->mas1 = mas1;
- __entry->mas2 = mas2;
- __entry->mas7_3 = mas7_3;
- ),
-
- TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
- __entry->mas0, __entry->mas8, __entry->mas1,
- __entry->mas2, __entry->mas7_3)
-);
-
-TRACE_EVENT(kvm_booke206_gtlb_write,
- TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
- TP_ARGS(mas0, mas1, mas2, mas7_3),
-
- TP_STRUCT__entry(
- __field( __u32, mas0 )
- __field( __u32, mas1 )
- __field( __u64, mas2 )
- __field( __u64, mas7_3 )
- ),
-
- TP_fast_assign(
- __entry->mas0 = mas0;
- __entry->mas1 = mas1;
- __entry->mas2 = mas2;
- __entry->mas7_3 = mas7_3;
- ),
-
- TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
- __entry->mas0, __entry->mas1,
- __entry->mas2, __entry->mas7_3)
-);
-
-TRACE_EVENT(kvm_booke206_ref_release,
- TP_PROTO(__u64 pfn, __u32 flags),
- TP_ARGS(pfn, flags),
-
- TP_STRUCT__entry(
- __field( __u64, pfn )
- __field( __u32, flags )
- ),
-
- TP_fast_assign(
- __entry->pfn = pfn;
- __entry->flags = flags;
- ),
-
- TP_printk("pfn=%llx flags=%x",
- __entry->pfn, __entry->flags)
-);
-
-TRACE_EVENT(kvm_booke_queue_irqprio,
- TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
- TP_ARGS(vcpu, priority),
-
- TP_STRUCT__entry(
- __field( __u32, cpu_nr )
- __field( __u32, priority )
- __field( unsigned long, pending )
- ),
-
- TP_fast_assign(
- __entry->cpu_nr = vcpu->vcpu_id;
- __entry->priority = priority;
- __entry->pending = vcpu->arch.pending_exceptions;
- ),
-
- TP_printk("vcpu=%x prio=%x pending=%lx",
- __entry->cpu_nr, __entry->priority, __entry->pending)
-);
-
-#endif
-
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
new file mode 100644
index 00000000000..f7537cf26ce
--- /dev/null
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -0,0 +1,177 @@
+#if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_BOOKE_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_booke
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_booke
+
+#define kvm_trace_symbol_exit \
+ {0, "CRITICAL"}, \
+ {1, "MACHINE_CHECK"}, \
+ {2, "DATA_STORAGE"}, \
+ {3, "INST_STORAGE"}, \
+ {4, "EXTERNAL"}, \
+ {5, "ALIGNMENT"}, \
+ {6, "PROGRAM"}, \
+ {7, "FP_UNAVAIL"}, \
+ {8, "SYSCALL"}, \
+ {9, "AP_UNAVAIL"}, \
+ {10, "DECREMENTER"}, \
+ {11, "FIT"}, \
+ {12, "WATCHDOG"}, \
+ {13, "DTLB_MISS"}, \
+ {14, "ITLB_MISS"}, \
+ {15, "DEBUG"}, \
+ {32, "SPE_UNAVAIL"}, \
+ {33, "SPE_FP_DATA"}, \
+ {34, "SPE_FP_ROUND"}, \
+ {35, "PERFORMANCE_MONITOR"}, \
+ {36, "DOORBELL"}, \
+ {37, "DOORBELL_CRITICAL"}, \
+ {38, "GUEST_DBELL"}, \
+ {39, "GUEST_DBELL_CRIT"}, \
+ {40, "HV_SYSCALL"}, \
+ {41, "HV_PRIV"}
+
+TRACE_EVENT(kvm_exit,
+ TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+ TP_ARGS(exit_nr, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, exit_nr )
+ __field( unsigned long, pc )
+ __field( unsigned long, msr )
+ __field( unsigned long, dar )
+ __field( unsigned long, last_inst )
+ ),
+
+ TP_fast_assign(
+ __entry->exit_nr = exit_nr;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->dar = kvmppc_get_fault_dar(vcpu);
+ __entry->msr = vcpu->arch.shared->msr;
+ __entry->last_inst = vcpu->arch.last_inst;
+ ),
+
+ TP_printk("exit=%s"
+ " | pc=0x%lx"
+ " | msr=0x%lx"
+ " | dar=0x%lx"
+ " | last_inst=0x%lx"
+ ,
+ __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+ __entry->pc,
+ __entry->msr,
+ __entry->dar,
+ __entry->last_inst
+ )
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+ TP_PROTO(unsigned long hva),
+ TP_ARGS(hva),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, hva )
+ ),
+
+ TP_fast_assign(
+ __entry->hva = hva;
+ ),
+
+ TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
+TRACE_EVENT(kvm_booke206_stlb_write,
+ TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
+ TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
+
+ TP_STRUCT__entry(
+ __field( __u32, mas0 )
+ __field( __u32, mas8 )
+ __field( __u32, mas1 )
+ __field( __u64, mas2 )
+ __field( __u64, mas7_3 )
+ ),
+
+ TP_fast_assign(
+ __entry->mas0 = mas0;
+ __entry->mas8 = mas8;
+ __entry->mas1 = mas1;
+ __entry->mas2 = mas2;
+ __entry->mas7_3 = mas7_3;
+ ),
+
+ TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
+ __entry->mas0, __entry->mas8, __entry->mas1,
+ __entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_gtlb_write,
+ TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
+ TP_ARGS(mas0, mas1, mas2, mas7_3),
+
+ TP_STRUCT__entry(
+ __field( __u32, mas0 )
+ __field( __u32, mas1 )
+ __field( __u64, mas2 )
+ __field( __u64, mas7_3 )
+ ),
+
+ TP_fast_assign(
+ __entry->mas0 = mas0;
+ __entry->mas1 = mas1;
+ __entry->mas2 = mas2;
+ __entry->mas7_3 = mas7_3;
+ ),
+
+ TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
+ __entry->mas0, __entry->mas1,
+ __entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_ref_release,
+ TP_PROTO(__u64 pfn, __u32 flags),
+ TP_ARGS(pfn, flags),
+
+ TP_STRUCT__entry(
+ __field( __u64, pfn )
+ __field( __u32, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = pfn;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("pfn=%llx flags=%x",
+ __entry->pfn, __entry->flags)
+);
+
+TRACE_EVENT(kvm_booke_queue_irqprio,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
+ TP_ARGS(vcpu, priority),
+
+ TP_STRUCT__entry(
+ __field( __u32, cpu_nr )
+ __field( __u32, priority )
+ __field( unsigned long, pending )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu_nr = vcpu->vcpu_id;
+ __entry->priority = priority;
+ __entry->pending = vcpu->arch.pending_exceptions;
+ ),
+
+ TP_printk("vcpu=%x prio=%x pending=%lx",
+ __entry->cpu_nr, __entry->priority, __entry->pending)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
new file mode 100644
index 00000000000..e1357cd8dc1
--- /dev/null
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -0,0 +1,297 @@
+
+#if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_PR_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_pr
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_pr
+
+#define kvm_trace_symbol_exit \
+ {0x100, "SYSTEM_RESET"}, \
+ {0x200, "MACHINE_CHECK"}, \
+ {0x300, "DATA_STORAGE"}, \
+ {0x380, "DATA_SEGMENT"}, \
+ {0x400, "INST_STORAGE"}, \
+ {0x480, "INST_SEGMENT"}, \
+ {0x500, "EXTERNAL"}, \
+ {0x501, "EXTERNAL_LEVEL"}, \
+ {0x502, "EXTERNAL_HV"}, \
+ {0x600, "ALIGNMENT"}, \
+ {0x700, "PROGRAM"}, \
+ {0x800, "FP_UNAVAIL"}, \
+ {0x900, "DECREMENTER"}, \
+ {0x980, "HV_DECREMENTER"}, \
+ {0xc00, "SYSCALL"}, \
+ {0xd00, "TRACE"}, \
+ {0xe00, "H_DATA_STORAGE"}, \
+ {0xe20, "H_INST_STORAGE"}, \
+ {0xe40, "H_EMUL_ASSIST"}, \
+ {0xf00, "PERFMON"}, \
+ {0xf20, "ALTIVEC"}, \
+ {0xf40, "VSX"}
+
+TRACE_EVENT(kvm_book3s_reenter,
+ TP_PROTO(int r, struct kvm_vcpu *vcpu),
+ TP_ARGS(r, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, r )
+ __field( unsigned long, pc )
+ ),
+
+ TP_fast_assign(
+ __entry->r = r;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ ),
+
+ TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
+);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+TRACE_EVENT(kvm_book3s_64_mmu_map,
+ TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
+ struct kvmppc_pte *orig_pte),
+ TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
+
+ TP_STRUCT__entry(
+ __field( unsigned char, flag_w )
+ __field( unsigned char, flag_x )
+ __field( unsigned long, eaddr )
+ __field( unsigned long, hpteg )
+ __field( unsigned long, va )
+ __field( unsigned long long, vpage )
+ __field( unsigned long, hpaddr )
+ ),
+
+ TP_fast_assign(
+ __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
+ __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
+ __entry->eaddr = orig_pte->eaddr;
+ __entry->hpteg = hpteg;
+ __entry->va = va;
+ __entry->vpage = orig_pte->vpage;
+ __entry->hpaddr = hpaddr;
+ ),
+
+ TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
+ __entry->flag_w, __entry->flag_x, __entry->eaddr,
+ __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
+);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+TRACE_EVENT(kvm_book3s_mmu_map,
+ TP_PROTO(struct hpte_cache *pte),
+ TP_ARGS(pte),
+
+ TP_STRUCT__entry(
+ __field( u64, host_vpn )
+ __field( u64, pfn )
+ __field( ulong, eaddr )
+ __field( u64, vpage )
+ __field( ulong, raddr )
+ __field( int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->host_vpn = pte->host_vpn;
+ __entry->pfn = pte->pfn;
+ __entry->eaddr = pte->pte.eaddr;
+ __entry->vpage = pte->pte.vpage;
+ __entry->raddr = pte->pte.raddr;
+ __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
+ (pte->pte.may_write ? 0x2 : 0) |
+ (pte->pte.may_execute ? 0x1 : 0);
+ ),
+
+ TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+ __entry->host_vpn, __entry->pfn, __entry->eaddr,
+ __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_invalidate,
+ TP_PROTO(struct hpte_cache *pte),
+ TP_ARGS(pte),
+
+ TP_STRUCT__entry(
+ __field( u64, host_vpn )
+ __field( u64, pfn )
+ __field( ulong, eaddr )
+ __field( u64, vpage )
+ __field( ulong, raddr )
+ __field( int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->host_vpn = pte->host_vpn;
+ __entry->pfn = pte->pfn;
+ __entry->eaddr = pte->pte.eaddr;
+ __entry->vpage = pte->pte.vpage;
+ __entry->raddr = pte->pte.raddr;
+ __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
+ (pte->pte.may_write ? 0x2 : 0) |
+ (pte->pte.may_execute ? 0x1 : 0);
+ ),
+
+ TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+ __entry->host_vpn, __entry->pfn, __entry->eaddr,
+ __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_flush,
+ TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
+ unsigned long long p2),
+ TP_ARGS(type, vcpu, p1, p2),
+
+ TP_STRUCT__entry(
+ __field( int, count )
+ __field( unsigned long long, p1 )
+ __field( unsigned long long, p2 )
+ __field( const char *, type )
+ ),
+
+ TP_fast_assign(
+ __entry->count = to_book3s(vcpu)->hpte_cache_count;
+ __entry->p1 = p1;
+ __entry->p2 = p2;
+ __entry->type = type;
+ ),
+
+ TP_printk("Flush %d %sPTEs: %llx - %llx",
+ __entry->count, __entry->type, __entry->p1, __entry->p2)
+);
+
+TRACE_EVENT(kvm_book3s_slb_found,
+ TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
+ TP_ARGS(gvsid, hvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned long long, gvsid )
+ __field( unsigned long long, hvsid )
+ ),
+
+ TP_fast_assign(
+ __entry->gvsid = gvsid;
+ __entry->hvsid = hvsid;
+ ),
+
+ TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_fail,
+ TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
+ TP_ARGS(sid_map_mask, gvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, sid_map_mask )
+ __field( unsigned long long, gvsid )
+ ),
+
+ TP_fast_assign(
+ __entry->sid_map_mask = sid_map_mask;
+ __entry->gvsid = gvsid;
+ ),
+
+ TP_printk("%x/%x: %llx", __entry->sid_map_mask,
+ SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_map,
+ TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
+ unsigned long long hvsid),
+ TP_ARGS(sid_map_mask, gvsid, hvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, sid_map_mask )
+ __field( unsigned long long, guest_vsid )
+ __field( unsigned long long, host_vsid )
+ ),
+
+ TP_fast_assign(
+ __entry->sid_map_mask = sid_map_mask;
+ __entry->guest_vsid = gvsid;
+ __entry->host_vsid = hvsid;
+ ),
+
+ TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
+ __entry->guest_vsid, __entry->host_vsid)
+);
+
+TRACE_EVENT(kvm_book3s_slbmte,
+ TP_PROTO(u64 slb_vsid, u64 slb_esid),
+ TP_ARGS(slb_vsid, slb_esid),
+
+ TP_STRUCT__entry(
+ __field( u64, slb_vsid )
+ __field( u64, slb_esid )
+ ),
+
+ TP_fast_assign(
+ __entry->slb_vsid = slb_vsid;
+ __entry->slb_esid = slb_esid;
+ ),
+
+ TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
+);
+
+TRACE_EVENT(kvm_exit,
+ TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+ TP_ARGS(exit_nr, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, exit_nr )
+ __field( unsigned long, pc )
+ __field( unsigned long, msr )
+ __field( unsigned long, dar )
+ __field( unsigned long, srr1 )
+ __field( unsigned long, last_inst )
+ ),
+
+ TP_fast_assign(
+ __entry->exit_nr = exit_nr;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->dar = kvmppc_get_fault_dar(vcpu);
+ __entry->msr = kvmppc_get_msr(vcpu);
+ __entry->srr1 = vcpu->arch.shadow_srr1;
+ __entry->last_inst = vcpu->arch.last_inst;
+ ),
+
+ TP_printk("exit=%s"
+ " | pc=0x%lx"
+ " | msr=0x%lx"
+ " | dar=0x%lx"
+ " | srr1=0x%lx"
+ " | last_inst=0x%lx"
+ ,
+ __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+ __entry->pc,
+ __entry->msr,
+ __entry->dar,
+ __entry->srr1,
+ __entry->last_inst
+ )
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+ TP_PROTO(unsigned long hva),
+ TP_ARGS(hva),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, hva )
+ ),
+
+ TP_fast_assign(
+ __entry->hva = hva;
+ ),
+
+ TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>