aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/44x.c82
-rw-r--r--arch/powerpc/kvm/44x_emulate.c169
-rw-r--r--arch/powerpc/kvm/44x_tlb.c12
-rw-r--r--arch/powerpc/kvm/Kconfig112
-rw-r--r--arch/powerpc/kvm/Makefile83
-rw-r--r--arch/powerpc/kvm/book3s.c1404
-rw-r--r--arch/powerpc/kvm/book3s.h34
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c110
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c62
-rw-r--r--arch/powerpc/kvm/book3s_32_sr.S2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c449
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c211
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c1667
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S100
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c150
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c105
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c332
-rw-r--r--arch/powerpc/kvm/book3s_exports.c16
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2482
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c214
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.c240
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.h27
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S195
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c145
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c922
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c406
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2501
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S277
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c154
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c195
-rw-r--r--arch/powerpc/kvm/book3s_pr.c1674
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c305
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S180
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c278
-rw-r--r--arch/powerpc/kvm/book3s_segment.S278
-rw-r--r--arch/powerpc/kvm/book3s_xics.c1303
-rw-r--r--arch/powerpc/kvm/book3s_xics.h130
-rw-r--r--arch/powerpc/kvm/booke.c1498
-rw-r--r--arch/powerpc/kvm/booke.h121
-rw-r--r--arch/powerpc/kvm/booke_emulate.c184
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S507
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S734
-rw-r--r--arch/powerpc/kvm/e500.c475
-rw-r--r--arch/powerpc/kvm/e500.h322
-rw-r--r--arch/powerpc/kvm/e500_emulate.c304
-rw-r--r--arch/powerpc/kvm/e500_mmu.c962
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c699
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.h18
-rw-r--r--arch/powerpc/kvm/e500_tlb.c762
-rw-r--r--arch/powerpc/kvm/e500_tlb.h185
-rw-r--r--arch/powerpc/kvm/e500mc.c397
-rw-r--r--arch/powerpc/kvm/emulate.c464
-rw-r--r--arch/powerpc/kvm/irq.h20
-rw-r--r--arch/powerpc/kvm/mpic.c1857
-rw-r--r--arch/powerpc/kvm/powerpc.c756
-rw-r--r--arch/powerpc/kvm/timing.c38
-rw-r--r--arch/powerpc/kvm/timing.h6
-rw-r--r--arch/powerpc/kvm/trace.h239
-rw-r--r--arch/powerpc/kvm/trace_booke.h177
-rw-r--r--arch/powerpc/kvm/trace_pr.h297
60 files changed, 23979 insertions, 4049 deletions
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 74d0e742114..9cb4b0a3603 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -20,6 +20,9 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -28,15 +31,18 @@
#include <asm/kvm_ppc.h>
#include "44x_tlb.h"
+#include "booke.h"
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu)
{
+ kvmppc_booke_vcpu_load(vcpu, cpu);
kvmppc_44x_tlb_load(vcpu);
}
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu)
{
kvmppc_44x_tlb_put(vcpu);
+ kvmppc_booke_vcpu_put(vcpu);
}
int kvmppc_core_check_processor_compat(void)
@@ -78,6 +84,9 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
vcpu_44x->shadow_refs[i].gtlb_index = -1;
+ vcpu->arch.cpu_type = KVM_CPU_440;
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+
return 0;
}
@@ -107,7 +116,32 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
+static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ return -EINVAL;
+}
+
+static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ return -EINVAL;
+}
+
+static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm,
+ unsigned int id)
{
struct kvmppc_vcpu_44x *vcpu_44x;
struct kvm_vcpu *vcpu;
@@ -138,7 +172,7 @@ out:
return ERR_PTR(err);
}
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
@@ -147,21 +181,57 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
}
+static int kvmppc_core_init_vm_44x(struct kvm *kvm)
+{
+ return 0;
+}
+
+static void kvmppc_core_destroy_vm_44x(struct kvm *kvm)
+{
+}
+
+static struct kvmppc_ops kvm_ops_44x = {
+ .get_sregs = kvmppc_core_get_sregs_44x,
+ .set_sregs = kvmppc_core_set_sregs_44x,
+ .get_one_reg = kvmppc_get_one_reg_44x,
+ .set_one_reg = kvmppc_set_one_reg_44x,
+ .vcpu_load = kvmppc_core_vcpu_load_44x,
+ .vcpu_put = kvmppc_core_vcpu_put_44x,
+ .vcpu_create = kvmppc_core_vcpu_create_44x,
+ .vcpu_free = kvmppc_core_vcpu_free_44x,
+ .mmu_destroy = kvmppc_mmu_destroy_44x,
+ .init_vm = kvmppc_core_init_vm_44x,
+ .destroy_vm = kvmppc_core_destroy_vm_44x,
+ .emulate_op = kvmppc_core_emulate_op_44x,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_44x,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_44x,
+};
+
static int __init kvmppc_44x_init(void)
{
int r;
r = kvmppc_booke_init();
if (r)
- return r;
+ goto err_out;
- return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
+ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
+ if (r)
+ goto err_out;
+ kvm_ops_44x.owner = THIS_MODULE;
+ kvmppc_pr_ops = &kvm_ops_44x;
+
+err_out:
+ return r;
}
static void __exit kvmppc_44x_exit(void)
{
+ kvmppc_pr_ops = NULL;
kvmppc_booke_exit();
}
module_init(kvmppc_44x_init);
module_exit(kvmppc_44x_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 65ea083a5b2..92c9ab4bcfe 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,98 +27,109 @@
#include "booke.h"
#include "44x_tlb.h"
+#define XOP_MFDCRX 259
#define XOP_MFDCR 323
+#define XOP_MTDCRX 387
#define XOP_MTDCR 451
#define XOP_TLBSX 914
#define XOP_ICCCI 966
#define XOP_TLBWE 978
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance)
+static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
+{
+ /* emulate some access in kernel */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
+ return EMULATE_DONE;
+ default:
+ vcpu->run->dcr.dcrn = dcrn;
+ vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
+ vcpu->run->dcr.is_write = 1;
+ vcpu->arch.dcr_is_write = 1;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ return EMULATE_DO_DCR;
+ }
+}
+
+static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
+{
+ /* The guest may access CPR0 registers to determine the timebase
+ * frequency, and it must know the real host frequency because it
+ * can directly access the timebase registers.
+ *
+ * It would be possible to emulate those accesses in userspace,
+ * but userspace can really only figure out the end frequency.
+ * We could decompose that into the factors that compute it, but
+ * that's tricky math, and it's easier to just report the real
+ * CPR0 values.
+ */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
+ break;
+ case DCRN_CPR0_CONFIG_DATA:
+ local_irq_disable();
+ mtdcr(DCRN_CPR0_CONFIG_ADDR,
+ vcpu->arch.cpr0_cfgaddr);
+ kvmppc_set_gpr(vcpu, rt,
+ mfdcr(DCRN_CPR0_CONFIG_DATA));
+ local_irq_enable();
+ break;
+ default:
+ vcpu->run->dcr.dcrn = dcrn;
+ vcpu->run->dcr.data = 0;
+ vcpu->run->dcr.is_write = 0;
+ vcpu->arch.dcr_is_write = 0;
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ return EMULATE_DO_DCR;
+ }
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
- int dcrn;
- int ra;
- int rb;
- int rc;
- int rs;
- int rt;
- int ws;
+ int dcrn = get_dcrn(inst);
+ int ra = get_ra(inst);
+ int rb = get_rb(inst);
+ int rc = get_rc(inst);
+ int rs = get_rs(inst);
+ int rt = get_rt(inst);
+ int ws = get_ws(inst);
switch (get_op(inst)) {
case 31:
switch (get_xop(inst)) {
case XOP_MFDCR:
- dcrn = get_dcrn(inst);
- rt = get_rt(inst);
-
- /* The guest may access CPR0 registers to determine the timebase
- * frequency, and it must know the real host frequency because it
- * can directly access the timebase registers.
- *
- * It would be possible to emulate those accesses in userspace,
- * but userspace can really only figure out the end frequency.
- * We could decompose that into the factors that compute it, but
- * that's tricky math, and it's easier to just report the real
- * CPR0 values.
- */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
- break;
- case DCRN_CPR0_CONFIG_DATA:
- local_irq_disable();
- mtdcr(DCRN_CPR0_CONFIG_ADDR,
- vcpu->arch.cpr0_cfgaddr);
- kvmppc_set_gpr(vcpu, rt,
- mfdcr(DCRN_CPR0_CONFIG_DATA));
- local_irq_enable();
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = 0;
- run->dcr.is_write = 0;
- vcpu->arch.io_gpr = rt;
- vcpu->arch.dcr_needed = 1;
- kvmppc_account_exit(vcpu, DCR_EXITS);
- emulated = EMULATE_DO_DCR;
- }
+ emulated = emulate_mfdcr(vcpu, rt, dcrn);
+ break;
+ case XOP_MFDCRX:
+ emulated = emulate_mfdcr(vcpu, rt,
+ kvmppc_get_gpr(vcpu, ra));
break;
case XOP_MTDCR:
- dcrn = get_dcrn(inst);
- rs = get_rs(inst);
-
- /* emulate some access in kernel */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = kvmppc_get_gpr(vcpu, rs);
- run->dcr.is_write = 1;
- vcpu->arch.dcr_needed = 1;
- kvmppc_account_exit(vcpu, DCR_EXITS);
- emulated = EMULATE_DO_DCR;
- }
+ emulated = emulate_mtdcr(vcpu, rs, dcrn);
+ break;
+ case XOP_MTDCRX:
+ emulated = emulate_mtdcr(vcpu, rs,
+ kvmppc_get_gpr(vcpu, ra));
break;
case XOP_TLBWE:
- ra = get_ra(inst);
- rs = get_rs(inst);
- ws = get_ws(inst);
emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
break;
case XOP_TLBSX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
- rc = get_rc(inst);
emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
break;
@@ -141,45 +152,43 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
switch (sprn) {
case SPRN_PID:
- kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
+ kvmppc_set_pid(vcpu, spr_val); break;
case SPRN_MMUCR:
- vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.mmucr = spr_val; break;
case SPRN_CCR0:
- vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.ccr0 = spr_val; break;
case SPRN_CCR1:
- vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.ccr1 = spr_val; break;
default:
- emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
}
- kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
switch (sprn) {
case SPRN_PID:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
+ *spr_val = vcpu->arch.pid; break;
case SPRN_MMUCR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
+ *spr_val = vcpu->arch.mmucr; break;
case SPRN_CCR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
+ *spr_val = vcpu->arch.ccr0; break;
case SPRN_CCR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
+ *spr_val = vcpu->arch.ccr1; break;
default:
- emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
}
- kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
return emulated;
}
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 5f3cff83e08..0deef1082e0 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -268,7 +268,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
trace_kvm_stlb_inval(stlb_index);
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
@@ -319,7 +319,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
if (is_error_page(new_page)) {
printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
(unsigned long long)gfn);
- kvm_release_page_clean(new_page);
return;
}
hpaddr = page_to_phys(new_page);
@@ -387,8 +386,10 @@ static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
}
}
-void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
{
+ int usermode = vcpu->arch.shared->msr & MSR_PR;
+
vcpu->arch.shadow_pid = !usermode;
}
@@ -440,6 +441,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *tlbe;
unsigned int gtlb_index;
+ int idx;
gtlb_index = kvmppc_get_gpr(vcpu, ra);
if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) {
@@ -472,6 +474,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
return EMULATE_FAIL;
}
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
if (tlbe_is_host_safe(vcpu, tlbe)) {
gva_t eaddr;
gpa_t gpaddr;
@@ -488,6 +492,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
tlbe->word2);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index b7baff78f90..d6a53b95de9 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,7 +20,7 @@ config KVM
bool
select PREEMPT_NOTIFIERS
select ANON_INODES
- select KVM_MMIO
+ select HAVE_KVM_EVENTFD
config KVM_BOOK3S_HANDLER
bool
@@ -28,16 +28,26 @@ config KVM_BOOK3S_HANDLER
config KVM_BOOK3S_32_HANDLER
bool
select KVM_BOOK3S_HANDLER
+ select KVM_MMIO
config KVM_BOOK3S_64_HANDLER
bool
select KVM_BOOK3S_HANDLER
+config KVM_BOOK3S_PR_POSSIBLE
+ bool
+ select KVM_MMIO
+ select MMU_NOTIFIER
+
+config KVM_BOOK3S_HV_POSSIBLE
+ bool
+
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
- depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT
+ depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
select KVM
select KVM_BOOK3S_32_HANDLER
+ select KVM_BOOK3S_PR_POSSIBLE
---help---
Support running unmodified book3s_32 guest kernels
in virtual machines on book3s_32 host processors.
@@ -49,9 +59,10 @@ config KVM_BOOK3S_32
config KVM_BOOK3S_64
tristate "KVM support for PowerPC book3s_64 processors"
- depends on EXPERIMENTAL && PPC_BOOK3S_64
- select KVM
+ depends on PPC_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
+ select KVM
+ select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
---help---
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
@@ -61,10 +72,52 @@ config KVM_BOOK3S_64
If unsure, say N.
+config KVM_BOOK3S_64_HV
+ tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+ depends on KVM_BOOK3S_64
+ depends on !CPU_LITTLE_ENDIAN
+ select KVM_BOOK3S_HV_POSSIBLE
+ select MMU_NOTIFIER
+ select CMA
+ ---help---
+ Support running unmodified book3s_64 guest kernels in
+ virtual machines on POWER7 and PPC970 processors that have
+ hypervisor mode available to the host.
+
+ If you say Y here, KVM will use the hardware virtualization
+ facilities of POWER7 (and later) processors, meaning that
+ guest operating systems will run at full hardware speed
+ using supervisor and user modes. However, this also means
+ that KVM is not usable under PowerVM (pHyp), is only usable
+ on POWER7 (or later) processors and PPC970-family processors,
+ and cannot emulate a different processor from the host processor.
+
+ If unsure, say N.
+
+config KVM_BOOK3S_64_PR
+ tristate "KVM support without using hypervisor mode in host"
+ depends on KVM_BOOK3S_64
+ select KVM_BOOK3S_PR_POSSIBLE
+ ---help---
+ Support running guest kernels in virtual machines on processors
+ without using hypervisor mode in the host, by running the
+ guest in user mode (problem state) and emulating all
+ privileged instructions and registers.
+
+ This is not as fast as using hypervisor mode, but works on
+ machines where hypervisor mode is not available or not usable,
+ and can emulate processors that are different from the host
+ processor, including emulating 32-bit processors on a 64-bit
+ host.
+
+config KVM_BOOKE_HV
+ bool
+
config KVM_440
bool "KVM support for PowerPC 440 processors"
- depends on EXPERIMENTAL && 44x
+ depends on 44x
select KVM
+ select KVM_MMIO
---help---
Support running unmodified 440 guest kernels in virtual machines on
440 host processors.
@@ -76,7 +129,7 @@ config KVM_440
config KVM_EXIT_TIMING
bool "Detailed exit timing"
- depends on KVM_440 || KVM_E500
+ depends on KVM_440 || KVM_E500V2 || KVM_E500MC
---help---
Calculate elapsed time for every exit/enter cycle. A per-vcpu
report is available in debugfs kvm/vm#_vcpu#_timing.
@@ -85,20 +138,57 @@ config KVM_EXIT_TIMING
If unsure, say N.
-config KVM_E500
- bool "KVM support for PowerPC E500 processors"
- depends on EXPERIMENTAL && E500
+config KVM_E500V2
+ bool "KVM support for PowerPC E500v2 processors"
+ depends on E500 && !PPC_E500MC
select KVM
+ select KVM_MMIO
+ select MMU_NOTIFIER
---help---
Support running unmodified E500 guest kernels in virtual machines on
- E500 host processors.
+ E500v2 host processors.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_E500MC
+ bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
+ depends on PPC_E500MC
+ select KVM
+ select KVM_MMIO
+ select KVM_BOOKE_HV
+ select MMU_NOTIFIER
+ ---help---
+ Support running unmodified E500MC/E5500/E6500 guest kernels in
+ virtual machines on E500MC/E5500/E6500 host processors.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
If unsure, say N.
+config KVM_MPIC
+ bool "KVM in-kernel MPIC emulation"
+ depends on KVM && E500
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQ_ROUTING
+ select HAVE_KVM_MSI
+ help
+ Enable support for emulating MPIC devices inside the
+ host kernel, rather than relying on userspace to emulate.
+ Currently, support is limited to certain versions of
+ Freescale's MPIC implementation.
+
+config KVM_XICS
+ bool "KVM in-kernel XICS emulation"
+ depends on KVM_BOOK3S_64 && !KVM_MPIC
+ ---help---
+ Include support for the XICS (eXternal Interrupt Controller
+ Specification) interrupt controller architecture used on
+ IBM POWER (pSeries) servers.
+
source drivers/vhost/Kconfig
-source drivers/virtio/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4d6863823f6..ce569b6bf4d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -5,11 +5,14 @@
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
+KVM := ../../../virt/kvm
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
+ $(KVM)/eventfd.o
CFLAGS_44x_tlb.o := -I.
-CFLAGS_e500_tlb.o := -I.
+CFLAGS_e500_mmu.o := -I.
+CFLAGS_e500_mmu_host.o := -I.
CFLAGS_emulate.o := -I.
common-objs-y += powerpc.o emulate.o
@@ -34,28 +37,84 @@ kvm-e500-objs := \
booke_emulate.o \
booke_interrupts.o \
e500.o \
- e500_tlb.o \
+ e500_mmu.o \
+ e500_mmu_host.o \
e500_emulate.o
-kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs)
+kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
-kvm-book3s_64-objs := \
+kvm-e500mc-objs := \
$(common-objs-y) \
+ booke.o \
+ booke_emulate.o \
+ bookehv_interrupts.o \
+ e500mc.o \
+ e500_mmu.o \
+ e500_mmu_host.o \
+ e500_emulate.o
+kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
+
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
+ book3s_64_vio_hv.o
+
+kvm-pr-y := \
fpu.o \
book3s_paired_singles.o \
- book3s.o \
+ book3s_pr.o \
+ book3s_pr_papr.o \
book3s_emulate.o \
book3s_interrupts.o \
book3s_mmu_hpte.o \
book3s_64_mmu_host.o \
book3s_64_mmu.o \
book3s_32_mmu.o
-kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs)
+
+ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+kvm-book3s_64-module-objs := \
+ $(KVM)/coalesced_mmio.o
+
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+ book3s_rmhandlers.o
+endif
+
+kvm-hv-y += \
+ book3s_hv.o \
+ book3s_hv_interrupts.o \
+ book3s_64_mmu_hv.o
+
+kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
+ book3s_hv_rm_xics.o
+
+ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+ book3s_hv_rmhandlers.o \
+ book3s_hv_rm_mmu.o \
+ book3s_hv_ras.o \
+ book3s_hv_builtin.o \
+ book3s_hv_cma.o \
+ $(kvm-book3s_64-builtin-xics-objs-y)
+endif
+
+kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
+ book3s_xics.o
+
+kvm-book3s_64-module-objs += \
+ $(KVM)/kvm_main.o \
+ $(KVM)/eventfd.o \
+ powerpc.o \
+ emulate.o \
+ book3s.o \
+ book3s_64_vio.o \
+ book3s_rtas.o \
+ $(kvm-book3s_64-objs-y)
+
+kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
kvm-book3s_32-objs := \
$(common-objs-y) \
fpu.o \
book3s_paired_singles.o \
book3s.o \
+ book3s_pr.o \
book3s_emulate.o \
book3s_interrupts.o \
book3s_mmu_hpte.o \
@@ -63,10 +122,18 @@ kvm-book3s_32-objs := \
book3s_32_mmu.o
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
+kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
+
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
obj-$(CONFIG_KVM_440) += kvm.o
-obj-$(CONFIG_KVM_E500) += kvm.o
+obj-$(CONFIG_KVM_E500V2) += kvm.o
+obj-$(CONFIG_KVM_E500MC) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
+obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o
+obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o
+
+obj-y += $(kvm-book3s_64-builtin-objs-y)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index e316847c08c..c254c27f240 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -16,8 +16,10 @@
#include <linux/kvm_host.h>
#include <linux/err.h>
+#include <linux/export.h>
#include <linux/slab.h>
-#include "trace.h"
+#include <linux/module.h>
+#include <linux/miscdevice.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -28,25 +30,18 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
+#include <asm/page.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
+#include "book3s.h"
+#include "trace.h"
+
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
/* #define EXIT_DEBUG */
-/* #define DEBUG_EXT */
-
-static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
- ulong msr);
-
-/* Some compatibility defines */
-#ifdef CONFIG_PPC_BOOK3S_32
-#define MSR_USER32 MSR_USER
-#define MSR_USER64 MSR_USER
-#define HW_PAGE_SIZE PAGE_SIZE
-#endif
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exits", VCPU_STAT(sum_exits) },
@@ -77,100 +72,55 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
{
}
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
- memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
- memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
- sizeof(get_paca()->shadow_vcpu));
- to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
-#endif
-
-#ifdef CONFIG_PPC_BOOK3S_32
- current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
-#endif
-}
-
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
{
-#ifdef CONFIG_PPC_BOOK3S_64
- memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
- memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
- sizeof(get_paca()->shadow_vcpu));
- to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
-#endif
-
- kvmppc_giveup_ext(vcpu, MSR_FP);
- kvmppc_giveup_ext(vcpu, MSR_VEC);
- kvmppc_giveup_ext(vcpu, MSR_VSX);
+ if (!is_kvmppc_hv_enabled(vcpu->kvm))
+ return to_book3s(vcpu)->hior;
+ return 0;
}
-static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
+static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
+ unsigned long pending_now, unsigned long old_pending)
{
- ulong smsr = vcpu->arch.shared->msr;
-
- /* Guest MSR values */
- smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
- /* Process MSR values */
- smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
- /* External providers the guest reserved */
- smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
- /* 64-bit Process MSR values */
-#ifdef CONFIG_PPC_BOOK3S_64
- smsr |= MSR_ISF | MSR_HV;
-#endif
- vcpu->arch.shadow_msr = smsr;
+ if (is_kvmppc_hv_enabled(vcpu->kvm))
+ return;
+ if (pending_now)
+ kvmppc_set_int_pending(vcpu, 1);
+ else if (old_pending)
+ kvmppc_set_int_pending(vcpu, 0);
}
-void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
{
- ulong old_msr = vcpu->arch.shared->msr;
-
-#ifdef EXIT_DEBUG
- printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
-#endif
+ ulong crit_raw;
+ ulong crit_r1;
+ bool crit;
- msr &= to_book3s(vcpu)->msr_mask;
- vcpu->arch.shared->msr = msr;
- kvmppc_recalc_shadow_msr(vcpu);
+ if (is_kvmppc_hv_enabled(vcpu->kvm))
+ return false;
- if (msr & MSR_POW) {
- if (!vcpu->arch.pending_exceptions) {
- kvm_vcpu_block(vcpu);
- vcpu->stat.halt_wakeup++;
+ crit_raw = kvmppc_get_critical(vcpu);
+ crit_r1 = kvmppc_get_gpr(vcpu, 1);
- /* Unset POW bit after we woke up */
- msr &= ~MSR_POW;
- vcpu->arch.shared->msr = msr;
- }
+ /* Truncate crit indicators in 32 bit mode */
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
+ crit_raw &= 0xffffffff;
+ crit_r1 &= 0xffffffff;
}
- if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
- (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
- kvmppc_mmu_flush_segments(vcpu);
- kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
-
- /* Preload magic page segment when in kernel mode */
- if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
- struct kvm_vcpu_arch *a = &vcpu->arch;
-
- if (msr & MSR_DR)
- kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
- else
- kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
- }
- }
+ /* Critical section when crit == r1 */
+ crit = (crit_raw == crit_r1);
+ /* ... and we're in supervisor mode */
+ crit = crit && !(kvmppc_get_msr(vcpu) & MSR_PR);
- /* Preload FPU if it's enabled */
- if (vcpu->arch.shared->msr & MSR_FP)
- kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+ return crit;
}
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
{
- vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
- vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
- kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
+ kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
+ kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);
+ kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
vcpu->arch.mmu.reset_msr(vcpu);
}
@@ -195,20 +145,23 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break;
case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break;
case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break;
+ case 0xf60: prio = BOOK3S_IRQPRIO_FAC_UNAVAIL; break;
default: prio = BOOK3S_IRQPRIO_MAX; break;
}
return prio;
}
-static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
unsigned int vec)
{
+ unsigned long old_pending = vcpu->arch.pending_exceptions;
+
clear_bit(kvmppc_book3s_vec2irqprio(vec),
&vcpu->arch.pending_exceptions);
- if (!vcpu->arch.pending_exceptions)
- vcpu->arch.shared->int_pending = 0;
+ kvmppc_update_int_pending(vcpu, vcpu->arch.pending_exceptions,
+ old_pending);
}
void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
@@ -221,28 +174,32 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
printk(KERN_INFO "Queueing interrupt %x\n", vec);
#endif
}
-
+EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
- to_book3s(vcpu)->prog_flags = flags;
- kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec);
int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
{
- return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
+ return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec);
void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
@@ -255,8 +212,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_book3s_queue_irqprio(vcpu, vec);
}
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
@@ -266,30 +222,16 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
{
int deliver = 1;
int vec = 0;
- ulong flags = 0ULL;
- ulong crit_raw = vcpu->arch.shared->critical;
- ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
- bool crit;
-
- /* Truncate crit indicators in 32 bit mode */
- if (!(vcpu->arch.shared->msr & MSR_SF)) {
- crit_raw &= 0xffffffff;
- crit_r1 &= 0xffffffff;
- }
-
- /* Critical section when crit == r1 */
- crit = (crit_raw == crit_r1);
- /* ... and we're in supervisor mode */
- crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
+ bool crit = kvmppc_critical_section(vcpu);
switch (priority) {
case BOOK3S_IRQPRIO_DECREMENTER:
- deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
+ deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_DECREMENTER;
break;
case BOOK3S_IRQPRIO_EXTERNAL:
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
- deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
+ deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_EXTERNAL;
break;
case BOOK3S_IRQPRIO_SYSTEM_RESET:
@@ -315,7 +257,6 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
break;
case BOOK3S_IRQPRIO_PROGRAM:
vec = BOOK3S_INTERRUPT_PROGRAM;
- flags = to_book3s(vcpu)->prog_flags;
break;
case BOOK3S_IRQPRIO_VSX:
vec = BOOK3S_INTERRUPT_VSX;
@@ -335,6 +276,9 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
vec = BOOK3S_INTERRUPT_PERFMON;
break;
+ case BOOK3S_IRQPRIO_FAC_UNAVAIL:
+ vec = BOOK3S_INTERRUPT_FAC_UNAVAIL;
+ break;
default:
deliver = 0;
printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
@@ -346,7 +290,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
#endif
if (deliver)
- kvmppc_inject_interrupt(vcpu, vec, flags);
+ kvmppc_inject_interrupt(vcpu, vec, 0);
return deliver;
}
@@ -368,7 +312,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
return true;
}
-void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
+int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -392,70 +336,20 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
}
/* Tell the guest about our interrupt status */
- if (*pending)
- vcpu->arch.shared->int_pending = 1;
- else if (old_pending)
- vcpu->arch.shared->int_pending = 0;
-}
-
-void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
-{
- u32 host_pvr;
-
- vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
- vcpu->arch.pvr = pvr;
-#ifdef CONFIG_PPC_BOOK3S_64
- if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
- kvmppc_mmu_book3s_64_init(vcpu);
- to_book3s(vcpu)->hior = 0xfff00000;
- to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
- } else
-#endif
- {
- kvmppc_mmu_book3s_32_init(vcpu);
- to_book3s(vcpu)->hior = 0;
- to_book3s(vcpu)->msr_mask = 0xffffffffULL;
- }
-
- /* If we are in hypervisor level on 970, we can tell the CPU to
- * treat DCBZ as 32 bytes store */
- vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
- if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
- !strcmp(cur_cpu_spec->platform, "ppc970"))
- vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
-
- /* Cell performs badly if MSR_FEx are set. So let's hope nobody
- really needs them in a VM on Cell and force disable them. */
- if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
- to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
-
-#ifdef CONFIG_PPC_BOOK3S_32
- /* 32 bit Book3S always has 32 byte dcbz */
- vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
-#endif
+ kvmppc_update_int_pending(vcpu, *pending, old_pending);
- /* On some CPUs we can execute paired single operations natively */
- asm ( "mfpvr %0" : "=r"(host_pvr));
- switch (host_pvr) {
- case 0x00080200: /* lonestar 2.0 */
- case 0x00088202: /* lonestar 2.2 */
- case 0x70000100: /* gekko 1.0 */
- case 0x00080100: /* gekko 2.0 */
- case 0x00083203: /* gekko 2.3a */
- case 0x00083213: /* gekko 2.3b */
- case 0x00083204: /* gekko 2.4 */
- case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
- case 0x00087200: /* broadway */
- vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
- /* Enable HID2.PSE - in case we need it later */
- mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
- }
+ return 0;
}
+EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
-pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+ bool *writable)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF))
+ mp_pa = (uint32_t)mp_pa;
+
/* Magic page override */
if (unlikely(mp_pa) &&
unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
@@ -465,58 +359,23 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
get_page(pfn_to_page(pfn));
+ if (writable)
+ *writable = true;
return pfn;
}
- return gfn_to_pfn(vcpu->kvm, gfn);
-}
-
-/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
- * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
- * emulate 32 bytes dcbz length.
- *
- * The Book3s_64 inventors also realized this case and implemented a special bit
- * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
- *
- * My approach here is to patch the dcbz instruction on executing pages.
- */
-static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
-{
- struct page *hpage;
- u64 hpage_offset;
- u32 *page;
- int i;
-
- hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
- if (is_error_page(hpage)) {
- kvm_release_page_clean(hpage);
- return;
- }
-
- hpage_offset = pte->raddr & ~PAGE_MASK;
- hpage_offset &= ~0xFFFULL;
- hpage_offset /= 4;
-
- get_page(hpage);
- page = kmap_atomic(hpage, KM_USER0);
-
- /* patch dcbz into reserved instruction, so we trap */
- for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
- if ((page[i] & 0xff0007ff) == INS_DCBZ)
- page[i] &= 0xfffffff7;
-
- kunmap_atomic(page, KM_USER0);
- put_page(hpage);
+ return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
}
+EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);
static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
- struct kvmppc_pte *pte)
+ bool iswrite, struct kvmppc_pte *pte)
{
- int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
+ int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR));
int r;
if (relocated) {
- r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
+ r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite);
} else {
pte->eaddr = eaddr;
pte->raddr = eaddr & KVM_PAM;
@@ -562,7 +421,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
vcpu->stat.st++;
- if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+ if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte))
return -ENOENT;
*eaddr = pte.raddr;
@@ -575,6 +434,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
return EMULATE_DONE;
}
+EXPORT_SYMBOL_GPL(kvmppc_st);
int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
bool data)
@@ -584,7 +444,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
vcpu->stat.ld++;
- if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+ if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte))
goto nopte;
*eaddr = pte.raddr;
@@ -605,523 +465,32 @@ nopte:
mmio:
return EMULATE_DO_MMIO;
}
+EXPORT_SYMBOL_GPL(kvmppc_ld);
-static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
- ulong mp_pa = vcpu->arch.magic_page_pa;
-
- if (unlikely(mp_pa) &&
- unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
- return 1;
- }
-
- return kvm_is_visible_gfn(vcpu->kvm, gfn);
-}
-
-int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
- ulong eaddr, int vec)
-{
- bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
- int r = RESUME_GUEST;
- int relocated;
- int page_found = 0;
- struct kvmppc_pte pte;
- bool is_mmio = false;
- bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
- bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
- u64 vsid;
-
- relocated = data ? dr : ir;
-
- /* Resolve real address if translation turned on */
- if (relocated) {
- page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
- } else {
- pte.may_execute = true;
- pte.may_read = true;
- pte.may_write = true;
- pte.raddr = eaddr & KVM_PAM;
- pte.eaddr = eaddr;
- pte.vpage = eaddr >> 12;
- }
-
- switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
- case 0:
- pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
- break;
- case MSR_DR:
- case MSR_IR:
- vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
-
- if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
- pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
- else
- pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
- pte.vpage |= vsid;
-
- if (vsid == -1)
- page_found = -EINVAL;
- break;
- }
-
- if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
- (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
- /*
- * If we do the dcbz hack, we have to NX on every execution,
- * so we can patch the executing code. This renders our guest
- * NX-less.
- */
- pte.may_execute = !data;
- }
-
- if (page_found == -ENOENT) {
- /* Page not found in guest PTE entries */
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
- vcpu->arch.shared->msr |=
- (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
- kvmppc_book3s_queue_irqprio(vcpu, vec);
- } else if (page_found == -EPERM) {
- /* Storage protection */
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr =
- to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
- vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
- vcpu->arch.shared->msr |=
- (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
- kvmppc_book3s_queue_irqprio(vcpu, vec);
- } else if (page_found == -EINVAL) {
- /* Page not found in guest SLB */
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
- } else if (!is_mmio &&
- kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
- /* The guest's PTE is not mapped yet. Map on the host */
- kvmppc_mmu_map_page(vcpu, &pte);
- if (data)
- vcpu->stat.sp_storage++;
- else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
- (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
- kvmppc_patch_dcbz(vcpu, &pte);
- } else {
- /* MMIO */
- vcpu->stat.mmio_exits++;
- vcpu->arch.paddr_accessed = pte.raddr;
- r = kvmppc_emulate_mmio(run, vcpu);
- if ( r == RESUME_HOST_NV )
- r = RESUME_HOST;
- }
-
- return r;
-}
-
-static inline int get_fpr_index(int i)
-{
-#ifdef CONFIG_VSX
- i *= 2;
-#endif
- return i;
-}
-
-/* Give up external provider (FPU, Altivec, VSX) */
-void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- struct thread_struct *t = &current->thread;
- u64 *vcpu_fpr = vcpu->arch.fpr;
-#ifdef CONFIG_VSX
- u64 *vcpu_vsx = vcpu->arch.vsr;
-#endif
- u64 *thread_fpr = (u64*)t->fpr;
- int i;
-
- if (!(vcpu->arch.guest_owned_ext & msr))
- return;
-
-#ifdef DEBUG_EXT
- printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
-#endif
-
- switch (msr) {
- case MSR_FP:
- giveup_fpu(current);
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
- vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
-
- vcpu->arch.fpscr = t->fpscr.val;
- break;
- case MSR_VEC:
-#ifdef CONFIG_ALTIVEC
- giveup_altivec(current);
- memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
- vcpu->arch.vscr = t->vscr;
-#endif
- break;
- case MSR_VSX:
-#ifdef CONFIG_VSX
- __giveup_vsx(current);
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
- vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
-#endif
- break;
- default:
- BUG();
- }
-
- vcpu->arch.guest_owned_ext &= ~msr;
- current->thread.regs->msr &= ~msr;
- kvmppc_recalc_shadow_msr(vcpu);
+ return 0;
}
-static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
{
- ulong srr0 = kvmppc_get_pc(vcpu);
- u32 last_inst = kvmppc_get_last_inst(vcpu);
- int ret;
-
- ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
- if (ret == -ENOENT) {
- ulong msr = vcpu->arch.shared->msr;
-
- msr = kvmppc_set_field(msr, 33, 33, 1);
- msr = kvmppc_set_field(msr, 34, 36, 0);
- vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
- kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
- return EMULATE_AGAIN;
- }
-
- return EMULATE_DONE;
+ return 0;
}
-static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
-
- /* Need to do paired single emulation? */
- if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
- return EMULATE_DONE;
-
- /* Read out the instruction */
- if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
- /* Need to emulate */
- return EMULATE_FAIL;
-
- return EMULATE_AGAIN;
}
-/* Handle external providers (FPU, Altivec, VSX) */
-static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
- ulong msr)
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
- struct thread_struct *t = &current->thread;
- u64 *vcpu_fpr = vcpu->arch.fpr;
-#ifdef CONFIG_VSX
- u64 *vcpu_vsx = vcpu->arch.vsr;
-#endif
- u64 *thread_fpr = (u64*)t->fpr;
- int i;
-
- /* When we have paired singles, we emulate in software */
- if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
- return RESUME_GUEST;
-
- if (!(vcpu->arch.shared->msr & msr)) {
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- return RESUME_GUEST;
- }
-
- /* We already own the ext */
- if (vcpu->arch.guest_owned_ext & msr) {
- return RESUME_GUEST;
- }
-
-#ifdef DEBUG_EXT
- printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
-#endif
-
- current->thread.regs->msr |= msr;
-
- switch (msr) {
- case MSR_FP:
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
- thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
-
- t->fpscr.val = vcpu->arch.fpscr;
- t->fpexc_mode = 0;
- kvmppc_load_up_fpu();
- break;
- case MSR_VEC:
-#ifdef CONFIG_ALTIVEC
- memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
- t->vscr = vcpu->arch.vscr;
- t->vrsave = -1;
- kvmppc_load_up_altivec();
-#endif
- break;
- case MSR_VSX:
-#ifdef CONFIG_VSX
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
- thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
- kvmppc_load_up_vsx();
-#endif
- break;
- default:
- BUG();
- }
-
- vcpu->arch.guest_owned_ext |= msr;
-
- kvmppc_recalc_shadow_msr(vcpu);
-
- return RESUME_GUEST;
+ return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
}
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
-{
- int r = RESUME_HOST;
-
- vcpu->stat.sum_exits++;
-
- run->exit_reason = KVM_EXIT_UNKNOWN;
- run->ready_for_interrupt_injection = 1;
-
- trace_kvm_book3s_exit(exit_nr, vcpu);
- kvm_resched(vcpu);
- switch (exit_nr) {
- case BOOK3S_INTERRUPT_INST_STORAGE:
- vcpu->stat.pf_instruc++;
-
-#ifdef CONFIG_PPC_BOOK3S_32
- /* We set segments as unused segments when invalidating them. So
- * treat the respective fault as segment fault. */
- if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
- == SR_INVALID) {
- kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
- r = RESUME_GUEST;
- break;
- }
-#endif
-
- /* only care about PTEG not found errors, but leave NX alone */
- if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
- r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
- vcpu->stat.sp_instruc++;
- } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
- (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
- /*
- * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
- * so we can't use the NX bit inside the guest. Let's cross our fingers,
- * that no guest that needs the dcbz hack does NX.
- */
- kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
- r = RESUME_GUEST;
- } else {
- vcpu->arch.shared->msr |=
- to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- r = RESUME_GUEST;
- }
- break;
- case BOOK3S_INTERRUPT_DATA_STORAGE:
- {
- ulong dar = kvmppc_get_fault_dar(vcpu);
- vcpu->stat.pf_storage++;
-
-#ifdef CONFIG_PPC_BOOK3S_32
- /* We set segments as unused segments when invalidating them. So
- * treat the respective fault as segment fault. */
- if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
- kvmppc_mmu_map_segment(vcpu, dar);
- r = RESUME_GUEST;
- break;
- }
-#endif
-
- /* The only case we need to handle is missing shadow PTEs */
- if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
- r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
- } else {
- vcpu->arch.shared->dar = dar;
- vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- r = RESUME_GUEST;
- }
- break;
- }
- case BOOK3S_INTERRUPT_DATA_SEGMENT:
- if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
- vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- kvmppc_book3s_queue_irqprio(vcpu,
- BOOK3S_INTERRUPT_DATA_SEGMENT);
- }
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_INST_SEGMENT:
- if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
- kvmppc_book3s_queue_irqprio(vcpu,
- BOOK3S_INTERRUPT_INST_SEGMENT);
- }
- r = RESUME_GUEST;
- break;
- /* We're good on these - the host merely wanted to get our attention */
- case BOOK3S_INTERRUPT_DECREMENTER:
- vcpu->stat.dec_exits++;
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_EXTERNAL:
- vcpu->stat.ext_intr_exits++;
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_PERFMON:
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_PROGRAM:
- {
- enum emulation_result er;
- ulong flags;
-
-program_interrupt:
- flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
-
- if (vcpu->arch.shared->msr & MSR_PR) {
-#ifdef EXIT_DEBUG
- printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
-#endif
- if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
- (INS_DCBZ & 0xfffffff7)) {
- kvmppc_core_queue_program(vcpu, flags);
- r = RESUME_GUEST;
- break;
- }
- }
-
- vcpu->stat.emulated_inst_exits++;
- er = kvmppc_emulate_instruction(run, vcpu);
- switch (er) {
- case EMULATE_DONE:
- r = RESUME_GUEST_NV;
- break;
- case EMULATE_AGAIN:
- r = RESUME_GUEST;
- break;
- case EMULATE_FAIL:
- printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
- __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
- kvmppc_core_queue_program(vcpu, flags);
- r = RESUME_GUEST;
- break;
- case EMULATE_DO_MMIO:
- run->exit_reason = KVM_EXIT_MMIO;
- r = RESUME_HOST_NV;
- break;
- default:
- BUG();
- }
- break;
- }
- case BOOK3S_INTERRUPT_SYSCALL:
- if (vcpu->arch.osi_enabled &&
- (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
- (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
- /* MOL hypercalls */
- u64 *gprs = run->osi.gprs;
- int i;
-
- run->exit_reason = KVM_EXIT_OSI;
- for (i = 0; i < 32; i++)
- gprs[i] = kvmppc_get_gpr(vcpu, i);
- vcpu->arch.osi_needed = 1;
- r = RESUME_HOST_NV;
- } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
- (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
- /* KVM PV hypercalls */
- kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
- r = RESUME_GUEST;
- } else {
- /* Guest syscalls */
- vcpu->stat.syscall_exits++;
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- r = RESUME_GUEST;
- }
- break;
- case BOOK3S_INTERRUPT_FP_UNAVAIL:
- case BOOK3S_INTERRUPT_ALTIVEC:
- case BOOK3S_INTERRUPT_VSX:
- {
- int ext_msr = 0;
-
- switch (exit_nr) {
- case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break;
- case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break;
- case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break;
- }
-
- switch (kvmppc_check_ext(vcpu, exit_nr)) {
- case EMULATE_DONE:
- /* everything ok - let's enable the ext */
- r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
- break;
- case EMULATE_FAIL:
- /* we need to emulate this instruction */
- goto program_interrupt;
- break;
- default:
- /* nothing to worry about - go again */
- break;
- }
- break;
- }
- case BOOK3S_INTERRUPT_ALIGNMENT:
- if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
- vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
- kvmppc_get_last_inst(vcpu));
- vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
- kvmppc_get_last_inst(vcpu));
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- }
- r = RESUME_GUEST;
- break;
- case BOOK3S_INTERRUPT_MACHINE_CHECK:
- case BOOK3S_INTERRUPT_TRACE:
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
- r = RESUME_GUEST;
- break;
- default:
- /* Ugh - bork here! What did we get? */
- printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
- exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
- r = RESUME_HOST;
- BUG();
- break;
- }
-
-
- if (!(r & RESUME_HOST)) {
- /* To avoid clobbering exit_reason, only check for signals if
- * we aren't already exiting to userspace for some other
- * reason. */
- if (signal_pending(current)) {
-#ifdef EXIT_DEBUG
- printk(KERN_EMERG "KVM: Going back to host\n");
-#endif
- vcpu->stat.signal_exits++;
- run->exit_reason = KVM_EXIT_INTR;
- r = -EINTR;
- } else {
- /* In case an interrupt came in that was triggered
- * from userspace (like DEC), we need to check what
- * to inject now! */
- kvmppc_core_deliver_interrupts(vcpu);
- }
- }
-
- trace_kvm_book3s_reenter(r, vcpu);
-
- return r;
-}
-
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
- return 0;
+ return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -1133,17 +502,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->ctr = kvmppc_get_ctr(vcpu);
regs->lr = kvmppc_get_lr(vcpu);
regs->xer = kvmppc_get_xer(vcpu);
- regs->msr = vcpu->arch.shared->msr;
- regs->srr0 = vcpu->arch.shared->srr0;
- regs->srr1 = vcpu->arch.shared->srr1;
+ regs->msr = kvmppc_get_msr(vcpu);
+ regs->srr0 = kvmppc_get_srr0(vcpu);
+ regs->srr1 = kvmppc_get_srr1(vcpu);
regs->pid = vcpu->arch.pid;
- regs->sprg0 = vcpu->arch.shared->sprg0;
- regs->sprg1 = vcpu->arch.shared->sprg1;
- regs->sprg2 = vcpu->arch.shared->sprg2;
- regs->sprg3 = vcpu->arch.shared->sprg3;
- regs->sprg5 = vcpu->arch.sprg4;
- regs->sprg6 = vcpu->arch.sprg5;
- regs->sprg7 = vcpu->arch.sprg6;
+ regs->sprg0 = kvmppc_get_sprg0(vcpu);
+ regs->sprg1 = kvmppc_get_sprg1(vcpu);
+ regs->sprg2 = kvmppc_get_sprg2(vcpu);
+ regs->sprg3 = kvmppc_get_sprg3(vcpu);
+ regs->sprg4 = kvmppc_get_sprg4(vcpu);
+ regs->sprg5 = kvmppc_get_sprg5(vcpu);
+ regs->sprg6 = kvmppc_get_sprg6(vcpu);
+ regs->sprg7 = kvmppc_get_sprg7(vcpu);
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -1161,15 +531,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvmppc_set_lr(vcpu, regs->lr);
kvmppc_set_xer(vcpu, regs->xer);
kvmppc_set_msr(vcpu, regs->msr);
- vcpu->arch.shared->srr0 = regs->srr0;
- vcpu->arch.shared->srr1 = regs->srr1;
- vcpu->arch.shared->sprg0 = regs->sprg0;
- vcpu->arch.shared->sprg1 = regs->sprg1;
- vcpu->arch.shared->sprg2 = regs->sprg2;
- vcpu->arch.shared->sprg3 = regs->sprg3;
- vcpu->arch.sprg5 = regs->sprg4;
- vcpu->arch.sprg6 = regs->sprg5;
- vcpu->arch.sprg7 = regs->sprg6;
+ kvmppc_set_srr0(vcpu, regs->srr0);
+ kvmppc_set_srr1(vcpu, regs->srr1);
+ kvmppc_set_sprg0(vcpu, regs->sprg0);
+ kvmppc_set_sprg1(vcpu, regs->sprg1);
+ kvmppc_set_sprg2(vcpu, regs->sprg2);
+ kvmppc_set_sprg3(vcpu, regs->sprg3);
+ kvmppc_set_sprg4(vcpu, regs->sprg4);
+ kvmppc_set_sprg5(vcpu, regs->sprg5);
+ kvmppc_set_sprg6(vcpu, regs->sprg6);
+ kvmppc_set_sprg7(vcpu, regs->sprg7);
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -1177,77 +548,236 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
- int i;
+ return -ENOTSUPP;
+}
- sregs->pvr = vcpu->arch.pvr;
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -ENOTSUPP;
+}
- sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
- if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
- for (i = 0; i < 64; i++) {
- sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i;
- sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
- }
- } else {
- for (i = 0; i < 16; i++)
- sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r;
+ union kvmppc_one_reg val;
+ int size;
+ long int i;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
- for (i = 0; i < 8; i++) {
- sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
- sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+ case KVM_REG_PPC_DAR:
+ val = get_reg_val(reg->id, kvmppc_get_dar(vcpu));
+ break;
+ case KVM_REG_PPC_DSISR:
+ val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu));
+ break;
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ i = reg->id - KVM_REG_PPC_FPR0;
+ val = get_reg_val(reg->id, VCPU_FPR(vcpu, i));
+ break;
+ case KVM_REG_PPC_FPSCR:
+ val = get_reg_val(reg->id, vcpu->arch.fp.fpscr);
+ break;
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ break;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ long int i = reg->id - KVM_REG_PPC_VSR0;
+ val.vsxval[0] = vcpu->arch.fp.fpr[i][0];
+ val.vsxval[1] = vcpu->arch.fp.fpr[i][1];
+ } else {
+ r = -ENXIO;
+ }
+ break;
+#endif /* CONFIG_VSX */
+ case KVM_REG_PPC_DEBUG_INST: {
+ u32 opcode = INS_TW;
+ r = copy_to_user((u32 __user *)(long)reg->addr,
+ &opcode, sizeof(u32));
+ break;
+ }
+#ifdef CONFIG_KVM_XICS
+ case KVM_REG_PPC_ICP_STATE:
+ if (!vcpu->arch.icp) {
+ r = -ENXIO;
+ break;
+ }
+ val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
+ break;
+#endif /* CONFIG_KVM_XICS */
+ case KVM_REG_PPC_FSCR:
+ val = get_reg_val(reg->id, vcpu->arch.fscr);
+ break;
+ case KVM_REG_PPC_TAR:
+ val = get_reg_val(reg->id, vcpu->arch.tar);
+ break;
+ case KVM_REG_PPC_EBBHR:
+ val = get_reg_val(reg->id, vcpu->arch.ebbhr);
+ break;
+ case KVM_REG_PPC_EBBRR:
+ val = get_reg_val(reg->id, vcpu->arch.ebbrr);
+ break;
+ case KVM_REG_PPC_BESCR:
+ val = get_reg_val(reg->id, vcpu->arch.bescr);
+ break;
+ default:
+ r = -EINVAL;
+ break;
}
}
+ if (r)
+ return r;
- return 0;
+ if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+ r = -EFAULT;
+
+ return r;
}
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
- struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
- int i;
+ int r;
+ union kvmppc_one_reg val;
+ int size;
+ long int i;
- kvmppc_set_pvr(vcpu, sregs->pvr);
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
- vcpu3s->sdr1 = sregs->u.s.sdr1;
- if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
- for (i = 0; i < 64; i++) {
- vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
- sregs->u.s.ppc64.slb[i].slbe);
- }
- } else {
- for (i = 0; i < 16; i++) {
- vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
- }
- for (i = 0; i < 8; i++) {
- kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
- (u32)sregs->u.s.ppc32.ibat[i]);
- kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
- (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
- kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
- (u32)sregs->u.s.ppc32.dbat[i]);
- kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
- (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
+ if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+ return -EFAULT;
+
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+ case KVM_REG_PPC_DAR:
+ kvmppc_set_dar(vcpu, set_reg_val(reg->id, val));
+ break;
+ case KVM_REG_PPC_DSISR:
+ kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val));
+ break;
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ i = reg->id - KVM_REG_PPC_FPR0;
+ VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_FPSCR:
+ vcpu->arch.fp.fpscr = set_reg_val(reg->id, val);
+ break;
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ break;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ long int i = reg->id - KVM_REG_PPC_VSR0;
+ vcpu->arch.fp.fpr[i][0] = val.vsxval[0];
+ vcpu->arch.fp.fpr[i][1] = val.vsxval[1];
+ } else {
+ r = -ENXIO;
+ }
+ break;
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_KVM_XICS
+ case KVM_REG_PPC_ICP_STATE:
+ if (!vcpu->arch.icp) {
+ r = -ENXIO;
+ break;
+ }
+ r = kvmppc_xics_set_icp(vcpu,
+ set_reg_val(reg->id, val));
+ break;
+#endif /* CONFIG_KVM_XICS */
+ case KVM_REG_PPC_FSCR:
+ vcpu->arch.fscr = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_TAR:
+ vcpu->arch.tar = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_EBBHR:
+ vcpu->arch.ebbhr = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_EBBRR:
+ vcpu->arch.ebbrr = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_BESCR:
+ vcpu->arch.bescr = set_reg_val(reg->id, val);
+ break;
+ default:
+ r = -EINVAL;
+ break;
}
}
- /* Flush the MMU after messing with the segments */
- kvmppc_mmu_pte_flush(vcpu, 0, 0);
+ return r;
+}
- return 0;
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
}
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
- return -ENOTSUPP;
+ vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
}
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
{
- return -ENOTSUPP;
+ vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr);
+
+int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
}
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -1256,242 +786,160 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return 0;
}
-/*
- * Get (and clear) the dirty memory log for a memory slot.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
- struct kvm_dirty_log *log)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
- struct kvm_memory_slot *memslot;
- struct kvm_vcpu *vcpu;
- ulong ga, ga_end;
- int is_dirty = 0;
- int r;
- unsigned long n;
-
- mutex_lock(&kvm->slots_lock);
+ return -EINVAL;
+}
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
- if (r)
- goto out;
+void kvmppc_decrementer_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
- /* If nothing is dirty, don't bother messing with page tables. */
- if (is_dirty) {
- memslot = &kvm->memslots->memslots[log->slot];
+ kvmppc_core_queue_dec(vcpu);
+ kvm_vcpu_kick(vcpu);
+}
- ga = memslot->base_gfn << PAGE_SHIFT;
- ga_end = ga + (memslot->npages << PAGE_SHIFT);
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+}
- kvm_for_each_vcpu(n, vcpu, kvm)
- kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+}
- n = kvm_dirty_bitmap_bytes(memslot);
- memset(memslot->dirty_bitmap, 0, n);
- }
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
+}
- r = 0;
-out:
- mutex_unlock(&kvm->slots_lock);
- return r;
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+ return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
}
-int kvmppc_core_check_processor_compat(void)
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
{
- return 0;
+ kvm->arch.kvm_ops->free_memslot(free, dont);
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s;
- struct kvm_vcpu *vcpu;
- int err = -ENOMEM;
- unsigned long p;
-
- vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
- if (!vcpu_book3s)
- goto out;
-
- memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s));
-
- vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
- kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
- if (!vcpu_book3s->shadow_vcpu)
- goto free_vcpu;
-
- vcpu = &vcpu_book3s->vcpu;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_shadow_vcpu;
-
- p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
- /* the real shared page fills the last 4k of our page */
- vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
- if (!p)
- goto uninit_vcpu;
-
- vcpu->arch.host_retip = kvm_return_point;
- vcpu->arch.host_msr = mfmsr();
-#ifdef CONFIG_PPC_BOOK3S_64
- /* default to book3s_64 (970fx) */
- vcpu->arch.pvr = 0x3C0301;
-#else
- /* default to book3s_32 (750) */
- vcpu->arch.pvr = 0x84202;
-#endif
- kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
- vcpu_book3s->slb_nr = 64;
-
- /* remember where some real-mode handlers are */
- vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
- vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
- vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
-#ifdef CONFIG_PPC_BOOK3S_64
- vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
-#else
- vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
-#endif
+ return kvm->arch.kvm_ops->create_memslot(slot, npages);
+}
- vcpu->arch.shadow_msr = MSR_USER64;
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+ kvm->arch.kvm_ops->flush_memslot(kvm, memslot);
+}
- err = kvmppc_mmu_init(vcpu);
- if (err < 0)
- goto uninit_vcpu;
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem)
+{
+ return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
+}
- return vcpu;
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old)
+{
+ kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
+}
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-free_shadow_vcpu:
- kfree(vcpu_book3s->shadow_vcpu);
-free_vcpu:
- vfree(vcpu_book3s);
-out:
- return ERR_PTR(err);
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
}
+EXPORT_SYMBOL_GPL(kvm_unmap_hva);
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
+}
- free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
- kvm_vcpu_uninit(vcpu);
- kfree(vcpu_book3s->shadow_vcpu);
- vfree(vcpu_book3s);
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return kvm->arch.kvm_ops->age_hva(kvm, hva);
}
-extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
-int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
- int ret;
- double fpr[32][TS_FPRWIDTH];
- unsigned int fpscr;
- int fpexc_mode;
-#ifdef CONFIG_ALTIVEC
- vector128 vr[32];
- vector128 vscr;
- unsigned long uninitialized_var(vrsave);
- int used_vr;
-#endif
-#ifdef CONFIG_VSX
- int used_vsr;
-#endif
- ulong ext_msr;
+ return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
+}
- /* No need to go into the guest when all we do is going out */
- if (signal_pending(current)) {
- kvm_run->exit_reason = KVM_EXIT_INTR;
- return -EINTR;
- }
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
+}
- /* Save FPU state in stack */
- if (current->thread.regs->msr & MSR_FP)
- giveup_fpu(current);
- memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
- fpscr = current->thread.fpscr.val;
- fpexc_mode = current->thread.fpexc_mode;
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
+}
-#ifdef CONFIG_ALTIVEC
- /* Save Altivec state in stack */
- used_vr = current->thread.used_vr;
- if (used_vr) {
- if (current->thread.regs->msr & MSR_VEC)
- giveup_altivec(current);
- memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
- vscr = current->thread.vscr;
- vrsave = current->thread.vrsave;
- }
-#endif
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
-#ifdef CONFIG_VSX
- /* Save VSX state in stack */
- used_vsr = current->thread.used_vsr;
- if (used_vsr && (current->thread.regs->msr & MSR_VSX))
- __giveup_vsx(current);
+#ifdef CONFIG_PPC64
+ INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+ INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif
- /* Remember the MSR with disabled extensions */
- ext_msr = current->thread.regs->msr;
-
- /* XXX we get called with irq disabled - change that! */
- local_irq_enable();
-
- /* Preload FPU if it's enabled */
- if (vcpu->arch.shared->msr & MSR_FP)
- kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
-
- ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
-
- local_irq_disable();
-
- current->thread.regs->msr = ext_msr;
-
- /* Make sure we save the guest FPU/Altivec/VSX state */
- kvmppc_giveup_ext(vcpu, MSR_FP);
- kvmppc_giveup_ext(vcpu, MSR_VEC);
- kvmppc_giveup_ext(vcpu, MSR_VSX);
-
- /* Restore FPU state from stack */
- memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
- current->thread.fpscr.val = fpscr;
- current->thread.fpexc_mode = fpexc_mode;
+ return kvm->arch.kvm_ops->init_vm(kvm);
+}
-#ifdef CONFIG_ALTIVEC
- /* Restore Altivec state from stack */
- if (used_vr && current->thread.used_vr) {
- memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
- current->thread.vscr = vscr;
- current->thread.vrsave = vrsave;
- }
- current->thread.used_vr = used_vr;
-#endif
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+ kvm->arch.kvm_ops->destroy_vm(kvm);
-#ifdef CONFIG_VSX
- current->thread.used_vsr = used_vsr;
+#ifdef CONFIG_PPC64
+ kvmppc_rtas_tokens_free(kvm);
+ WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
#endif
+}
- return ret;
+int kvmppc_core_check_processor_compat(void)
+{
+ /*
+ * We always return 0 for book3s. We check
+ * for compatability while loading the HV
+ * or PR module
+ */
+ return 0;
}
static int kvmppc_book3s_init(void)
{
int r;
- r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
- THIS_MODULE);
-
+ r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
if (r)
return r;
-
- r = kvmppc_mmu_hpte_sysinit();
-
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ r = kvmppc_book3s_init_pr();
+#endif
return r;
+
}
static void kvmppc_book3s_exit(void)
{
- kvmppc_mmu_hpte_sysexit();
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ kvmppc_book3s_exit_pr();
+#endif
kvm_exit();
}
module_init(kvmppc_book3s_init);
module_exit(kvmppc_book3s_exit);
+
+/* On 32bit this is our one and only kernel module */
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
+#endif
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
new file mode 100644
index 00000000000..4bf956cf94d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_BOOK3S_H__
+#define __POWERPC_KVM_BOOK3S_H__
+
+extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
+extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
+ unsigned long end);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
+ int sprn, ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
+ int sprn, ulong *spr_val);
+extern int kvmppc_book3s_init_pr(void);
+extern void kvmppc_book3s_exit_pr(void);
+
+#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index c8cefdd15fd..93503bbdae4 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -84,13 +84,14 @@ static inline bool sr_nx(u32 sr_raw)
}
static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *pte, bool data);
+ struct kvmppc_pte *pte, bool data,
+ bool iswrite);
static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
u64 *vsid);
static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
{
- return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf];
+ return kvmppc_get_sr(vcpu, (eaddr >> 28) & 0xf);
}
static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
@@ -99,7 +100,7 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
u64 vsid;
struct kvmppc_pte pte;
- if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data))
+ if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false))
return pte.vpage;
kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
@@ -111,10 +112,11 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, 0);
}
-static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s,
+static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
u32 sre, gva_t eaddr,
bool primary)
{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u32 page, hash, pteg, htabmask;
hva_t r;
@@ -129,10 +131,10 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
- kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
+ kvmppc_get_pc(vcpu), eaddr, vcpu_book3s->sdr1, pteg,
sr_vsid(sre));
- r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+ r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
if (kvm_is_error_hva(r))
return r;
return r | (pteg & ~PAGE_MASK);
@@ -145,7 +147,8 @@ static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
}
static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *pte, bool data)
+ struct kvmppc_pte *pte, bool data,
+ bool iswrite)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_bat *bat;
@@ -157,7 +160,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
else
bat = &vcpu_book3s->ibat[i];
- if (vcpu->arch.shared->msr & MSR_PR) {
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
if (!bat->vp)
continue;
} else {
@@ -186,8 +189,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
printk(KERN_INFO "BAT is not readable!\n");
continue;
}
- if (!pte->may_write) {
- /* let's treat r/o BATs as not-readable for now */
+ if (iswrite && !pte->may_write) {
dprintk_pte("BAT is read-only!\n");
continue;
}
@@ -201,12 +203,12 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data,
- bool primary)
+ bool iswrite, bool primary)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u32 sre;
hva_t ptegp;
u32 pteg[16];
+ u32 pte0, pte1;
u32 ptem = 0;
int i;
int found = 0;
@@ -218,7 +220,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
- ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary);
+ ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary);
if (kvm_is_error_hva(ptegp)) {
printk(KERN_INFO "KVM: Invalid PTEG!\n");
goto no_page_found;
@@ -232,14 +234,16 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
}
for (i=0; i<16; i+=2) {
- if (ptem == pteg[i]) {
+ pte0 = be32_to_cpu(pteg[i]);
+ pte1 = be32_to_cpu(pteg[i + 1]);
+ if (ptem == pte0) {
u8 pp;
- pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF);
- pp = pteg[i+1] & 3;
+ pte->raddr = (pte1 & ~(0xFFFULL)) | (eaddr & 0xFFF);
+ pp = pte1 & 3;
- if ((sr_kp(sre) && (vcpu->arch.shared->msr & MSR_PR)) ||
- (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR)))
+ if ((sr_kp(sre) && (kvmppc_get_msr(vcpu) & MSR_PR)) ||
+ (sr_ks(sre) && !(kvmppc_get_msr(vcpu) & MSR_PR)))
pp |= 4;
pte->may_write = false;
@@ -258,11 +262,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
break;
}
- if ( !pte->may_read )
- continue;
-
dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
- pteg[i], pteg[i+1], pp);
+ pte0, pte1, pp);
found = 1;
break;
}
@@ -271,19 +272,23 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
/* Update PTE C and A bits, so the guest's swapper knows we used the
page */
if (found) {
- u32 oldpte = pteg[i+1];
-
- if (pte->may_read)
- pteg[i+1] |= PTEG_FLAG_ACCESSED;
- if (pte->may_write)
- pteg[i+1] |= PTEG_FLAG_DIRTY;
- else
- dprintk_pte("KVM: Mapping read-only page!\n");
-
- /* Write back into the PTEG */
- if (pteg[i+1] != oldpte)
- copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
-
+ u32 pte_r = pte1;
+ char __user *addr = (char __user *) (ptegp + (i+1) * sizeof(u32));
+
+ /*
+ * Use single-byte writes to update the HPTE, to
+ * conform to what real hardware does.
+ */
+ if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) {
+ pte_r |= PTEG_FLAG_ACCESSED;
+ put_user(pte_r >> 8, addr + 2);
+ }
+ if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) {
+ pte_r |= PTEG_FLAG_DIRTY;
+ put_user(pte_r, addr + 3);
+ }
+ if (!pte->may_read || (iswrite && !pte->may_write))
+ return -EPERM;
return 0;
}
@@ -294,7 +299,8 @@ no_page_found:
to_book3s(vcpu)->sdr1, ptegp);
for (i=0; i<16; i+=2) {
dprintk_pte(" %02d: 0x%x - 0x%x (0x%x)\n",
- i, pteg[i], pteg[i+1], ptem);
+ i, be32_to_cpu(pteg[i]),
+ be32_to_cpu(pteg[i+1]), ptem);
}
}
@@ -302,17 +308,19 @@ no_page_found:
}
static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *pte, bool data)
+ struct kvmppc_pte *pte, bool data,
+ bool iswrite)
{
int r;
ulong mp_ea = vcpu->arch.magic_page_ea;
pte->eaddr = eaddr;
+ pte->page_size = MMU_PAGE_4K;
/* Magic page override */
if (unlikely(mp_ea) &&
unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
- !(vcpu->arch.shared->msr & MSR_PR)) {
+ !(kvmppc_get_msr(vcpu) & MSR_PR)) {
pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
pte->raddr &= KVM_PAM;
@@ -323,11 +331,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
return 0;
}
- r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data);
+ r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite);
if (r < 0)
- r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true);
+ r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+ data, iswrite, true);
if (r < 0)
- r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false);
+ r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+ data, iswrite, false);
return r;
}
@@ -335,19 +345,24 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
{
- return vcpu->arch.shared->sr[srnum];
+ return kvmppc_get_sr(vcpu, srnum);
}
static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
ulong value)
{
- vcpu->arch.shared->sr[srnum] = value;
+ kvmppc_set_sr(vcpu, srnum, value);
kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
}
static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
{
- kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000);
+ int i;
+ struct kvm_vcpu *v;
+
+ /* flush this VA on all cpus */
+ kvm_for_each_vcpu(i, v, vcpu->kvm)
+ kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000);
}
static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
@@ -356,8 +371,9 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
ulong ea = esid << SID_SHIFT;
u32 sr;
u64 gvsid = esid;
+ u64 msr = kvmppc_get_msr(vcpu);
- if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ if (msr & (MSR_DR|MSR_IR)) {
sr = find_sr(vcpu, ea);
if (sr_valid(sr))
gvsid = sr_vsid(sr);
@@ -366,7 +382,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
/* In case we only have one of MSR_IR or MSR_DR set, let's put
that in the real-mode context (and hope RM doesn't access
high memory) */
- switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ switch (msr & (MSR_DR|MSR_IR)) {
case 0:
*vsid = VSID_REAL | esid;
break;
@@ -386,7 +402,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
BUG();
}
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (msr & MSR_PR)
*vsid |= VSID_PR;
return 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 9fecbfbce77..678e7537049 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -92,7 +92,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
struct kvmppc_sid_map *map;
u16 sid_map_mask;
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
gvsid |= VSID_PR;
sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -138,10 +138,11 @@ static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,
extern char etext[];
-int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+ bool iswrite)
{
pfn_t hpaddr;
- u64 va;
+ u64 vpn;
u64 vsid;
struct kvmppc_sid_map *map;
volatile u32 *pteg;
@@ -151,13 +152,17 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
bool primary = false;
bool evict = false;
struct hpte_cache *pte;
+ int r = 0;
+ bool writable;
/* Get host physical address for gpa */
- hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
- if (is_error_pfn(hpaddr)) {
+ hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
+ iswrite, &writable);
+ if (is_error_noslot_pfn(hpaddr)) {
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
orig_pte->eaddr);
- return -EINVAL;
+ r = -EINVAL;
+ goto out;
}
hpaddr <<= PAGE_SHIFT;
@@ -171,8 +176,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
BUG_ON(!map);
vsid = map->host_vsid;
- va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK);
-
+ vpn = (vsid << (SID_SHIFT - VPN_SHIFT)) |
+ ((eaddr & ~ESID_MASK) >> VPN_SHIFT);
next_pteg:
if (rr == 16) {
primary = !primary;
@@ -202,13 +207,16 @@ next_pteg:
(primary ? 0 : PTE_SEC);
pteg1 = hpaddr | PTE_M | PTE_R | PTE_C;
- if (orig_pte->may_write) {
+ if (orig_pte->may_write && writable) {
pteg1 |= PP_RWRW;
mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
} else {
pteg1 |= PP_RWRX;
}
+ if (orig_pte->may_execute)
+ kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
+
local_irq_disable();
if (pteg[rr]) {
@@ -235,21 +243,33 @@ next_pteg:
/* Now tell our Shadow PTE code about the new page */
pte = kvmppc_mmu_hpte_cache_next(vcpu);
+ if (!pte) {
+ kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+ r = -EAGAIN;
+ goto out;
+ }
dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
orig_pte->may_write ? 'w' : '-',
orig_pte->may_execute ? 'x' : '-',
- orig_pte->eaddr, (ulong)pteg, va,
+ orig_pte->eaddr, (ulong)pteg, vpn,
orig_pte->vpage, hpaddr);
pte->slot = (ulong)&pteg[rr];
- pte->host_va = va;
+ pte->host_vpn = vpn;
pte->pte = *orig_pte;
pte->pfn = hpaddr >> PAGE_SHIFT;
kvmppc_mmu_hpte_cache_map(vcpu, pte);
- return 0;
+ kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+out:
+ return r;
+}
+
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+ kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL);
}
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
@@ -259,7 +279,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
u16 sid_map_mask;
static int backwards_map = 0;
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
gvsid |= VSID_PR;
/* We might get collisions that trap in preceding order, so let's
@@ -297,12 +317,14 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
u64 gvsid;
u32 sr;
struct kvmppc_sid_map *map;
- struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ int r = 0;
if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
/* Invalidate an entry */
svcpu->sr[esid] = SR_INVALID;
- return -ENOENT;
+ r = -ENOENT;
+ goto out;
}
map = find_sid_vsid(vcpu, gvsid);
@@ -315,20 +337,24 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr);
- return 0;
+out:
+ svcpu_put(svcpu);
+ return r;
}
void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
{
int i;
- struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr));
for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++)
svcpu->sr[i] = SR_INVALID;
+
+ svcpu_put(svcpu);
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
{
int i;
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
index 3608471ad2d..7e06a6fc8d0 100644
--- a/arch/powerpc/kvm/book3s_32_sr.S
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -31,7 +31,7 @@
* R1 = host R1
* R2 = host R2
* R3 = shadow vcpu
- * all other volatile GPRS = free
+ * all other volatile GPRS = free except R4, R6
* SVCPU[CR] = guest CR
* SVCPU[XER] = guest XER
* SVCPU[CTR] = guest CTR
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index d7889ef3211..774a253ca4e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -26,6 +26,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
/* #define DEBUG_MMU */
@@ -37,85 +38,113 @@
static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
{
- kvmppc_set_msr(vcpu, MSR_SF);
+ kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);
}
static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
- struct kvmppc_vcpu_book3s *vcpu_book3s,
+ struct kvm_vcpu *vcpu,
gva_t eaddr)
{
int i;
u64 esid = GET_ESID(eaddr);
u64 esid_1t = GET_ESID_1T(eaddr);
- for (i = 0; i < vcpu_book3s->slb_nr; i++) {
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
u64 cmp_esid = esid;
- if (!vcpu_book3s->slb[i].valid)
+ if (!vcpu->arch.slb[i].valid)
continue;
- if (vcpu_book3s->slb[i].tb)
+ if (vcpu->arch.slb[i].tb)
cmp_esid = esid_1t;
- if (vcpu_book3s->slb[i].esid == cmp_esid)
- return &vcpu_book3s->slb[i];
+ if (vcpu->arch.slb[i].esid == cmp_esid)
+ return &vcpu->arch.slb[i];
}
dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n",
eaddr, esid, esid_1t);
- for (i = 0; i < vcpu_book3s->slb_nr; i++) {
- if (vcpu_book3s->slb[i].vsid)
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ if (vcpu->arch.slb[i].vsid)
dprintk(" %d: %c%c%c %llx %llx\n", i,
- vcpu_book3s->slb[i].valid ? 'v' : ' ',
- vcpu_book3s->slb[i].large ? 'l' : ' ',
- vcpu_book3s->slb[i].tb ? 't' : ' ',
- vcpu_book3s->slb[i].esid,
- vcpu_book3s->slb[i].vsid);
+ vcpu->arch.slb[i].valid ? 'v' : ' ',
+ vcpu->arch.slb[i].large ? 'l' : ' ',
+ vcpu->arch.slb[i].tb ? 't' : ' ',
+ vcpu->arch.slb[i].esid,
+ vcpu->arch.slb[i].vsid);
}
return NULL;
}
+static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe)
+{
+ return slbe->tb ? SID_SHIFT_1T : SID_SHIFT;
+}
+
+static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe)
+{
+ return (1ul << kvmppc_slb_sid_shift(slbe)) - 1;
+}
+
+static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr)
+{
+ eaddr &= kvmppc_slb_offset_mask(slb);
+
+ return (eaddr >> VPN_SHIFT) |
+ ((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT));
+}
+
static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
bool data)
{
struct kvmppc_slb *slb;
- slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), eaddr);
+ slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
if (!slb)
return 0;
- if (slb->tb)
- return (((u64)eaddr >> 12) & 0xfffffff) |
- (((u64)slb->vsid) << 28);
+ return kvmppc_slb_calc_vpn(slb, eaddr);
+}
- return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16);
+static int mmu_pagesize(int mmu_pg)
+{
+ switch (mmu_pg) {
+ case MMU_PAGE_64K:
+ return 16;
+ case MMU_PAGE_16M:
+ return 24;
+ }
+ return 12;
}
static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
{
- return slbe->large ? 24 : 12;
+ return mmu_pagesize(slbe->base_page_size);
}
static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
{
int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
- return ((eaddr & 0xfffffff) >> p);
+
+ return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
}
-static hva_t kvmppc_mmu_book3s_64_get_pteg(
- struct kvmppc_vcpu_book3s *vcpu_book3s,
+static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu,
struct kvmppc_slb *slbe, gva_t eaddr,
bool second)
{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u64 hash, pteg, htabsize;
- u32 page;
+ u32 ssize;
hva_t r;
+ u64 vpn;
- page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1);
- hash = slbe->vsid ^ page;
+ vpn = kvmppc_slb_calc_vpn(slbe, eaddr);
+ ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M;
+ hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize);
if (second)
hash = ~hash;
hash &= ((1ULL << 39ULL) - 1ULL);
@@ -128,7 +157,13 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
page, vcpu_book3s->sdr1, pteg, slbe->vsid);
- r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+ /* When running a PAPR guest, SDR1 contains a HVA address instead
+ of a GPA */
+ if (vcpu->arch.papr_enabled)
+ r = pteg;
+ else
+ r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
+
if (kvm_is_error_hva(r))
return r;
return r | (pteg & ~PAGE_MASK);
@@ -140,35 +175,58 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
u64 avpn;
avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
- avpn |= slbe->vsid << (28 - p);
+ avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
- if (p < 24)
- avpn >>= ((80 - p) - 56) - 8;
+ if (p < 16)
+ avpn >>= ((80 - p) - 56) - 8; /* 16 - p */
else
- avpn <<= 8;
+ avpn <<= p - 16;
return avpn;
}
+/*
+ * Return page size encoded in the second word of a HPTE, or
+ * -1 for an invalid encoding for the base page size indicated by
+ * the SLB entry. This doesn't handle mixed pagesize segments yet.
+ */
+static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
+{
+ switch (slbe->base_page_size) {
+ case MMU_PAGE_64K:
+ if ((r & 0xf000) == 0x1000)
+ return MMU_PAGE_64K;
+ break;
+ case MMU_PAGE_16M:
+ if ((r & 0xff000) == 0)
+ return MMU_PAGE_16M;
+ break;
+ }
+ return -1;
+}
+
static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *gpte, bool data)
+ struct kvmppc_pte *gpte, bool data,
+ bool iswrite)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_slb *slbe;
hva_t ptegp;
u64 pteg[16];
u64 avpn = 0;
+ u64 v, r;
+ u64 v_val, v_mask;
+ u64 eaddr_mask;
int i;
- u8 key = 0;
+ u8 pp, key = 0;
bool found = false;
- bool perm_err = false;
- int second = 0;
+ bool second = false;
+ int pgsize;
ulong mp_ea = vcpu->arch.magic_page_ea;
/* Magic page override */
if (unlikely(mp_ea) &&
unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
- !(vcpu->arch.shared->msr & MSR_PR)) {
+ !(kvmppc_get_msr(vcpu) & MSR_PR)) {
gpte->eaddr = eaddr;
gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
@@ -176,131 +234,143 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
gpte->may_execute = true;
gpte->may_read = true;
gpte->may_write = true;
+ gpte->page_size = MMU_PAGE_4K;
return 0;
}
- slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr);
+ slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
if (!slbe)
goto no_seg_found;
+ avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
+ v_val = avpn & HPTE_V_AVPN;
+
+ if (slbe->tb)
+ v_val |= SLB_VSID_B_1T;
+ if (slbe->large)
+ v_val |= HPTE_V_LARGE;
+ v_val |= HPTE_V_VALID;
+
+ v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
+ HPTE_V_SECONDARY;
+
+ pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
+
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+
do_second:
- ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
+ ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second);
if (kvm_is_error_hva(ptegp))
goto no_page_found;
- avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
-
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
goto no_page_found;
}
- if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp)
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Kp)
key = 4;
- else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks)
+ else if (!(kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Ks)
key = 4;
for (i=0; i<16; i+=2) {
- u64 v = pteg[i];
- u64 r = pteg[i+1];
-
- /* Valid check */
- if (!(v & HPTE_V_VALID))
- continue;
- /* Hash check */
- if ((v & HPTE_V_SECONDARY) != second)
- continue;
-
- /* AVPN compare */
- if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) {
- u8 pp = (r & HPTE_R_PP) | key;
- int eaddr_mask = 0xFFF;
-
- gpte->eaddr = eaddr;
- gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
- eaddr,
- data);
- if (slbe->large)
- eaddr_mask = 0xFFFFFF;
- gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask);
- gpte->may_execute = ((r & HPTE_R_N) ? false : true);
- gpte->may_read = false;
- gpte->may_write = false;
-
- switch (pp) {
- case 0:
- case 1:
- case 2:
- case 6:
- gpte->may_write = true;
- /* fall through */
- case 3:
- case 5:
- case 7:
- gpte->may_read = true;
- break;
- }
-
- if (!gpte->may_read) {
- perm_err = true;
- continue;
+ u64 pte0 = be64_to_cpu(pteg[i]);
+ u64 pte1 = be64_to_cpu(pteg[i + 1]);
+
+ /* Check all relevant fields of 1st dword */
+ if ((pte0 & v_mask) == v_val) {
+ /* If large page bit is set, check pgsize encoding */
+ if (slbe->large &&
+ (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+ pgsize = decode_pagesize(slbe, pte1);
+ if (pgsize < 0)
+ continue;
}
-
- dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
- "-> 0x%lx\n",
- eaddr, avpn, gpte->vpage, gpte->raddr);
found = true;
break;
}
}
- /* Update PTE R and C bits, so the guest's swapper knows we used the
- * page */
- if (found) {
- u32 oldr = pteg[i+1];
-
- if (gpte->may_read) {
- /* Set the accessed flag */
- pteg[i+1] |= HPTE_R_R;
- }
- if (gpte->may_write) {
- /* Set the dirty flag */
- pteg[i+1] |= HPTE_R_C;
- } else {
- dprintk("KVM: Mapping read-only page!\n");
- }
+ if (!found) {
+ if (second)
+ goto no_page_found;
+ v_val |= HPTE_V_SECONDARY;
+ second = true;
+ goto do_second;
+ }
- /* Write back into the PTEG */
- if (pteg[i+1] != oldr)
- copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
+ v = be64_to_cpu(pteg[i]);
+ r = be64_to_cpu(pteg[i+1]);
+ pp = (r & HPTE_R_PP) | key;
+ if (r & HPTE_R_PP0)
+ pp |= 8;
+
+ gpte->eaddr = eaddr;
+ gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+
+ eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;
+ gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
+ gpte->page_size = pgsize;
+ gpte->may_execute = ((r & HPTE_R_N) ? false : true);
+ if (unlikely(vcpu->arch.disable_kernel_nx) &&
+ !(kvmppc_get_msr(vcpu) & MSR_PR))
+ gpte->may_execute = true;
+ gpte->may_read = false;
+ gpte->may_write = false;
- return 0;
- } else {
- dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
- "ptegp=0x%lx)\n",
- eaddr, to_book3s(vcpu)->sdr1, ptegp);
- for (i = 0; i < 16; i += 2)
- dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n",
- i, pteg[i], pteg[i+1], avpn);
-
- if (!second) {
- second = HPTE_V_SECONDARY;
- goto do_second;
- }
+ switch (pp) {
+ case 0:
+ case 1:
+ case 2:
+ case 6:
+ gpte->may_write = true;
+ /* fall through */
+ case 3:
+ case 5:
+ case 7:
+ case 10:
+ gpte->may_read = true;
+ break;
}
+ dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
+ "-> 0x%lx\n",
+ eaddr, avpn, gpte->vpage, gpte->raddr);
-no_page_found:
+ /* Update PTE R and C bits, so the guest's swapper knows we used the
+ * page */
+ if (gpte->may_read && !(r & HPTE_R_R)) {
+ /*
+ * Set the accessed flag.
+ * We have to write this back with a single byte write
+ * because another vcpu may be accessing this on
+ * non-PAPR platforms such as mac99, and this is
+ * what real hardware does.
+ */
+ char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
+ r |= HPTE_R_R;
+ put_user(r >> 8, addr + 6);
+ }
+ if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
+ /* Set the dirty flag */
+ /* Use a single byte write */
+ char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
+ r |= HPTE_R_C;
+ put_user(r, addr + 7);
+ }
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
- if (perm_err)
+ if (!gpte->may_read || (iswrite && !gpte->may_write))
return -EPERM;
+ return 0;
+no_page_found:
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
return -ENOENT;
no_seg_found:
-
dprintk("KVM MMU: Trigger segment fault\n");
return -EINVAL;
}
@@ -320,21 +390,36 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
esid_1t = GET_ESID_1T(rb);
slb_nr = rb & 0xfff;
- if (slb_nr > vcpu_book3s->slb_nr)
+ if (slb_nr > vcpu->arch.slb_nr)
return;
- slbe = &vcpu_book3s->slb[slb_nr];
+ slbe = &vcpu->arch.slb[slb_nr];
slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
slbe->esid = slbe->tb ? esid_1t : esid;
- slbe->vsid = rs >> 12;
+ slbe->vsid = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16);
slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;
slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0;
slbe->nx = (rs & SLB_VSID_N) ? 1 : 0;
slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
+ slbe->base_page_size = MMU_PAGE_4K;
+ if (slbe->large) {
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) {
+ switch (rs & SLB_VSID_LP) {
+ case SLB_VSID_LP_00:
+ slbe->base_page_size = MMU_PAGE_16M;
+ break;
+ case SLB_VSID_LP_01:
+ slbe->base_page_size = MMU_PAGE_64K;
+ break;
+ }
+ } else
+ slbe->base_page_size = MMU_PAGE_16M;
+ }
+
slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
slbe->origv = rs;
@@ -344,38 +429,36 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_slb *slbe;
- if (slb_nr > vcpu_book3s->slb_nr)
+ if (slb_nr > vcpu->arch.slb_nr)
return 0;
- slbe = &vcpu_book3s->slb[slb_nr];
+ slbe = &vcpu->arch.slb[slb_nr];
return slbe->orige;
}
static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_slb *slbe;
- if (slb_nr > vcpu_book3s->slb_nr)
+ if (slb_nr > vcpu->arch.slb_nr)
return 0;
- slbe = &vcpu_book3s->slb[slb_nr];
+ slbe = &vcpu->arch.slb[slb_nr];
return slbe->origv;
}
static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
struct kvmppc_slb *slbe;
+ u64 seg_size;
dprintk("KVM MMU: slbie(0x%llx)\n", ea);
- slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, ea);
+ slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
if (!slbe)
return;
@@ -383,21 +466,26 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid);
slbe->valid = false;
+ slbe->orige = 0;
+ slbe->origv = 0;
- kvmppc_mmu_map_segment(vcpu, ea);
+ seg_size = 1ull << kvmppc_slb_sid_shift(slbe);
+ kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size);
}
static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
int i;
dprintk("KVM MMU: slbia()\n");
- for (i = 1; i < vcpu_book3s->slb_nr; i++)
- vcpu_book3s->slb[i].valid = false;
+ for (i = 1; i < vcpu->arch.slb_nr; i++) {
+ vcpu->arch.slb[i].valid = false;
+ vcpu->arch.slb[i].orige = 0;
+ vcpu->arch.slb[i].origv = 0;
+ }
- if (vcpu->arch.shared->msr & MSR_IR) {
+ if (kvmppc_get_msr(vcpu) & MSR_IR) {
kvmppc_mmu_flush_segments(vcpu);
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
}
@@ -447,14 +535,45 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
bool large)
{
u64 mask = 0xFFFFFFFFFULL;
+ long i;
+ struct kvm_vcpu *v;
dprintk("KVM MMU: tlbie(0x%lx)\n", va);
- if (large)
- mask = 0xFFFFFF000ULL;
- kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
+ /*
+ * The tlbie instruction changed behaviour starting with
+ * POWER6. POWER6 and later don't have the large page flag
+ * in the instruction but in the RB value, along with bits
+ * indicating page and segment sizes.
+ */
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) {
+ /* POWER6 or later */
+ if (va & 1) { /* L bit */
+ if ((va & 0xf000) == 0x1000)
+ mask = 0xFFFFFFFF0ULL; /* 64k page */
+ else
+ mask = 0xFFFFFF000ULL; /* 16M page */
+ }
+ } else {
+ /* older processors, e.g. PPC970 */
+ if (large)
+ mask = 0xFFFFFF000ULL;
+ }
+ /* flush this VA on all vcpus */
+ kvm_for_each_vcpu(i, v, vcpu->kvm)
+ kvmppc_mmu_pte_vflush(v, va >> 12, mask);
}
+#ifdef CONFIG_PPC_64K_PAGES
+static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
+{
+ ulong mp_ea = vcpu->arch.magic_page_ea;
+
+ return mp_ea && !(kvmppc_get_msr(vcpu) & MSR_PR) &&
+ (mp_ea >> SID_SHIFT) == esid;
+}
+#endif
+
static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
u64 *vsid)
{
@@ -462,44 +581,66 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
struct kvmppc_slb *slb;
u64 gvsid = esid;
ulong mp_ea = vcpu->arch.magic_page_ea;
+ int pagesize = MMU_PAGE_64K;
+ u64 msr = kvmppc_get_msr(vcpu);
- if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
- slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea);
- if (slb)
+ if (msr & (MSR_DR|MSR_IR)) {
+ slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
+ if (slb) {
gvsid = slb->vsid;
+ pagesize = slb->base_page_size;
+ if (slb->tb) {
+ gvsid <<= SID_SHIFT_1T - SID_SHIFT;
+ gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
+ gvsid |= VSID_1T;
+ }
+ }
}
- switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+ switch (msr & (MSR_DR|MSR_IR)) {
case 0:
- *vsid = VSID_REAL | esid;
+ gvsid = VSID_REAL | esid;
break;
case MSR_IR:
- *vsid = VSID_REAL_IR | gvsid;
+ gvsid |= VSID_REAL_IR;
break;
case MSR_DR:
- *vsid = VSID_REAL_DR | gvsid;
+ gvsid |= VSID_REAL_DR;
break;
case MSR_DR|MSR_IR:
if (!slb)
goto no_slb;
- *vsid = gvsid;
break;
default:
BUG();
break;
}
- if (vcpu->arch.shared->msr & MSR_PR)
- *vsid |= VSID_PR;
+#ifdef CONFIG_PPC_64K_PAGES
+ /*
+ * Mark this as a 64k segment if the host is using
+ * 64k pages, the host MMU supports 64k pages and
+ * the guest segment page size is >= 64k,
+ * but not if this segment contains the magic page.
+ */
+ if (pagesize >= MMU_PAGE_64K &&
+ mmu_psize_defs[MMU_PAGE_64K].shift &&
+ !segment_contains_magic_page(vcpu, esid))
+ gvsid |= VSID_64K;
+#endif
+
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
+ gvsid |= VSID_PR;
+ *vsid = gvsid;
return 0;
no_slb:
/* Catch magic page case */
if (unlikely(mp_ea) &&
unlikely(esid == (mp_ea >> SID_SHIFT)) &&
- !(vcpu->arch.shared->msr & MSR_PR)) {
+ !(kvmppc_get_msr(vcpu) & MSR_PR)) {
*vsid = VSID_REAL | esid;
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index fa2f08434ba..0ac98392f36 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -27,14 +27,14 @@
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
-#include "trace.h"
+#include "trace_pr.h"
#define PTE_SIZE 12
void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
- ppc_md.hpte_invalidate(pte->slot, pte->host_va,
- MMU_PAGE_4K, MMU_SEGSIZE_256M,
+ ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
+ pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M,
false);
}
@@ -58,7 +58,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
struct kvmppc_sid_map *map;
u16 sid_map_mask;
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
gvsid |= VSID_PR;
sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -78,25 +78,39 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
return NULL;
}
-int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+ bool iswrite)
{
+ unsigned long vpn;
pfn_t hpaddr;
- ulong hash, hpteg, va;
+ ulong hash, hpteg;
u64 vsid;
int ret;
int rflags = 0x192;
int vflags = 0;
int attempt = 0;
struct kvmppc_sid_map *map;
+ int r = 0;
+ int hpsize = MMU_PAGE_4K;
+ bool writable;
+ unsigned long mmu_seq;
+ struct kvm *kvm = vcpu->kvm;
+ struct hpte_cache *cpte;
+ unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT;
+ unsigned long pfn;
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
/* Get host physical address for gpa */
- hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
- if (is_error_pfn(hpaddr)) {
- printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
- return -EINVAL;
+ pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable);
+ if (is_error_noslot_pfn(pfn)) {
+ printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn);
+ r = -EINVAL;
+ goto out;
}
- hpaddr <<= PAGE_SHIFT;
- hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
+ hpaddr = pfn << PAGE_SHIFT;
/* and write the mapping ea -> hpa into the pt */
vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
@@ -110,31 +124,56 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n",
vsid, orig_pte->eaddr);
WARN_ON(true);
- return -EINVAL;
+ r = -EINVAL;
+ goto out;
}
- vsid = map->host_vsid;
- va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+ vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M);
- if (!orig_pte->may_write)
- rflags |= HPTE_R_PP;
- else
- mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+ kvm_set_pfn_accessed(pfn);
+ if (!orig_pte->may_write || !writable)
+ rflags |= PP_RXRX;
+ else {
+ mark_page_dirty(vcpu->kvm, gfn);
+ kvm_set_pfn_dirty(pfn);
+ }
if (!orig_pte->may_execute)
rflags |= HPTE_R_N;
+ else
+ kvmppc_mmu_flush_icache(pfn);
+
+ /*
+ * Use 64K pages if possible; otherwise, on 64K page kernels,
+ * we need to transfer 4 more bits from guest real to host real addr.
+ */
+ if (vsid & VSID_64K)
+ hpsize = MMU_PAGE_64K;
+ else
+ hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
- hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
+ hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
+
+ cpte = kvmppc_mmu_hpte_cache_next(vcpu);
+
+ spin_lock(&kvm->mmu_lock);
+ if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
+ r = -EAGAIN;
+ goto out_unlock;
+ }
map_again:
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
/* In case we tried normal mapping already, let's nuke old entries */
if (attempt > 1)
- if (ppc_md.hpte_remove(hpteg) < 0)
- return -1;
+ if (ppc_md.hpte_remove(hpteg) < 0) {
+ r = -1;
+ goto out_unlock;
+ }
- ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
+ ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
+ hpsize, hpsize, MMU_SEGSIZE_256M);
if (ret < 0) {
/* If we couldn't map a primary PTE, try a secondary */
@@ -143,9 +182,8 @@ map_again:
attempt++;
goto map_again;
} else {
- struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
-
- trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte);
+ trace_kvm_book3s_64_mmu_map(rflags, hpteg,
+ vpn, hpaddr, orig_pte);
/* The ppc_md code may give us a secondary entry even though we
asked for a primary. Fix up. */
@@ -154,15 +192,35 @@ map_again:
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
}
- pte->slot = hpteg + (ret & 7);
- pte->host_va = va;
- pte->pte = *orig_pte;
- pte->pfn = hpaddr >> PAGE_SHIFT;
+ cpte->slot = hpteg + (ret & 7);
+ cpte->host_vpn = vpn;
+ cpte->pte = *orig_pte;
+ cpte->pfn = pfn;
+ cpte->pagesize = hpsize;
- kvmppc_mmu_hpte_cache_map(vcpu, pte);
+ kvmppc_mmu_hpte_cache_map(vcpu, cpte);
+ cpte = NULL;
}
- return 0;
+out_unlock:
+ spin_unlock(&kvm->mmu_lock);
+ kvm_release_pfn_clean(pfn);
+ if (cpte)
+ kvmppc_mmu_hpte_cache_free(cpte);
+
+out:
+ return r;
+}
+
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+ u64 mask = 0xfffffffffULL;
+ u64 vsid;
+
+ vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid);
+ if (vsid & VSID_64K)
+ mask = 0xffffffff0ULL;
+ kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask);
}
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
@@ -172,7 +230,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
u16 sid_map_mask;
static int backwards_map = 0;
- if (vcpu->arch.shared->msr & MSR_PR)
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
gvsid |= VSID_PR;
/* We might get collisions that trap in preceding order, so let's
@@ -188,14 +246,14 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
backwards_map = !backwards_map;
/* Uh-oh ... out of mappings. Let's flush! */
- if (vcpu_book3s->vsid_next == vcpu_book3s->vsid_max) {
- vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
+ if (vcpu_book3s->proto_vsid_next == vcpu_book3s->proto_vsid_max) {
+ vcpu_book3s->proto_vsid_next = vcpu_book3s->proto_vsid_first;
memset(vcpu_book3s->sid_map, 0,
sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
kvmppc_mmu_pte_flush(vcpu, 0, 0);
kvmppc_mmu_flush_segments(vcpu);
}
- map->host_vsid = vcpu_book3s->vsid_next++;
+ map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M);
map->guest_vsid = gvsid;
map->valid = true;
@@ -207,25 +265,27 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
int i;
int max_slb_size = 64;
int found_inval = -1;
int r;
- if (!to_svcpu(vcpu)->slb_max)
- to_svcpu(vcpu)->slb_max = 1;
-
/* Are we overwriting? */
- for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) {
- if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V))
+ for (i = 0; i < svcpu->slb_max; i++) {
+ if (!(svcpu->slb[i].esid & SLB_ESID_V))
found_inval = i;
- else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid)
- return i;
+ else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
+ r = i;
+ goto out;
+ }
}
/* Found a spare entry that was invalidated before */
- if (found_inval > 0)
- return found_inval;
+ if (found_inval >= 0) {
+ r = found_inval;
+ goto out;
+ }
/* No spare invalid entry, so create one */
@@ -233,30 +293,35 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
max_slb_size = mmu_slb_size;
/* Overflowing -> purge */
- if ((to_svcpu(vcpu)->slb_max) == max_slb_size)
+ if ((svcpu->slb_max) == max_slb_size)
kvmppc_mmu_flush_segments(vcpu);
- r = to_svcpu(vcpu)->slb_max;
- to_svcpu(vcpu)->slb_max++;
+ r = svcpu->slb_max;
+ svcpu->slb_max++;
+out:
+ svcpu_put(svcpu);
return r;
}
int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
u64 esid = eaddr >> SID_SHIFT;
u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V;
u64 slb_vsid = SLB_VSID_USER;
u64 gvsid;
int slb_index;
struct kvmppc_sid_map *map;
+ int r = 0;
slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK);
if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
/* Invalidate an entry */
- to_svcpu(vcpu)->slb[slb_index].esid = 0;
- return -ENOENT;
+ svcpu->slb[slb_index].esid = 0;
+ r = -ENOENT;
+ goto out;
}
map = find_sid_vsid(vcpu, gvsid);
@@ -269,21 +334,48 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
slb_vsid &= ~SLB_VSID_KP;
slb_esid |= slb_index;
- to_svcpu(vcpu)->slb[slb_index].esid = slb_esid;
- to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid;
+#ifdef CONFIG_PPC_64K_PAGES
+ /* Set host segment base page size to 64K if possible */
+ if (gvsid & VSID_64K)
+ slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp;
+#endif
+
+ svcpu->slb[slb_index].esid = slb_esid;
+ svcpu->slb[slb_index].vsid = slb_vsid;
trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
- return 0;
+out:
+ svcpu_put(svcpu);
+ return r;
+}
+
+void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
+{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong seg_mask = -seg_size;
+ int i;
+
+ for (i = 0; i < svcpu->slb_max; i++) {
+ if ((svcpu->slb[i].esid & SLB_ESID_V) &&
+ (svcpu->slb[i].esid & seg_mask) == ea) {
+ /* Invalidate this entry */
+ svcpu->slb[i].esid = 0;
+ }
+ }
+
+ svcpu_put(svcpu);
}
void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
{
- to_svcpu(vcpu)->slb_max = 1;
- to_svcpu(vcpu)->slb[0].esid = 0;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ svcpu->slb_max = 0;
+ svcpu->slb[0].esid = 0;
+ svcpu_put(svcpu);
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_hpte_destroy(vcpu);
__destroy_context(to_book3s(vcpu)->context_id[0]);
@@ -299,9 +391,10 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
return -1;
vcpu3s->context_id[0] = err;
- vcpu3s->vsid_max = ((vcpu3s->context_id[0] + 1) << USER_ESID_BITS) - 1;
- vcpu3s->vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS;
- vcpu3s->vsid_next = vcpu3s->vsid_first;
+ vcpu3s->proto_vsid_max = ((u64)(vcpu3s->context_id[0] + 1)
+ << ESID_BITS) - 1;
+ vcpu3s->proto_vsid_first = (u64)vcpu3s->context_id[0] << ESID_BITS;
+ vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
kvmppc_mmu_hpte_init(vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
new file mode 100644
index 00000000000..68468d695f1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -0,0 +1,1667 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
+#include <linux/srcu.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cputable.h>
+
+#include "book3s_hv_cma.h"
+
+/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
+#define MAX_LPID_970 63
+
+/* Power architecture requires HPT is at least 256kB */
+#define PPC_MIN_HPT_ORDER 18
+
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel, unsigned long *pte_idx_ret);
+static void kvmppc_rmap_reset(struct kvm *kvm);
+
+long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
+{
+ unsigned long hpt = 0;
+ struct revmap_entry *rev;
+ struct page *page = NULL;
+ long order = KVM_DEFAULT_HPT_ORDER;
+
+ if (htab_orderp) {
+ order = *htab_orderp;
+ if (order < PPC_MIN_HPT_ORDER)
+ order = PPC_MIN_HPT_ORDER;
+ }
+
+ kvm->arch.hpt_cma_alloc = 0;
+ VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
+ page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+ if (page) {
+ hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ kvm->arch.hpt_cma_alloc = 1;
+ }
+
+ /* Lastly try successively smaller sizes from the page allocator */
+ while (!hpt && order > PPC_MIN_HPT_ORDER) {
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
+ __GFP_NOWARN, order - PAGE_SHIFT);
+ if (!hpt)
+ --order;
+ }
+
+ if (!hpt)
+ return -ENOMEM;
+
+ kvm->arch.hpt_virt = hpt;
+ kvm->arch.hpt_order = order;
+ /* HPTEs are 2**4 bytes long */
+ kvm->arch.hpt_npte = 1ul << (order - 4);
+ /* 128 (2**7) bytes in each HPTEG */
+ kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
+
+ /* Allocate reverse map array */
+ rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
+ if (!rev) {
+ pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
+ goto out_freehpt;
+ }
+ kvm->arch.revmap = rev;
+ kvm->arch.sdr1 = __pa(hpt) | (order - 18);
+
+ pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
+ hpt, order, kvm->arch.lpid);
+
+ if (htab_orderp)
+ *htab_orderp = order;
+ return 0;
+
+ out_freehpt:
+ if (kvm->arch.hpt_cma_alloc)
+ kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
+ else
+ free_pages(hpt, order - PAGE_SHIFT);
+ return -ENOMEM;
+}
+
+long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
+{
+ long err = -EBUSY;
+ long order;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.rma_setup_done) {
+ kvm->arch.rma_setup_done = 0;
+ /* order rma_setup_done vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.rma_setup_done = 1;
+ goto out;
+ }
+ }
+ if (kvm->arch.hpt_virt) {
+ order = kvm->arch.hpt_order;
+ /* Set the entire HPT to 0, i.e. invalid HPTEs */
+ memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
+ /*
+ * Reset all the reverse-mapping chains for all memslots
+ */
+ kvmppc_rmap_reset(kvm);
+ /* Ensure that each vcpu will flush its TLB on next entry. */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+ *htab_orderp = order;
+ err = 0;
+ } else {
+ err = kvmppc_alloc_hpt(kvm, htab_orderp);
+ order = *htab_orderp;
+ }
+ out:
+ mutex_unlock(&kvm->lock);
+ return err;
+}
+
+void kvmppc_free_hpt(struct kvm *kvm)
+{
+ kvmppc_free_lpid(kvm->arch.lpid);
+ vfree(kvm->arch.revmap);
+ if (kvm->arch.hpt_cma_alloc)
+ kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
+ 1 << (kvm->arch.hpt_order - PAGE_SHIFT));
+ else
+ free_pages(kvm->arch.hpt_virt,
+ kvm->arch.hpt_order - PAGE_SHIFT);
+}
+
+/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
+}
+
+/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize == 0x10000) ? 0x1000 : 0;
+}
+
+void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
+ unsigned long porder)
+{
+ unsigned long i;
+ unsigned long npages;
+ unsigned long hp_v, hp_r;
+ unsigned long addr, hash;
+ unsigned long psize;
+ unsigned long hp0, hp1;
+ unsigned long idx_ret;
+ long ret;
+ struct kvm *kvm = vcpu->kvm;
+
+ psize = 1ul << porder;
+ npages = memslot->npages >> (porder - PAGE_SHIFT);
+
+ /* VRMA can't be > 1TB */
+ if (npages > 1ul << (40 - porder))
+ npages = 1ul << (40 - porder);
+ /* Can't use more than 1 HPTE per HPTEG */
+ if (npages > kvm->arch.hpt_mask + 1)
+ npages = kvm->arch.hpt_mask + 1;
+
+ hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
+ HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
+ hp1 = hpte1_pgsize_encoding(psize) |
+ HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+
+ for (i = 0; i < npages; ++i) {
+ addr = i << porder;
+ /* can't use hpt_hash since va > 64 bits */
+ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask;
+ /*
+ * We assume that the hash table is empty and no
+ * vcpus are using it at this stage. Since we create
+ * at most one HPTE per HPTEG, we just assume entry 7
+ * is available and use it.
+ */
+ hash = (hash << 3) + 7;
+ hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
+ hp_r = hp1 | addr;
+ ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
+ &idx_ret);
+ if (ret != H_SUCCESS) {
+ pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
+ addr, ret);
+ break;
+ }
+ }
+}
+
+int kvmppc_mmu_hv_init(void)
+{
+ unsigned long host_lpid, rsvd_lpid;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return -EINVAL;
+
+ /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ host_lpid = mfspr(SPRN_LPID); /* POWER7 */
+ rsvd_lpid = LPID_RSVD;
+ } else {
+ host_lpid = 0; /* PPC970 */
+ rsvd_lpid = MAX_LPID_970;
+ }
+
+ kvmppc_init_lpid(rsvd_lpid + 1);
+
+ kvmppc_claim_lpid(host_lpid);
+ /* rsvd_lpid is reserved for use in partition switching */
+ kvmppc_claim_lpid(rsvd_lpid);
+
+ return 0;
+}
+
+static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
+{
+ unsigned long msr = vcpu->arch.intr_msr;
+
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
+ msr |= MSR_TS_S;
+ else
+ msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
+ kvmppc_set_msr(vcpu, msr);
+}
+
+/*
+ * This is called to get a reference to a guest page if there isn't
+ * one already in the memslot->arch.slot_phys[] array.
+ */
+static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
+ struct kvm_memory_slot *memslot,
+ unsigned long psize)
+{
+ unsigned long start;
+ long np, err;
+ struct page *page, *hpage, *pages[1];
+ unsigned long s, pgsize;
+ unsigned long *physp;
+ unsigned int is_io, got, pgorder;
+ struct vm_area_struct *vma;
+ unsigned long pfn, i, npages;
+
+ physp = memslot->arch.slot_phys;
+ if (!physp)
+ return -EINVAL;
+ if (physp[gfn - memslot->base_gfn])
+ return 0;
+
+ is_io = 0;
+ got = 0;
+ page = NULL;
+ pgsize = psize;
+ err = -EINVAL;
+ start = gfn_to_hva_memslot(memslot, gfn);
+
+ /* Instantiate and get the page we want access to */
+ np = get_user_pages_fast(start, 1, 1, pages);
+ if (np != 1) {
+ /* Look up the vma for the page */
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, start);
+ if (!vma || vma->vm_start > start ||
+ start + psize > vma->vm_end ||
+ !(vma->vm_flags & VM_PFNMAP))
+ goto up_err;
+ is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+ pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ /* check alignment of pfn vs. requested page size */
+ if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
+ goto up_err;
+ up_read(&current->mm->mmap_sem);
+
+ } else {
+ page = pages[0];
+ got = KVMPPC_GOT_PAGE;
+
+ /* See if this is a large page */
+ s = PAGE_SIZE;
+ if (PageHuge(page)) {
+ hpage = compound_head(page);
+ s <<= compound_order(hpage);
+ /* Get the whole large page if slot alignment is ok */
+ if (s > psize && slot_is_aligned(memslot, s) &&
+ !(memslot->userspace_addr & (s - 1))) {
+ start &= ~(s - 1);
+ pgsize = s;
+ get_page(hpage);
+ put_page(page);
+ page = hpage;
+ }
+ }
+ if (s < psize)
+ goto out;
+ pfn = page_to_pfn(page);
+ }
+
+ npages = pgsize >> PAGE_SHIFT;
+ pgorder = __ilog2(npages);
+ physp += (gfn - memslot->base_gfn) & ~(npages - 1);
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (i = 0; i < npages; ++i) {
+ if (!physp[i]) {
+ physp[i] = ((pfn + i) << PAGE_SHIFT) +
+ got + is_io + pgorder;
+ got = 0;
+ }
+ }
+ spin_unlock(&kvm->arch.slot_phys_lock);
+ err = 0;
+
+ out:
+ if (got)
+ put_page(page);
+ return err;
+
+ up_err:
+ up_read(&current->mm->mmap_sem);
+ return err;
+}
+
+long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel, unsigned long *pte_idx_ret)
+{
+ unsigned long psize, gpa, gfn;
+ struct kvm_memory_slot *memslot;
+ long ret;
+
+ if (kvm->arch.using_mmu_notifiers)
+ goto do_insert;
+
+ psize = hpte_page_size(pteh, ptel);
+ if (!psize)
+ return H_PARAMETER;
+
+ pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+
+ /* Find the memslot (if any) for this address */
+ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
+ if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
+ return H_PARAMETER;
+ }
+
+ do_insert:
+ /* Protect linux PTE lookup from page table destruction */
+ rcu_read_lock_sched(); /* this disables preemption too */
+ ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
+ current->mm->pgd, false, pte_idx_ret);
+ rcu_read_unlock_sched();
+ if (ret == H_TOO_HARD) {
+ /* this can't happen */
+ pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
+ ret = H_RESOURCE; /* or something */
+ }
+ return ret;
+
+}
+
+/*
+ * We come here on a H_ENTER call from the guest when we are not
+ * using mmu notifiers and we don't have the requested page pinned
+ * already.
+ */
+long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel)
+{
+ return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
+ pteh, ptel, &vcpu->arch.gpr[4]);
+}
+
+static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
+ gva_t eaddr)
+{
+ u64 mask;
+ int i;
+
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ if (!(vcpu->arch.slb[i].orige & SLB_ESID_V))
+ continue;
+
+ if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T)
+ mask = ESID_MASK_1T;
+ else
+ mask = ESID_MASK;
+
+ if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0)
+ return &vcpu->arch.slb[i];
+ }
+ return NULL;
+}
+
+static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
+ unsigned long ea)
+{
+ unsigned long ra_mask;
+
+ ra_mask = hpte_page_size(v, r) - 1;
+ return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
+}
+
+static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, bool data, bool iswrite)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_slb *slbe;
+ unsigned long slb_v;
+ unsigned long pp, key;
+ unsigned long v, gr;
+ unsigned long *hptep;
+ int index;
+ int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
+
+ /* Get SLB entry */
+ if (virtmode) {
+ slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
+ if (!slbe)
+ return -EINVAL;
+ slb_v = slbe->origv;
+ } else {
+ /* real mode access */
+ slb_v = vcpu->kvm->arch.vrma_slb_v;
+ }
+
+ preempt_disable();
+ /* Find the HPTE in the hash table */
+ index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
+ HPTE_V_VALID | HPTE_V_ABSENT);
+ if (index < 0) {
+ preempt_enable();
+ return -ENOENT;
+ }
+ hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+ v = hptep[0] & ~HPTE_V_HVLOCK;
+ gr = kvm->arch.revmap[index].guest_rpte;
+
+ /* Unlock the HPTE */
+ asm volatile("lwsync" : : : "memory");
+ hptep[0] = v;
+ preempt_enable();
+
+ gpte->eaddr = eaddr;
+ gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
+
+ /* Get PP bits and key for permission check */
+ pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
+ key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+ key &= slb_v;
+
+ /* Calculate permissions */
+ gpte->may_read = hpte_read_permission(pp, key);
+ gpte->may_write = hpte_write_permission(pp, key);
+ gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
+
+ /* Storage key permission check for POWER7 */
+ if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+ int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
+ if (amrfield & 1)
+ gpte->may_read = 0;
+ if (amrfield & 2)
+ gpte->may_write = 0;
+ }
+
+ /* Get the guest physical address */
+ gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr);
+ return 0;
+}
+
+/*
+ * Quick test for whether an instruction is a load or a store.
+ * If the instruction is a load or a store, then this will indicate
+ * which it is, at least on server processors. (Embedded processors
+ * have some external PID instructions that don't follow the rule
+ * embodied here.) If the instruction isn't a load or store, then
+ * this doesn't return anything useful.
+ */
+static int instruction_is_store(unsigned int instr)
+{
+ unsigned int mask;
+
+ mask = 0x10000000;
+ if ((instr & 0xfc000000) == 0x7c000000)
+ mask = 0x100; /* major opcode 31 */
+ return (instr & mask) != 0;
+}
+
+static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long gpa, gva_t ea, int is_store)
+{
+ int ret;
+ u32 last_inst;
+ unsigned long srr0 = kvmppc_get_pc(vcpu);
+
+ /* We try to load the last instruction. We don't let
+ * emulate_instruction do it as it doesn't check what
+ * kvmppc_ld returns.
+ * If we fail, we just return to the guest and try executing it again.
+ */
+ if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
+ ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
+ if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED)
+ return RESUME_GUEST;
+ vcpu->arch.last_inst = last_inst;
+ }
+
+ /*
+ * WARNING: We do not know for sure whether the instruction we just
+ * read from memory is the same that caused the fault in the first
+ * place. If the instruction we read is neither an load or a store,
+ * then it can't access memory, so we don't need to worry about
+ * enforcing access permissions. So, assuming it is a load or
+ * store, we just check that its direction (load or store) is
+ * consistent with the original fault, since that's what we
+ * checked the access permissions against. If there is a mismatch
+ * we just return and retry the instruction.
+ */
+
+ if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store)
+ return RESUME_GUEST;
+
+ /*
+ * Emulated accesses are emulated by looking at the hash for
+ * translation once, then performing the access later. The
+ * translation could be invalidated in the meantime in which
+ * point performing the subsequent memory access on the old
+ * physical address could possibly be a security hole for the
+ * guest (but not the host).
+ *
+ * This is less of an issue for MMIO stores since they aren't
+ * globally visible. It could be an issue for MMIO loads to
+ * a certain extent but we'll ignore it for now.
+ */
+
+ vcpu->arch.paddr_accessed = gpa;
+ vcpu->arch.vaddr_accessed = ea;
+ return kvmppc_emulate_mmio(run, vcpu);
+}
+
+int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long ea, unsigned long dsisr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hptep, hpte[3], r;
+ unsigned long mmu_seq, psize, pte_size;
+ unsigned long gpa_base, gfn_base;
+ unsigned long gpa, gfn, hva, pfn;
+ struct kvm_memory_slot *memslot;
+ unsigned long *rmap;
+ struct revmap_entry *rev;
+ struct page *page, *pages[1];
+ long index, ret, npages;
+ unsigned long is_io;
+ unsigned int writing, write_ok;
+ struct vm_area_struct *vma;
+ unsigned long rcbits;
+
+ /*
+ * Real-mode code has already searched the HPT and found the
+ * entry we're interested in. Lock the entry and check that
+ * it hasn't changed. If it has, just return and re-execute the
+ * instruction.
+ */
+ if (ea != vcpu->arch.pgfault_addr)
+ return RESUME_GUEST;
+ index = vcpu->arch.pgfault_index;
+ hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+ rev = &kvm->arch.revmap[index];
+ preempt_disable();
+ while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+ cpu_relax();
+ hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
+ hpte[1] = hptep[1];
+ hpte[2] = r = rev->guest_rpte;
+ asm volatile("lwsync" : : : "memory");
+ hptep[0] = hpte[0];
+ preempt_enable();
+
+ if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
+ hpte[1] != vcpu->arch.pgfault_hpte[1])
+ return RESUME_GUEST;
+
+ /* Translate the logical address and get the page */
+ psize = hpte_page_size(hpte[0], r);
+ gpa_base = r & HPTE_R_RPN & ~(psize - 1);
+ gfn_base = gpa_base >> PAGE_SHIFT;
+ gpa = gpa_base | (ea & (psize - 1));
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = gfn_to_memslot(kvm, gfn);
+
+ /* No memslot means it's an emulated MMIO region */
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+ dsisr & DSISR_ISSTORE);
+
+ if (!kvm->arch.using_mmu_notifiers)
+ return -EFAULT; /* should never get here */
+
+ /*
+ * This should never happen, because of the slot_is_aligned()
+ * check in kvmppc_do_h_enter().
+ */
+ if (gfn_base < memslot->base_gfn)
+ return -EFAULT;
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ is_io = 0;
+ pfn = 0;
+ page = NULL;
+ pte_size = PAGE_SIZE;
+ writing = (dsisr & DSISR_ISSTORE) != 0;
+ /* If writing != 0, then the HPTE must allow writing, if we get here */
+ write_ok = writing;
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ npages = get_user_pages_fast(hva, 1, writing, pages);
+ if (npages < 1) {
+ /* Check if it's an I/O mapping */
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, hva);
+ if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
+ (vma->vm_flags & VM_PFNMAP)) {
+ pfn = vma->vm_pgoff +
+ ((hva - vma->vm_start) >> PAGE_SHIFT);
+ pte_size = psize;
+ is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+ write_ok = vma->vm_flags & VM_WRITE;
+ }
+ up_read(&current->mm->mmap_sem);
+ if (!pfn)
+ return -EFAULT;
+ } else {
+ page = pages[0];
+ pfn = page_to_pfn(page);
+ if (PageHuge(page)) {
+ page = compound_head(page);
+ pte_size <<= compound_order(page);
+ }
+ /* if the guest wants write access, see if that is OK */
+ if (!writing && hpte_is_writable(r)) {
+ unsigned int hugepage_shift;
+ pte_t *ptep, pte;
+
+ /*
+ * We need to protect against page table destruction
+ * while looking up and updating the pte.
+ */
+ rcu_read_lock_sched();
+ ptep = find_linux_pte_or_hugepte(current->mm->pgd,
+ hva, &hugepage_shift);
+ if (ptep) {
+ pte = kvmppc_read_update_linux_pte(ptep, 1,
+ hugepage_shift);
+ if (pte_write(pte))
+ write_ok = 1;
+ }
+ rcu_read_unlock_sched();
+ }
+ }
+
+ ret = -EFAULT;
+ if (psize > pte_size)
+ goto out_put;
+
+ /* Check WIMG vs. the actual page we're accessing */
+ if (!hpte_cache_flags_ok(r, is_io)) {
+ if (is_io)
+ return -EFAULT;
+ /*
+ * Allow guest to map emulated device memory as
+ * uncacheable, but actually make it cacheable.
+ */
+ r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
+ }
+
+ /*
+ * Set the HPTE to point to pfn.
+ * Since the pfn is at PAGE_SIZE granularity, make sure we
+ * don't mask out lower-order bits if psize < PAGE_SIZE.
+ */
+ if (psize < PAGE_SIZE)
+ psize = PAGE_SIZE;
+ r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1));
+ if (hpte_is_writable(r) && !write_ok)
+ r = hpte_make_readonly(r);
+ ret = RESUME_GUEST;
+ preempt_disable();
+ while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+ cpu_relax();
+ if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
+ rev->guest_rpte != hpte[2])
+ /* HPTE has been changed under us; let the guest retry */
+ goto out_unlock;
+ hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
+
+ /* Always put the HPTE in the rmap chain for the page base address */
+ rmap = &memslot->arch.rmap[gfn_base - memslot->base_gfn];
+ lock_rmap(rmap);
+
+ /* Check if we might have been invalidated; let the guest retry if so */
+ ret = RESUME_GUEST;
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
+ unlock_rmap(rmap);
+ goto out_unlock;
+ }
+
+ /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
+ rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
+ r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
+
+ if (hptep[0] & HPTE_V_VALID) {
+ /* HPTE was previously valid, so we need to invalidate it */
+ unlock_rmap(rmap);
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, index);
+ /* don't lose previous R and C bits */
+ r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
+ } else {
+ kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
+ }
+
+ hptep[1] = r;
+ eieio();
+ hptep[0] = hpte[0];
+ asm volatile("ptesync" : : : "memory");
+ preempt_enable();
+ if (page && hpte_is_writable(r))
+ SetPageDirty(page);
+
+ out_put:
+ if (page) {
+ /*
+ * We drop pages[0] here, not page because page might
+ * have been set to the head page of a compound, but
+ * we have to drop the reference on the correct tail
+ * page to match the get inside gup()
+ */
+ put_page(pages[0]);
+ }
+ return ret;
+
+ out_unlock:
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ preempt_enable();
+ goto out_put;
+}
+
+static void kvmppc_rmap_reset(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm->memslots;
+ kvm_for_each_memslot(memslot, slots) {
+ /*
+ * This assumes it is acceptable to lose reference and
+ * change bits across a reset.
+ */
+ memset(memslot->arch.rmap, 0,
+ memslot->npages * sizeof(*memslot->arch.rmap));
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+static int kvm_handle_hva_range(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end,
+ int (*handler)(struct kvm *kvm,
+ unsigned long *rmapp,
+ unsigned long gfn))
+{
+ int ret;
+ int retval = 0;
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long hva_start, hva_end;
+ gfn_t gfn, gfn_end;
+
+ hva_start = max(start, memslot->userspace_addr);
+ hva_end = min(end, memslot->userspace_addr +
+ (memslot->npages << PAGE_SHIFT));
+ if (hva_start >= hva_end)
+ continue;
+ /*
+ * {gfn(page) | page intersects with [hva_start, hva_end)} =
+ * {gfn, gfn+1, ..., gfn_end-1}.
+ */
+ gfn = hva_to_gfn_memslot(hva_start, memslot);
+ gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+
+ for (; gfn < gfn_end; ++gfn) {
+ gfn_t gfn_offset = gfn - memslot->base_gfn;
+
+ ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
+ retval |= ret;
+ }
+ }
+
+ return retval;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn))
+{
+ return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
+}
+
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long h, i, j;
+ unsigned long *hptep;
+ unsigned long ptel, psize, rcbits;
+
+ for (;;) {
+ lock_rmap(rmapp);
+ if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+ unlock_rmap(rmapp);
+ break;
+ }
+
+ /*
+ * To avoid an ABBA deadlock with the HPTE lock bit,
+ * we can't spin on the HPTE lock while holding the
+ * rmap chain lock.
+ */
+ i = *rmapp & KVMPPC_RMAP_INDEX;
+ hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+ if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+ /* unlock rmap before spinning on the HPTE lock */
+ unlock_rmap(rmapp);
+ while (hptep[0] & HPTE_V_HVLOCK)
+ cpu_relax();
+ continue;
+ }
+ j = rev[i].forw;
+ if (j == i) {
+ /* chain is now empty */
+ *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+ } else {
+ /* remove i from chain */
+ h = rev[i].back;
+ rev[h].forw = j;
+ rev[j].back = h;
+ rev[i].forw = rev[i].back = i;
+ *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
+ }
+
+ /* Now check and modify the HPTE */
+ ptel = rev[i].guest_rpte;
+ psize = hpte_page_size(hptep[0], ptel);
+ if ((hptep[0] & HPTE_V_VALID) &&
+ hpte_rpn(ptel, psize) == gfn) {
+ if (kvm->arch.using_mmu_notifiers)
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, i);
+ /* Harvest R and C */
+ rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
+ *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ if (rcbits & ~rev[i].guest_rpte) {
+ rev[i].guest_rpte = ptel | rcbits;
+ note_hpte_modification(kvm, &rev[i]);
+ }
+ }
+ unlock_rmap(rmapp);
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ }
+ return 0;
+}
+
+int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
+{
+ if (kvm->arch.using_mmu_notifiers)
+ kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+ return 0;
+}
+
+int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ if (kvm->arch.using_mmu_notifiers)
+ kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+ return 0;
+}
+
+void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ unsigned long *rmapp;
+ unsigned long gfn;
+ unsigned long n;
+
+ rmapp = memslot->arch.rmap;
+ gfn = memslot->base_gfn;
+ for (n = memslot->npages; n; --n) {
+ /*
+ * Testing the present bit without locking is OK because
+ * the memslot has been marked invalid already, and hence
+ * no new HPTEs referencing this page can be created,
+ * thus the present bit can't go from 0 to 1.
+ */
+ if (*rmapp & KVMPPC_RMAP_PRESENT)
+ kvm_unmap_rmapp(kvm, rmapp, gfn);
+ ++rmapp;
+ ++gfn;
+ }
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long head, i, j;
+ unsigned long *hptep;
+ int ret = 0;
+
+ retry:
+ lock_rmap(rmapp);
+ if (*rmapp & KVMPPC_RMAP_REFERENCED) {
+ *rmapp &= ~KVMPPC_RMAP_REFERENCED;
+ ret = 1;
+ }
+ if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+ unlock_rmap(rmapp);
+ return ret;
+ }
+
+ i = head = *rmapp & KVMPPC_RMAP_INDEX;
+ do {
+ hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+ j = rev[i].forw;
+
+ /* If this HPTE isn't referenced, ignore it */
+ if (!(hptep[1] & HPTE_R_R))
+ continue;
+
+ if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+ /* unlock rmap before spinning on the HPTE lock */
+ unlock_rmap(rmapp);
+ while (hptep[0] & HPTE_V_HVLOCK)
+ cpu_relax();
+ goto retry;
+ }
+
+ /* Now check and modify the HPTE */
+ if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
+ kvmppc_clear_ref_hpte(kvm, hptep, i);
+ if (!(rev[i].guest_rpte & HPTE_R_R)) {
+ rev[i].guest_rpte |= HPTE_R_R;
+ note_hpte_modification(kvm, &rev[i]);
+ }
+ ret = 1;
+ }
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ } while ((i = j) != head);
+
+ unlock_rmap(rmapp);
+ return ret;
+}
+
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
+{
+ if (!kvm->arch.using_mmu_notifiers)
+ return 0;
+ return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+}
+
+static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long head, i, j;
+ unsigned long *hp;
+ int ret = 1;
+
+ if (*rmapp & KVMPPC_RMAP_REFERENCED)
+ return 1;
+
+ lock_rmap(rmapp);
+ if (*rmapp & KVMPPC_RMAP_REFERENCED)
+ goto out;
+
+ if (*rmapp & KVMPPC_RMAP_PRESENT) {
+ i = head = *rmapp & KVMPPC_RMAP_INDEX;
+ do {
+ hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
+ j = rev[i].forw;
+ if (hp[1] & HPTE_R_R)
+ goto out;
+ } while ((i = j) != head);
+ }
+ ret = 0;
+
+ out:
+ unlock_rmap(rmapp);
+ return ret;
+}
+
+int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
+{
+ if (!kvm->arch.using_mmu_notifiers)
+ return 0;
+ return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
+}
+
+void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ if (!kvm->arch.using_mmu_notifiers)
+ return;
+ kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+}
+
+static int vcpus_running(struct kvm *kvm)
+{
+ return atomic_read(&kvm->arch.vcpus_running) != 0;
+}
+
+/*
+ * Returns the number of system pages that are dirty.
+ * This can be more than 1 if we find a huge-page HPTE.
+ */
+static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long head, i, j;
+ unsigned long n;
+ unsigned long v, r;
+ unsigned long *hptep;
+ int npages_dirty = 0;
+
+ retry:
+ lock_rmap(rmapp);
+ if (*rmapp & KVMPPC_RMAP_CHANGED) {
+ *rmapp &= ~KVMPPC_RMAP_CHANGED;
+ npages_dirty = 1;
+ }
+ if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+ unlock_rmap(rmapp);
+ return npages_dirty;
+ }
+
+ i = head = *rmapp & KVMPPC_RMAP_INDEX;
+ do {
+ hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+ j = rev[i].forw;
+
+ /*
+ * Checking the C (changed) bit here is racy since there
+ * is no guarantee about when the hardware writes it back.
+ * If the HPTE is not writable then it is stable since the
+ * page can't be written to, and we would have done a tlbie
+ * (which forces the hardware to complete any writeback)
+ * when making the HPTE read-only.
+ * If vcpus are running then this call is racy anyway
+ * since the page could get dirtied subsequently, so we
+ * expect there to be a further call which would pick up
+ * any delayed C bit writeback.
+ * Otherwise we need to do the tlbie even if C==0 in
+ * order to pick up any delayed writeback of C.
+ */
+ if (!(hptep[1] & HPTE_R_C) &&
+ (!hpte_is_writable(hptep[1]) || vcpus_running(kvm)))
+ continue;
+
+ if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+ /* unlock rmap before spinning on the HPTE lock */
+ unlock_rmap(rmapp);
+ while (hptep[0] & HPTE_V_HVLOCK)
+ cpu_relax();
+ goto retry;
+ }
+
+ /* Now check and modify the HPTE */
+ if (!(hptep[0] & HPTE_V_VALID))
+ continue;
+
+ /* need to make it temporarily absent so C is stable */
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, i);
+ v = hptep[0];
+ r = hptep[1];
+ if (r & HPTE_R_C) {
+ hptep[1] = r & ~HPTE_R_C;
+ if (!(rev[i].guest_rpte & HPTE_R_C)) {
+ rev[i].guest_rpte |= HPTE_R_C;
+ note_hpte_modification(kvm, &rev[i]);
+ }
+ n = hpte_page_size(v, r);
+ n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (n > npages_dirty)
+ npages_dirty = n;
+ eieio();
+ }
+ v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
+ v |= HPTE_V_VALID;
+ hptep[0] = v;
+ } while ((i = j) != head);
+
+ unlock_rmap(rmapp);
+ return npages_dirty;
+}
+
+static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
+ struct kvm_memory_slot *memslot,
+ unsigned long *map)
+{
+ unsigned long gfn;
+
+ if (!vpa->dirty || !vpa->pinned_addr)
+ return;
+ gfn = vpa->gpa >> PAGE_SHIFT;
+ if (gfn < memslot->base_gfn ||
+ gfn >= memslot->base_gfn + memslot->npages)
+ return;
+
+ vpa->dirty = false;
+ if (map)
+ __set_bit_le(gfn - memslot->base_gfn, map);
+}
+
+long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long *map)
+{
+ unsigned long i, j;
+ unsigned long *rmapp;
+ struct kvm_vcpu *vcpu;
+
+ preempt_disable();
+ rmapp = memslot->arch.rmap;
+ for (i = 0; i < memslot->npages; ++i) {
+ int npages = kvm_test_clear_dirty_npages(kvm, rmapp);
+ /*
+ * Note that if npages > 0 then i must be a multiple of npages,
+ * since we always put huge-page HPTEs in the rmap chain
+ * corresponding to their page base address.
+ */
+ if (npages && map)
+ for (j = i; npages; ++j, --npages)
+ __set_bit_le(j, map);
+ ++rmapp;
+ }
+
+ /* Harvest dirty bits from VPA and DTL updates */
+ /* Note: we never modify the SLB shadow buffer areas */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
+ harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ }
+ preempt_enable();
+ return 0;
+}
+
+void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
+ unsigned long *nb_ret)
+{
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ struct page *page, *pages[1];
+ int npages;
+ unsigned long hva, offset;
+ unsigned long pa;
+ unsigned long *physp;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ goto err;
+ if (!kvm->arch.using_mmu_notifiers) {
+ physp = memslot->arch.slot_phys;
+ if (!physp)
+ goto err;
+ physp += gfn - memslot->base_gfn;
+ pa = *physp;
+ if (!pa) {
+ if (kvmppc_get_guest_page(kvm, gfn, memslot,
+ PAGE_SIZE) < 0)
+ goto err;
+ pa = *physp;
+ }
+ page = pfn_to_page(pa >> PAGE_SHIFT);
+ get_page(page);
+ } else {
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ npages = get_user_pages_fast(hva, 1, 1, pages);
+ if (npages < 1)
+ goto err;
+ page = pages[0];
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ offset = gpa & (PAGE_SIZE - 1);
+ if (nb_ret)
+ *nb_ret = PAGE_SIZE - offset;
+ return page_address(page) + offset;
+
+ err:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return NULL;
+}
+
+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
+ bool dirty)
+{
+ struct page *page = virt_to_page(va);
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn;
+ unsigned long *rmap;
+ int srcu_idx;
+
+ put_page(page);
+
+ if (!dirty || !kvm->arch.using_mmu_notifiers)
+ return;
+
+ /* We need to mark this page dirty in the rmap chain */
+ gfn = gpa >> PAGE_SHIFT;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (memslot) {
+ rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ lock_rmap(rmap);
+ *rmap |= KVMPPC_RMAP_CHANGED;
+ unlock_rmap(rmap);
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+/*
+ * Functions for reading and writing the hash table via reads and
+ * writes on a file descriptor.
+ *
+ * Reads return the guest view of the hash table, which has to be
+ * pieced together from the real hash table and the guest_rpte
+ * values in the revmap array.
+ *
+ * On writes, each HPTE written is considered in turn, and if it
+ * is valid, it is written to the HPT as if an H_ENTER with the
+ * exact flag set was done. When the invalid count is non-zero
+ * in the header written to the stream, the kernel will make
+ * sure that that many HPTEs are invalid, and invalidate them
+ * if not.
+ */
+
+struct kvm_htab_ctx {
+ unsigned long index;
+ unsigned long flags;
+ struct kvm *kvm;
+ int first_pass;
+};
+
+#define HPTE_SIZE (2 * sizeof(unsigned long))
+
+/*
+ * Returns 1 if this HPT entry has been modified or has pending
+ * R/C bit changes.
+ */
+static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
+{
+ unsigned long rcbits_unset;
+
+ if (revp->guest_rpte & HPTE_GR_MODIFIED)
+ return 1;
+
+ /* Also need to consider changes in reference and changed bits */
+ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
+ if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
+ return 1;
+
+ return 0;
+}
+
+static long record_hpte(unsigned long flags, unsigned long *hptp,
+ unsigned long *hpte, struct revmap_entry *revp,
+ int want_valid, int first_pass)
+{
+ unsigned long v, r;
+ unsigned long rcbits_unset;
+ int ok = 1;
+ int valid, dirty;
+
+ /* Unmodified entries are uninteresting except on the first pass */
+ dirty = hpte_dirty(revp, hptp);
+ if (!first_pass && !dirty)
+ return 0;
+
+ valid = 0;
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ valid = 1;
+ if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
+ !(hptp[0] & HPTE_V_BOLTED))
+ valid = 0;
+ }
+ if (valid != want_valid)
+ return 0;
+
+ v = r = 0;
+ if (valid || dirty) {
+ /* lock the HPTE so it's stable and read it */
+ preempt_disable();
+ while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+ cpu_relax();
+ v = hptp[0];
+
+ /* re-evaluate valid and dirty from synchronized HPTE value */
+ valid = !!(v & HPTE_V_VALID);
+ dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+
+ /* Harvest R and C into guest view if necessary */
+ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
+ if (valid && (rcbits_unset & hptp[1])) {
+ revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
+ HPTE_GR_MODIFIED;
+ dirty = 1;
+ }
+
+ if (v & HPTE_V_ABSENT) {
+ v &= ~HPTE_V_ABSENT;
+ v |= HPTE_V_VALID;
+ valid = 1;
+ }
+ if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
+ valid = 0;
+
+ r = revp->guest_rpte;
+ /* only clear modified if this is the right sort of entry */
+ if (valid == want_valid && dirty) {
+ r &= ~HPTE_GR_MODIFIED;
+ revp->guest_rpte = r;
+ }
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+ hptp[0] &= ~HPTE_V_HVLOCK;
+ preempt_enable();
+ if (!(valid == want_valid && (first_pass || dirty)))
+ ok = 0;
+ }
+ hpte[0] = v;
+ hpte[1] = r;
+ return ok;
+}
+
+static ssize_t kvm_htab_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kvm_htab_ctx *ctx = file->private_data;
+ struct kvm *kvm = ctx->kvm;
+ struct kvm_get_htab_header hdr;
+ unsigned long *hptp;
+ struct revmap_entry *revp;
+ unsigned long i, nb, nw;
+ unsigned long __user *lbuf;
+ struct kvm_get_htab_header __user *hptr;
+ unsigned long flags;
+ int first_pass;
+ unsigned long hpte[2];
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ first_pass = ctx->first_pass;
+ flags = ctx->flags;
+
+ i = ctx->index;
+ hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+ revp = kvm->arch.revmap + i;
+ lbuf = (unsigned long __user *)buf;
+
+ nb = 0;
+ while (nb + sizeof(hdr) + HPTE_SIZE < count) {
+ /* Initialize header */
+ hptr = (struct kvm_get_htab_header __user *)buf;
+ hdr.n_valid = 0;
+ hdr.n_invalid = 0;
+ nw = nb;
+ nb += sizeof(hdr);
+ lbuf = (unsigned long __user *)(buf + sizeof(hdr));
+
+ /* Skip uninteresting entries, i.e. clean on not-first pass */
+ if (!first_pass) {
+ while (i < kvm->arch.hpt_npte &&
+ !hpte_dirty(revp, hptp)) {
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+ }
+ hdr.index = i;
+
+ /* Grab a series of valid entries */
+ while (i < kvm->arch.hpt_npte &&
+ hdr.n_valid < 0xffff &&
+ nb + HPTE_SIZE < count &&
+ record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
+ /* valid entry, write it out */
+ ++hdr.n_valid;
+ if (__put_user(hpte[0], lbuf) ||
+ __put_user(hpte[1], lbuf + 1))
+ return -EFAULT;
+ nb += HPTE_SIZE;
+ lbuf += 2;
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+ /* Now skip invalid entries while we can */
+ while (i < kvm->arch.hpt_npte &&
+ hdr.n_invalid < 0xffff &&
+ record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
+ /* found an invalid entry */
+ ++hdr.n_invalid;
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+
+ if (hdr.n_valid || hdr.n_invalid) {
+ /* write back the header */
+ if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+ nw = nb;
+ buf = (char __user *)lbuf;
+ } else {
+ nb = nw;
+ }
+
+ /* Check if we've wrapped around the hash table */
+ if (i >= kvm->arch.hpt_npte) {
+ i = 0;
+ ctx->first_pass = 0;
+ break;
+ }
+ }
+
+ ctx->index = i;
+
+ return nb;
+}
+
+static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kvm_htab_ctx *ctx = file->private_data;
+ struct kvm *kvm = ctx->kvm;
+ struct kvm_get_htab_header hdr;
+ unsigned long i, j;
+ unsigned long v, r;
+ unsigned long __user *lbuf;
+ unsigned long *hptp;
+ unsigned long tmp[2];
+ ssize_t nb;
+ long int err, ret;
+ int rma_setup;
+
+ if (!access_ok(VERIFY_READ, buf, count))
+ return -EFAULT;
+
+ /* lock out vcpus from running while we're doing this */
+ mutex_lock(&kvm->lock);
+ rma_setup = kvm->arch.rma_setup_done;
+ if (rma_setup) {
+ kvm->arch.rma_setup_done = 0; /* temporarily */
+ /* order rma_setup_done vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.rma_setup_done = 1;
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
+ }
+ }
+
+ err = 0;
+ for (nb = 0; nb + sizeof(hdr) <= count; ) {
+ err = -EFAULT;
+ if (__copy_from_user(&hdr, buf, sizeof(hdr)))
+ break;
+
+ err = 0;
+ if (nb + hdr.n_valid * HPTE_SIZE > count)
+ break;
+
+ nb += sizeof(hdr);
+ buf += sizeof(hdr);
+
+ err = -EINVAL;
+ i = hdr.index;
+ if (i >= kvm->arch.hpt_npte ||
+ i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
+ break;
+
+ hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+ lbuf = (unsigned long __user *)buf;
+ for (j = 0; j < hdr.n_valid; ++j) {
+ err = -EFAULT;
+ if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+ goto out;
+ err = -EINVAL;
+ if (!(v & HPTE_V_VALID))
+ goto out;
+ lbuf += 2;
+ nb += HPTE_SIZE;
+
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+ kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+ err = -EIO;
+ ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
+ tmp);
+ if (ret != H_SUCCESS) {
+ pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
+ "r=%lx\n", ret, i, v, r);
+ goto out;
+ }
+ if (!rma_setup && is_vrma_hpte(v)) {
+ unsigned long psize = hpte_base_page_size(v, r);
+ unsigned long senc = slb_pgsize_encoding(psize);
+ unsigned long lpcr;
+
+ kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
+ kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
+ rma_setup = 1;
+ }
+ ++i;
+ hptp += 2;
+ }
+
+ for (j = 0; j < hdr.n_invalid; ++j) {
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+ kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+ ++i;
+ hptp += 2;
+ }
+ err = 0;
+ }
+
+ out:
+ /* Order HPTE updates vs. rma_setup_done */
+ smp_wmb();
+ kvm->arch.rma_setup_done = rma_setup;
+ mutex_unlock(&kvm->lock);
+
+ if (err)
+ return err;
+ return nb;
+}
+
+static int kvm_htab_release(struct inode *inode, struct file *filp)
+{
+ struct kvm_htab_ctx *ctx = filp->private_data;
+
+ filp->private_data = NULL;
+ if (!(ctx->flags & KVM_GET_HTAB_WRITE))
+ atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
+ kvm_put_kvm(ctx->kvm);
+ kfree(ctx);
+ return 0;
+}
+
+static const struct file_operations kvm_htab_fops = {
+ .read = kvm_htab_read,
+ .write = kvm_htab_write,
+ .llseek = default_llseek,
+ .release = kvm_htab_release,
+};
+
+int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
+{
+ int ret;
+ struct kvm_htab_ctx *ctx;
+ int rwflag;
+
+ /* reject flags we don't recognize */
+ if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
+ return -EINVAL;
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ kvm_get_kvm(kvm);
+ ctx->kvm = kvm;
+ ctx->index = ghf->start_index;
+ ctx->flags = ghf->flags;
+ ctx->first_pass = 1;
+
+ rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
+ ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
+ if (ret < 0) {
+ kvm_put_kvm(kvm);
+ return ret;
+ }
+
+ if (rwflag == O_RDONLY) {
+ mutex_lock(&kvm->slots_lock);
+ atomic_inc(&kvm->arch.hpte_mod_interest);
+ /* make sure kvmppc_do_h_enter etc. see the increment */
+ synchronize_srcu_expedited(&kvm->srcu);
+ mutex_unlock(&kvm->slots_lock);
+ }
+
+ return ret;
+}
+
+void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ vcpu->arch.slb_nr = 32; /* POWER7 */
+ else
+ vcpu->arch.slb_nr = 64;
+
+ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
+ mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
+
+ vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
+}
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 04e7d3bbfe8..3589c4e3d49 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -17,26 +17,9 @@
* Authors: Alexander Graf <agraf@suse.de>
*/
-#define SHADOW_SLB_ESID(num) (SLBSHADOW_SAVEAREA + (num * 0x10))
-#define SHADOW_SLB_VSID(num) (SLBSHADOW_SAVEAREA + (num * 0x10) + 0x8)
-#define UNBOLT_SLB_ENTRY(num) \
- ld r9, SHADOW_SLB_ESID(num)(r12); \
- /* Invalid? Skip. */; \
- rldicl. r0, r9, 37, 63; \
- beq slb_entry_skip_ ## num; \
- xoris r9, r9, SLB_ESID_V@h; \
- std r9, SHADOW_SLB_ESID(num)(r12); \
- slb_entry_skip_ ## num:
-
-#define REBOLT_SLB_ENTRY(num) \
- ld r10, SHADOW_SLB_ESID(num)(r11); \
- cmpdi r10, 0; \
- beq slb_exit_skip_ ## num; \
- oris r10, r10, SLB_ESID_V@h; \
- ld r9, SHADOW_SLB_VSID(num)(r11); \
- slbmte r9, r10; \
- std r10, SHADOW_SLB_ESID(num)(r11); \
-slb_exit_skip_ ## num:
+#define SHADOW_SLB_ENTRY_LEN 0x10
+#define OFFSET_ESID(x) (SHADOW_SLB_ENTRY_LEN * x)
+#define OFFSET_VSID(x) ((SHADOW_SLB_ENTRY_LEN * x) + 8)
/******************************************************************************
* *
@@ -53,45 +36,29 @@ slb_exit_skip_ ## num:
* R1 = host R1
* R2 = host R2
* R3 = shadow vcpu
- * all other volatile GPRS = free
+ * all other volatile GPRS = free except R4, R6
* SVCPU[CR] = guest CR
* SVCPU[XER] = guest XER
* SVCPU[CTR] = guest CTR
* SVCPU[LR] = guest LR
*/
- /* Remove LPAR shadow entries */
+BEGIN_FW_FTR_SECTION
-#if SLB_NUM_BOLTED == 3
+ /* Declare SLB shadow as 0 entries big */
- ld r12, PACA_SLBSHADOWPTR(r13)
-
- /* Save off the first entry so we can slbie it later */
- ld r10, SHADOW_SLB_ESID(0)(r12)
- ld r11, SHADOW_SLB_VSID(0)(r12)
+ ld r11, PACA_SLBSHADOWPTR(r13)
+ li r8, 0
+ stb r8, 3(r11)
- /* Remove bolted entries */
- UNBOLT_SLB_ENTRY(0)
- UNBOLT_SLB_ENTRY(1)
- UNBOLT_SLB_ENTRY(2)
-
-#else
-#error unknown number of bolted entries
-#endif
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
/* Flush SLB */
+ li r10, 0
+ slbmte r10, r10
slbia
- /* r0 = esid & ESID_MASK */
- rldicr r10, r10, 0, 35
- /* r0 |= CLASS_BIT(VSID) */
- rldic r12, r11, 56 - 36, 36
- or r10, r10, r12
- slbie r10
-
- isync
-
/* Fill SLB with our shadow */
lbz r12, SVCPU_SLB_MAX(r3)
@@ -107,7 +74,7 @@ slb_loop_enter:
ld r10, 0(r11)
- rldicl. r0, r10, 37, 63
+ andis. r9, r10, SLB_ESID_V@h
beq slb_loop_enter_skip
ld r9, 8(r11)
@@ -144,23 +111,42 @@ slb_do_enter:
*
*/
- /* Restore bolted entries from the shadow and fix it along the way */
+ /* Remove all SLB entries that are in use. */
- /* We don't store anything in entry 0, so we don't need to take care of it */
+ li r0, r0
+ slbmte r0, r0
slbia
- isync
-#if SLB_NUM_BOLTED == 3
+ /* Restore bolted entries from the shadow */
ld r11, PACA_SLBSHADOWPTR(r13)
- REBOLT_SLB_ENTRY(0)
- REBOLT_SLB_ENTRY(1)
- REBOLT_SLB_ENTRY(2)
-
-#else
-#error unknown number of bolted entries
-#endif
+BEGIN_FW_FTR_SECTION
+
+ /* Declare SLB shadow as SLB_NUM_BOLTED entries big */
+
+ li r8, SLB_NUM_BOLTED
+ stb r8, 3(r11)
+
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
+
+ /* Manually load all entries from shadow SLB */
+
+ li r8, SLBSHADOW_SAVEAREA
+ li r7, SLBSHADOW_SAVEAREA + 8
+
+ .rept SLB_NUM_BOLTED
+ LDX_BE r10, r11, r8
+ cmpdi r10, 0
+ beq 1f
+ LDX_BE r9, r11, r7
+ slbmte r9, r10
+1: addi r7, r7, SHADOW_SLB_ENTRY_LEN
+ addi r8, r8, SHADOW_SLB_ENTRY_LEN
+ .endr
+
+ isync
+ sync
slb_do_exit:
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
new file mode 100644
index 00000000000..54cf9bc94da
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -0,0 +1,150 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/list.h>
+#include <linux/anon_inodes.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/kvm_host.h>
+#include <asm/udbg.h>
+
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
+static long kvmppc_stt_npages(unsigned long window_size)
+{
+ return ALIGN((window_size >> SPAPR_TCE_SHIFT)
+ * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
+
+static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
+{
+ struct kvm *kvm = stt->kvm;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ list_del(&stt->list);
+ for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
+ __free_page(stt->pages[i]);
+ kfree(stt);
+ mutex_unlock(&kvm->lock);
+
+ kvm_put_kvm(kvm);
+}
+
+static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
+ struct page *page;
+
+ if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
+ return VM_FAULT_SIGBUS;
+
+ page = stt->pages[vmf->pgoff];
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
+ .fault = kvm_spapr_tce_fault,
+};
+
+static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &kvm_spapr_tce_vm_ops;
+ return 0;
+}
+
+static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
+{
+ struct kvmppc_spapr_tce_table *stt = filp->private_data;
+
+ release_spapr_tce_table(stt);
+ return 0;
+}
+
+static const struct file_operations kvm_spapr_tce_fops = {
+ .mmap = kvm_spapr_tce_mmap,
+ .release = kvm_spapr_tce_release,
+};
+
+long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ struct kvm_create_spapr_tce *args)
+{
+ struct kvmppc_spapr_tce_table *stt = NULL;
+ long npages;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Check this LIOBN hasn't been previously allocated */
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == args->liobn)
+ return -EBUSY;
+ }
+
+ npages = kvmppc_stt_npages(args->window_size);
+
+ stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
+ GFP_KERNEL);
+ if (!stt)
+ goto fail;
+
+ stt->liobn = args->liobn;
+ stt->window_size = args->window_size;
+ stt->kvm = kvm;
+
+ for (i = 0; i < npages; i++) {
+ stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!stt->pages[i])
+ goto fail;
+ }
+
+ kvm_get_kvm(kvm);
+
+ mutex_lock(&kvm->lock);
+ list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+
+ mutex_unlock(&kvm->lock);
+
+ return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+ stt, O_RDWR | O_CLOEXEC);
+
+fail:
+ if (stt) {
+ for (i = 0; i < npages; i++)
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
+
+ kfree(stt);
+ }
+ return ret;
+}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
new file mode 100644
index 00000000000..89e96b3e003
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -0,0 +1,105 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/list.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/kvm_host.h>
+#include <asm/udbg.h>
+
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
+/* WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_spapr_tce_table *stt;
+
+ /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
+ /* liobn, ioba, tce); */
+
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == liobn) {
+ unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
+ struct page *page;
+ u64 *tbl;
+
+ /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
+ /* liobn, stt, stt->window_size); */
+ if (ioba >= stt->window_size)
+ return H_PARAMETER;
+
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = (u64 *)page_address(page);
+
+ /* FIXME: Need to validate the TCE itself */
+ /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
+ tbl[idx % TCES_PER_PAGE] = tce;
+ return H_SUCCESS;
+ }
+ }
+
+ /* Didn't find the liobn, punt it to userspace */
+ return H_TOO_HARD;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
+
+long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_spapr_tce_table *stt;
+
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == liobn) {
+ unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
+ struct page *page;
+ u64 *tbl;
+
+ if (ioba >= stt->window_size)
+ return H_PARAMETER;
+
+ page = stt->pages[idx / TCES_PER_PAGE];
+ tbl = (u64 *)page_address(page);
+
+ vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
+ return H_SUCCESS;
+ }
+ }
+
+ /* Didn't find the liobn, punt it to userspace */
+ return H_TOO_HARD;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 46684655708..3f295269af3 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -21,6 +21,8 @@
#include <asm/disassemble.h>
#include <asm/kvm_book3s.h>
#include <asm/reg.h>
+#include <asm/switch_to.h>
+#include <asm/time.h>
#define OP_19_XOP_RFID 18
#define OP_19_XOP_RFI 50
@@ -32,6 +34,8 @@
#define OP_31_XOP_MTSRIN 242
#define OP_31_XOP_TLBIEL 274
#define OP_31_XOP_TLBIE 306
+/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
+#define OP_31_XOP_FAKE_SC1 308
#define OP_31_XOP_SLBMTE 402
#define OP_31_XOP_SLBIE 434
#define OP_31_XOP_SLBIA 498
@@ -63,18 +67,58 @@
* function pointers, so let's just disable the define. */
#undef mfsrin
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance)
+enum priv_level {
+ PRIV_PROBLEM = 0,
+ PRIV_SUPER = 1,
+ PRIV_HYPER = 2,
+};
+
+static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
+{
+ /* PAPR VMs only access supervisor SPRs */
+ if (vcpu->arch.papr_enabled && (level > PRIV_SUPER))
+ return false;
+
+ /* Limit user space to its own small SPR set */
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) && level > PRIV_PROBLEM)
+ return false;
+
+ return true;
+}
+
+int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
+ int rt = get_rt(inst);
+ int rs = get_rs(inst);
+ int ra = get_ra(inst);
+ int rb = get_rb(inst);
+ u32 inst_sc = 0x44000002;
switch (get_op(inst)) {
+ case 0:
+ emulated = EMULATE_FAIL;
+ if ((kvmppc_get_msr(vcpu) & MSR_LE) &&
+ (inst == swab32(inst_sc))) {
+ /*
+ * This is the byte reversed syscall instruction of our
+ * hypercall handler. Early versions of LE Linux didn't
+ * swap the instructions correctly and ended up in
+ * illegal instructions.
+ * Just always fail hypercalls on these broken systems.
+ */
+ kvmppc_set_gpr(vcpu, 3, EV_UNIMPLEMENTED);
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+ emulated = EMULATE_DONE;
+ }
+ break;
case 19:
switch (get_xop(inst)) {
case OP_19_XOP_RFID:
case OP_19_XOP_RFI:
- kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0);
- kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
+ kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu));
+ kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu));
*advance = 0;
break;
@@ -86,21 +130,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
case 31:
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
- kvmppc_set_gpr(vcpu, get_rt(inst),
- vcpu->arch.shared->msr);
+ kvmppc_set_gpr(vcpu, rt, kvmppc_get_msr(vcpu));
break;
case OP_31_XOP_MTMSRD:
{
- ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
+ ulong rs_val = kvmppc_get_gpr(vcpu, rs);
if (inst & 0x10000) {
- vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE);
- vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE);
+ ulong new_msr = kvmppc_get_msr(vcpu);
+ new_msr &= ~(MSR_RI | MSR_EE);
+ new_msr |= rs_val & (MSR_RI | MSR_EE);
+ kvmppc_set_msr_fast(vcpu, new_msr);
} else
- kvmppc_set_msr(vcpu, rs);
+ kvmppc_set_msr(vcpu, rs_val);
break;
}
case OP_31_XOP_MTMSR:
- kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
+ kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_MFSR:
{
@@ -110,7 +155,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (vcpu->arch.mmu.mfsrin) {
u32 sr;
sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
- kvmppc_set_gpr(vcpu, get_rt(inst), sr);
+ kvmppc_set_gpr(vcpu, rt, sr);
}
break;
}
@@ -118,32 +163,60 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
int srnum;
- srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
+ srnum = (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf;
if (vcpu->arch.mmu.mfsrin) {
u32 sr;
sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
- kvmppc_set_gpr(vcpu, get_rt(inst), sr);
+ kvmppc_set_gpr(vcpu, rt, sr);
}
break;
}
case OP_31_XOP_MTSR:
vcpu->arch.mmu.mtsrin(vcpu,
(inst >> 16) & 0xf,
- kvmppc_get_gpr(vcpu, get_rs(inst)));
+ kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_MTSRIN:
vcpu->arch.mmu.mtsrin(vcpu,
- (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
- kvmppc_get_gpr(vcpu, get_rs(inst)));
+ (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf,
+ kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_TLBIE:
case OP_31_XOP_TLBIEL:
{
bool large = (inst & 0x00200000) ? true : false;
- ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
+ ulong addr = kvmppc_get_gpr(vcpu, rb);
vcpu->arch.mmu.tlbie(vcpu, addr, large);
break;
}
+#ifdef CONFIG_PPC_BOOK3S_64
+ case OP_31_XOP_FAKE_SC1:
+ {
+ /* SC 1 papr hypercalls */
+ ulong cmd = kvmppc_get_gpr(vcpu, 3);
+ int i;
+
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) ||
+ !vcpu->arch.papr_enabled) {
+ emulated = EMULATE_FAIL;
+ break;
+ }
+
+ if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE)
+ break;
+
+ run->papr_hcall.nr = cmd;
+ for (i = 0; i < 9; ++i) {
+ ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
+ run->papr_hcall.args[i] = gpr;
+ }
+
+ run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->arch.hcall_needed = 1;
+ emulated = EMULATE_EXIT_USER;
+ break;
+ }
+#endif
case OP_31_XOP_EIOIO:
break;
case OP_31_XOP_SLBMTE:
@@ -151,15 +224,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return EMULATE_FAIL;
vcpu->arch.mmu.slbmte(vcpu,
- kvmppc_get_gpr(vcpu, get_rs(inst)),
- kvmppc_get_gpr(vcpu, get_rb(inst)));
+ kvmppc_get_gpr(vcpu, rs),
+ kvmppc_get_gpr(vcpu, rb));
break;
case OP_31_XOP_SLBIE:
if (!vcpu->arch.mmu.slbie)
return EMULATE_FAIL;
vcpu->arch.mmu.slbie(vcpu,
- kvmppc_get_gpr(vcpu, get_rb(inst)));
+ kvmppc_get_gpr(vcpu, rb));
break;
case OP_31_XOP_SLBIA:
if (!vcpu->arch.mmu.slbia)
@@ -171,22 +244,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!vcpu->arch.mmu.slbmfee) {
emulated = EMULATE_FAIL;
} else {
- ulong t, rb;
+ ulong t, rb_val;
- rb = kvmppc_get_gpr(vcpu, get_rb(inst));
- t = vcpu->arch.mmu.slbmfee(vcpu, rb);
- kvmppc_set_gpr(vcpu, get_rt(inst), t);
+ rb_val = kvmppc_get_gpr(vcpu, rb);
+ t = vcpu->arch.mmu.slbmfee(vcpu, rb_val);
+ kvmppc_set_gpr(vcpu, rt, t);
}
break;
case OP_31_XOP_SLBMFEV:
if (!vcpu->arch.mmu.slbmfev) {
emulated = EMULATE_FAIL;
} else {
- ulong t, rb;
+ ulong t, rb_val;
- rb = kvmppc_get_gpr(vcpu, get_rb(inst));
- t = vcpu->arch.mmu.slbmfev(vcpu, rb);
- kvmppc_set_gpr(vcpu, get_rt(inst), t);
+ rb_val = kvmppc_get_gpr(vcpu, rb);
+ t = vcpu->arch.mmu.slbmfev(vcpu, rb_val);
+ kvmppc_set_gpr(vcpu, rt, t);
}
break;
case OP_31_XOP_DCBA:
@@ -194,26 +267,26 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case OP_31_XOP_DCBZ:
{
- ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
- ulong ra = 0;
+ ulong rb_val = kvmppc_get_gpr(vcpu, rb);
+ ulong ra_val = 0;
ulong addr, vaddr;
u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
u32 dsisr;
int r;
- if (get_ra(inst))
- ra = kvmppc_get_gpr(vcpu, get_ra(inst));
+ if (ra)
+ ra_val = kvmppc_get_gpr(vcpu, ra);
- addr = (ra + rb) & ~31ULL;
- if (!(vcpu->arch.shared->msr & MSR_SF))
+ addr = (ra_val + rb_val) & ~31ULL;
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF))
addr &= 0xffffffff;
vaddr = addr;
r = kvmppc_st(vcpu, &addr, 32, zeros, true);
if ((r == -ENOENT) || (r == -EPERM)) {
*advance = 0;
- vcpu->arch.shared->dar = vaddr;
- to_svcpu(vcpu)->fault_dar = vaddr;
+ kvmppc_set_dar(vcpu, vaddr);
+ vcpu->arch.fault_dar = vaddr;
dsisr = DSISR_ISSTORE;
if (r == -ENOENT)
@@ -221,8 +294,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
else if (r == -EPERM)
dsisr |= DSISR_PROTFAULT;
- vcpu->arch.shared->dsisr = dsisr;
- to_svcpu(vcpu)->fault_dsisr = dsisr;
+ kvmppc_set_dsisr(vcpu, dsisr);
+ vcpu->arch.fault_dsisr = dsisr;
kvmppc_book3s_queue_irqprio(vcpu,
BOOK3S_INTERRUPT_DATA_STORAGE);
@@ -289,20 +362,21 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
return bat;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
- ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_SDR1:
+ if (!spr_allowed(vcpu, PRIV_HYPER))
+ goto unprivileged;
to_book3s(vcpu)->sdr1 = spr_val;
break;
case SPRN_DSISR:
- vcpu->arch.shared->dsisr = spr_val;
+ kvmppc_set_dsisr(vcpu, spr_val);
break;
case SPRN_DAR:
- vcpu->arch.shared->dar = spr_val;
+ kvmppc_set_dar(vcpu, spr_val);
break;
case SPRN_HIOR:
to_book3s(vcpu)->hior = spr_val;
@@ -365,6 +439,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
(mfmsr() & MSR_HV))
vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
break;
+ case SPRN_PURR:
+ to_book3s(vcpu)->purr_offset = spr_val - get_tb();
+ break;
+ case SPRN_SPURR:
+ to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
+ break;
case SPRN_GQR0:
case SPRN_GQR1:
case SPRN_GQR2:
@@ -375,6 +455,31 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_GQR7:
to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
break;
+ case SPRN_FSCR:
+ vcpu->arch.fscr = spr_val;
+ break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case SPRN_BESCR:
+ vcpu->arch.bescr = spr_val;
+ break;
+ case SPRN_EBBHR:
+ vcpu->arch.ebbhr = spr_val;
+ break;
+ case SPRN_EBBRR:
+ vcpu->arch.ebbrr = spr_val;
+ break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case SPRN_TFHAR:
+ vcpu->arch.tfhar = spr_val;
+ break;
+ case SPRN_TEXASR:
+ vcpu->arch.texasr = spr_val;
+ break;
+ case SPRN_TFIAR:
+ vcpu->arch.tfiar = spr_val;
+ break;
+#endif
+#endif
case SPRN_ICTC:
case SPRN_THRM1:
case SPRN_THRM2:
@@ -382,6 +487,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_CTRLF:
case SPRN_CTRLT:
case SPRN_L2CR:
+ case SPRN_DSCR:
case SPRN_MMCR0_GEKKO:
case SPRN_MMCR1_GEKKO:
case SPRN_PMC1_GEKKO:
@@ -389,7 +495,17 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_PMC3_GEKKO:
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
+ case SPRN_MSSSR0:
+ case SPRN_DABR:
+#ifdef CONFIG_PPC_BOOK3S_64
+ case SPRN_MMCRS:
+ case SPRN_MMCRA:
+ case SPRN_MMCR0:
+ case SPRN_MMCR1:
+ case SPRN_MMCR2:
+#endif
break;
+unprivileged:
default:
printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
#ifndef DEBUG_SPR
@@ -401,7 +517,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
@@ -414,40 +530,52 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
if (sprn % 2)
- kvmppc_set_gpr(vcpu, rt, bat->raw >> 32);
+ *spr_val = bat->raw >> 32;
else
- kvmppc_set_gpr(vcpu, rt, bat->raw);
+ *spr_val = bat->raw;
break;
}
case SPRN_SDR1:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
+ if (!spr_allowed(vcpu, PRIV_HYPER))
+ goto unprivileged;
+ *spr_val = to_book3s(vcpu)->sdr1;
break;
case SPRN_DSISR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr);
+ *spr_val = kvmppc_get_dsisr(vcpu);
break;
case SPRN_DAR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar);
+ *spr_val = kvmppc_get_dar(vcpu);
break;
case SPRN_HIOR:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
+ *spr_val = to_book3s(vcpu)->hior;
break;
case SPRN_HID0:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
+ *spr_val = to_book3s(vcpu)->hid[0];
break;
case SPRN_HID1:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
+ *spr_val = to_book3s(vcpu)->hid[1];
break;
case SPRN_HID2:
case SPRN_HID2_GEKKO:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
+ *spr_val = to_book3s(vcpu)->hid[2];
break;
case SPRN_HID4:
case SPRN_HID4_GEKKO:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
+ *spr_val = to_book3s(vcpu)->hid[4];
break;
case SPRN_HID5:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
+ *spr_val = to_book3s(vcpu)->hid[5];
+ break;
+ case SPRN_CFAR:
+ case SPRN_DSCR:
+ *spr_val = 0;
+ break;
+ case SPRN_PURR:
+ *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+ break;
+ case SPRN_SPURR:
+ *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
break;
case SPRN_GQR0:
case SPRN_GQR1:
@@ -457,9 +585,33 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_GQR5:
case SPRN_GQR6:
case SPRN_GQR7:
- kvmppc_set_gpr(vcpu, rt,
- to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]);
+ *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
+ break;
+ case SPRN_FSCR:
+ *spr_val = vcpu->arch.fscr;
+ break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case SPRN_BESCR:
+ *spr_val = vcpu->arch.bescr;
+ break;
+ case SPRN_EBBHR:
+ *spr_val = vcpu->arch.ebbhr;
+ break;
+ case SPRN_EBBRR:
+ *spr_val = vcpu->arch.ebbrr;
+ break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case SPRN_TFHAR:
+ *spr_val = vcpu->arch.tfhar;
+ break;
+ case SPRN_TEXASR:
+ *spr_val = vcpu->arch.texasr;
+ break;
+ case SPRN_TFIAR:
+ *spr_val = vcpu->arch.tfiar;
break;
+#endif
+#endif
case SPRN_THRM1:
case SPRN_THRM2:
case SPRN_THRM3:
@@ -473,9 +625,20 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_PMC3_GEKKO:
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
- kvmppc_set_gpr(vcpu, rt, 0);
+ case SPRN_MSSSR0:
+ case SPRN_DABR:
+#ifdef CONFIG_PPC_BOOK3S_64
+ case SPRN_MMCRS:
+ case SPRN_MMCRA:
+ case SPRN_MMCR0:
+ case SPRN_MMCR1:
+ case SPRN_MMCR2:
+ case SPRN_TIR:
+#endif
+ *spr_val = 0;
break;
default:
+unprivileged:
printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
#ifndef DEBUG_SPR
emulated = EMULATE_FAIL;
@@ -488,66 +651,34 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
{
- u32 dsisr = 0;
-
- /*
- * This is what the spec says about DSISR bits (not mentioned = 0):
- *
- * 12:13 [DS] Set to bits 30:31
- * 15:16 [X] Set to bits 29:30
- * 17 [X] Set to bit 25
- * [D/DS] Set to bit 5
- * 18:21 [X] Set to bits 21:24
- * [D/DS] Set to bits 1:4
- * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS)
- * 27:31 Set to bits 11:15 (RA)
- */
-
- switch (get_op(inst)) {
- /* D-form */
- case OP_LFS:
- case OP_LFD:
- case OP_STFD:
- case OP_STFS:
- dsisr |= (inst >> 12) & 0x4000; /* bit 17 */
- dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */
- break;
- /* X-form */
- case 31:
- dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */
- dsisr |= (inst << 8) & 0x04000; /* bit 17 */
- dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */
- break;
- default:
- printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
- break;
- }
-
- dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */
-
- return dsisr;
+ return make_dsisr(inst);
}
ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
{
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * Linux's fix_alignment() assumes that DAR is valid, so can we
+ */
+ return vcpu->arch.fault_dar;
+#else
ulong dar = 0;
- ulong ra;
+ ulong ra = get_ra(inst);
+ ulong rb = get_rb(inst);
switch (get_op(inst)) {
case OP_LFS:
case OP_LFD:
case OP_STFD:
case OP_STFS:
- ra = get_ra(inst);
if (ra)
dar = kvmppc_get_gpr(vcpu, ra);
dar += (s32)((s16)inst);
break;
case 31:
- ra = get_ra(inst);
if (ra)
dar = kvmppc_get_gpr(vcpu, ra);
- dar += kvmppc_get_gpr(vcpu, get_rb(inst));
+ dar += kvmppc_get_gpr(vcpu, rb);
break;
default:
printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
@@ -555,4 +686,5 @@ ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
}
return dar;
+#endif
}
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 1dd5a1ddfd0..0d013fbc2e1 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -17,16 +17,14 @@
* Authors: Alexander Graf <agraf@suse.de>
*/
-#include <linux/module.h>
+#include <linux/export.h>
+#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter);
-EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem);
-EXPORT_SYMBOL_GPL(kvmppc_rmcall);
-EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
-#ifdef CONFIG_ALTIVEC
-EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
#endif
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
#endif
+
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
new file mode 100644
index 00000000000..7a12edbb61e
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -0,0 +1,2482 @@
+/*
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Paul Mackerras <paulus@au1.ibm.com>
+ * Alexander Graf <agraf@suse.de>
+ * Kevin Wolf <mail@kevin-wolf.de>
+ *
+ * Description: KVM functions specific to running on Book 3S
+ * processors in hypervisor mode (specifically POWER7 and later).
+ *
+ * This file is derived from arch/powerpc/kvm/book3s.c,
+ * by Alexander Graf <agraf@suse.de>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/preempt.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/page-flags.h>
+#include <linux/srcu.h>
+#include <linux/miscdevice.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <asm/lppaca.h>
+#include <asm/processor.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+#include <asm/switch_to.h>
+#include <asm/smp.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/module.h>
+
+#include "book3s.h"
+
+/* #define EXIT_DEBUG */
+/* #define EXIT_DEBUG_SIMPLE */
+/* #define EXIT_DEBUG_INT */
+
+/* Used to indicate that a guest page fault needs to be handled */
+#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
+
+/* Used as a "null" value for timebase values */
+#define TB_NIL (~(u64)0)
+
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+
+static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
+{
+ int me;
+ int cpu = vcpu->cpu;
+ wait_queue_head_t *wqp;
+
+ wqp = kvm_arch_vcpu_wq(vcpu);
+ if (waitqueue_active(wqp)) {
+ wake_up_interruptible(wqp);
+ ++vcpu->stat.halt_wakeup;
+ }
+
+ me = get_cpu();
+
+ /* CPU points to the first thread of the core */
+ if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
+#ifdef CONFIG_PPC_ICP_NATIVE
+ int real_cpu = cpu + vcpu->arch.ptid;
+ if (paca[real_cpu].kvm_hstate.xics_phys)
+ xics_wake_cpu(real_cpu);
+ else
+#endif
+ if (cpu_online(cpu))
+ smp_send_reschedule(cpu);
+ }
+ put_cpu();
+}
+
+/*
+ * We use the vcpu_load/put functions to measure stolen time.
+ * Stolen time is counted as time when either the vcpu is able to
+ * run as part of a virtual core, but the task running the vcore
+ * is preempted or sleeping, or when the vcpu needs something done
+ * in the kernel by the task running the vcpu, but that task is
+ * preempted or sleeping. Those two things have to be counted
+ * separately, since one of the vcpu tasks will take on the job
+ * of running the core, and the other vcpu tasks in the vcore will
+ * sleep waiting for it to do that, but that sleep shouldn't count
+ * as stolen time.
+ *
+ * Hence we accumulate stolen time when the vcpu can run as part of
+ * a vcore using vc->stolen_tb, and the stolen time when the vcpu
+ * needs its task to do other things in the kernel (for example,
+ * service a page fault) in busy_stolen. We don't accumulate
+ * stolen time for a vcore when it is inactive, or for a vcpu
+ * when it is in state RUNNING or NOTREADY. NOTREADY is a bit of
+ * a misnomer; it means that the vcpu task is not executing in
+ * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in
+ * the kernel. We don't have any way of dividing up that time
+ * between time that the vcpu is genuinely stopped, time that
+ * the task is actively working on behalf of the vcpu, and time
+ * that the task is preempted, so we don't count any of it as
+ * stolen.
+ *
+ * Updates to busy_stolen are protected by arch.tbacct_lock;
+ * updates to vc->stolen_tb are protected by the arch.tbacct_lock
+ * of the vcpu that has taken responsibility for running the vcore
+ * (i.e. vc->runner). The stolen times are measured in units of
+ * timebase ticks. (Note that the != TB_NIL checks below are
+ * purely defensive; they should never fail.)
+ */
+
+static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
+ vc->preempt_tb != TB_NIL) {
+ vc->stolen_tb += mftb() - vc->preempt_tb;
+ vc->preempt_tb = TB_NIL;
+ }
+ if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
+ vcpu->arch.busy_preempt != TB_NIL) {
+ vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
+ vcpu->arch.busy_preempt = TB_NIL;
+ }
+ spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+}
+
+static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+ vc->preempt_tb = mftb();
+ if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
+ vcpu->arch.busy_preempt = mftb();
+ spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+}
+
+static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+{
+ vcpu->arch.shregs.msr = msr;
+ kvmppc_end_cede(vcpu);
+}
+
+void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
+{
+ vcpu->arch.pvr = pvr;
+}
+
+int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
+{
+ unsigned long pcr = 0;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ if (arch_compat) {
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ return -EINVAL; /* 970 has no compat mode support */
+
+ switch (arch_compat) {
+ case PVR_ARCH_205:
+ /*
+ * If an arch bit is set in PCR, all the defined
+ * higher-order arch bits also have to be set.
+ */
+ pcr = PCR_ARCH_206 | PCR_ARCH_205;
+ break;
+ case PVR_ARCH_206:
+ case PVR_ARCH_206p:
+ pcr = PCR_ARCH_206;
+ break;
+ case PVR_ARCH_207:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ /* POWER7 can't emulate POWER8 */
+ if (!(pcr & PCR_ARCH_206))
+ return -EINVAL;
+ pcr &= ~PCR_ARCH_206;
+ }
+ }
+
+ spin_lock(&vc->lock);
+ vc->arch_compat = arch_compat;
+ vc->pcr = pcr;
+ spin_unlock(&vc->lock);
+
+ return 0;
+}
+
+void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
+ pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
+ vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
+ for (r = 0; r < 16; ++r)
+ pr_err("r%2d = %.16lx r%d = %.16lx\n",
+ r, kvmppc_get_gpr(vcpu, r),
+ r+16, kvmppc_get_gpr(vcpu, r+16));
+ pr_err("ctr = %.16lx lr = %.16lx\n",
+ vcpu->arch.ctr, vcpu->arch.lr);
+ pr_err("srr0 = %.16llx srr1 = %.16llx\n",
+ vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
+ pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
+ vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
+ pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
+ vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
+ pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
+ vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
+ pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
+ pr_err("fault dar = %.16lx dsisr = %.8x\n",
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
+ for (r = 0; r < vcpu->arch.slb_max; ++r)
+ pr_err(" ESID = %.16llx VSID = %.16llx\n",
+ vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
+ pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
+ vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
+ vcpu->arch.last_inst);
+}
+
+struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
+{
+ int r;
+ struct kvm_vcpu *v, *ret = NULL;
+
+ mutex_lock(&kvm->lock);
+ kvm_for_each_vcpu(r, v, kvm) {
+ if (v->vcpu_id == id) {
+ ret = v;
+ break;
+ }
+ }
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
+{
+ vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
+ vpa->yield_count = 1;
+}
+
+static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
+ unsigned long addr, unsigned long len)
+{
+ /* check address is cacheline aligned */
+ if (addr & (L1_CACHE_BYTES - 1))
+ return -EINVAL;
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (v->next_gpa != addr || v->len != len) {
+ v->next_gpa = addr;
+ v->len = addr ? len : 0;
+ v->update_pending = 1;
+ }
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ return 0;
+}
+
+/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
+struct reg_vpa {
+ u32 dummy;
+ union {
+ u16 hword;
+ u32 word;
+ } length;
+};
+
+static int vpa_is_registered(struct kvmppc_vpa *vpap)
+{
+ if (vpap->update_pending)
+ return vpap->next_gpa != 0;
+ return vpap->pinned_addr != NULL;
+}
+
+static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
+ unsigned long flags,
+ unsigned long vcpuid, unsigned long vpa)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long len, nb;
+ void *va;
+ struct kvm_vcpu *tvcpu;
+ int err;
+ int subfunc;
+ struct kvmppc_vpa *vpap;
+
+ tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
+ if (!tvcpu)
+ return H_PARAMETER;
+
+ subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
+ if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
+ subfunc == H_VPA_REG_SLB) {
+ /* Registering new area - address must be cache-line aligned */
+ if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
+ return H_PARAMETER;
+
+ /* convert logical addr to kernel addr and read length */
+ va = kvmppc_pin_guest_page(kvm, vpa, &nb);
+ if (va == NULL)
+ return H_PARAMETER;
+ if (subfunc == H_VPA_REG_VPA)
+ len = ((struct reg_vpa *)va)->length.hword;
+ else
+ len = ((struct reg_vpa *)va)->length.word;
+ kvmppc_unpin_guest_page(kvm, va, vpa, false);
+
+ /* Check length */
+ if (len > nb || len < sizeof(struct reg_vpa))
+ return H_PARAMETER;
+ } else {
+ vpa = 0;
+ len = 0;
+ }
+
+ err = H_PARAMETER;
+ vpap = NULL;
+ spin_lock(&tvcpu->arch.vpa_update_lock);
+
+ switch (subfunc) {
+ case H_VPA_REG_VPA: /* register VPA */
+ if (len < sizeof(struct lppaca))
+ break;
+ vpap = &tvcpu->arch.vpa;
+ err = 0;
+ break;
+
+ case H_VPA_REG_DTL: /* register DTL */
+ if (len < sizeof(struct dtl_entry))
+ break;
+ len -= len % sizeof(struct dtl_entry);
+
+ /* Check that they have previously registered a VPA */
+ err = H_RESOURCE;
+ if (!vpa_is_registered(&tvcpu->arch.vpa))
+ break;
+
+ vpap = &tvcpu->arch.dtl;
+ err = 0;
+ break;
+
+ case H_VPA_REG_SLB: /* register SLB shadow buffer */
+ /* Check that they have previously registered a VPA */
+ err = H_RESOURCE;
+ if (!vpa_is_registered(&tvcpu->arch.vpa))
+ break;
+
+ vpap = &tvcpu->arch.slb_shadow;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_VPA: /* deregister VPA */
+ /* Check they don't still have a DTL or SLB buf registered */
+ err = H_RESOURCE;
+ if (vpa_is_registered(&tvcpu->arch.dtl) ||
+ vpa_is_registered(&tvcpu->arch.slb_shadow))
+ break;
+
+ vpap = &tvcpu->arch.vpa;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_DTL: /* deregister DTL */
+ vpap = &tvcpu->arch.dtl;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
+ vpap = &tvcpu->arch.slb_shadow;
+ err = 0;
+ break;
+ }
+
+ if (vpap) {
+ vpap->next_gpa = vpa;
+ vpap->len = len;
+ vpap->update_pending = 1;
+ }
+
+ spin_unlock(&tvcpu->arch.vpa_update_lock);
+
+ return err;
+}
+
+static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
+{
+ struct kvm *kvm = vcpu->kvm;
+ void *va;
+ unsigned long nb;
+ unsigned long gpa;
+
+ /*
+ * We need to pin the page pointed to by vpap->next_gpa,
+ * but we can't call kvmppc_pin_guest_page under the lock
+ * as it does get_user_pages() and down_read(). So we
+ * have to drop the lock, pin the page, then get the lock
+ * again and check that a new area didn't get registered
+ * in the meantime.
+ */
+ for (;;) {
+ gpa = vpap->next_gpa;
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ va = NULL;
+ nb = 0;
+ if (gpa)
+ va = kvmppc_pin_guest_page(kvm, gpa, &nb);
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (gpa == vpap->next_gpa)
+ break;
+ /* sigh... unpin that one and try again */
+ if (va)
+ kvmppc_unpin_guest_page(kvm, va, gpa, false);
+ }
+
+ vpap->update_pending = 0;
+ if (va && nb < vpap->len) {
+ /*
+ * If it's now too short, it must be that userspace
+ * has changed the mappings underlying guest memory,
+ * so unregister the region.
+ */
+ kvmppc_unpin_guest_page(kvm, va, gpa, false);
+ va = NULL;
+ }
+ if (vpap->pinned_addr)
+ kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
+ vpap->dirty);
+ vpap->gpa = gpa;
+ vpap->pinned_addr = va;
+ vpap->dirty = false;
+ if (va)
+ vpap->pinned_end = va + vpap->len;
+}
+
+static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.vpa.update_pending ||
+ vcpu->arch.slb_shadow.update_pending ||
+ vcpu->arch.dtl.update_pending))
+ return;
+
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (vcpu->arch.vpa.update_pending) {
+ kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
+ if (vcpu->arch.vpa.pinned_addr)
+ init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+ }
+ if (vcpu->arch.dtl.update_pending) {
+ kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
+ vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
+ vcpu->arch.dtl_index = 0;
+ }
+ if (vcpu->arch.slb_shadow.update_pending)
+ kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+}
+
+/*
+ * Return the accumulated stolen time for the vcore up until `now'.
+ * The caller should hold the vcore lock.
+ */
+static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
+{
+ u64 p;
+
+ /*
+ * If we are the task running the vcore, then since we hold
+ * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
+ * can't be updated, so we don't need the tbacct_lock.
+ * If the vcore is inactive, it can't become active (since we
+ * hold the vcore lock), so the vcpu load/put functions won't
+ * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
+ */
+ if (vc->vcore_state != VCORE_INACTIVE &&
+ vc->runner->arch.run_task != current) {
+ spin_lock_irq(&vc->runner->arch.tbacct_lock);
+ p = vc->stolen_tb;
+ if (vc->preempt_tb != TB_NIL)
+ p += now - vc->preempt_tb;
+ spin_unlock_irq(&vc->runner->arch.tbacct_lock);
+ } else {
+ p = vc->stolen_tb;
+ }
+ return p;
+}
+
+static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc)
+{
+ struct dtl_entry *dt;
+ struct lppaca *vpa;
+ unsigned long stolen;
+ unsigned long core_stolen;
+ u64 now;
+
+ dt = vcpu->arch.dtl_ptr;
+ vpa = vcpu->arch.vpa.pinned_addr;
+ now = mftb();
+ core_stolen = vcore_stolen_time(vc, now);
+ stolen = core_stolen - vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = core_stolen;
+ spin_lock_irq(&vcpu->arch.tbacct_lock);
+ stolen += vcpu->arch.busy_stolen;
+ vcpu->arch.busy_stolen = 0;
+ spin_unlock_irq(&vcpu->arch.tbacct_lock);
+ if (!dt || !vpa)
+ return;
+ memset(dt, 0, sizeof(struct dtl_entry));
+ dt->dispatch_reason = 7;
+ dt->processor_id = vc->pcpu + vcpu->arch.ptid;
+ dt->timebase = now + vc->tb_offset;
+ dt->enqueue_to_dispatch_time = stolen;
+ dt->srr0 = kvmppc_get_pc(vcpu);
+ dt->srr1 = vcpu->arch.shregs.msr;
+ ++dt;
+ if (dt == vcpu->arch.dtl.pinned_end)
+ dt = vcpu->arch.dtl.pinned_addr;
+ vcpu->arch.dtl_ptr = dt;
+ /* order writing *dt vs. writing vpa->dtl_idx */
+ smp_wmb();
+ vpa->dtl_idx = ++vcpu->arch.dtl_index;
+ vcpu->arch.dtl.dirty = true;
+}
+
+int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
+{
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+ unsigned long target, ret = H_SUCCESS;
+ struct kvm_vcpu *tvcpu;
+ int idx, rc;
+
+ switch (req) {
+ case H_ENTER:
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ break;
+ case H_CEDE:
+ break;
+ case H_PROD:
+ target = kvmppc_get_gpr(vcpu, 4);
+ tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ if (!tvcpu) {
+ ret = H_PARAMETER;
+ break;
+ }
+ tvcpu->arch.prodded = 1;
+ smp_mb();
+ if (vcpu->arch.ceded) {
+ if (waitqueue_active(&vcpu->wq)) {
+ wake_up_interruptible(&vcpu->wq);
+ vcpu->stat.halt_wakeup++;
+ }
+ }
+ break;
+ case H_CONFER:
+ target = kvmppc_get_gpr(vcpu, 4);
+ if (target == -1)
+ break;
+ tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ if (!tvcpu) {
+ ret = H_PARAMETER;
+ break;
+ }
+ kvm_vcpu_yield_to(tvcpu);
+ break;
+ case H_REGISTER_VPA:
+ ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ case H_RTAS:
+ if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ return RESUME_HOST;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ rc = kvmppc_rtas_hcall(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (rc == -ENOENT)
+ return RESUME_HOST;
+ else if (rc == 0)
+ break;
+
+ /* Send the error out to userspace via KVM_RUN */
+ return rc;
+
+ case H_XIRR:
+ case H_CPPR:
+ case H_EOI:
+ case H_IPI:
+ case H_IPOLL:
+ case H_XIRR_X:
+ if (kvmppc_xics_enabled(vcpu)) {
+ ret = kvmppc_xics_hcall(vcpu, req);
+ break;
+ } /* fallthrough */
+ default:
+ return RESUME_HOST;
+ }
+ kvmppc_set_gpr(vcpu, 3, ret);
+ vcpu->arch.hcall_needed = 0;
+ return RESUME_GUEST;
+}
+
+static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ struct task_struct *tsk)
+{
+ int r = RESUME_HOST;
+
+ vcpu->stat.sum_exits++;
+
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->ready_for_interrupt_injection = 1;
+ switch (vcpu->arch.trap) {
+ /* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_HV_DECREMENTER:
+ vcpu->stat.dec_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ case BOOK3S_INTERRUPT_H_DOORBELL:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PERFMON:
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ /*
+ * Deliver a machine check interrupt to the guest.
+ * We have to do this, even if the host has handled the
+ * machine check, because machine checks use SRR0/1 and
+ * the interrupt might have trashed guest state in them.
+ */
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_MACHINE_CHECK);
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PROGRAM:
+ {
+ ulong flags;
+ /*
+ * Normally program interrupts are delivered directly
+ * to the guest by the hardware, but we can get here
+ * as a result of a hypervisor emulation interrupt
+ * (e40) getting turned into a 700 by BML RTAS.
+ */
+ flags = vcpu->arch.shregs.msr & 0x1f0000ull;
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ }
+ case BOOK3S_INTERRUPT_SYSCALL:
+ {
+ /* hcall - punt to userspace */
+ int i;
+
+ /* hypercall with MSR_PR has already been handled in rmode,
+ * and never reaches here.
+ */
+
+ run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
+ for (i = 0; i < 9; ++i)
+ run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
+ run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->arch.hcall_needed = 1;
+ r = RESUME_HOST;
+ break;
+ }
+ /*
+ * We get these next two if the guest accesses a page which it thinks
+ * it has mapped but which is not actually present, either because
+ * it is for an emulated I/O device or because the corresonding
+ * host page has been paged out. Any other HDSI/HISI interrupts
+ * have been handled already.
+ */
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+ r = RESUME_PAGE_FAULT;
+ break;
+ case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
+ vcpu->arch.fault_dsisr = 0;
+ r = RESUME_PAGE_FAULT;
+ break;
+ /*
+ * This occurs if the guest executes an illegal instruction.
+ * We just generate a program interrupt to the guest, since
+ * we don't emulate any guest instructions at this stage.
+ */
+ case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ r = RESUME_GUEST;
+ break;
+ /*
+ * This occurs if the guest (kernel or userspace), does something that
+ * is prohibited by HFSCR. We just generate a program interrupt to
+ * the guest.
+ */
+ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ r = RESUME_GUEST;
+ break;
+ default:
+ kvmppc_dump_regs(vcpu);
+ printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+ vcpu->arch.trap, kvmppc_get_pc(vcpu),
+ vcpu->arch.shregs.msr);
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ r = RESUME_HOST;
+ break;
+ }
+
+ return r;
+}
+
+static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ int i;
+
+ memset(sregs, 0, sizeof(struct kvm_sregs));
+ sregs->pvr = vcpu->arch.pvr;
+ for (i = 0; i < vcpu->arch.slb_max; i++) {
+ sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
+ sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
+ }
+
+ return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ int i, j;
+
+ kvmppc_set_pvr_hv(vcpu, sregs->pvr);
+
+ j = 0;
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
+ vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
+ vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
+ ++j;
+ }
+ }
+ vcpu->arch.slb_max = j;
+
+ return 0;
+}
+
+static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 mask;
+
+ spin_lock(&vc->lock);
+ /*
+ * If ILE (interrupt little-endian) has changed, update the
+ * MSR_LE bit in the intr_msr for each vcpu in this vcore.
+ */
+ if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.vcore != vc)
+ continue;
+ if (new_lpcr & LPCR_ILE)
+ vcpu->arch.intr_msr |= MSR_LE;
+ else
+ vcpu->arch.intr_msr &= ~MSR_LE;
+ }
+ mutex_unlock(&kvm->lock);
+ }
+
+ /*
+ * Userspace can only modify DPFD (default prefetch depth),
+ * ILE (interrupt little-endian) and TC (translation control).
+ * On POWER8 userspace can also modify AIL (alt. interrupt loc.)
+ */
+ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ mask |= LPCR_AIL;
+ vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
+ spin_unlock(&vc->lock);
+}
+
+static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = 0;
+ long int i;
+
+ switch (id) {
+ case KVM_REG_PPC_HIOR:
+ *val = get_reg_val(id, 0);
+ break;
+ case KVM_REG_PPC_DABR:
+ *val = get_reg_val(id, vcpu->arch.dabr);
+ break;
+ case KVM_REG_PPC_DABRX:
+ *val = get_reg_val(id, vcpu->arch.dabrx);
+ break;
+ case KVM_REG_PPC_DSCR:
+ *val = get_reg_val(id, vcpu->arch.dscr);
+ break;
+ case KVM_REG_PPC_PURR:
+ *val = get_reg_val(id, vcpu->arch.purr);
+ break;
+ case KVM_REG_PPC_SPURR:
+ *val = get_reg_val(id, vcpu->arch.spurr);
+ break;
+ case KVM_REG_PPC_AMR:
+ *val = get_reg_val(id, vcpu->arch.amr);
+ break;
+ case KVM_REG_PPC_UAMOR:
+ *val = get_reg_val(id, vcpu->arch.uamor);
+ break;
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
+ i = id - KVM_REG_PPC_MMCR0;
+ *val = get_reg_val(id, vcpu->arch.mmcr[i]);
+ break;
+ case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+ i = id - KVM_REG_PPC_PMC1;
+ *val = get_reg_val(id, vcpu->arch.pmc[i]);
+ break;
+ case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
+ i = id - KVM_REG_PPC_SPMC1;
+ *val = get_reg_val(id, vcpu->arch.spmc[i]);
+ break;
+ case KVM_REG_PPC_SIAR:
+ *val = get_reg_val(id, vcpu->arch.siar);
+ break;
+ case KVM_REG_PPC_SDAR:
+ *val = get_reg_val(id, vcpu->arch.sdar);
+ break;
+ case KVM_REG_PPC_SIER:
+ *val = get_reg_val(id, vcpu->arch.sier);
+ break;
+ case KVM_REG_PPC_IAMR:
+ *val = get_reg_val(id, vcpu->arch.iamr);
+ break;
+ case KVM_REG_PPC_PSPB:
+ *val = get_reg_val(id, vcpu->arch.pspb);
+ break;
+ case KVM_REG_PPC_DPDES:
+ *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+ break;
+ case KVM_REG_PPC_DAWR:
+ *val = get_reg_val(id, vcpu->arch.dawr);
+ break;
+ case KVM_REG_PPC_DAWRX:
+ *val = get_reg_val(id, vcpu->arch.dawrx);
+ break;
+ case KVM_REG_PPC_CIABR:
+ *val = get_reg_val(id, vcpu->arch.ciabr);
+ break;
+ case KVM_REG_PPC_IC:
+ *val = get_reg_val(id, vcpu->arch.ic);
+ break;
+ case KVM_REG_PPC_VTB:
+ *val = get_reg_val(id, vcpu->arch.vtb);
+ break;
+ case KVM_REG_PPC_CSIGR:
+ *val = get_reg_val(id, vcpu->arch.csigr);
+ break;
+ case KVM_REG_PPC_TACR:
+ *val = get_reg_val(id, vcpu->arch.tacr);
+ break;
+ case KVM_REG_PPC_TCSCR:
+ *val = get_reg_val(id, vcpu->arch.tcscr);
+ break;
+ case KVM_REG_PPC_PID:
+ *val = get_reg_val(id, vcpu->arch.pid);
+ break;
+ case KVM_REG_PPC_ACOP:
+ *val = get_reg_val(id, vcpu->arch.acop);
+ break;
+ case KVM_REG_PPC_WORT:
+ *val = get_reg_val(id, vcpu->arch.wort);
+ break;
+ case KVM_REG_PPC_VPA_ADDR:
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ break;
+ case KVM_REG_PPC_VPA_SLB:
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
+ val->vpaval.length = vcpu->arch.slb_shadow.len;
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ break;
+ case KVM_REG_PPC_VPA_DTL:
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ val->vpaval.addr = vcpu->arch.dtl.next_gpa;
+ val->vpaval.length = vcpu->arch.dtl.len;
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ break;
+ case KVM_REG_PPC_TB_OFFSET:
+ *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
+ break;
+ case KVM_REG_PPC_LPCR:
+ *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
+ break;
+ case KVM_REG_PPC_PPR:
+ *val = get_reg_val(id, vcpu->arch.ppr);
+ break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case KVM_REG_PPC_TFHAR:
+ *val = get_reg_val(id, vcpu->arch.tfhar);
+ break;
+ case KVM_REG_PPC_TFIAR:
+ *val = get_reg_val(id, vcpu->arch.tfiar);
+ break;
+ case KVM_REG_PPC_TEXASR:
+ *val = get_reg_val(id, vcpu->arch.texasr);
+ break;
+ case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+ i = id - KVM_REG_PPC_TM_GPR0;
+ *val = get_reg_val(id, vcpu->arch.gpr_tm[i]);
+ break;
+ case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+ {
+ int j;
+ i = id - KVM_REG_PPC_TM_VSR0;
+ if (i < 32)
+ for (j = 0; j < TS_FPRWIDTH; j++)
+ val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
+ else {
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ val->vval = vcpu->arch.vr_tm.vr[i-32];
+ else
+ r = -ENXIO;
+ }
+ break;
+ }
+ case KVM_REG_PPC_TM_CR:
+ *val = get_reg_val(id, vcpu->arch.cr_tm);
+ break;
+ case KVM_REG_PPC_TM_LR:
+ *val = get_reg_val(id, vcpu->arch.lr_tm);
+ break;
+ case KVM_REG_PPC_TM_CTR:
+ *val = get_reg_val(id, vcpu->arch.ctr_tm);
+ break;
+ case KVM_REG_PPC_TM_FPSCR:
+ *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
+ break;
+ case KVM_REG_PPC_TM_AMR:
+ *val = get_reg_val(id, vcpu->arch.amr_tm);
+ break;
+ case KVM_REG_PPC_TM_PPR:
+ *val = get_reg_val(id, vcpu->arch.ppr_tm);
+ break;
+ case KVM_REG_PPC_TM_VRSAVE:
+ *val = get_reg_val(id, vcpu->arch.vrsave_tm);
+ break;
+ case KVM_REG_PPC_TM_VSCR:
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
+ else
+ r = -ENXIO;
+ break;
+ case KVM_REG_PPC_TM_DSCR:
+ *val = get_reg_val(id, vcpu->arch.dscr_tm);
+ break;
+ case KVM_REG_PPC_TM_TAR:
+ *val = get_reg_val(id, vcpu->arch.tar_tm);
+ break;
+#endif
+ case KVM_REG_PPC_ARCH_COMPAT:
+ *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
+
+static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = 0;
+ long int i;
+ unsigned long addr, len;
+
+ switch (id) {
+ case KVM_REG_PPC_HIOR:
+ /* Only allow this to be set to zero */
+ if (set_reg_val(id, *val))
+ r = -EINVAL;
+ break;
+ case KVM_REG_PPC_DABR:
+ vcpu->arch.dabr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DABRX:
+ vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
+ break;
+ case KVM_REG_PPC_DSCR:
+ vcpu->arch.dscr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PURR:
+ vcpu->arch.purr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SPURR:
+ vcpu->arch.spurr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_AMR:
+ vcpu->arch.amr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_UAMOR:
+ vcpu->arch.uamor = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
+ i = id - KVM_REG_PPC_MMCR0;
+ vcpu->arch.mmcr[i] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+ i = id - KVM_REG_PPC_PMC1;
+ vcpu->arch.pmc[i] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
+ i = id - KVM_REG_PPC_SPMC1;
+ vcpu->arch.spmc[i] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SIAR:
+ vcpu->arch.siar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SDAR:
+ vcpu->arch.sdar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SIER:
+ vcpu->arch.sier = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_IAMR:
+ vcpu->arch.iamr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PSPB:
+ vcpu->arch.pspb = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DPDES:
+ vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DAWR:
+ vcpu->arch.dawr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DAWRX:
+ vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
+ break;
+ case KVM_REG_PPC_CIABR:
+ vcpu->arch.ciabr = set_reg_val(id, *val);
+ /* Don't allow setting breakpoints in hypervisor code */
+ if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+ vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */
+ break;
+ case KVM_REG_PPC_IC:
+ vcpu->arch.ic = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_VTB:
+ vcpu->arch.vtb = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_CSIGR:
+ vcpu->arch.csigr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TACR:
+ vcpu->arch.tacr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TCSCR:
+ vcpu->arch.tcscr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PID:
+ vcpu->arch.pid = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_ACOP:
+ vcpu->arch.acop = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_WORT:
+ vcpu->arch.wort = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_VPA_ADDR:
+ addr = set_reg_val(id, *val);
+ r = -EINVAL;
+ if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
+ vcpu->arch.dtl.next_gpa))
+ break;
+ r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
+ break;
+ case KVM_REG_PPC_VPA_SLB:
+ addr = val->vpaval.addr;
+ len = val->vpaval.length;
+ r = -EINVAL;
+ if (addr && !vcpu->arch.vpa.next_gpa)
+ break;
+ r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
+ break;
+ case KVM_REG_PPC_VPA_DTL:
+ addr = val->vpaval.addr;
+ len = val->vpaval.length;
+ r = -EINVAL;
+ if (addr && (len < sizeof(struct dtl_entry) ||
+ !vcpu->arch.vpa.next_gpa))
+ break;
+ len -= len % sizeof(struct dtl_entry);
+ r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
+ break;
+ case KVM_REG_PPC_TB_OFFSET:
+ /* round up to multiple of 2^24 */
+ vcpu->arch.vcore->tb_offset =
+ ALIGN(set_reg_val(id, *val), 1UL << 24);
+ break;
+ case KVM_REG_PPC_LPCR:
+ kvmppc_set_lpcr(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_PPR:
+ vcpu->arch.ppr = set_reg_val(id, *val);
+ break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case KVM_REG_PPC_TFHAR:
+ vcpu->arch.tfhar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TFIAR:
+ vcpu->arch.tfiar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TEXASR:
+ vcpu->arch.texasr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+ i = id - KVM_REG_PPC_TM_GPR0;
+ vcpu->arch.gpr_tm[i] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+ {
+ int j;
+ i = id - KVM_REG_PPC_TM_VSR0;
+ if (i < 32)
+ for (j = 0; j < TS_FPRWIDTH; j++)
+ vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
+ else
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ vcpu->arch.vr_tm.vr[i-32] = val->vval;
+ else
+ r = -ENXIO;
+ break;
+ }
+ case KVM_REG_PPC_TM_CR:
+ vcpu->arch.cr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_LR:
+ vcpu->arch.lr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_CTR:
+ vcpu->arch.ctr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_FPSCR:
+ vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_AMR:
+ vcpu->arch.amr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_PPR:
+ vcpu->arch.ppr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_VRSAVE:
+ vcpu->arch.vrsave_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_VSCR:
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
+ else
+ r = - ENXIO;
+ break;
+ case KVM_REG_PPC_TM_DSCR:
+ vcpu->arch.dscr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_TAR:
+ vcpu->arch.tar_tm = set_reg_val(id, *val);
+ break;
+#endif
+ case KVM_REG_PPC_ARCH_COMPAT:
+ r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
+
+static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+ unsigned int id)
+{
+ struct kvm_vcpu *vcpu;
+ int err = -EINVAL;
+ int core;
+ struct kvmppc_vcore *vcore;
+
+ core = id / threads_per_subcore;
+ if (core >= KVM_MAX_VCORES)
+ goto out;
+
+ err = -ENOMEM;
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu)
+ goto out;
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ vcpu->arch.shared = &vcpu->arch.shregs;
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ /*
+ * The shared struct is never shared on HV,
+ * so we can always use host endianness
+ */
+#ifdef __BIG_ENDIAN__
+ vcpu->arch.shared_big_endian = true;
+#else
+ vcpu->arch.shared_big_endian = false;
+#endif
+#endif
+ vcpu->arch.mmcr[0] = MMCR0_FC;
+ vcpu->arch.ctrl = CTRL_RUNLATCH;
+ /* default to host PVR, since we can't spoof it */
+ kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
+ spin_lock_init(&vcpu->arch.vpa_update_lock);
+ spin_lock_init(&vcpu->arch.tbacct_lock);
+ vcpu->arch.busy_preempt = TB_NIL;
+ vcpu->arch.intr_msr = MSR_SF | MSR_ME;
+
+ kvmppc_mmu_book3s_hv_init(vcpu);
+
+ vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
+
+ init_waitqueue_head(&vcpu->arch.cpu_run);
+
+ mutex_lock(&kvm->lock);
+ vcore = kvm->arch.vcores[core];
+ if (!vcore) {
+ vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
+ if (vcore) {
+ INIT_LIST_HEAD(&vcore->runnable_threads);
+ spin_lock_init(&vcore->lock);
+ init_waitqueue_head(&vcore->wq);
+ vcore->preempt_tb = TB_NIL;
+ vcore->lpcr = kvm->arch.lpcr;
+ vcore->first_vcpuid = core * threads_per_subcore;
+ vcore->kvm = kvm;
+ }
+ kvm->arch.vcores[core] = vcore;
+ kvm->arch.online_vcores++;
+ }
+ mutex_unlock(&kvm->lock);
+
+ if (!vcore)
+ goto free_vcpu;
+
+ spin_lock(&vcore->lock);
+ ++vcore->num_threads;
+ spin_unlock(&vcore->lock);
+ vcpu->arch.vcore = vcore;
+ vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
+
+ vcpu->arch.cpu_type = KVM_CPU_3S_64;
+ kvmppc_sanity_check(vcpu);
+
+ return vcpu;
+
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+ return ERR_PTR(err);
+}
+
+static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+ if (vpa->pinned_addr)
+ kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
+ vpa->dirty);
+}
+
+static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
+ unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
+ unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
+{
+ /* Indicate we want to get back into the guest */
+ return 1;
+}
+
+static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned long dec_nsec, now;
+
+ now = get_tb();
+ if (now > vcpu->arch.dec_expires) {
+ /* decrementer has already gone negative */
+ kvmppc_core_queue_dec(vcpu);
+ kvmppc_core_prepare_to_enter(vcpu);
+ return;
+ }
+ dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
+ / tb_ticks_per_sec;
+ hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
+ HRTIMER_MODE_REL);
+ vcpu->arch.timer_running = 1;
+}
+
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ceded = 0;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+}
+
+extern void __kvmppc_vcore_entry(void);
+
+static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
+ struct kvm_vcpu *vcpu)
+{
+ u64 now;
+
+ if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
+ return;
+ spin_lock_irq(&vcpu->arch.tbacct_lock);
+ now = mftb();
+ vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
+ vcpu->arch.stolen_logged;
+ vcpu->arch.busy_preempt = now;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+ spin_unlock_irq(&vcpu->arch.tbacct_lock);
+ --vc->n_runnable;
+ list_del(&vcpu->arch.run_list);
+}
+
+static int kvmppc_grab_hwthread(int cpu)
+{
+ struct paca_struct *tpaca;
+ long timeout = 1000;
+
+ tpaca = &paca[cpu];
+
+ /* Ensure the thread won't go into the kernel if it wakes */
+ tpaca->kvm_hstate.hwthread_req = 1;
+ tpaca->kvm_hstate.kvm_vcpu = NULL;
+
+ /*
+ * If the thread is already executing in the kernel (e.g. handling
+ * a stray interrupt), wait for it to get back to nap mode.
+ * The smp_mb() is to ensure that our setting of hwthread_req
+ * is visible before we look at hwthread_state, so if this
+ * races with the code at system_reset_pSeries and the thread
+ * misses our setting of hwthread_req, we are sure to see its
+ * setting of hwthread_state, and vice versa.
+ */
+ smp_mb();
+ while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
+ if (--timeout <= 0) {
+ pr_err("KVM: couldn't grab cpu %d\n", cpu);
+ return -EBUSY;
+ }
+ udelay(1);
+ }
+ return 0;
+}
+
+static void kvmppc_release_hwthread(int cpu)
+{
+ struct paca_struct *tpaca;
+
+ tpaca = &paca[cpu];
+ tpaca->kvm_hstate.hwthread_req = 0;
+ tpaca->kvm_hstate.kvm_vcpu = NULL;
+}
+
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ struct paca_struct *tpaca;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+ cpu = vc->pcpu + vcpu->arch.ptid;
+ tpaca = &paca[cpu];
+ tpaca->kvm_hstate.kvm_vcpu = vcpu;
+ tpaca->kvm_hstate.kvm_vcore = vc;
+ tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
+ vcpu->cpu = vc->pcpu;
+ smp_wmb();
+#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
+ if (cpu != smp_processor_id()) {
+ xics_wake_cpu(cpu);
+ if (vcpu->arch.ptid)
+ ++vc->n_woken;
+ }
+#endif
+}
+
+static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
+{
+ int i;
+
+ HMT_low();
+ i = 0;
+ while (vc->nap_count < vc->n_woken) {
+ if (++i >= 1000000) {
+ pr_err("kvmppc_wait_for_nap timeout %d %d\n",
+ vc->nap_count, vc->n_woken);
+ break;
+ }
+ cpu_relax();
+ }
+ HMT_medium();
+}
+
+/*
+ * Check that we are on thread 0 and that any other threads in
+ * this core are off-line. Then grab the threads so they can't
+ * enter the kernel.
+ */
+static int on_primary_thread(void)
+{
+ int cpu = smp_processor_id();
+ int thr;
+
+ /* Are we on a primary subcore? */
+ if (cpu_thread_in_subcore(cpu))
+ return 0;
+
+ thr = 0;
+ while (++thr < threads_per_subcore)
+ if (cpu_online(cpu + thr))
+ return 0;
+
+ /* Grab all hw threads so they can't go into the kernel */
+ for (thr = 1; thr < threads_per_subcore; ++thr) {
+ if (kvmppc_grab_hwthread(cpu + thr)) {
+ /* Couldn't grab one; let the others go */
+ do {
+ kvmppc_release_hwthread(cpu + thr);
+ } while (--thr > 0);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*
+ * Run a set of guest threads on a physical core.
+ * Called with vc->lock held.
+ */
+static void kvmppc_run_core(struct kvmppc_vcore *vc)
+{
+ struct kvm_vcpu *vcpu, *vnext;
+ long ret;
+ u64 now;
+ int i, need_vpa_update;
+ int srcu_idx;
+ struct kvm_vcpu *vcpus_to_update[threads_per_core];
+
+ /* don't start if any threads have a signal pending */
+ need_vpa_update = 0;
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ if (signal_pending(vcpu->arch.run_task))
+ return;
+ if (vcpu->arch.vpa.update_pending ||
+ vcpu->arch.slb_shadow.update_pending ||
+ vcpu->arch.dtl.update_pending)
+ vcpus_to_update[need_vpa_update++] = vcpu;
+ }
+
+ /*
+ * Initialize *vc, in particular vc->vcore_state, so we can
+ * drop the vcore lock if necessary.
+ */
+ vc->n_woken = 0;
+ vc->nap_count = 0;
+ vc->entry_exit_count = 0;
+ vc->vcore_state = VCORE_STARTING;
+ vc->in_guest = 0;
+ vc->napping_threads = 0;
+
+ /*
+ * Updating any of the vpas requires calling kvmppc_pin_guest_page,
+ * which can't be called with any spinlocks held.
+ */
+ if (need_vpa_update) {
+ spin_unlock(&vc->lock);
+ for (i = 0; i < need_vpa_update; ++i)
+ kvmppc_update_vpas(vcpus_to_update[i]);
+ spin_lock(&vc->lock);
+ }
+
+ /*
+ * Make sure we are running on primary threads, and that secondary
+ * threads are offline. Also check if the number of threads in this
+ * guest are greater than the current system threads per guest.
+ */
+ if ((threads_per_core > 1) &&
+ ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ vcpu->arch.ret = -EBUSY;
+ goto out;
+ }
+
+
+ vc->pcpu = smp_processor_id();
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ kvmppc_start_thread(vcpu);
+ kvmppc_create_dtl_entry(vcpu, vc);
+ }
+
+ /* Set this explicitly in case thread 0 doesn't have a vcpu */
+ get_paca()->kvm_hstate.kvm_vcore = vc;
+ get_paca()->kvm_hstate.ptid = 0;
+
+ vc->vcore_state = VCORE_RUNNING;
+ preempt_disable();
+ spin_unlock(&vc->lock);
+
+ kvm_guest_enter();
+
+ srcu_idx = srcu_read_lock(&vc->kvm->srcu);
+
+ __kvmppc_vcore_entry();
+
+ spin_lock(&vc->lock);
+ /* disable sending of IPIs on virtual external irqs */
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ vcpu->cpu = -1;
+ /* wait for secondary threads to finish writing their state to memory */
+ if (vc->nap_count < vc->n_woken)
+ kvmppc_wait_for_nap(vc);
+ for (i = 0; i < threads_per_subcore; ++i)
+ kvmppc_release_hwthread(vc->pcpu + i);
+ /* prevent other vcpu threads from doing kvmppc_start_thread() now */
+ vc->vcore_state = VCORE_EXITING;
+ spin_unlock(&vc->lock);
+
+ srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+
+ /* make sure updates to secondary vcpu structs are visible now */
+ smp_mb();
+ kvm_guest_exit();
+
+ preempt_enable();
+ cond_resched();
+
+ spin_lock(&vc->lock);
+ now = get_tb();
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ /* cancel pending dec exception if dec is positive */
+ if (now < vcpu->arch.dec_expires &&
+ kvmppc_core_pending_dec(vcpu))
+ kvmppc_core_dequeue_dec(vcpu);
+
+ ret = RESUME_GUEST;
+ if (vcpu->arch.trap)
+ ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
+ vcpu->arch.run_task);
+
+ vcpu->arch.ret = ret;
+ vcpu->arch.trap = 0;
+
+ if (vcpu->arch.ceded) {
+ if (!is_kvmppc_resume_guest(ret))
+ kvmppc_end_cede(vcpu);
+ else
+ kvmppc_set_timer(vcpu);
+ }
+ }
+
+ out:
+ vc->vcore_state = VCORE_INACTIVE;
+ list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+ arch.run_list) {
+ if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
+ kvmppc_remove_runnable(vc, vcpu);
+ wake_up(&vcpu->arch.cpu_run);
+ }
+ }
+}
+
+/*
+ * Wait for some other vcpu thread to execute us, and
+ * wake us up when we need to handle something in the host.
+ */
+static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
+{
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+ schedule();
+ finish_wait(&vcpu->arch.cpu_run, &wait);
+}
+
+/*
+ * All the vcpus in this vcore are idle, so wait for a decrementer
+ * or external interrupt to one of the vcpus. vc->lock is held.
+ */
+static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
+{
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ vc->vcore_state = VCORE_SLEEPING;
+ spin_unlock(&vc->lock);
+ schedule();
+ finish_wait(&vc->wq, &wait);
+ spin_lock(&vc->lock);
+ vc->vcore_state = VCORE_INACTIVE;
+}
+
+static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int n_ceded;
+ struct kvmppc_vcore *vc;
+ struct kvm_vcpu *v, *vn;
+
+ kvm_run->exit_reason = 0;
+ vcpu->arch.ret = RESUME_GUEST;
+ vcpu->arch.trap = 0;
+ kvmppc_update_vpas(vcpu);
+
+ /*
+ * Synchronize with other threads in this virtual core
+ */
+ vc = vcpu->arch.vcore;
+ spin_lock(&vc->lock);
+ vcpu->arch.ceded = 0;
+ vcpu->arch.run_task = current;
+ vcpu->arch.kvm_run = kvm_run;
+ vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+ vcpu->arch.busy_preempt = TB_NIL;
+ list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
+ ++vc->n_runnable;
+
+ /*
+ * This happens the first time this is called for a vcpu.
+ * If the vcore is already running, we may be able to start
+ * this thread straight away and have it join in.
+ */
+ if (!signal_pending(current)) {
+ if (vc->vcore_state == VCORE_RUNNING &&
+ VCORE_EXIT_COUNT(vc) == 0) {
+ kvmppc_create_dtl_entry(vcpu, vc);
+ kvmppc_start_thread(vcpu);
+ } else if (vc->vcore_state == VCORE_SLEEPING) {
+ wake_up(&vc->wq);
+ }
+
+ }
+
+ while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+ !signal_pending(current)) {
+ if (vc->vcore_state != VCORE_INACTIVE) {
+ spin_unlock(&vc->lock);
+ kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
+ spin_lock(&vc->lock);
+ continue;
+ }
+ list_for_each_entry_safe(v, vn, &vc->runnable_threads,
+ arch.run_list) {
+ kvmppc_core_prepare_to_enter(v);
+ if (signal_pending(v->arch.run_task)) {
+ kvmppc_remove_runnable(vc, v);
+ v->stat.signal_exits++;
+ v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+ v->arch.ret = -EINTR;
+ wake_up(&v->arch.cpu_run);
+ }
+ }
+ if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
+ break;
+ vc->runner = vcpu;
+ n_ceded = 0;
+ list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
+ if (!v->arch.pending_exceptions)
+ n_ceded += v->arch.ceded;
+ else
+ v->arch.ceded = 0;
+ }
+ if (n_ceded == vc->n_runnable)
+ kvmppc_vcore_blocked(vc);
+ else
+ kvmppc_run_core(vc);
+ vc->runner = NULL;
+ }
+
+ while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+ (vc->vcore_state == VCORE_RUNNING ||
+ vc->vcore_state == VCORE_EXITING)) {
+ spin_unlock(&vc->lock);
+ kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
+ spin_lock(&vc->lock);
+ }
+
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+ kvmppc_remove_runnable(vc, vcpu);
+ vcpu->stat.signal_exits++;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ }
+
+ if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
+ /* Wake up some vcpu to run the core */
+ v = list_first_entry(&vc->runnable_threads,
+ struct kvm_vcpu, arch.run_list);
+ wake_up(&v->arch.cpu_run);
+ }
+
+ spin_unlock(&vc->lock);
+ return vcpu->arch.ret;
+}
+
+static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int r;
+ int srcu_idx;
+
+ if (!vcpu->arch.sane) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return -EINVAL;
+ }
+
+ kvmppc_core_prepare_to_enter(vcpu);
+
+ /* No need to go into the guest when all we'll do is come back out */
+ if (signal_pending(current)) {
+ run->exit_reason = KVM_EXIT_INTR;
+ return -EINTR;
+ }
+
+ atomic_inc(&vcpu->kvm->arch.vcpus_running);
+ /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
+ smp_mb();
+
+ /* On the first time here, set up HTAB and VRMA or RMA */
+ if (!vcpu->kvm->arch.rma_setup_done) {
+ r = kvmppc_hv_setup_htab_rma(vcpu);
+ if (r)
+ goto out;
+ }
+
+ flush_fp_to_thread(current);
+ flush_altivec_to_thread(current);
+ flush_vsx_to_thread(current);
+ vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+ vcpu->arch.pgdir = current->mm->pgd;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+
+ do {
+ r = kvmppc_run_vcpu(run, vcpu);
+
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
+ !(vcpu->arch.shregs.msr & MSR_PR)) {
+ r = kvmppc_pseries_do_hcall(vcpu);
+ kvmppc_core_prepare_to_enter(vcpu);
+ } else if (r == RESUME_PAGE_FAULT) {
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvmppc_book3s_hv_page_fault(run, vcpu,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ }
+ } while (is_kvmppc_resume_guest(r));
+
+ out:
+ vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
+ atomic_dec(&vcpu->kvm->arch.vcpus_running);
+ return r;
+}
+
+
+/* Work out RMLS (real mode limit selector) field value for a given RMA size.
+ Assumes POWER7 or PPC970. */
+static inline int lpcr_rmls(unsigned long rma_size)
+{
+ switch (rma_size) {
+ case 32ul << 20: /* 32 MB */
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ return 8; /* only supported on POWER7 */
+ return -1;
+ case 64ul << 20: /* 64 MB */
+ return 3;
+ case 128ul << 20: /* 128 MB */
+ return 7;
+ case 256ul << 20: /* 256 MB */
+ return 4;
+ case 1ul << 30: /* 1 GB */
+ return 2;
+ case 16ul << 30: /* 16 GB */
+ return 1;
+ case 256ul << 30: /* 256 GB */
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *page;
+ struct kvm_rma_info *ri = vma->vm_file->private_data;
+
+ if (vmf->pgoff >= kvm_rma_pages)
+ return VM_FAULT_SIGBUS;
+
+ page = pfn_to_page(ri->base_pfn + vmf->pgoff);
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct kvm_rma_vm_ops = {
+ .fault = kvm_rma_fault,
+};
+
+static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_ops = &kvm_rma_vm_ops;
+ return 0;
+}
+
+static int kvm_rma_release(struct inode *inode, struct file *filp)
+{
+ struct kvm_rma_info *ri = filp->private_data;
+
+ kvm_release_rma(ri);
+ return 0;
+}
+
+static const struct file_operations kvm_rma_fops = {
+ .mmap = kvm_rma_mmap,
+ .release = kvm_rma_release,
+};
+
+static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
+ struct kvm_allocate_rma *ret)
+{
+ long fd;
+ struct kvm_rma_info *ri;
+ /*
+ * Only do this on PPC970 in HV mode
+ */
+ if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+ !cpu_has_feature(CPU_FTR_ARCH_201))
+ return -EINVAL;
+
+ if (!kvm_rma_pages)
+ return -EINVAL;
+
+ ri = kvm_alloc_rma();
+ if (!ri)
+ return -ENOMEM;
+
+ fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ kvm_release_rma(ri);
+
+ ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
+ return fd;
+}
+
+static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
+ int linux_psize)
+{
+ struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
+
+ if (!def->shift)
+ return;
+ (*sps)->page_shift = def->shift;
+ (*sps)->slb_enc = def->sllp;
+ (*sps)->enc[0].page_shift = def->shift;
+ /*
+ * Only return base page encoding. We don't want to return
+ * all the supporting pte_enc, because our H_ENTER doesn't
+ * support MPSS yet. Once they do, we can start passing all
+ * support pte_enc here
+ */
+ (*sps)->enc[0].pte_enc = def->penc[linux_psize];
+ /*
+ * Add 16MB MPSS support if host supports it
+ */
+ if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) {
+ (*sps)->enc[1].page_shift = 24;
+ (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
+ }
+ (*sps)++;
+}
+
+static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
+ struct kvm_ppc_smmu_info *info)
+{
+ struct kvm_ppc_one_seg_page_size *sps;
+
+ info->flags = KVM_PPC_PAGE_SIZES_REAL;
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ info->flags |= KVM_PPC_1T_SEGMENTS;
+ info->slb_size = mmu_slb_size;
+
+ /* We only support these sizes for now, and no muti-size segments */
+ sps = &info->sps[0];
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
+
+ return 0;
+}
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ struct kvm_memory_slot *memslot;
+ int r;
+ unsigned long n;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = -EINVAL;
+ if (log->slot >= KVM_USER_MEM_SLOTS)
+ goto out;
+
+ memslot = id_to_memslot(kvm->memslots, log->slot);
+ r = -ENOENT;
+ if (!memslot->dirty_bitmap)
+ goto out;
+
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+
+ r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
+ if (r)
+ goto out;
+
+ r = -EFAULT;
+ if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
+ goto out;
+
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+static void unpin_slot(struct kvm_memory_slot *memslot)
+{
+ unsigned long *physp;
+ unsigned long j, npages, pfn;
+ struct page *page;
+
+ physp = memslot->arch.slot_phys;
+ npages = memslot->npages;
+ if (!physp)
+ return;
+ for (j = 0; j < npages; j++) {
+ if (!(physp[j] & KVMPPC_GOT_PAGE))
+ continue;
+ pfn = physp[j] >> PAGE_SHIFT;
+ page = pfn_to_page(pfn);
+ SetPageDirty(page);
+ put_page(page);
+ }
+}
+
+static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ if (!dont || free->arch.rmap != dont->arch.rmap) {
+ vfree(free->arch.rmap);
+ free->arch.rmap = NULL;
+ }
+ if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
+ unpin_slot(free);
+ vfree(free->arch.slot_phys);
+ free->arch.slot_phys = NULL;
+ }
+}
+
+static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
+ if (!slot->arch.rmap)
+ return -ENOMEM;
+ slot->arch.slot_phys = NULL;
+
+ return 0;
+}
+
+static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem)
+{
+ unsigned long *phys;
+
+ /* Allocate a slot_phys array if needed */
+ phys = memslot->arch.slot_phys;
+ if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
+ phys = vzalloc(memslot->npages * sizeof(unsigned long));
+ if (!phys)
+ return -ENOMEM;
+ memslot->arch.slot_phys = phys;
+ }
+
+ return 0;
+}
+
+static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old)
+{
+ unsigned long npages = mem->memory_size >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ if (npages && old->npages) {
+ /*
+ * If modifying a memslot, reset all the rmap dirty bits.
+ * If this is a new memslot, we don't need to do anything
+ * since the rmap array starts out as all zeroes,
+ * i.e. no pages are dirty.
+ */
+ memslot = id_to_memslot(kvm->memslots, mem->slot);
+ kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
+ }
+}
+
+/*
+ * Update LPCR values in kvm->arch and in vcores.
+ * Caller must hold kvm->lock.
+ */
+void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
+{
+ long int i;
+ u32 cores_done = 0;
+
+ if ((kvm->arch.lpcr & mask) == lpcr)
+ return;
+
+ kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
+
+ for (i = 0; i < KVM_MAX_VCORES; ++i) {
+ struct kvmppc_vcore *vc = kvm->arch.vcores[i];
+ if (!vc)
+ continue;
+ spin_lock(&vc->lock);
+ vc->lpcr = (vc->lpcr & ~mask) | lpcr;
+ spin_unlock(&vc->lock);
+ if (++cores_done >= kvm->arch.online_vcores)
+ break;
+ }
+}
+
+static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
+{
+ return;
+}
+
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
+{
+ int err = 0;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_rma_info *ri = NULL;
+ unsigned long hva;
+ struct kvm_memory_slot *memslot;
+ struct vm_area_struct *vma;
+ unsigned long lpcr = 0, senc;
+ unsigned long lpcr_mask = 0;
+ unsigned long psize, porder;
+ unsigned long rma_size;
+ unsigned long rmls;
+ unsigned long *physp;
+ unsigned long i, npages;
+ int srcu_idx;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.rma_setup_done)
+ goto out; /* another vcpu beat us to it */
+
+ /* Allocate hashed page table (if not done already) and reset it */
+ if (!kvm->arch.hpt_virt) {
+ err = kvmppc_alloc_hpt(kvm, NULL);
+ if (err) {
+ pr_err("KVM: Couldn't alloc HPT\n");
+ goto out;
+ }
+ }
+
+ /* Look up the memslot for guest physical address 0 */
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ memslot = gfn_to_memslot(kvm, 0);
+
+ /* We must have some memory at 0 by now */
+ err = -EINVAL;
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ goto out_srcu;
+
+ /* Look up the VMA for the start of this memory slot */
+ hva = memslot->userspace_addr;
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, hva);
+ if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
+ goto up_out;
+
+ psize = vma_kernel_pagesize(vma);
+ porder = __ilog2(psize);
+
+ /* Is this one of our preallocated RMAs? */
+ if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
+ hva == vma->vm_start)
+ ri = vma->vm_file->private_data;
+
+ up_read(&current->mm->mmap_sem);
+
+ if (!ri) {
+ /* On POWER7, use VRMA; on PPC970, give up */
+ err = -EPERM;
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ pr_err("KVM: CPU requires an RMO\n");
+ goto out_srcu;
+ }
+
+ /* We can handle 4k, 64k or 16M pages in the VRMA */
+ err = -EINVAL;
+ if (!(psize == 0x1000 || psize == 0x10000 ||
+ psize == 0x1000000))
+ goto out_srcu;
+
+ /* Update VRMASD field in the LPCR */
+ senc = slb_pgsize_encoding(psize);
+ kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ lpcr_mask = LPCR_VRMASD;
+ /* the -4 is to account for senc values starting at 0x10 */
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
+
+ /* Create HPTEs in the hash page table for the VRMA */
+ kvmppc_map_vrma(vcpu, memslot, porder);
+
+ } else {
+ /* Set up to use an RMO region */
+ rma_size = kvm_rma_pages;
+ if (rma_size > memslot->npages)
+ rma_size = memslot->npages;
+ rma_size <<= PAGE_SHIFT;
+ rmls = lpcr_rmls(rma_size);
+ err = -EINVAL;
+ if ((long)rmls < 0) {
+ pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
+ goto out_srcu;
+ }
+ atomic_inc(&ri->use_count);
+ kvm->arch.rma = ri;
+
+ /* Update LPCR and RMOR */
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ /* PPC970; insert RMLS value (split field) in HID4 */
+ lpcr_mask = (1ul << HID4_RMLS0_SH) |
+ (3ul << HID4_RMLS2_SH) | HID4_RMOR;
+ lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
+ ((rmls & 3) << HID4_RMLS2_SH);
+ /* RMOR is also in HID4 */
+ lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
+ << HID4_RMOR_SH;
+ } else {
+ /* POWER7 */
+ lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
+ lpcr = rmls << LPCR_RMLS_SH;
+ kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
+ }
+ pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
+ ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
+
+ /* Initialize phys addrs of pages in RMO */
+ npages = kvm_rma_pages;
+ porder = __ilog2(npages);
+ physp = memslot->arch.slot_phys;
+ if (physp) {
+ if (npages > memslot->npages)
+ npages = memslot->npages;
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (i = 0; i < npages; ++i)
+ physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
+ porder;
+ spin_unlock(&kvm->arch.slot_phys_lock);
+ }
+ }
+
+ kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
+ /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
+ smp_wmb();
+ kvm->arch.rma_setup_done = 1;
+ err = 0;
+ out_srcu:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ out:
+ mutex_unlock(&kvm->lock);
+ return err;
+
+ up_out:
+ up_read(&current->mm->mmap_sem);
+ goto out_srcu;
+}
+
+static int kvmppc_core_init_vm_hv(struct kvm *kvm)
+{
+ unsigned long lpcr, lpid;
+
+ /* Allocate the guest's logical partition ID */
+
+ lpid = kvmppc_alloc_lpid();
+ if ((long)lpid < 0)
+ return -ENOMEM;
+ kvm->arch.lpid = lpid;
+
+ /*
+ * Since we don't flush the TLB when tearing down a VM,
+ * and this lpid might have previously been used,
+ * make sure we flush on each core before running the new VM.
+ */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+
+ kvm->arch.rma = NULL;
+
+ kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ /* PPC970; HID4 is effectively the LPCR */
+ kvm->arch.host_lpid = 0;
+ kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
+ lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
+ lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
+ ((lpid & 0xf) << HID4_LPID5_SH);
+ } else {
+ /* POWER7; init LPCR for virtual RMA mode */
+ kvm->arch.host_lpid = mfspr(SPRN_LPID);
+ kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+ lpcr &= LPCR_PECE | LPCR_LPES;
+ lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+ LPCR_VPM0 | LPCR_VPM1;
+ kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ /* On POWER8 turn on online bit to enable PURR/SPURR */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ lpcr |= LPCR_ONL;
+ }
+ kvm->arch.lpcr = lpcr;
+
+ kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
+ spin_lock_init(&kvm->arch.slot_phys_lock);
+
+ /*
+ * Track that we now have a HV mode VM active. This blocks secondary
+ * CPU threads from coming online.
+ */
+ kvm_hv_vm_activated();
+
+ return 0;
+}
+
+static void kvmppc_free_vcores(struct kvm *kvm)
+{
+ long int i;
+
+ for (i = 0; i < KVM_MAX_VCORES; ++i)
+ kfree(kvm->arch.vcores[i]);
+ kvm->arch.online_vcores = 0;
+}
+
+static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
+{
+ kvm_hv_vm_deactivated();
+
+ kvmppc_free_vcores(kvm);
+ if (kvm->arch.rma) {
+ kvm_release_rma(kvm->arch.rma);
+ kvm->arch.rma = NULL;
+ }
+
+ kvmppc_free_hpt(kvm);
+}
+
+/* We don't need to emulate any privileged instructions or dcbz */
+static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ return EMULATE_FAIL;
+}
+
+static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
+ ulong spr_val)
+{
+ return EMULATE_FAIL;
+}
+
+static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
+ ulong *spr_val)
+{
+ return EMULATE_FAIL;
+}
+
+static int kvmppc_core_check_processor_compat_hv(void)
+{
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return -EIO;
+ return 0;
+}
+
+static long kvm_arch_vm_ioctl_hv(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm *kvm __maybe_unused = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r;
+
+ switch (ioctl) {
+
+ case KVM_ALLOCATE_RMA: {
+ struct kvm_allocate_rma rma;
+ struct kvm *kvm = filp->private_data;
+
+ r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
+ if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
+ r = -EFAULT;
+ break;
+ }
+
+ case KVM_PPC_ALLOCATE_HTAB: {
+ u32 htab_order;
+
+ r = -EFAULT;
+ if (get_user(htab_order, (u32 __user *)argp))
+ break;
+ r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+ if (r)
+ break;
+ r = -EFAULT;
+ if (put_user(htab_order, (u32 __user *)argp))
+ break;
+ r = 0;
+ break;
+ }
+
+ case KVM_PPC_GET_HTAB_FD: {
+ struct kvm_get_htab_fd ghf;
+
+ r = -EFAULT;
+ if (copy_from_user(&ghf, argp, sizeof(ghf)))
+ break;
+ r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
+ break;
+ }
+
+ default:
+ r = -ENOTTY;
+ }
+
+ return r;
+}
+
+static struct kvmppc_ops kvm_ops_hv = {
+ .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
+ .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
+ .get_one_reg = kvmppc_get_one_reg_hv,
+ .set_one_reg = kvmppc_set_one_reg_hv,
+ .vcpu_load = kvmppc_core_vcpu_load_hv,
+ .vcpu_put = kvmppc_core_vcpu_put_hv,
+ .set_msr = kvmppc_set_msr_hv,
+ .vcpu_run = kvmppc_vcpu_run_hv,
+ .vcpu_create = kvmppc_core_vcpu_create_hv,
+ .vcpu_free = kvmppc_core_vcpu_free_hv,
+ .check_requests = kvmppc_core_check_requests_hv,
+ .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv,
+ .flush_memslot = kvmppc_core_flush_memslot_hv,
+ .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
+ .commit_memory_region = kvmppc_core_commit_memory_region_hv,
+ .unmap_hva = kvm_unmap_hva_hv,
+ .unmap_hva_range = kvm_unmap_hva_range_hv,
+ .age_hva = kvm_age_hva_hv,
+ .test_age_hva = kvm_test_age_hva_hv,
+ .set_spte_hva = kvm_set_spte_hva_hv,
+ .mmu_destroy = kvmppc_mmu_destroy_hv,
+ .free_memslot = kvmppc_core_free_memslot_hv,
+ .create_memslot = kvmppc_core_create_memslot_hv,
+ .init_vm = kvmppc_core_init_vm_hv,
+ .destroy_vm = kvmppc_core_destroy_vm_hv,
+ .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
+ .emulate_op = kvmppc_core_emulate_op_hv,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
+ .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
+ .arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
+};
+
+static int kvmppc_book3s_init_hv(void)
+{
+ int r;
+ /*
+ * FIXME!! Do we need to check on all cpus ?
+ */
+ r = kvmppc_core_check_processor_compat_hv();
+ if (r < 0)
+ return -ENODEV;
+
+ kvm_ops_hv.owner = THIS_MODULE;
+ kvmppc_hv_ops = &kvm_ops_hv;
+
+ r = kvmppc_mmu_hv_init();
+ return r;
+}
+
+static void kvmppc_book3s_exit_hv(void)
+{
+ kvmppc_hv_ops = NULL;
+}
+
+module_init(kvmppc_book3s_init_hv);
+module_exit(kvmppc_book3s_exit_hv);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
new file mode 100644
index 00000000000..7cde8a66520
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm_host.h>
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
+
+#include <asm/cputable.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+#include "book3s_hv_cma.h"
+/*
+ * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
+ * should be power of 2.
+ */
+#define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */
+/*
+ * By default we reserve 5% of memory for hash pagetable allocation.
+ */
+static unsigned long kvm_cma_resv_ratio = 5;
+/*
+ * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
+ * Each RMA has to be physically contiguous and of a size that the
+ * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
+ * and other larger sizes. Since we are unlikely to be allocate that
+ * much physically contiguous memory after the system is up and running,
+ * we preallocate a set of RMAs in early boot using CMA.
+ * should be power of 2.
+ */
+unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
+EXPORT_SYMBOL_GPL(kvm_rma_pages);
+
+/* Work out RMLS (real mode limit selector) field value for a given RMA size.
+ Assumes POWER7 or PPC970. */
+static inline int lpcr_rmls(unsigned long rma_size)
+{
+ switch (rma_size) {
+ case 32ul << 20: /* 32 MB */
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ return 8; /* only supported on POWER7 */
+ return -1;
+ case 64ul << 20: /* 64 MB */
+ return 3;
+ case 128ul << 20: /* 128 MB */
+ return 7;
+ case 256ul << 20: /* 256 MB */
+ return 4;
+ case 1ul << 30: /* 1 GB */
+ return 2;
+ case 16ul << 30: /* 16 GB */
+ return 1;
+ case 256ul << 30: /* 256 GB */
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static int __init early_parse_rma_size(char *p)
+{
+ unsigned long kvm_rma_size;
+
+ pr_debug("%s(%s)\n", __func__, p);
+ if (!p)
+ return -EINVAL;
+ kvm_rma_size = memparse(p, &p);
+ /*
+ * Check that the requested size is one supported in hardware
+ */
+ if (lpcr_rmls(kvm_rma_size) < 0) {
+ pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+ return -EINVAL;
+ }
+ kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
+ return 0;
+}
+early_param("kvm_rma_size", early_parse_rma_size);
+
+struct kvm_rma_info *kvm_alloc_rma()
+{
+ struct page *page;
+ struct kvm_rma_info *ri;
+
+ ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
+ if (!ri)
+ return NULL;
+ page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+ if (!page)
+ goto err_out;
+ atomic_set(&ri->use_count, 1);
+ ri->base_pfn = page_to_pfn(page);
+ return ri;
+err_out:
+ kfree(ri);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_rma);
+
+void kvm_release_rma(struct kvm_rma_info *ri)
+{
+ if (atomic_dec_and_test(&ri->use_count)) {
+ kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+ kfree(ri);
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_release_rma);
+
+static int __init early_parse_kvm_cma_resv(char *p)
+{
+ pr_debug("%s(%s)\n", __func__, p);
+ if (!p)
+ return -EINVAL;
+ return kstrtoul(p, 0, &kvm_cma_resv_ratio);
+}
+early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
+
+struct page *kvm_alloc_hpt(unsigned long nr_pages)
+{
+ unsigned long align_pages = HPT_ALIGN_PAGES;
+
+ /* Old CPUs require HPT aligned on a multiple of its size */
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ align_pages = nr_pages;
+ return kvm_alloc_cma(nr_pages, align_pages);
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
+
+void kvm_release_hpt(struct page *page, unsigned long nr_pages)
+{
+ kvm_release_cma(page, nr_pages);
+}
+EXPORT_SYMBOL_GPL(kvm_release_hpt);
+
+/**
+ * kvm_cma_reserve() - reserve area for kvm hash pagetable
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
+ */
+void __init kvm_cma_reserve(void)
+{
+ unsigned long align_size;
+ struct memblock_region *reg;
+ phys_addr_t selected_size = 0;
+ /*
+ * We cannot use memblock_phys_mem_size() here, because
+ * memblock_analyze() has not been called yet.
+ */
+ for_each_memblock(memory, reg)
+ selected_size += memblock_region_memory_end_pfn(reg) -
+ memblock_region_memory_base_pfn(reg);
+
+ selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
+ if (selected_size) {
+ pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+ (unsigned long)selected_size / SZ_1M);
+ /*
+ * Old CPUs require HPT aligned on a multiple of its size. So for them
+ * make the alignment as max size we could request.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ align_size = __rounddown_pow_of_two(selected_size);
+ else
+ align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+
+ align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+ kvm_cma_declare_contiguous(selected_size, align_size);
+ }
+}
+
+/*
+ * When running HV mode KVM we need to block certain operations while KVM VMs
+ * exist in the system. We use a counter of VMs to track this.
+ *
+ * One of the operations we need to block is onlining of secondaries, so we
+ * protect hv_vm_count with get/put_online_cpus().
+ */
+static atomic_t hv_vm_count;
+
+void kvm_hv_vm_activated(void)
+{
+ get_online_cpus();
+ atomic_inc(&hv_vm_count);
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
+
+void kvm_hv_vm_deactivated(void)
+{
+ get_online_cpus();
+ atomic_dec(&hv_vm_count);
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
+
+bool kvm_hv_mode_active(void)
+{
+ return atomic_read(&hv_vm_count) != 0;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
new file mode 100644
index 00000000000..d9d3d8553d5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -0,0 +1,240 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+#define pr_fmt(fmt) "kvm_cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+# define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include "book3s_hv_cma.h"
+
+struct kvm_cma {
+ unsigned long base_pfn;
+ unsigned long count;
+ unsigned long *bitmap;
+};
+
+static DEFINE_MUTEX(kvm_cma_mutex);
+static struct kvm_cma kvm_cma_area;
+
+/**
+ * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
+ * for kvm hash pagetable
+ * @size: Size of the reserved memory.
+ * @alignment: Alignment for the contiguous memory area
+ *
+ * This function reserves memory for kvm cma area. It should be
+ * called by arch code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
+{
+ long base_pfn;
+ phys_addr_t addr;
+ struct kvm_cma *cma = &kvm_cma_area;
+
+ pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
+
+ if (!size)
+ return -EINVAL;
+ /*
+ * Sanitise input arguments.
+ * We should be pageblock aligned for CMA.
+ */
+ alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
+ size = ALIGN(size, alignment);
+ /*
+ * Reserve memory
+ * Use __memblock_alloc_base() since
+ * memblock_alloc_base() panic()s.
+ */
+ addr = __memblock_alloc_base(size, alignment, 0);
+ if (!addr) {
+ base_pfn = -ENOMEM;
+ goto err;
+ } else
+ base_pfn = PFN_DOWN(addr);
+
+ /*
+ * Each reserved area must be initialised later, when more kernel
+ * subsystems (like slab allocator) are available.
+ */
+ cma->base_pfn = base_pfn;
+ cma->count = size >> PAGE_SHIFT;
+ pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
+ return 0;
+err:
+ pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+ return base_pfn;
+}
+
+/**
+ * kvm_alloc_cma() - allocate pages from contiguous area
+ * @nr_pages: Requested number of pages.
+ * @align_pages: Requested alignment in number of pages
+ *
+ * This function allocates memory buffer for hash pagetable.
+ */
+struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
+{
+ int ret;
+ struct page *page = NULL;
+ struct kvm_cma *cma = &kvm_cma_area;
+ unsigned long chunk_count, nr_chunk;
+ unsigned long mask, pfn, pageno, start = 0;
+
+
+ if (!cma || !cma->count)
+ return NULL;
+
+ pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
+ (void *)cma, nr_pages, align_pages);
+
+ if (!nr_pages)
+ return NULL;
+ /*
+ * align mask with chunk size. The bit tracks pages in chunk size
+ */
+ VM_BUG_ON(!is_power_of_2(align_pages));
+ mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
+ BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
+
+ chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+ nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
+ mutex_lock(&kvm_cma_mutex);
+ for (;;) {
+ pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
+ start, nr_chunk, mask);
+ if (pageno >= chunk_count)
+ break;
+
+ pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
+ ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
+ if (ret == 0) {
+ bitmap_set(cma->bitmap, pageno, nr_chunk);
+ page = pfn_to_page(pfn);
+ memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
+ break;
+ } else if (ret != -EBUSY) {
+ break;
+ }
+ pr_debug("%s(): memory range at %p is busy, retrying\n",
+ __func__, pfn_to_page(pfn));
+ /* try again with a bit different memory target */
+ start = pageno + mask + 1;
+ }
+ mutex_unlock(&kvm_cma_mutex);
+ pr_debug("%s(): returned %p\n", __func__, page);
+ return page;
+}
+
+/**
+ * kvm_release_cma() - release allocated pages for hash pagetable
+ * @pages: Allocated pages.
+ * @nr_pages: Number of allocated pages.
+ *
+ * This function releases memory allocated by kvm_alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
+{
+ unsigned long pfn;
+ unsigned long nr_chunk;
+ struct kvm_cma *cma = &kvm_cma_area;
+
+ if (!cma || !pages)
+ return false;
+
+ pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
+
+ pfn = page_to_pfn(pages);
+
+ if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+ return false;
+
+ VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+ nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
+ mutex_lock(&kvm_cma_mutex);
+ bitmap_clear(cma->bitmap,
+ (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
+ nr_chunk);
+ free_contig_range(pfn, nr_pages);
+ mutex_unlock(&kvm_cma_mutex);
+
+ return true;
+}
+
+static int __init kvm_cma_activate_area(unsigned long base_pfn,
+ unsigned long count)
+{
+ unsigned long pfn = base_pfn;
+ unsigned i = count >> pageblock_order;
+ struct zone *zone;
+
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ zone = page_zone(pfn_to_page(pfn));
+ do {
+ unsigned j;
+ base_pfn = pfn;
+ for (j = pageblock_nr_pages; j; --j, pfn++) {
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ /*
+ * alloc_contig_range requires the pfn range
+ * specified to be in the same zone. Make this
+ * simple by forcing the entire CMA resv range
+ * to be in the same zone.
+ */
+ if (page_zone(pfn_to_page(pfn)) != zone)
+ return -EINVAL;
+ }
+ init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+ } while (--i);
+ return 0;
+}
+
+static int __init kvm_cma_init_reserved_areas(void)
+{
+ int bitmap_size, ret;
+ unsigned long chunk_count;
+ struct kvm_cma *cma = &kvm_cma_area;
+
+ pr_debug("%s()\n", __func__);
+ if (!cma->count)
+ return 0;
+ chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+ bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
+ cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+ if (!cma->bitmap)
+ return -ENOMEM;
+
+ ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
+ if (ret)
+ goto error;
+ return 0;
+
+error:
+ kfree(cma->bitmap);
+ return ret;
+}
+core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
new file mode 100644
index 00000000000..655144f75fa
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -0,0 +1,27 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_CMA_ALLOC_H__
+#define __POWERPC_KVM_CMA_ALLOC_H__
+/*
+ * Both RMA and Hash page allocation will be multiple of 256K.
+ */
+#define KVM_CMA_CHUNK_ORDER 18
+
+extern struct page *kvm_alloc_cma(unsigned long nr_pages,
+ unsigned long align_pages);
+extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
+extern long kvm_cma_declare_contiguous(phys_addr_t size,
+ phys_addr_t alignment) __init;
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
new file mode 100644
index 00000000000..731be7478b2
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -0,0 +1,195 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * Derived from book3s_interrupts.S, which is:
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+#include <asm/ppc-opcode.h>
+
+/*****************************************************************************
+ * *
+ * Guest entry / exit code that is in kernel module memory (vmalloc) *
+ * *
+ ****************************************************************************/
+
+/* Registers:
+ * none
+ */
+_GLOBAL(__kvmppc_vcore_entry)
+
+ /* Write correct stack frame */
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+
+ /* Save host state to the stack */
+ stdu r1, -SWITCH_FRAME_SIZE(r1)
+
+ /* Save non-volatile registers (r14 - r31) and CR */
+ SAVE_NVGPRS(r1)
+ mfcr r3
+ std r3, _CCR(r1)
+
+ /* Save host DSCR */
+BEGIN_FTR_SECTION
+ mfspr r3, SPRN_DSCR
+ std r3, HSTATE_DSCR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+ /* Save host DABR */
+ mfspr r3, SPRN_DABR
+ std r3, HSTATE_DABR(r13)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+ /* Hard-disable interrupts */
+ mfmsr r10
+ std r10, HSTATE_HOST_MSR(r13)
+ rldicl r10,r10,48,1
+ rotldi r10,r10,16
+ mtmsrd r10,1
+
+ /* Save host PMU registers */
+BEGIN_FTR_SECTION
+ /* Work around P8 PMAE bug */
+ li r3, -1
+ clrrdi r3, r3, 10
+ mfspr r8, SPRN_MMCR2
+ mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r7, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
+ mfspr r6, SPRN_MMCRA
+BEGIN_FTR_SECTION
+ /* On P7, clear MMCRA in order to disable SDAR updates */
+ li r5, 0
+ mtspr SPRN_MMCRA, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ isync
+ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
+ lbz r5, LPPACA_PMCINUSE(r3)
+ cmpwi r5, 0
+ beq 31f /* skip if not */
+ mfspr r5, SPRN_MMCR1
+ mfspr r9, SPRN_SIAR
+ mfspr r10, SPRN_SDAR
+ std r7, HSTATE_MMCR(r13)
+ std r5, HSTATE_MMCR + 8(r13)
+ std r6, HSTATE_MMCR + 16(r13)
+ std r9, HSTATE_MMCR + 24(r13)
+ std r10, HSTATE_MMCR + 32(r13)
+BEGIN_FTR_SECTION
+ mfspr r9, SPRN_SIER
+ std r8, HSTATE_MMCR + 40(r13)
+ std r9, HSTATE_MMCR + 48(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mfspr r3, SPRN_PMC1
+ mfspr r5, SPRN_PMC2
+ mfspr r6, SPRN_PMC3
+ mfspr r7, SPRN_PMC4
+ mfspr r8, SPRN_PMC5
+ mfspr r9, SPRN_PMC6
+BEGIN_FTR_SECTION
+ mfspr r10, SPRN_PMC7
+ mfspr r11, SPRN_PMC8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ stw r3, HSTATE_PMC(r13)
+ stw r5, HSTATE_PMC + 4(r13)
+ stw r6, HSTATE_PMC + 8(r13)
+ stw r7, HSTATE_PMC + 12(r13)
+ stw r8, HSTATE_PMC + 16(r13)
+ stw r9, HSTATE_PMC + 20(r13)
+BEGIN_FTR_SECTION
+ stw r10, HSTATE_PMC + 24(r13)
+ stw r11, HSTATE_PMC + 28(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+31:
+
+ /*
+ * Put whatever is in the decrementer into the
+ * hypervisor decrementer.
+ */
+ mfspr r8,SPRN_DEC
+ mftb r7
+ mtspr SPRN_HDEC,r8
+ extsw r8,r8
+ add r8,r8,r7
+ std r8,HSTATE_DECEXP(r13)
+
+#ifdef CONFIG_SMP
+ /*
+ * On PPC970, if the guest vcpu has an external interrupt pending,
+ * send ourselves an IPI so as to interrupt the guest once it
+ * enables interrupts. (It must have interrupts disabled,
+ * otherwise we would already have delivered the interrupt.)
+ *
+ * XXX If this is a UP build, smp_send_reschedule is not available,
+ * so the interrupt will be delayed until the next time the vcpu
+ * enters the guest with interrupts enabled.
+ */
+BEGIN_FTR_SECTION
+ ld r4, HSTATE_KVM_VCPU(r13)
+ ld r0, VCPU_PENDING_EXC(r4)
+ li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
+ oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+ and. r0, r0, r7
+ beq 32f
+ lhz r3, PACAPACAINDEX(r13)
+ bl smp_send_reschedule
+ nop
+32:
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+#endif /* CONFIG_SMP */
+
+ /* Jump to partition switch code */
+ bl kvmppc_hv_entry_trampoline
+ nop
+
+/*
+ * We return here in virtual mode after the guest exits
+ * with something that we can't handle in real mode.
+ * Interrupts are enabled again at this point.
+ */
+
+ /*
+ * Register usage at this point:
+ *
+ * R1 = host R1
+ * R2 = host R2
+ * R12 = exit handler id
+ * R13 = PACA
+ */
+
+ /* Restore non-volatile host registers (r14 - r31) and CR */
+ REST_NVGPRS(r1)
+ ld r4, _CCR(r1)
+ mtcr r4
+
+ addi r1, r1, SWITCH_FRAME_SIZE
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
new file mode 100644
index 00000000000..3a5c568b1e8
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -0,0 +1,145 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/kernel.h>
+#include <asm/opal.h>
+#include <asm/mce.h>
+
+/* SRR1 bits for machine check on POWER7 */
+#define SRR1_MC_LDSTERR (1ul << (63-42))
+#define SRR1_MC_IFETCH_SH (63-45)
+#define SRR1_MC_IFETCH_MASK 0x7
+#define SRR1_MC_IFETCH_SLBPAR 2 /* SLB parity error */
+#define SRR1_MC_IFETCH_SLBMULTI 3 /* SLB multi-hit */
+#define SRR1_MC_IFETCH_SLBPARMULTI 4 /* SLB parity + multi-hit */
+#define SRR1_MC_IFETCH_TLBMULTI 5 /* I-TLB multi-hit */
+
+/* DSISR bits for machine check on POWER7 */
+#define DSISR_MC_DERAT_MULTI 0x800 /* D-ERAT multi-hit */
+#define DSISR_MC_TLB_MULTI 0x400 /* D-TLB multi-hit */
+#define DSISR_MC_SLB_PARITY 0x100 /* SLB parity error */
+#define DSISR_MC_SLB_MULTI 0x080 /* SLB multi-hit */
+#define DSISR_MC_SLB_PARMULTI 0x040 /* SLB parity + multi-hit */
+
+/* POWER7 SLB flush and reload */
+static void reload_slb(struct kvm_vcpu *vcpu)
+{
+ struct slb_shadow *slb;
+ unsigned long i, n;
+
+ /* First clear out SLB */
+ asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+ /* Do they have an SLB shadow buffer registered? */
+ slb = vcpu->arch.slb_shadow.pinned_addr;
+ if (!slb)
+ return;
+
+ /* Sanity check */
+ n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
+ if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
+ return;
+
+ /* Load up the SLB from that */
+ for (i = 0; i < n; ++i) {
+ unsigned long rb = slb->save_area[i].esid;
+ unsigned long rs = slb->save_area[i].vsid;
+
+ rb = (rb & ~0xFFFul) | i; /* insert entry number */
+ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+ }
+}
+
+/*
+ * On POWER7, see if we can handle a machine check that occurred inside
+ * the guest in real mode, without switching to the host partition.
+ *
+ * Returns: 0 => exit guest, 1 => deliver machine check to guest
+ */
+static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
+{
+ unsigned long srr1 = vcpu->arch.shregs.msr;
+ struct machine_check_event mce_evt;
+ long handled = 1;
+
+ if (srr1 & SRR1_MC_LDSTERR) {
+ /* error on load/store */
+ unsigned long dsisr = vcpu->arch.shregs.dsisr;
+
+ if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+ DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {
+ /* flush and reload SLB; flushes D-ERAT too */
+ reload_slb(vcpu);
+ dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+ DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
+ }
+ if (dsisr & DSISR_MC_TLB_MULTI) {
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
+ dsisr &= ~DSISR_MC_TLB_MULTI;
+ }
+ /* Any other errors we don't understand? */
+ if (dsisr & 0xffffffffUL)
+ handled = 0;
+ }
+
+ switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {
+ case 0:
+ break;
+ case SRR1_MC_IFETCH_SLBPAR:
+ case SRR1_MC_IFETCH_SLBMULTI:
+ case SRR1_MC_IFETCH_SLBPARMULTI:
+ reload_slb(vcpu);
+ break;
+ case SRR1_MC_IFETCH_TLBMULTI:
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
+ break;
+ default:
+ handled = 0;
+ }
+
+ /*
+ * See if we have already handled the condition in the linux host.
+ * We assume that if the condition is recovered then linux host
+ * will have generated an error log event that we will pick
+ * up and log later.
+ * Don't release mce event now. We will queue up the event so that
+ * we can log the MCE event info on host console.
+ */
+ if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
+ goto out;
+
+ if (mce_evt.version == MCE_V1 &&
+ (mce_evt.severity == MCE_SEV_NO_ERROR ||
+ mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
+ handled = 1;
+
+out:
+ /*
+ * We are now going enter guest either through machine check
+ * interrupt (for unhandled errors) or will continue from
+ * current HSRR0 (for handled errors) in guest. Hence
+ * queue up the event so that we can log it from host console later.
+ */
+ machine_check_queue_event();
+
+ return handled;
+}
+
+long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ return kvmppc_realmode_mc_power7(vcpu);
+
+ return 0;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
new file mode 100644
index 00000000000..5a24d3c2b6b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -0,0 +1,922 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/hugetlb.h>
+#include <linux/module.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+/* Translate address of a vmalloc'd thing to a linear map address */
+static void *real_vmalloc_addr(void *x)
+{
+ unsigned long addr = (unsigned long) x;
+ pte_t *p;
+
+ p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
+ if (!p || !pte_present(*p))
+ return NULL;
+ /* assume we don't have huge pages in vmalloc space... */
+ addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
+ return __va(addr);
+}
+
+/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
+static int global_invalidates(struct kvm *kvm, unsigned long flags)
+{
+ int global;
+
+ /*
+ * If there is only one vcore, and it's currently running,
+ * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
+ * we can use tlbiel as long as we mark all other physical
+ * cores as potentially having stale TLB entries for this lpid.
+ * If we're not using MMU notifiers, we never take pages away
+ * from the guest, so we can use tlbiel if requested.
+ * Otherwise, don't use tlbiel.
+ */
+ if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
+ global = 0;
+ else if (kvm->arch.using_mmu_notifiers)
+ global = 1;
+ else
+ global = !(flags & H_LOCAL);
+
+ if (!global) {
+ /* any other core might now have stale TLB entries... */
+ smp_wmb();
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+ cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
+ &kvm->arch.need_tlb_flush);
+ }
+
+ return global;
+}
+
+/*
+ * Add this HPTE into the chain for the real page.
+ * Must be called with the chain locked; it unlocks the chain.
+ */
+void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
+ unsigned long *rmap, long pte_index, int realmode)
+{
+ struct revmap_entry *head, *tail;
+ unsigned long i;
+
+ if (*rmap & KVMPPC_RMAP_PRESENT) {
+ i = *rmap & KVMPPC_RMAP_INDEX;
+ head = &kvm->arch.revmap[i];
+ if (realmode)
+ head = real_vmalloc_addr(head);
+ tail = &kvm->arch.revmap[head->back];
+ if (realmode)
+ tail = real_vmalloc_addr(tail);
+ rev->forw = i;
+ rev->back = head->back;
+ tail->forw = pte_index;
+ head->back = pte_index;
+ } else {
+ rev->forw = rev->back = pte_index;
+ *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
+ pte_index | KVMPPC_RMAP_PRESENT;
+ }
+ unlock_rmap(rmap);
+}
+EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
+
+/* Remove this HPTE from the chain for a real page */
+static void remove_revmap_chain(struct kvm *kvm, long pte_index,
+ struct revmap_entry *rev,
+ unsigned long hpte_v, unsigned long hpte_r)
+{
+ struct revmap_entry *next, *prev;
+ unsigned long gfn, ptel, head;
+ struct kvm_memory_slot *memslot;
+ unsigned long *rmap;
+ unsigned long rcbits;
+
+ rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
+ ptel = rev->guest_rpte |= rcbits;
+ gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+ if (!memslot)
+ return;
+
+ rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
+ lock_rmap(rmap);
+
+ head = *rmap & KVMPPC_RMAP_INDEX;
+ next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
+ prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
+ next->back = rev->back;
+ prev->forw = rev->forw;
+ if (head == pte_index) {
+ head = rev->forw;
+ if (head == pte_index)
+ *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+ else
+ *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
+ }
+ *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ unlock_rmap(rmap);
+}
+
+static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
+ int writing, unsigned long *pte_sizep)
+{
+ pte_t *ptep;
+ unsigned long ps = *pte_sizep;
+ unsigned int hugepage_shift;
+
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
+ if (!ptep)
+ return __pte(0);
+ if (hugepage_shift)
+ *pte_sizep = 1ul << hugepage_shift;
+ else
+ *pte_sizep = PAGE_SIZE;
+ if (ps > *pte_sizep)
+ return __pte(0);
+ return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
+}
+
+static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
+{
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+ hpte[0] = hpte_v;
+}
+
+long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel,
+ pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
+{
+ unsigned long i, pa, gpa, gfn, psize;
+ unsigned long slot_fn, hva;
+ unsigned long *hpte;
+ struct revmap_entry *rev;
+ unsigned long g_ptel;
+ struct kvm_memory_slot *memslot;
+ unsigned long *physp, pte_size;
+ unsigned long is_io;
+ unsigned long *rmap;
+ pte_t pte;
+ unsigned int writing;
+ unsigned long mmu_seq;
+ unsigned long rcbits;
+
+ psize = hpte_page_size(pteh, ptel);
+ if (!psize)
+ return H_PARAMETER;
+ writing = hpte_is_writable(ptel);
+ pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+ ptel &= ~HPTE_GR_RESERVED;
+ g_ptel = ptel;
+
+ /* used later to detect if we might have been invalidated */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ /* Find the memslot (if any) for this address */
+ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+ pa = 0;
+ is_io = ~0ul;
+ rmap = NULL;
+ if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
+ /* PPC970 can't do emulated MMIO */
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ return H_PARAMETER;
+ /* Emulated MMIO - mark this with key=31 */
+ pteh |= HPTE_V_ABSENT;
+ ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
+ goto do_insert;
+ }
+
+ /* Check if the requested page fits entirely in the memslot. */
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
+ slot_fn = gfn - memslot->base_gfn;
+ rmap = &memslot->arch.rmap[slot_fn];
+
+ if (!kvm->arch.using_mmu_notifiers) {
+ physp = memslot->arch.slot_phys;
+ if (!physp)
+ return H_PARAMETER;
+ physp += slot_fn;
+ if (realmode)
+ physp = real_vmalloc_addr(physp);
+ pa = *physp;
+ if (!pa)
+ return H_TOO_HARD;
+ is_io = pa & (HPTE_R_I | HPTE_R_W);
+ pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
+ pa &= PAGE_MASK;
+ pa |= gpa & ~PAGE_MASK;
+ } else {
+ /* Translate to host virtual address */
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+
+ /* Look up the Linux PTE for the backing page */
+ pte_size = psize;
+ pte = lookup_linux_pte_and_update(pgdir, hva, writing,
+ &pte_size);
+ if (pte_present(pte) && !pte_numa(pte)) {
+ if (writing && !pte_write(pte))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ is_io = hpte_cache_bits(pte_val(pte));
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ pa |= hva & (pte_size - 1);
+ pa |= gpa & ~PAGE_MASK;
+ }
+ }
+
+ if (pte_size < psize)
+ return H_PARAMETER;
+
+ ptel &= ~(HPTE_R_PP0 - psize);
+ ptel |= pa;
+
+ if (pa)
+ pteh |= HPTE_V_VALID;
+ else
+ pteh |= HPTE_V_ABSENT;
+
+ /* Check WIMG */
+ if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
+ if (is_io)
+ return H_PARAMETER;
+ /*
+ * Allow guest to map emulated device memory as
+ * uncacheable, but actually make it cacheable.
+ */
+ ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
+ ptel |= HPTE_R_M;
+ }
+
+ /* Find and lock the HPTEG slot to use */
+ do_insert:
+ if (pte_index >= kvm->arch.hpt_npte)
+ return H_PARAMETER;
+ if (likely((flags & H_EXACT) == 0)) {
+ pte_index &= ~7UL;
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ for (i = 0; i < 8; ++i) {
+ if ((*hpte & HPTE_V_VALID) == 0 &&
+ try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
+ HPTE_V_ABSENT))
+ break;
+ hpte += 2;
+ }
+ if (i == 8) {
+ /*
+ * Since try_lock_hpte doesn't retry (not even stdcx.
+ * failures), it could be that there is a free slot
+ * but we transiently failed to lock it. Try again,
+ * actually locking each slot and checking it.
+ */
+ hpte -= 16;
+ for (i = 0; i < 8; ++i) {
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)))
+ break;
+ *hpte &= ~HPTE_V_HVLOCK;
+ hpte += 2;
+ }
+ if (i == 8)
+ return H_PTEG_FULL;
+ }
+ pte_index += i;
+ } else {
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
+ HPTE_V_ABSENT)) {
+ /* Lock the slot and check again */
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ *hpte &= ~HPTE_V_HVLOCK;
+ return H_PTEG_FULL;
+ }
+ }
+ }
+
+ /* Save away the guest's idea of the second HPTE dword */
+ rev = &kvm->arch.revmap[pte_index];
+ if (realmode)
+ rev = real_vmalloc_addr(rev);
+ if (rev) {
+ rev->guest_rpte = g_ptel;
+ note_hpte_modification(kvm, rev);
+ }
+
+ /* Link HPTE into reverse-map chain */
+ if (pteh & HPTE_V_VALID) {
+ if (realmode)
+ rmap = real_vmalloc_addr(rmap);
+ lock_rmap(rmap);
+ /* Check for pending invalidations under the rmap chain lock */
+ if (kvm->arch.using_mmu_notifiers &&
+ mmu_notifier_retry(kvm, mmu_seq)) {
+ /* inval in progress, write a non-present HPTE */
+ pteh |= HPTE_V_ABSENT;
+ pteh &= ~HPTE_V_VALID;
+ unlock_rmap(rmap);
+ } else {
+ kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
+ realmode);
+ /* Only set R/C in real HPTE if already set in *rmap */
+ rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
+ ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
+ }
+ }
+
+ hpte[1] = ptel;
+
+ /* Write the first HPTE dword, unlocking the HPTE and making it valid */
+ eieio();
+ hpte[0] = pteh;
+ asm volatile("ptesync" : : : "memory");
+
+ *pte_idx_ret = pte_index;
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
+
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel)
+{
+ return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
+ vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
+}
+
+#ifdef __BIG_ENDIAN__
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
+#else
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
+#endif
+
+static inline int try_lock_tlbie(unsigned int *lock)
+{
+ unsigned int tmp, old;
+ unsigned int token = LOCK_TOKEN;
+
+ asm volatile("1:lwarx %1,0,%2\n"
+ " cmpwi cr0,%1,0\n"
+ " bne 2f\n"
+ " stwcx. %3,0,%2\n"
+ " bne- 1b\n"
+ " isync\n"
+ "2:"
+ : "=&r" (tmp), "=&r" (old)
+ : "r" (lock), "r" (token)
+ : "cc", "memory");
+ return old == 0;
+}
+
+/*
+ * tlbie/tlbiel is a bit different on the PPC970 compared to later
+ * processors such as POWER7; the large page bit is in the instruction
+ * not RB, and the top 16 bits and the bottom 12 bits of the VA
+ * in RB must be 0.
+ */
+static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
+ long npages, int global, bool need_sync)
+{
+ long i;
+
+ if (global) {
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < npages; ++i) {
+ unsigned long rb = rbvalues[i];
+
+ if (rb & 1) /* large page */
+ asm volatile("tlbie %0,1" : :
+ "r" (rb & 0x0000fffffffff000ul));
+ else
+ asm volatile("tlbie %0,0" : :
+ "r" (rb & 0x0000fffffffff000ul));
+ }
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < npages; ++i) {
+ unsigned long rb = rbvalues[i];
+
+ if (rb & 1) /* large page */
+ asm volatile("tlbiel %0,1" : :
+ "r" (rb & 0x0000fffffffff000ul));
+ else
+ asm volatile("tlbiel %0,0" : :
+ "r" (rb & 0x0000fffffffff000ul));
+ }
+ asm volatile("ptesync" : : : "memory");
+ }
+}
+
+static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+ long npages, int global, bool need_sync)
+{
+ long i;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ /* PPC970 tlbie instruction is a bit different */
+ do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
+ return;
+ }
+ if (global) {
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < npages; ++i)
+ asm volatile(PPC_TLBIE(%1,%0) : :
+ "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ if (need_sync)
+ asm volatile("ptesync" : : : "memory");
+ for (i = 0; i < npages; ++i)
+ asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
+ asm volatile("ptesync" : : : "memory");
+ }
+}
+
+long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long *hpret)
+{
+ unsigned long *hpte;
+ unsigned long v, r, rb;
+ struct revmap_entry *rev;
+
+ if (pte_index >= kvm->arch.hpt_npte)
+ return H_PARAMETER;
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+ ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
+ ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
+ hpte[0] &= ~HPTE_V_HVLOCK;
+ return H_NOT_FOUND;
+ }
+
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ v = hpte[0] & ~HPTE_V_HVLOCK;
+ if (v & HPTE_V_VALID) {
+ hpte[0] &= ~HPTE_V_VALID;
+ rb = compute_tlbie_rb(v, hpte[1], pte_index);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
+ /* Read PTE low word after tlbie to get final R/C values */
+ remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
+ }
+ r = rev->guest_rpte & ~HPTE_GR_RESERVED;
+ note_hpte_modification(kvm, rev);
+ unlock_hpte(hpte, 0);
+
+ hpret[0] = v;
+ hpret[1] = r;
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
+
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn)
+{
+ return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
+ &vcpu->arch.gpr[4]);
+}
+
+long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *args = &vcpu->arch.gpr[4];
+ unsigned long *hp, *hptes[4], tlbrb[4];
+ long int i, j, k, n, found, indexes[4];
+ unsigned long flags, req, pte_index, rcbits;
+ int global;
+ long int ret = H_SUCCESS;
+ struct revmap_entry *rev, *revs[4];
+
+ global = global_invalidates(kvm, 0);
+ for (i = 0; i < 4 && ret == H_SUCCESS; ) {
+ n = 0;
+ for (; i < 4; ++i) {
+ j = i * 2;
+ pte_index = args[j];
+ flags = pte_index >> 56;
+ pte_index &= ((1ul << 56) - 1);
+ req = flags >> 6;
+ flags &= 3;
+ if (req == 3) { /* no more requests */
+ i = 4;
+ break;
+ }
+ if (req != 1 || flags == 3 ||
+ pte_index >= kvm->arch.hpt_npte) {
+ /* parameter error */
+ args[j] = ((0xa0 | flags) << 56) + pte_index;
+ ret = H_PARAMETER;
+ break;
+ }
+ hp = (unsigned long *)
+ (kvm->arch.hpt_virt + (pte_index << 4));
+ /* to avoid deadlock, don't spin except for first */
+ if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
+ if (n)
+ break;
+ while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
+ cpu_relax();
+ }
+ found = 0;
+ if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
+ switch (flags & 3) {
+ case 0: /* absolute */
+ found = 1;
+ break;
+ case 1: /* andcond */
+ if (!(hp[0] & args[j + 1]))
+ found = 1;
+ break;
+ case 2: /* AVPN */
+ if ((hp[0] & ~0x7fUL) == args[j + 1])
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ hp[0] &= ~HPTE_V_HVLOCK;
+ args[j] = ((0x90 | flags) << 56) + pte_index;
+ continue;
+ }
+
+ args[j] = ((0x80 | flags) << 56) + pte_index;
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ note_hpte_modification(kvm, rev);
+
+ if (!(hp[0] & HPTE_V_VALID)) {
+ /* insert R and C bits from PTE */
+ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+ args[j] |= rcbits << (56 - 5);
+ hp[0] = 0;
+ continue;
+ }
+
+ hp[0] &= ~HPTE_V_VALID; /* leave it locked */
+ tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+ indexes[n] = j;
+ hptes[n] = hp;
+ revs[n] = rev;
+ ++n;
+ }
+
+ if (!n)
+ break;
+
+ /* Now that we've collected a batch, do the tlbies */
+ do_tlbies(kvm, tlbrb, n, global, true);
+
+ /* Read PTE low words after tlbie to get final R/C values */
+ for (k = 0; k < n; ++k) {
+ j = indexes[k];
+ pte_index = args[j] & ((1ul << 56) - 1);
+ hp = hptes[k];
+ rev = revs[k];
+ remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
+ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+ args[j] |= rcbits << (56 - 5);
+ hp[0] = 0;
+ }
+ }
+
+ return ret;
+}
+
+long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long va)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hpte;
+ struct revmap_entry *rev;
+ unsigned long v, r, rb, mask, bits;
+
+ if (pte_index >= kvm->arch.hpt_npte)
+ return H_PARAMETER;
+
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+ ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
+ hpte[0] &= ~HPTE_V_HVLOCK;
+ return H_NOT_FOUND;
+ }
+
+ v = hpte[0];
+ bits = (flags << 55) & HPTE_R_PP0;
+ bits |= (flags << 48) & HPTE_R_KEY_HI;
+ bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+ /* Update guest view of 2nd HPTE dword */
+ mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
+ HPTE_R_KEY_HI | HPTE_R_KEY_LO;
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ if (rev) {
+ r = (rev->guest_rpte & ~mask) | bits;
+ rev->guest_rpte = r;
+ note_hpte_modification(kvm, rev);
+ }
+ r = (hpte[1] & ~mask) | bits;
+
+ /* Update HPTE */
+ if (v & HPTE_V_VALID) {
+ rb = compute_tlbie_rb(v, r, pte_index);
+ hpte[0] = v & ~HPTE_V_VALID;
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
+ /*
+ * If the host has this page as readonly but the guest
+ * wants to make it read/write, reduce the permissions.
+ * Checking the host permissions involves finding the
+ * memslot and then the Linux PTE for the page.
+ */
+ if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
+ unsigned long psize, gfn, hva;
+ struct kvm_memory_slot *memslot;
+ pgd_t *pgdir = vcpu->arch.pgdir;
+ pte_t pte;
+
+ psize = hpte_page_size(v, r);
+ gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+ if (memslot) {
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+ pte = lookup_linux_pte_and_update(pgdir, hva,
+ 1, &psize);
+ if (pte_present(pte) && !pte_write(pte))
+ r = hpte_make_readonly(r);
+ }
+ }
+ }
+ hpte[1] = r;
+ eieio();
+ hpte[0] = v & ~HPTE_V_HVLOCK;
+ asm volatile("ptesync" : : : "memory");
+ return H_SUCCESS;
+}
+
+long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hpte, v, r;
+ int i, n = 1;
+ struct revmap_entry *rev = NULL;
+
+ if (pte_index >= kvm->arch.hpt_npte)
+ return H_PARAMETER;
+ if (flags & H_READ_4) {
+ pte_index &= ~3;
+ n = 4;
+ }
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ for (i = 0; i < n; ++i, ++pte_index) {
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ v = hpte[0] & ~HPTE_V_HVLOCK;
+ r = hpte[1];
+ if (v & HPTE_V_ABSENT) {
+ v &= ~HPTE_V_ABSENT;
+ v |= HPTE_V_VALID;
+ }
+ if (v & HPTE_V_VALID) {
+ r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+ r &= ~HPTE_GR_RESERVED;
+ }
+ vcpu->arch.gpr[4 + i * 2] = v;
+ vcpu->arch.gpr[5 + i * 2] = r;
+ }
+ return H_SUCCESS;
+}
+
+void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
+ unsigned long pte_index)
+{
+ unsigned long rb;
+
+ hptep[0] &= ~HPTE_V_VALID;
+ rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
+ do_tlbies(kvm, &rb, 1, 1, true);
+}
+EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
+
+void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
+ unsigned long pte_index)
+{
+ unsigned long rb;
+ unsigned char rbyte;
+
+ rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
+ rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
+ /* modify only the second-last byte, which contains the ref bit */
+ *((char *)hptep + 14) = rbyte;
+ do_tlbies(kvm, &rb, 1, 1, false);
+}
+EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
+
+static int slb_base_page_shift[4] = {
+ 24, /* 16M */
+ 16, /* 64k */
+ 34, /* 16G */
+ 20, /* 1M, unsupported */
+};
+
+/* When called from virtmode, this func should be protected by
+ * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
+ * can trigger deadlock issue.
+ */
+long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
+ unsigned long valid)
+{
+ unsigned int i;
+ unsigned int pshift;
+ unsigned long somask;
+ unsigned long vsid, hash;
+ unsigned long avpn;
+ unsigned long *hpte;
+ unsigned long mask, val;
+ unsigned long v, r;
+
+ /* Get page shift, work out hash and AVPN etc. */
+ mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
+ val = 0;
+ pshift = 12;
+ if (slb_v & SLB_VSID_L) {
+ mask |= HPTE_V_LARGE;
+ val |= HPTE_V_LARGE;
+ pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
+ }
+ if (slb_v & SLB_VSID_B_1T) {
+ somask = (1UL << 40) - 1;
+ vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
+ vsid ^= vsid << 25;
+ } else {
+ somask = (1UL << 28) - 1;
+ vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
+ }
+ hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
+ avpn = slb_v & ~(somask >> 16); /* also includes B */
+ avpn |= (eaddr & somask) >> 16;
+
+ if (pshift >= 24)
+ avpn &= ~((1UL << (pshift - 16)) - 1);
+ else
+ avpn &= ~0x7fUL;
+ val |= avpn;
+
+ for (;;) {
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7));
+
+ for (i = 0; i < 16; i += 2) {
+ /* Read the PTE racily */
+ v = hpte[i] & ~HPTE_V_HVLOCK;
+
+ /* Check valid/absent, hash, segment size and AVPN */
+ if (!(v & valid) || (v & mask) != val)
+ continue;
+
+ /* Lock the PTE and read it under the lock */
+ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
+ cpu_relax();
+ v = hpte[i] & ~HPTE_V_HVLOCK;
+ r = hpte[i+1];
+
+ /*
+ * Check the HPTE again, including base page size
+ */
+ if ((v & valid) && (v & mask) == val &&
+ hpte_base_page_size(v, r) == (1ul << pshift))
+ /* Return with the HPTE still locked */
+ return (hash << 3) + (i >> 1);
+
+ /* Unlock and move on */
+ hpte[i] = v;
+ }
+
+ if (val & HPTE_V_SECONDARY)
+ break;
+ val |= HPTE_V_SECONDARY;
+ hash = hash ^ kvm->arch.hpt_mask;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
+
+/*
+ * Called in real mode to check whether an HPTE not found fault
+ * is due to accessing a paged-out page or an emulated MMIO page,
+ * or if a protection fault is due to accessing a page that the
+ * guest wanted read/write access to but which we made read-only.
+ * Returns a possibly modified status (DSISR) value if not
+ * (i.e. pass the interrupt to the guest),
+ * -1 to pass the fault up to host kernel mode code, -2 to do that
+ * and also load the instruction word (for MMIO emulation),
+ * or 0 if we should make the guest retry the access.
+ */
+long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
+ unsigned long slb_v, unsigned int status, bool data)
+{
+ struct kvm *kvm = vcpu->kvm;
+ long int index;
+ unsigned long v, r, gr;
+ unsigned long *hpte;
+ unsigned long valid;
+ struct revmap_entry *rev;
+ unsigned long pp, key;
+
+ /* For protection fault, expect to find a valid HPTE */
+ valid = HPTE_V_VALID;
+ if (status & DSISR_NOHPTE)
+ valid |= HPTE_V_ABSENT;
+
+ index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
+ if (index < 0) {
+ if (status & DSISR_NOHPTE)
+ return status; /* there really was no HPTE */
+ return 0; /* for prot fault, HPTE disappeared */
+ }
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+ v = hpte[0] & ~HPTE_V_HVLOCK;
+ r = hpte[1];
+ rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
+ gr = rev->guest_rpte;
+
+ unlock_hpte(hpte, v);
+
+ /* For not found, if the HPTE is valid by now, retry the instruction */
+ if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
+ return 0;
+
+ /* Check access permissions to the page */
+ pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
+ key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+ status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
+ if (!data) {
+ if (gr & (HPTE_R_N | HPTE_R_G))
+ return status | SRR1_ISI_N_OR_G;
+ if (!hpte_read_permission(pp, slb_v & key))
+ return status | SRR1_ISI_PROT;
+ } else if (status & DSISR_ISSTORE) {
+ /* check write permission */
+ if (!hpte_write_permission(pp, slb_v & key))
+ return status | DSISR_PROTFAULT;
+ } else {
+ if (!hpte_read_permission(pp, slb_v & key))
+ return status | DSISR_PROTFAULT;
+ }
+
+ /* Check storage key, if applicable */
+ if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
+ unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
+ if (status & DSISR_ISSTORE)
+ perm >>= 1;
+ if (perm & 1)
+ return status | DSISR_KEYFAULT;
+ }
+
+ /* Save HPTE info for virtual-mode handler */
+ vcpu->arch.pgfault_addr = addr;
+ vcpu->arch.pgfault_index = index;
+ vcpu->arch.pgfault_hpte[0] = v;
+ vcpu->arch.pgfault_hpte[1] = r;
+
+ /* Check the storage key to see if it is possibly emulated MMIO */
+ if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
+ (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
+ return -2; /* MMIO emulation - load instr word */
+
+ return -1; /* send fault up to host kernel mode */
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 00000000000..b4b0082f761
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+#include "book3s_xics.h"
+
+#define DEBUG_PASSUP
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+ __asm__ __volatile__("sync; stbcix %0,0,%1"
+ : : "r" (val), "r" (paddr) : "memory");
+}
+
+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *this_vcpu)
+{
+ struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
+ unsigned long xics_phys;
+ int cpu;
+
+ /* Mark the target VCPU as having an interrupt pending */
+ vcpu->stat.queue_intr++;
+ set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+
+ /* Kick self ? Just set MER and return */
+ if (vcpu == this_vcpu) {
+ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
+ return;
+ }
+
+ /* Check if the core is loaded, if not, too hard */
+ cpu = vcpu->cpu;
+ if (cpu < 0 || cpu >= nr_cpu_ids) {
+ this_icp->rm_action |= XICS_RM_KICK_VCPU;
+ this_icp->rm_kick_target = vcpu;
+ return;
+ }
+ /* In SMT cpu will always point to thread 0, we adjust it */
+ cpu += vcpu->arch.ptid;
+
+ /* Not too hard, then poke the target */
+ xics_phys = paca[cpu].kvm_hstate.xics_phys;
+ rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
+{
+ /* Note: Only called on self ! */
+ clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
+ &vcpu->arch.pending_exceptions);
+ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
+}
+
+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
+ union kvmppc_icp_state old,
+ union kvmppc_icp_state new)
+{
+ struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ bool success;
+
+ /* Calculate new output value */
+ new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+ /* Attempt atomic update */
+ success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+ if (!success)
+ goto bail;
+
+ /*
+ * Check for output state update
+ *
+ * Note that this is racy since another processor could be updating
+ * the state already. This is why we never clear the interrupt output
+ * here, we only ever set it. The clear only happens prior to doing
+ * an update and only by the processor itself. Currently we do it
+ * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+ *
+ * We also do not try to figure out whether the EE state has changed,
+ * we unconditionally set it if the new state calls for it. The reason
+ * for that is that we opportunistically remove the pending interrupt
+ * flag when raising CPPR, so we need to set it back here if an
+ * interrupt is still pending.
+ */
+ if (new.out_ee)
+ icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
+
+ /* Expose the state change for debug purposes */
+ this_vcpu->arch.icp->rm_dbgstate = new;
+ this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
+
+ bail:
+ return success;
+}
+
+static inline int check_too_hard(struct kvmppc_xics *xics,
+ struct kvmppc_icp *icp)
+{
+ return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
+}
+
+static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u8 new_cppr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ bool resend;
+
+ /*
+ * This handles several related states in one operation:
+ *
+ * ICP State: Down_CPPR
+ *
+ * Load CPPR with new value and if the XISR is 0
+ * then check for resends:
+ *
+ * ICP State: Resend
+ *
+ * If MFRR is more favored than CPPR, check for IPIs
+ * and notify ICS of a potential resend. This is done
+ * asynchronously (when used in real mode, we will have
+ * to exit here).
+ *
+ * We do not handle the complete Check_IPI as documented
+ * here. In the PAPR, this state will be used for both
+ * Set_MFRR and Down_CPPR. However, we know that we aren't
+ * changing the MFRR state here so we don't need to handle
+ * the case of an MFRR causing a reject of a pending irq,
+ * this will have been handled when the MFRR was set in the
+ * first place.
+ *
+ * Thus we don't have to handle rejects, only resends.
+ *
+ * When implementing real mode for HV KVM, resend will lead to
+ * a H_TOO_HARD return and the whole transaction will be handled
+ * in virtual mode.
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ /* Down_CPPR */
+ new_state.cppr = new_cppr;
+
+ /*
+ * Cut down Resend / Check_IPI / IPI
+ *
+ * The logic is that we cannot have a pending interrupt
+ * trumped by an IPI at this point (see above), so we
+ * know that either the pending interrupt is already an
+ * IPI (in which case we don't care to override it) or
+ * it's either more favored than us or non existent
+ */
+ if (new_state.mfrr < new_cppr &&
+ new_state.mfrr <= new_state.pending_pri) {
+ new_state.pending_pri = new_state.mfrr;
+ new_state.xisr = XICS_IPI;
+ }
+
+ /* Latch/clear resend bit */
+ resend = new_state.need_resend;
+ new_state.need_resend = 0;
+
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ /*
+ * Now handle resend checks. Those are asynchronous to the ICP
+ * state update in HW (ie bus transactions) so we can handle them
+ * separately here as well.
+ */
+ if (resend)
+ icp->rm_action |= XICS_RM_CHECK_RESEND;
+}
+
+
+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 xirr;
+
+ if (!xics || !xics->real_mode)
+ return H_TOO_HARD;
+
+ /* First clear the interrupt */
+ icp_rm_clr_vcpu_irq(icp->vcpu);
+
+ /*
+ * ICP State: Accept_Interrupt
+ *
+ * Return the pending interrupt (if any) along with the
+ * current CPPR, then clear the XISR & set CPPR to the
+ * pending priority
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+ if (!old_state.xisr)
+ break;
+ new_state.cppr = new_state.pending_pri;
+ new_state.pending_pri = 0xff;
+ new_state.xisr = 0;
+
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ /* Return the result in GPR4 */
+ vcpu->arch.gpr[4] = xirr;
+
+ return check_too_hard(xics, icp);
+}
+
+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
+ u32 reject;
+ bool resend;
+ bool local;
+
+ if (!xics || !xics->real_mode)
+ return H_TOO_HARD;
+
+ local = this_icp->server_num == server;
+ if (local)
+ icp = this_icp;
+ else
+ icp = kvmppc_xics_find_server(vcpu->kvm, server);
+ if (!icp)
+ return H_PARAMETER;
+
+ /*
+ * ICP state: Set_MFRR
+ *
+ * If the CPPR is more favored than the new MFRR, then
+ * nothing needs to be done as there can be no XISR to
+ * reject.
+ *
+ * If the CPPR is less favored, then we might be replacing
+ * an interrupt, and thus need to possibly reject it as in
+ *
+ * ICP state: Check_IPI
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ /* Set_MFRR */
+ new_state.mfrr = mfrr;
+
+ /* Check_IPI */
+ reject = 0;
+ resend = false;
+ if (mfrr < new_state.cppr) {
+ /* Reject a pending interrupt if not an IPI */
+ if (mfrr <= new_state.pending_pri)
+ reject = new_state.xisr;
+ new_state.pending_pri = mfrr;
+ new_state.xisr = XICS_IPI;
+ }
+
+ if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+ resend = new_state.need_resend;
+ new_state.need_resend = 0;
+ }
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ /* Pass rejects to virtual mode */
+ if (reject && reject != XICS_IPI) {
+ this_icp->rm_action |= XICS_RM_REJECT;
+ this_icp->rm_reject = reject;
+ }
+
+ /* Pass resends to virtual mode */
+ if (resend)
+ this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+
+ return check_too_hard(xics, this_icp);
+}
+
+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 reject;
+
+ if (!xics || !xics->real_mode)
+ return H_TOO_HARD;
+
+ /*
+ * ICP State: Set_CPPR
+ *
+ * We can safely compare the new value with the current
+ * value outside of the transaction as the CPPR is only
+ * ever changed by the processor on itself
+ */
+ if (cppr > icp->state.cppr) {
+ icp_rm_down_cppr(xics, icp, cppr);
+ goto bail;
+ } else if (cppr == icp->state.cppr)
+ return H_SUCCESS;
+
+ /*
+ * ICP State: Up_CPPR
+ *
+ * The processor is raising its priority, this can result
+ * in a rejection of a pending interrupt:
+ *
+ * ICP State: Reject_Current
+ *
+ * We can remove EE from the current processor, the update
+ * transaction will set it again if needed
+ */
+ icp_rm_clr_vcpu_irq(icp->vcpu);
+
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ reject = 0;
+ new_state.cppr = cppr;
+
+ if (cppr <= new_state.pending_pri) {
+ reject = new_state.xisr;
+ new_state.xisr = 0;
+ new_state.pending_pri = 0xff;
+ }
+
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ /* Pass rejects to virtual mode */
+ if (reject && reject != XICS_IPI) {
+ icp->rm_action |= XICS_RM_REJECT;
+ icp->rm_reject = reject;
+ }
+ bail:
+ return check_too_hard(xics, icp);
+}
+
+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u32 irq = xirr & 0x00ffffff;
+ u16 src;
+
+ if (!xics || !xics->real_mode)
+ return H_TOO_HARD;
+
+ /*
+ * ICP State: EOI
+ *
+ * Note: If EOI is incorrectly used by SW to lower the CPPR
+ * value (ie more favored), we do not check for rejection of
+ * a pending interrupt, this is a SW error and PAPR sepcifies
+ * that we don't have to deal with it.
+ *
+ * The sending of an EOI to the ICS is handled after the
+ * CPPR update
+ *
+ * ICP State: Down_CPPR which we handle
+ * in a separate function as it's shared with H_CPPR.
+ */
+ icp_rm_down_cppr(xics, icp, xirr >> 24);
+
+ /* IPIs have no EOI */
+ if (irq == XICS_IPI)
+ goto bail;
+ /*
+ * EOI handling: If the interrupt is still asserted, we need to
+ * resend it. We can take a lockless "peek" at the ICS state here.
+ *
+ * "Message" interrupts will never have "asserted" set
+ */
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ goto bail;
+ state = &ics->irq_state[src];
+
+ /* Still asserted, resend it, we make it look like a reject */
+ if (state->asserted) {
+ icp->rm_action |= XICS_RM_REJECT;
+ icp->rm_reject = irq;
+ }
+ bail:
+ return check_too_hard(xics, icp);
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
new file mode 100644
index 00000000000..558a67df812
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -0,0 +1,2501 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * Derived from book3s_rmhandlers.S and other files, which are:
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/hvcall.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/mmu-hash64.h>
+#include <asm/tm.h>
+
+#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
+
+#ifdef __LITTLE_ENDIAN__
+#error Need to fix lppaca and SLB shadow accesses in little endian mode
+#endif
+
+/* Values in HSTATE_NAPPING(r13) */
+#define NAPPING_CEDE 1
+#define NAPPING_NOVCPU 2
+
+/*
+ * Call kvmppc_hv_entry in real mode.
+ * Must be called with interrupts hard-disabled.
+ *
+ * Input Registers:
+ *
+ * LR = return address to continue at after eventually re-enabling MMU
+ */
+_GLOBAL_TOC(kvmppc_hv_entry_trampoline)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -112(r1)
+ mfmsr r10
+ LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
+ li r0,MSR_RI
+ andc r0,r10,r0
+ li r6,MSR_IR | MSR_DR
+ andc r6,r10,r6
+ mtmsrd r0,1 /* clear RI in MSR */
+ mtsrr0 r5
+ mtsrr1 r6
+ RFI
+
+kvmppc_call_hv_entry:
+ ld r4, HSTATE_KVM_VCPU(r13)
+ bl kvmppc_hv_entry
+
+ /* Back from guest - restore host state and return to caller */
+
+BEGIN_FTR_SECTION
+ /* Restore host DABR and DABRX */
+ ld r5,HSTATE_DABR(r13)
+ li r6,7
+ mtspr SPRN_DABR,r5
+ mtspr SPRN_DABRX,r6
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+ /* Restore SPRG3 */
+ ld r3,PACA_SPRG_VDSO(r13)
+ mtspr SPRN_SPRG_VDSO_WRITE,r3
+
+ /* Reload the host's PMU registers */
+ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
+ lbz r4, LPPACA_PMCINUSE(r3)
+ cmpwi r4, 0
+ beq 23f /* skip if not */
+BEGIN_FTR_SECTION
+ ld r3, HSTATE_MMCR(r13)
+ andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+ cmpwi r4, MMCR0_PMAO
+ beql kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+ lwz r3, HSTATE_PMC(r13)
+ lwz r4, HSTATE_PMC + 4(r13)
+ lwz r5, HSTATE_PMC + 8(r13)
+ lwz r6, HSTATE_PMC + 12(r13)
+ lwz r8, HSTATE_PMC + 16(r13)
+ lwz r9, HSTATE_PMC + 20(r13)
+BEGIN_FTR_SECTION
+ lwz r10, HSTATE_PMC + 24(r13)
+ lwz r11, HSTATE_PMC + 28(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r4
+ mtspr SPRN_PMC3, r5
+ mtspr SPRN_PMC4, r6
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+ mtspr SPRN_PMC7, r10
+ mtspr SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ ld r3, HSTATE_MMCR(r13)
+ ld r4, HSTATE_MMCR + 8(r13)
+ ld r5, HSTATE_MMCR + 16(r13)
+ ld r6, HSTATE_MMCR + 24(r13)
+ ld r7, HSTATE_MMCR + 32(r13)
+ mtspr SPRN_MMCR1, r4
+ mtspr SPRN_MMCRA, r5
+ mtspr SPRN_SIAR, r6
+ mtspr SPRN_SDAR, r7
+BEGIN_FTR_SECTION
+ ld r8, HSTATE_MMCR + 40(r13)
+ ld r9, HSTATE_MMCR + 48(r13)
+ mtspr SPRN_MMCR2, r8
+ mtspr SPRN_SIER, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtspr SPRN_MMCR0, r3
+ isync
+23:
+
+ /*
+ * Reload DEC. HDEC interrupts were disabled when
+ * we reloaded the host's LPCR value.
+ */
+ ld r3, HSTATE_DECEXP(r13)
+ mftb r4
+ subf r4, r4, r3
+ mtspr SPRN_DEC, r4
+
+ /*
+ * For external and machine check interrupts, we need
+ * to call the Linux handler to process the interrupt.
+ * We do that by jumping to absolute address 0x500 for
+ * external interrupts, or the machine_check_fwnmi label
+ * for machine checks (since firmware might have patched
+ * the vector area at 0x200). The [h]rfid at the end of the
+ * handler will return to the book3s_hv_interrupts.S code.
+ * For other interrupts we do the rfid to get back
+ * to the book3s_hv_interrupts.S code here.
+ */
+ ld r8, 112+PPC_LR_STKOFF(r1)
+ addi r1, r1, 112
+ ld r7, HSTATE_HOST_MSR(r13)
+
+ cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+BEGIN_FTR_SECTION
+ beq 11f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* RFI into the highmem handler, or branch to interrupt handler */
+ mfmsr r6
+ li r0, MSR_RI
+ andc r6, r6, r0
+ mtmsrd r6, 1 /* Clear RI in MSR */
+ mtsrr0 r8
+ mtsrr1 r7
+ beqa 0x500 /* external interrupt (PPC970) */
+ beq cr1, 13f /* machine check */
+ RFI
+
+ /* On POWER7, we have external interrupts set to use HSRR0/1 */
+11: mtspr SPRN_HSRR0, r8
+ mtspr SPRN_HSRR1, r7
+ ba 0x500
+
+13: b machine_check_fwnmi
+
+kvmppc_primary_no_guest:
+ /* We handle this much like a ceded vcpu */
+ /* set our bit in napping_threads */
+ ld r5, HSTATE_KVM_VCORE(r13)
+ lbz r7, HSTATE_PTID(r13)
+ li r0, 1
+ sld r0, r0, r7
+ addi r6, r5, VCORE_NAPPING_THREADS
+1: lwarx r3, 0, r6
+ or r3, r3, r0
+ stwcx. r3, 0, r6
+ bne 1b
+ /* order napping_threads update vs testing entry_exit_count */
+ isync
+ li r12, 0
+ lwz r7, VCORE_ENTRY_EXIT(r5)
+ cmpwi r7, 0x100
+ bge kvm_novcpu_exit /* another thread already exiting */
+ li r3, NAPPING_NOVCPU
+ stb r3, HSTATE_NAPPING(r13)
+ li r3, 1
+ stb r3, HSTATE_HWTHREAD_REQ(r13)
+
+ b kvm_do_nap
+
+kvm_novcpu_wakeup:
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r5, HSTATE_KVM_VCORE(r13)
+ li r0, 0
+ stb r0, HSTATE_NAPPING(r13)
+ stb r0, HSTATE_HWTHREAD_REQ(r13)
+
+ /* check the wake reason */
+ bl kvmppc_check_wake_reason
+
+ /* see if any other thread is already exiting */
+ lwz r0, VCORE_ENTRY_EXIT(r5)
+ cmpwi r0, 0x100
+ bge kvm_novcpu_exit
+
+ /* clear our bit in napping_threads */
+ lbz r7, HSTATE_PTID(r13)
+ li r0, 1
+ sld r0, r0, r7
+ addi r6, r5, VCORE_NAPPING_THREADS
+4: lwarx r7, 0, r6
+ andc r7, r7, r0
+ stwcx. r7, 0, r6
+ bne 4b
+
+ /* See if the wake reason means we need to exit */
+ cmpdi r3, 0
+ bge kvm_novcpu_exit
+
+ /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
+ ld r4, HSTATE_KVM_VCPU(r13)
+ cmpdi r4, 0
+ bne kvmppc_got_guest
+
+kvm_novcpu_exit:
+ b hdec_soon
+
+/*
+ * We come in here when wakened from nap mode.
+ * Relocation is off and most register values are lost.
+ * r13 points to the PACA.
+ */
+ .globl kvm_start_guest
+kvm_start_guest:
+
+ /* Set runlatch bit the minute you wake up from nap */
+ mfspr r1, SPRN_CTRLF
+ ori r1, r1, 1
+ mtspr SPRN_CTRLT, r1
+
+ ld r2,PACATOC(r13)
+
+ li r0,KVM_HWTHREAD_IN_KVM
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+
+ /* NV GPR values from power7_idle() will no longer be valid */
+ li r0,1
+ stb r0,PACA_NAPSTATELOST(r13)
+
+ /* were we napping due to cede? */
+ lbz r0,HSTATE_NAPPING(r13)
+ cmpwi r0,NAPPING_CEDE
+ beq kvm_end_cede
+ cmpwi r0,NAPPING_NOVCPU
+ beq kvm_novcpu_wakeup
+
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_OVERHEAD
+
+ /*
+ * We weren't napping due to cede, so this must be a secondary
+ * thread being woken up to run a guest, or being woken up due
+ * to a stray IPI. (Or due to some machine check or hypervisor
+ * maintenance interrupt while the core is in KVM.)
+ */
+
+ /* Check the wake reason in SRR1 to see why we got here */
+ bl kvmppc_check_wake_reason
+ cmpdi r3, 0
+ bge kvm_no_guest
+
+ /* get vcpu pointer, NULL if we have no vcpu to run */
+ ld r4,HSTATE_KVM_VCPU(r13)
+ cmpdi r4,0
+ /* if we have no vcpu to run, go back to sleep */
+ beq kvm_no_guest
+
+ /* Set HSTATE_DSCR(r13) to something sensible */
+ ld r6, PACA_DSCR(r13)
+ std r6, HSTATE_DSCR(r13)
+
+ bl kvmppc_hv_entry
+
+ /* Back from the guest, go back to nap */
+ /* Clear our vcpu pointer so we don't come back in early */
+ li r0, 0
+ std r0, HSTATE_KVM_VCPU(r13)
+ /*
+ * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing
+ * the nap_count, because once the increment to nap_count is
+ * visible we could be given another vcpu.
+ */
+ lwsync
+
+ /* increment the nap count and then go to nap mode */
+ ld r4, HSTATE_KVM_VCORE(r13)
+ addi r4, r4, VCORE_NAP_COUNT
+51: lwarx r3, 0, r4
+ addi r3, r3, 1
+ stwcx. r3, 0, r4
+ bne 51b
+
+kvm_no_guest:
+ li r0, KVM_HWTHREAD_IN_NAP
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+kvm_do_nap:
+ /* Clear the runlatch bit before napping */
+ mfspr r2, SPRN_CTRLF
+ clrrdi r2, r2, 1
+ mtspr SPRN_CTRLT, r2
+
+ li r3, LPCR_PECE0
+ mfspr r4, SPRN_LPCR
+ rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
+ mtspr SPRN_LPCR, r4
+ isync
+ std r0, HSTATE_SCRATCH0(r13)
+ ptesync
+ ld r0, HSTATE_SCRATCH0(r13)
+1: cmpd r0, r0
+ bne 1b
+ nap
+ b .
+
+/******************************************************************************
+ * *
+ * Entry code *
+ * *
+ *****************************************************************************/
+
+.global kvmppc_hv_entry
+kvmppc_hv_entry:
+
+ /* Required state:
+ *
+ * R4 = vcpu pointer (or NULL)
+ * MSR = ~IR|DR
+ * R13 = PACA
+ * R1 = host R1
+ * all other volatile GPRS = free
+ */
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -112(r1)
+
+ /* Save R1 in the PACA */
+ std r1, HSTATE_HOST_R1(r13)
+
+ li r6, KVM_GUEST_MODE_HOST_HV
+ stb r6, HSTATE_IN_GUEST(r13)
+
+ /* Clear out SLB */
+ li r6,0
+ slbmte r6,r6
+ slbia
+ ptesync
+
+BEGIN_FTR_SECTION
+ b 30f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ /*
+ * POWER7 host -> guest partition switch code.
+ * We don't have to lock against concurrent tlbies,
+ * but we do have to coordinate across hardware threads.
+ */
+ /* Increment entry count iff exit count is zero. */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ addi r9,r5,VCORE_ENTRY_EXIT
+21: lwarx r3,0,r9
+ cmpwi r3,0x100 /* any threads starting to exit? */
+ bge secondary_too_late /* if so we're too late to the party */
+ addi r3,r3,1
+ stwcx. r3,0,r9
+ bne 21b
+
+ /* Primary thread switches to guest partition. */
+ ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
+ lbz r6,HSTATE_PTID(r13)
+ cmpwi r6,0
+ bne 20f
+ ld r6,KVM_SDR1(r9)
+ lwz r7,KVM_LPID(r9)
+ li r0,LPID_RSVD /* switch to reserved LPID */
+ mtspr SPRN_LPID,r0
+ ptesync
+ mtspr SPRN_SDR1,r6 /* switch to partition page table */
+ mtspr SPRN_LPID,r7
+ isync
+
+ /* See if we need to flush the TLB */
+ lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
+ clrldi r7,r6,64-6 /* extract bit number (6 bits) */
+ srdi r6,r6,6 /* doubleword number */
+ sldi r6,r6,3 /* address offset */
+ add r6,r6,r9
+ addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
+ li r0,1
+ sld r0,r0,r7
+ ld r7,0(r6)
+ and. r7,r7,r0
+ beq 22f
+23: ldarx r7,0,r6 /* if set, clear the bit */
+ andc r7,r7,r0
+ stdcx. r7,0,r6
+ bne 23b
+ /* Flush the TLB of any entries for this LPID */
+ /* use arch 2.07S as a proxy for POWER8 */
+BEGIN_FTR_SECTION
+ li r6,512 /* POWER8 has 512 sets */
+FTR_SECTION_ELSE
+ li r6,128 /* POWER7 has 128 sets */
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
+ mtctr r6
+ li r7,0x800 /* IS field = 0b10 */
+ ptesync
+28: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 28b
+ ptesync
+
+ /* Add timebase offset onto timebase */
+22: ld r8,VCORE_TB_OFFSET(r5)
+ cmpdi r8,0
+ beq 37f
+ mftb r6 /* current host timebase */
+ add r8,r8,r6
+ mtspr SPRN_TBU40,r8 /* update upper 40 bits */
+ mftb r7 /* check if lower 24 bits overflowed */
+ clrldi r6,r6,40
+ clrldi r7,r7,40
+ cmpld r7,r6
+ bge 37f
+ addis r8,r8,0x100 /* if so, increment upper 40 bits */
+ mtspr SPRN_TBU40,r8
+
+ /* Load guest PCR value to select appropriate compat mode */
+37: ld r7, VCORE_PCR(r5)
+ cmpdi r7, 0
+ beq 38f
+ mtspr SPRN_PCR, r7
+38:
+
+BEGIN_FTR_SECTION
+ /* DPDES is shared between threads */
+ ld r8, VCORE_DPDES(r5)
+ mtspr SPRN_DPDES, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+ li r0,1
+ stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
+ b 10f
+
+ /* Secondary threads wait for primary to have done partition switch */
+20: lbz r0,VCORE_IN_GUEST(r5)
+ cmpwi r0,0
+ beq 20b
+
+ /* Set LPCR and RMOR. */
+10: ld r8,VCORE_LPCR(r5)
+ mtspr SPRN_LPCR,r8
+ ld r8,KVM_RMOR(r9)
+ mtspr SPRN_RMOR,r8
+ isync
+
+ /* Check if HDEC expires soon */
+ mfspr r3,SPRN_HDEC
+ cmpwi r3,512 /* 1 microsecond */
+ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ blt hdec_soon
+ b 31f
+
+ /*
+ * PPC970 host -> guest partition switch code.
+ * We have to lock against concurrent tlbies,
+ * using native_tlbie_lock to lock against host tlbies
+ * and kvm->arch.tlbie_lock to lock against guest tlbies.
+ * We also have to invalidate the TLB since its
+ * entries aren't tagged with the LPID.
+ */
+30: ld r5,HSTATE_KVM_VCORE(r13)
+ ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
+
+ /* first take native_tlbie_lock */
+ .section ".toc","aw"
+toc_tlbie_lock:
+ .tc native_tlbie_lock[TC],native_tlbie_lock
+ .previous
+ ld r3,toc_tlbie_lock@toc(2)
+#ifdef __BIG_ENDIAN__
+ lwz r8,PACA_LOCK_TOKEN(r13)
+#else
+ lwz r8,PACAPACAINDEX(r13)
+#endif
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+
+ ld r5,HSTATE_KVM_VCORE(r13)
+ ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */
+ li r0,0x18f
+ rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
+ or r0,r7,r0
+ ptesync
+ sync
+ mtspr SPRN_HID4,r0 /* switch to reserved LPID */
+ isync
+ li r0,0
+ stw r0,0(r3) /* drop native_tlbie_lock */
+
+ /* invalidate the whole TLB */
+ li r0,256
+ mtctr r0
+ li r6,0
+25: tlbiel r6
+ addi r6,r6,0x1000
+ bdnz 25b
+ ptesync
+
+ /* Take the guest's tlbie_lock */
+ addi r3,r9,KVM_TLBIE_LOCK
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+ ld r6,KVM_SDR1(r9)
+ mtspr SPRN_SDR1,r6 /* switch to partition page table */
+
+ /* Set up HID4 with the guest's LPID etc. */
+ sync
+ mtspr SPRN_HID4,r7
+ isync
+
+ /* drop the guest's tlbie_lock */
+ li r0,0
+ stw r0,0(r3)
+
+ /* Check if HDEC expires soon */
+ mfspr r3,SPRN_HDEC
+ cmpwi r3,10
+ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ blt hdec_soon
+
+ /* Enable HDEC interrupts */
+ mfspr r0,SPRN_HID0
+ li r3,1
+ rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
+ sync
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+31:
+ /* Do we have a guest vcpu to run? */
+ cmpdi r4, 0
+ beq kvmppc_primary_no_guest
+kvmppc_got_guest:
+
+ /* Load up guest SLB entries */
+ lwz r5,VCPU_SLB_MAX(r4)
+ cmpwi r5,0
+ beq 9f
+ mtctr r5
+ addi r6,r4,VCPU_SLB
+1: ld r8,VCPU_SLB_E(r6)
+ ld r9,VCPU_SLB_V(r6)
+ slbmte r9,r8
+ addi r6,r6,VCPU_SLB_SIZE
+ bdnz 1b
+9:
+ /* Increment yield count if they have a VPA */
+ ld r3, VCPU_VPA(r4)
+ cmpdi r3, 0
+ beq 25f
+ lwz r5, LPPACA_YIELDCOUNT(r3)
+ addi r5, r5, 1
+ stw r5, LPPACA_YIELDCOUNT(r3)
+ li r6, 1
+ stb r6, VCPU_VPA_DIRTY(r4)
+25:
+
+BEGIN_FTR_SECTION
+ /* Save purr/spurr */
+ mfspr r5,SPRN_PURR
+ mfspr r6,SPRN_SPURR
+ std r5,HSTATE_PURR(r13)
+ std r6,HSTATE_SPURR(r13)
+ ld r7,VCPU_PURR(r4)
+ ld r8,VCPU_SPURR(r4)
+ mtspr SPRN_PURR,r7
+ mtspr SPRN_SPURR,r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+ /* Set partition DABR */
+ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
+ lwz r5,VCPU_DABRX(r4)
+ ld r6,VCPU_DABR(r4)
+ mtspr SPRN_DABRX,r5
+ mtspr SPRN_DABR,r6
+ BEGIN_FTR_SECTION_NESTED(89)
+ isync
+ END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ b skip_tm
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+
+ /* Turn on TM/FP/VSX/VMX so we can restore them. */
+ mfmsr r5
+ li r6, MSR_TM >> 32
+ sldi r6, r6, 32
+ or r5, r5, r6
+ ori r5, r5, MSR_FP
+ oris r5, r5, (MSR_VEC | MSR_VSX)@h
+ mtmsrd r5
+
+ /*
+ * The user may change these outside of a transaction, so they must
+ * always be context switched.
+ */
+ ld r5, VCPU_TFHAR(r4)
+ ld r6, VCPU_TFIAR(r4)
+ ld r7, VCPU_TEXASR(r4)
+ mtspr SPRN_TFHAR, r5
+ mtspr SPRN_TFIAR, r6
+ mtspr SPRN_TEXASR, r7
+
+ ld r5, VCPU_MSR(r4)
+ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+ beq skip_tm /* TM not active in guest */
+
+ /* Make sure the failure summary is set, otherwise we'll program check
+ * when we trechkpt. It's possible that this might have been not set
+ * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+ * host.
+ */
+ oris r7, r7, (TEXASR_FS)@h
+ mtspr SPRN_TEXASR, r7
+
+ /*
+ * We need to load up the checkpointed state for the guest.
+ * We need to do this early as it will blow away any GPRs, VSRs and
+ * some SPRs.
+ */
+
+ mr r31, r4
+ addi r3, r31, VCPU_FPRS_TM
+ bl .load_fp_state
+ addi r3, r31, VCPU_VRS_TM
+ bl .load_vr_state
+ mr r4, r31
+ lwz r7, VCPU_VRSAVE_TM(r4)
+ mtspr SPRN_VRSAVE, r7
+
+ ld r5, VCPU_LR_TM(r4)
+ lwz r6, VCPU_CR_TM(r4)
+ ld r7, VCPU_CTR_TM(r4)
+ ld r8, VCPU_AMR_TM(r4)
+ ld r9, VCPU_TAR_TM(r4)
+ mtlr r5
+ mtcr r6
+ mtctr r7
+ mtspr SPRN_AMR, r8
+ mtspr SPRN_TAR, r9
+
+ /*
+ * Load up PPR and DSCR values but don't put them in the actual SPRs
+ * till the last moment to avoid running with userspace PPR and DSCR for
+ * too long.
+ */
+ ld r29, VCPU_DSCR_TM(r4)
+ ld r30, VCPU_PPR_TM(r4)
+
+ std r2, PACATMSCRATCH(r13) /* Save TOC */
+
+ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /* Load GPRs r0-r28 */
+ reg = 0
+ .rept 29
+ ld reg, VCPU_GPRS_TM(reg)(r31)
+ reg = reg + 1
+ .endr
+
+ mtspr SPRN_DSCR, r29
+ mtspr SPRN_PPR, r30
+
+ /* Load final GPRs */
+ ld 29, VCPU_GPRS_TM(29)(r31)
+ ld 30, VCPU_GPRS_TM(30)(r31)
+ ld 31, VCPU_GPRS_TM(31)(r31)
+
+ /* TM checkpointed state is now setup. All GPRs are now volatile. */
+ TRECHKPT
+
+ /* Now let's get back the state we need. */
+ HMT_MEDIUM
+ GET_PACA(r13)
+ ld r29, HSTATE_DSCR(r13)
+ mtspr SPRN_DSCR, r29
+ ld r4, HSTATE_KVM_VCPU(r13)
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATMSCRATCH(r13)
+
+ /* Set the MSR RI since we have our registers back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
+skip_tm:
+#endif
+
+ /* Load guest PMU registers */
+ /* R4 is live here (vcpu pointer) */
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ isync
+BEGIN_FTR_SECTION
+ ld r3, VCPU_MMCR(r4)
+ andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+ cmpwi r5, MMCR0_PMAO
+ beql kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+ lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
+ lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
+ lwz r6, VCPU_PMC + 8(r4)
+ lwz r7, VCPU_PMC + 12(r4)
+ lwz r8, VCPU_PMC + 16(r4)
+ lwz r9, VCPU_PMC + 20(r4)
+BEGIN_FTR_SECTION
+ lwz r10, VCPU_PMC + 24(r4)
+ lwz r11, VCPU_PMC + 28(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r5
+ mtspr SPRN_PMC3, r6
+ mtspr SPRN_PMC4, r7
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+ mtspr SPRN_PMC7, r10
+ mtspr SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ ld r3, VCPU_MMCR(r4)
+ ld r5, VCPU_MMCR + 8(r4)
+ ld r6, VCPU_MMCR + 16(r4)
+ ld r7, VCPU_SIAR(r4)
+ ld r8, VCPU_SDAR(r4)
+ mtspr SPRN_MMCR1, r5
+ mtspr SPRN_MMCRA, r6
+ mtspr SPRN_SIAR, r7
+ mtspr SPRN_SDAR, r8
+BEGIN_FTR_SECTION
+ ld r5, VCPU_MMCR + 24(r4)
+ ld r6, VCPU_SIER(r4)
+ lwz r7, VCPU_PMC + 24(r4)
+ lwz r8, VCPU_PMC + 28(r4)
+ ld r9, VCPU_MMCR + 32(r4)
+ mtspr SPRN_MMCR2, r5
+ mtspr SPRN_SIER, r6
+ mtspr SPRN_SPMC1, r7
+ mtspr SPRN_SPMC2, r8
+ mtspr SPRN_MMCRS, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtspr SPRN_MMCR0, r3
+ isync
+
+ /* Load up FP, VMX and VSX registers */
+ bl kvmppc_load_fp
+
+ ld r14, VCPU_GPR(R14)(r4)
+ ld r15, VCPU_GPR(R15)(r4)
+ ld r16, VCPU_GPR(R16)(r4)
+ ld r17, VCPU_GPR(R17)(r4)
+ ld r18, VCPU_GPR(R18)(r4)
+ ld r19, VCPU_GPR(R19)(r4)
+ ld r20, VCPU_GPR(R20)(r4)
+ ld r21, VCPU_GPR(R21)(r4)
+ ld r22, VCPU_GPR(R22)(r4)
+ ld r23, VCPU_GPR(R23)(r4)
+ ld r24, VCPU_GPR(R24)(r4)
+ ld r25, VCPU_GPR(R25)(r4)
+ ld r26, VCPU_GPR(R26)(r4)
+ ld r27, VCPU_GPR(R27)(r4)
+ ld r28, VCPU_GPR(R28)(r4)
+ ld r29, VCPU_GPR(R29)(r4)
+ ld r30, VCPU_GPR(R30)(r4)
+ ld r31, VCPU_GPR(R31)(r4)
+
+BEGIN_FTR_SECTION
+ /* Switch DSCR to guest value */
+ ld r5, VCPU_DSCR(r4)
+ mtspr SPRN_DSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+ /* Skip next section on POWER7 or PPC970 */
+ b 8f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+ /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
+ mfmsr r8
+ li r0, 1
+ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r8
+
+ /* Load up POWER8-specific registers */
+ ld r5, VCPU_IAMR(r4)
+ lwz r6, VCPU_PSPB(r4)
+ ld r7, VCPU_FSCR(r4)
+ mtspr SPRN_IAMR, r5
+ mtspr SPRN_PSPB, r6
+ mtspr SPRN_FSCR, r7
+ ld r5, VCPU_DAWR(r4)
+ ld r6, VCPU_DAWRX(r4)
+ ld r7, VCPU_CIABR(r4)
+ ld r8, VCPU_TAR(r4)
+ mtspr SPRN_DAWR, r5
+ mtspr SPRN_DAWRX, r6
+ mtspr SPRN_CIABR, r7
+ mtspr SPRN_TAR, r8
+ ld r5, VCPU_IC(r4)
+ ld r6, VCPU_VTB(r4)
+ mtspr SPRN_IC, r5
+ mtspr SPRN_VTB, r6
+ ld r8, VCPU_EBBHR(r4)
+ mtspr SPRN_EBBHR, r8
+ ld r5, VCPU_EBBRR(r4)
+ ld r6, VCPU_BESCR(r4)
+ ld r7, VCPU_CSIGR(r4)
+ ld r8, VCPU_TACR(r4)
+ mtspr SPRN_EBBRR, r5
+ mtspr SPRN_BESCR, r6
+ mtspr SPRN_CSIGR, r7
+ mtspr SPRN_TACR, r8
+ ld r5, VCPU_TCSCR(r4)
+ ld r6, VCPU_ACOP(r4)
+ lwz r7, VCPU_GUEST_PID(r4)
+ ld r8, VCPU_WORT(r4)
+ mtspr SPRN_TCSCR, r5
+ mtspr SPRN_ACOP, r6
+ mtspr SPRN_PID, r7
+ mtspr SPRN_WORT, r8
+8:
+
+ /*
+ * Set the decrementer to the guest decrementer.
+ */
+ ld r8,VCPU_DEC_EXPIRES(r4)
+ /* r8 is a host timebase value here, convert to guest TB */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ ld r6,VCORE_TB_OFFSET(r5)
+ add r8,r8,r6
+ mftb r7
+ subf r3,r7,r8
+ mtspr SPRN_DEC,r3
+ stw r3,VCPU_DEC(r4)
+
+ ld r5, VCPU_SPRG0(r4)
+ ld r6, VCPU_SPRG1(r4)
+ ld r7, VCPU_SPRG2(r4)
+ ld r8, VCPU_SPRG3(r4)
+ mtspr SPRN_SPRG0, r5
+ mtspr SPRN_SPRG1, r6
+ mtspr SPRN_SPRG2, r7
+ mtspr SPRN_SPRG3, r8
+
+ /* Load up DAR and DSISR */
+ ld r5, VCPU_DAR(r4)
+ lwz r6, VCPU_DSISR(r4)
+ mtspr SPRN_DAR, r5
+ mtspr SPRN_DSISR, r6
+
+BEGIN_FTR_SECTION
+ /* Restore AMR and UAMOR, set AMOR to all 1s */
+ ld r5,VCPU_AMR(r4)
+ ld r6,VCPU_UAMOR(r4)
+ li r7,-1
+ mtspr SPRN_AMR,r5
+ mtspr SPRN_UAMOR,r6
+ mtspr SPRN_AMOR,r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Restore state of CTRL run bit; assume 1 on entry */
+ lwz r5,VCPU_CTRL(r4)
+ andi. r5,r5,1
+ bne 4f
+ mfspr r6,SPRN_CTRLF
+ clrrdi r6,r6,1
+ mtspr SPRN_CTRLT,r6
+4:
+ ld r6, VCPU_CTR(r4)
+ lwz r7, VCPU_XER(r4)
+
+ mtctr r6
+ mtxer r7
+
+kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
+ ld r10, VCPU_PC(r4)
+ ld r11, VCPU_MSR(r4)
+ ld r6, VCPU_SRR0(r4)
+ ld r7, VCPU_SRR1(r4)
+ mtspr SPRN_SRR0, r6
+ mtspr SPRN_SRR1, r7
+
+deliver_guest_interrupt:
+ /* r11 = vcpu->arch.msr & ~MSR_HV */
+ rldicl r11, r11, 63 - MSR_HV_LG, 1
+ rotldi r11, r11, 1 + MSR_HV_LG
+ ori r11, r11, MSR_ME
+
+ /* Check if we can deliver an external or decrementer interrupt now */
+ ld r0, VCPU_PENDING_EXC(r4)
+ rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
+ cmpdi cr1, r0, 0
+ andi. r8, r11, MSR_EE
+BEGIN_FTR_SECTION
+ mfspr r8, SPRN_LPCR
+ /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
+ rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
+ mtspr SPRN_LPCR, r8
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ beq 5f
+ li r0, BOOK3S_INTERRUPT_EXTERNAL
+ bne cr1, 12f
+ mfspr r0, SPRN_DEC
+ cmpwi r0, 0
+ li r0, BOOK3S_INTERRUPT_DECREMENTER
+ bge 5f
+
+12: mtspr SPRN_SRR0, r10
+ mr r10,r0
+ mtspr SPRN_SRR1, r11
+ mr r9, r4
+ bl kvmppc_msr_interrupt
+5:
+
+/*
+ * Required state:
+ * R4 = vcpu
+ * R10: value for HSRR0
+ * R11: value for HSRR1
+ * R13 = PACA
+ */
+fast_guest_return:
+ li r0,0
+ stb r0,VCPU_CEDED(r4) /* cancel cede */
+ mtspr SPRN_HSRR0,r10
+ mtspr SPRN_HSRR1,r11
+
+ /* Activate guest mode, so faults get handled by KVM */
+ li r9, KVM_GUEST_MODE_GUEST_HV
+ stb r9, HSTATE_IN_GUEST(r13)
+
+ /* Enter guest */
+
+BEGIN_FTR_SECTION
+ ld r5, VCPU_CFAR(r4)
+ mtspr SPRN_CFAR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ ld r0, VCPU_PPR(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ ld r5, VCPU_LR(r4)
+ lwz r6, VCPU_CR(r4)
+ mtlr r5
+ mtcr r6
+
+ ld r1, VCPU_GPR(R1)(r4)
+ ld r2, VCPU_GPR(R2)(r4)
+ ld r3, VCPU_GPR(R3)(r4)
+ ld r5, VCPU_GPR(R5)(r4)
+ ld r6, VCPU_GPR(R6)(r4)
+ ld r7, VCPU_GPR(R7)(r4)
+ ld r8, VCPU_GPR(R8)(r4)
+ ld r9, VCPU_GPR(R9)(r4)
+ ld r10, VCPU_GPR(R10)(r4)
+ ld r11, VCPU_GPR(R11)(r4)
+ ld r12, VCPU_GPR(R12)(r4)
+ ld r13, VCPU_GPR(R13)(r4)
+
+BEGIN_FTR_SECTION
+ mtspr SPRN_PPR, r0
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r0, VCPU_GPR(R0)(r4)
+ ld r4, VCPU_GPR(R4)(r4)
+
+ hrfid
+ b .
+
+/******************************************************************************
+ * *
+ * Exit code *
+ * *
+ *****************************************************************************/
+
+/*
+ * We come here from the first-level interrupt handlers.
+ */
+ .globl kvmppc_interrupt_hv
+kvmppc_interrupt_hv:
+ /*
+ * Register contents:
+ * R12 = interrupt vector
+ * R13 = PACA
+ * guest CR, R12 saved in shadow VCPU SCRATCH1/0
+ * guest R13 saved in SPRN_SCRATCH0
+ */
+ std r9, HSTATE_SCRATCH2(r13)
+
+ lbz r9, HSTATE_IN_GUEST(r13)
+ cmpwi r9, KVM_GUEST_MODE_HOST_HV
+ beq kvmppc_bad_host_intr
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ cmpwi r9, KVM_GUEST_MODE_GUEST
+ ld r9, HSTATE_SCRATCH2(r13)
+ beq kvmppc_interrupt_pr
+#endif
+ /* We're now back in the host but in guest MMU context */
+ li r9, KVM_GUEST_MODE_HOST_HV
+ stb r9, HSTATE_IN_GUEST(r13)
+
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ /* Save registers */
+
+ std r0, VCPU_GPR(R0)(r9)
+ std r1, VCPU_GPR(R1)(r9)
+ std r2, VCPU_GPR(R2)(r9)
+ std r3, VCPU_GPR(R3)(r9)
+ std r4, VCPU_GPR(R4)(r9)
+ std r5, VCPU_GPR(R5)(r9)
+ std r6, VCPU_GPR(R6)(r9)
+ std r7, VCPU_GPR(R7)(r9)
+ std r8, VCPU_GPR(R8)(r9)
+ ld r0, HSTATE_SCRATCH2(r13)
+ std r0, VCPU_GPR(R9)(r9)
+ std r10, VCPU_GPR(R10)(r9)
+ std r11, VCPU_GPR(R11)(r9)
+ ld r3, HSTATE_SCRATCH0(r13)
+ lwz r4, HSTATE_SCRATCH1(r13)
+ std r3, VCPU_GPR(R12)(r9)
+ stw r4, VCPU_CR(r9)
+BEGIN_FTR_SECTION
+ ld r3, HSTATE_CFAR(r13)
+ std r3, VCPU_CFAR(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ ld r4, HSTATE_PPR(r13)
+ std r4, VCPU_PPR(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ /* Restore R1/R2 so we can handle faults */
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
+
+ mfspr r10, SPRN_SRR0
+ mfspr r11, SPRN_SRR1
+ std r10, VCPU_SRR0(r9)
+ std r11, VCPU_SRR1(r9)
+ andi. r0, r12, 2 /* need to read HSRR0/1? */
+ beq 1f
+ mfspr r10, SPRN_HSRR0
+ mfspr r11, SPRN_HSRR1
+ clrrdi r12, r12, 2
+1: std r10, VCPU_PC(r9)
+ std r11, VCPU_MSR(r9)
+
+ GET_SCRATCH0(r3)
+ mflr r4
+ std r3, VCPU_GPR(R13)(r9)
+ std r4, VCPU_LR(r9)
+
+ stw r12,VCPU_TRAP(r9)
+
+ /* Save HEIR (HV emulation assist reg) in last_inst
+ if this is an HEI (HV emulation interrupt, e40) */
+ li r3,KVM_INST_FETCH_FAILED
+BEGIN_FTR_SECTION
+ cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
+ bne 11f
+ mfspr r3,SPRN_HEIR
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+11: stw r3,VCPU_LAST_INST(r9)
+
+ /* these are volatile across C function calls */
+ mfctr r3
+ mfxer r4
+ std r3, VCPU_CTR(r9)
+ stw r4, VCPU_XER(r9)
+
+BEGIN_FTR_SECTION
+ /* If this is a page table miss then see if it's theirs or ours */
+ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
+ beq kvmppc_hdsi
+ cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
+ beq kvmppc_hisi
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* See if this is a leftover HDEC interrupt */
+ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ bne 2f
+ mfspr r3,SPRN_HDEC
+ cmpwi r3,0
+ bge ignore_hdec
+2:
+ /* See if this is an hcall we can handle in real mode */
+ cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
+ beq hcall_try_real_mode
+
+ /* Only handle external interrupts here on arch 206 and later */
+BEGIN_FTR_SECTION
+ b ext_interrupt_to_host
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
+
+ /* External interrupt ? */
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+ bne+ ext_interrupt_to_host
+
+ /* External interrupt, first check for host_ipi. If this is
+ * set, we know the host wants us out so let's do it now
+ */
+ bl kvmppc_read_intr
+ cmpdi r3, 0
+ bgt ext_interrupt_to_host
+
+ /* Check if any CPU is heading out to the host, if so head out too */
+ ld r5, HSTATE_KVM_VCORE(r13)
+ lwz r0, VCORE_ENTRY_EXIT(r5)
+ cmpwi r0, 0x100
+ bge ext_interrupt_to_host
+
+ /* Return to guest after delivering any pending interrupt */
+ mr r4, r9
+ b deliver_guest_interrupt
+
+ext_interrupt_to_host:
+
+guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
+ /* Save more register state */
+ mfdar r6
+ mfdsisr r7
+ std r6, VCPU_DAR(r9)
+ stw r7, VCPU_DSISR(r9)
+BEGIN_FTR_SECTION
+ /* don't overwrite fault_dar/fault_dsisr if HDSI */
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
+ beq 6f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ std r6, VCPU_FAULT_DAR(r9)
+ stw r7, VCPU_FAULT_DSISR(r9)
+
+ /* See if it is a machine check */
+ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_realmode
+mc_cont:
+
+ /* Save guest CTRL register, set runlatch to 1 */
+6: mfspr r6,SPRN_CTRLF
+ stw r6,VCPU_CTRL(r9)
+ andi. r0,r6,1
+ bne 4f
+ ori r6,r6,1
+ mtspr SPRN_CTRLT,r6
+4:
+ /* Read the guest SLB and save it away */
+ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
+ mtctr r0
+ li r6,0
+ addi r7,r9,VCPU_SLB
+ li r5,0
+1: slbmfee r8,r6
+ andis. r0,r8,SLB_ESID_V@h
+ beq 2f
+ add r8,r8,r6 /* put index in */
+ slbmfev r3,r6
+ std r8,VCPU_SLB_E(r7)
+ std r3,VCPU_SLB_V(r7)
+ addi r7,r7,VCPU_SLB_SIZE
+ addi r5,r5,1
+2: addi r6,r6,1
+ bdnz 1b
+ stw r5,VCPU_SLB_MAX(r9)
+
+ /*
+ * Save the guest PURR/SPURR
+ */
+BEGIN_FTR_SECTION
+ mfspr r5,SPRN_PURR
+ mfspr r6,SPRN_SPURR
+ ld r7,VCPU_PURR(r9)
+ ld r8,VCPU_SPURR(r9)
+ std r5,VCPU_PURR(r9)
+ std r6,VCPU_SPURR(r9)
+ subf r5,r7,r5
+ subf r6,r8,r6
+
+ /*
+ * Restore host PURR/SPURR and add guest times
+ * so that the time in the guest gets accounted.
+ */
+ ld r3,HSTATE_PURR(r13)
+ ld r4,HSTATE_SPURR(r13)
+ add r3,r3,r5
+ add r4,r4,r6
+ mtspr SPRN_PURR,r3
+ mtspr SPRN_SPURR,r4
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
+
+ /* Save DEC */
+ mfspr r5,SPRN_DEC
+ mftb r6
+ extsw r5,r5
+ add r5,r5,r6
+ /* r5 is a guest timebase value here, convert to host TB */
+ ld r3,HSTATE_KVM_VCORE(r13)
+ ld r4,VCORE_TB_OFFSET(r3)
+ subf r5,r4,r5
+ std r5,VCPU_DEC_EXPIRES(r9)
+
+BEGIN_FTR_SECTION
+ b 8f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+ /* Save POWER8-specific registers */
+ mfspr r5, SPRN_IAMR
+ mfspr r6, SPRN_PSPB
+ mfspr r7, SPRN_FSCR
+ std r5, VCPU_IAMR(r9)
+ stw r6, VCPU_PSPB(r9)
+ std r7, VCPU_FSCR(r9)
+ mfspr r5, SPRN_IC
+ mfspr r6, SPRN_VTB
+ mfspr r7, SPRN_TAR
+ std r5, VCPU_IC(r9)
+ std r6, VCPU_VTB(r9)
+ std r7, VCPU_TAR(r9)
+ mfspr r8, SPRN_EBBHR
+ std r8, VCPU_EBBHR(r9)
+ mfspr r5, SPRN_EBBRR
+ mfspr r6, SPRN_BESCR
+ mfspr r7, SPRN_CSIGR
+ mfspr r8, SPRN_TACR
+ std r5, VCPU_EBBRR(r9)
+ std r6, VCPU_BESCR(r9)
+ std r7, VCPU_CSIGR(r9)
+ std r8, VCPU_TACR(r9)
+ mfspr r5, SPRN_TCSCR
+ mfspr r6, SPRN_ACOP
+ mfspr r7, SPRN_PID
+ mfspr r8, SPRN_WORT
+ std r5, VCPU_TCSCR(r9)
+ std r6, VCPU_ACOP(r9)
+ stw r7, VCPU_GUEST_PID(r9)
+ std r8, VCPU_WORT(r9)
+8:
+
+ /* Save and reset AMR and UAMOR before turning on the MMU */
+BEGIN_FTR_SECTION
+ mfspr r5,SPRN_AMR
+ mfspr r6,SPRN_UAMOR
+ std r5,VCPU_AMR(r9)
+ std r6,VCPU_UAMOR(r9)
+ li r6,0
+ mtspr SPRN_AMR,r6
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Switch DSCR back to host value */
+BEGIN_FTR_SECTION
+ mfspr r8, SPRN_DSCR
+ ld r7, HSTATE_DSCR(r13)
+ std r8, VCPU_DSCR(r9)
+ mtspr SPRN_DSCR, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Save non-volatile GPRs */
+ std r14, VCPU_GPR(R14)(r9)
+ std r15, VCPU_GPR(R15)(r9)
+ std r16, VCPU_GPR(R16)(r9)
+ std r17, VCPU_GPR(R17)(r9)
+ std r18, VCPU_GPR(R18)(r9)
+ std r19, VCPU_GPR(R19)(r9)
+ std r20, VCPU_GPR(R20)(r9)
+ std r21, VCPU_GPR(R21)(r9)
+ std r22, VCPU_GPR(R22)(r9)
+ std r23, VCPU_GPR(R23)(r9)
+ std r24, VCPU_GPR(R24)(r9)
+ std r25, VCPU_GPR(R25)(r9)
+ std r26, VCPU_GPR(R26)(r9)
+ std r27, VCPU_GPR(R27)(r9)
+ std r28, VCPU_GPR(R28)(r9)
+ std r29, VCPU_GPR(R29)(r9)
+ std r30, VCPU_GPR(R30)(r9)
+ std r31, VCPU_GPR(R31)(r9)
+
+ /* Save SPRGs */
+ mfspr r3, SPRN_SPRG0
+ mfspr r4, SPRN_SPRG1
+ mfspr r5, SPRN_SPRG2
+ mfspr r6, SPRN_SPRG3
+ std r3, VCPU_SPRG0(r9)
+ std r4, VCPU_SPRG1(r9)
+ std r5, VCPU_SPRG2(r9)
+ std r6, VCPU_SPRG3(r9)
+
+ /* save FP state */
+ mr r3, r9
+ bl kvmppc_save_fp
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ b 2f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+ /* Turn on TM. */
+ mfmsr r8
+ li r0, 1
+ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r8
+
+ ld r5, VCPU_MSR(r9)
+ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+ beq 1f /* TM not active in guest. */
+
+ li r3, TM_CAUSE_KVM_RESCHED
+
+ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /* All GPRs are volatile at this point. */
+ TRECLAIM(R3)
+
+ /* Temporarily store r13 and r9 so we have some regs to play with */
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ std r9, PACATMSCRATCH(r13)
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ /* Get a few more GPRs free. */
+ std r29, VCPU_GPRS_TM(29)(r9)
+ std r30, VCPU_GPRS_TM(30)(r9)
+ std r31, VCPU_GPRS_TM(31)(r9)
+
+ /* Save away PPR and DSCR soon so don't run with user values. */
+ mfspr r31, SPRN_PPR
+ HMT_MEDIUM
+ mfspr r30, SPRN_DSCR
+ ld r29, HSTATE_DSCR(r13)
+ mtspr SPRN_DSCR, r29
+
+ /* Save all but r9, r13 & r29-r31 */
+ reg = 0
+ .rept 29
+ .if (reg != 9) && (reg != 13)
+ std reg, VCPU_GPRS_TM(reg)(r9)
+ .endif
+ reg = reg + 1
+ .endr
+ /* ... now save r13 */
+ GET_SCRATCH0(r4)
+ std r4, VCPU_GPRS_TM(13)(r9)
+ /* ... and save r9 */
+ ld r4, PACATMSCRATCH(r13)
+ std r4, VCPU_GPRS_TM(9)(r9)
+
+ /* Reload stack pointer and TOC. */
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATOC(r13)
+
+ /* Set MSR RI now we have r1 and r13 back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
+
+ /* Save away checkpinted SPRs. */
+ std r31, VCPU_PPR_TM(r9)
+ std r30, VCPU_DSCR_TM(r9)
+ mflr r5
+ mfcr r6
+ mfctr r7
+ mfspr r8, SPRN_AMR
+ mfspr r10, SPRN_TAR
+ std r5, VCPU_LR_TM(r9)
+ stw r6, VCPU_CR_TM(r9)
+ std r7, VCPU_CTR_TM(r9)
+ std r8, VCPU_AMR_TM(r9)
+ std r10, VCPU_TAR_TM(r9)
+
+ /* Restore r12 as trap number. */
+ lwz r12, VCPU_TRAP(r9)
+
+ /* Save FP/VSX. */
+ addi r3, r9, VCPU_FPRS_TM
+ bl .store_fp_state
+ addi r3, r9, VCPU_VRS_TM
+ bl .store_vr_state
+ mfspr r6, SPRN_VRSAVE
+ stw r6, VCPU_VRSAVE_TM(r9)
+1:
+ /*
+ * We need to save these SPRs after the treclaim so that the software
+ * error code is recorded correctly in the TEXASR. Also the user may
+ * change these outside of a transaction, so they must always be
+ * context switched.
+ */
+ mfspr r5, SPRN_TFHAR
+ mfspr r6, SPRN_TFIAR
+ mfspr r7, SPRN_TEXASR
+ std r5, VCPU_TFHAR(r9)
+ std r6, VCPU_TFIAR(r9)
+ std r7, VCPU_TEXASR(r9)
+2:
+#endif
+
+ /* Increment yield count if they have a VPA */
+ ld r8, VCPU_VPA(r9) /* do they have a VPA? */
+ cmpdi r8, 0
+ beq 25f
+ lwz r3, LPPACA_YIELDCOUNT(r8)
+ addi r3, r3, 1
+ stw r3, LPPACA_YIELDCOUNT(r8)
+ li r3, 1
+ stb r3, VCPU_VPA_DIRTY(r9)
+25:
+ /* Save PMU registers if requested */
+ /* r8 and cr0.eq are live here */
+BEGIN_FTR_SECTION
+ /*
+ * POWER8 seems to have a hardware bug where setting
+ * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
+ * when some counters are already negative doesn't seem
+ * to cause a performance monitor alert (and hence interrupt).
+ * The effect of this is that when saving the PMU state,
+ * if there is no PMU alert pending when we read MMCR0
+ * before freezing the counters, but one becomes pending
+ * before we read the counters, we lose it.
+ * To work around this, we need a way to freeze the counters
+ * before reading MMCR0. Normally, freezing the counters
+ * is done by writing MMCR0 (to set MMCR0[FC]) which
+ * unavoidably writes MMCR0[PMA0] as well. On POWER8,
+ * we can also freeze the counters using MMCR2, by writing
+ * 1s to all the counter freeze condition bits (there are
+ * 9 bits each for 6 counters).
+ */
+ li r3, -1 /* set all freeze bits */
+ clrrdi r3, r3, 10
+ mfspr r10, SPRN_MMCR2
+ mtspr SPRN_MMCR2, r3
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r4, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ mfspr r6, SPRN_MMCRA
+BEGIN_FTR_SECTION
+ /* On P7, clear MMCRA in order to disable SDAR updates */
+ li r7, 0
+ mtspr SPRN_MMCRA, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ isync
+ beq 21f /* if no VPA, save PMU stuff anyway */
+ lbz r7, LPPACA_PMCINUSE(r8)
+ cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */
+ bne 21f
+ std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
+ b 22f
+21: mfspr r5, SPRN_MMCR1
+ mfspr r7, SPRN_SIAR
+ mfspr r8, SPRN_SDAR
+ std r4, VCPU_MMCR(r9)
+ std r5, VCPU_MMCR + 8(r9)
+ std r6, VCPU_MMCR + 16(r9)
+BEGIN_FTR_SECTION
+ std r10, VCPU_MMCR + 24(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ std r7, VCPU_SIAR(r9)
+ std r8, VCPU_SDAR(r9)
+ mfspr r3, SPRN_PMC1
+ mfspr r4, SPRN_PMC2
+ mfspr r5, SPRN_PMC3
+ mfspr r6, SPRN_PMC4
+ mfspr r7, SPRN_PMC5
+ mfspr r8, SPRN_PMC6
+BEGIN_FTR_SECTION
+ mfspr r10, SPRN_PMC7
+ mfspr r11, SPRN_PMC8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ stw r3, VCPU_PMC(r9)
+ stw r4, VCPU_PMC + 4(r9)
+ stw r5, VCPU_PMC + 8(r9)
+ stw r6, VCPU_PMC + 12(r9)
+ stw r7, VCPU_PMC + 16(r9)
+ stw r8, VCPU_PMC + 20(r9)
+BEGIN_FTR_SECTION
+ stw r10, VCPU_PMC + 24(r9)
+ stw r11, VCPU_PMC + 28(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_SIER
+ mfspr r6, SPRN_SPMC1
+ mfspr r7, SPRN_SPMC2
+ mfspr r8, SPRN_MMCRS
+ std r5, VCPU_SIER(r9)
+ stw r6, VCPU_PMC + 24(r9)
+ stw r7, VCPU_PMC + 28(r9)
+ std r8, VCPU_MMCR + 32(r9)
+ lis r4, 0x8000
+ mtspr SPRN_MMCRS, r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+22:
+ /* Clear out SLB */
+ li r5,0
+ slbmte r5,r5
+ slbia
+ ptesync
+
+hdec_soon: /* r12 = trap, r13 = paca */
+BEGIN_FTR_SECTION
+ b 32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ /*
+ * POWER7 guest -> host partition switch code.
+ * We don't have to lock against tlbies but we do
+ * have to coordinate the hardware threads.
+ */
+ /* Increment the threads-exiting-guest count in the 0xff00
+ bits of vcore->entry_exit_count */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ addi r6,r5,VCORE_ENTRY_EXIT
+41: lwarx r3,0,r6
+ addi r0,r3,0x100
+ stwcx. r0,0,r6
+ bne 41b
+ isync /* order stwcx. vs. reading napping_threads */
+
+ /*
+ * At this point we have an interrupt that we have to pass
+ * up to the kernel or qemu; we can't handle it in real mode.
+ * Thus we have to do a partition switch, so we have to
+ * collect the other threads, if we are the first thread
+ * to take an interrupt. To do this, we set the HDEC to 0,
+ * which causes an HDEC interrupt in all threads within 2ns
+ * because the HDEC register is shared between all 4 threads.
+ * However, we don't need to bother if this is an HDEC
+ * interrupt, since the other threads will already be on their
+ * way here in that case.
+ */
+ cmpwi r3,0x100 /* Are we the first here? */
+ bge 43f
+ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+ beq 40f
+ li r0,0
+ mtspr SPRN_HDEC,r0
+40:
+ /*
+ * Send an IPI to any napping threads, since an HDEC interrupt
+ * doesn't wake CPUs up from nap.
+ */
+ lwz r3,VCORE_NAPPING_THREADS(r5)
+ lbz r4,HSTATE_PTID(r13)
+ li r0,1
+ sld r0,r0,r4
+ andc. r3,r3,r0 /* no sense IPI'ing ourselves */
+ beq 43f
+ /* Order entry/exit update vs. IPIs */
+ sync
+ mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
+ subf r6,r4,r13
+42: andi. r0,r3,1
+ beq 44f
+ ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
+ li r0,IPI_PRIORITY
+ li r7,XICS_MFRR
+ stbcix r0,r7,r8 /* trigger the IPI */
+44: srdi. r3,r3,1
+ addi r6,r6,PACA_SIZE
+ bne 42b
+
+secondary_too_late:
+ /* Secondary threads wait for primary to do partition switch */
+43: ld r5,HSTATE_KVM_VCORE(r13)
+ ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
+ lbz r3,HSTATE_PTID(r13)
+ cmpwi r3,0
+ beq 15f
+ HMT_LOW
+13: lbz r3,VCORE_IN_GUEST(r5)
+ cmpwi r3,0
+ bne 13b
+ HMT_MEDIUM
+ b 16f
+
+ /* Primary thread waits for all the secondaries to exit guest */
+15: lwz r3,VCORE_ENTRY_EXIT(r5)
+ srwi r0,r3,8
+ clrldi r3,r3,56
+ cmpw r3,r0
+ bne 15b
+ isync
+
+ /* Primary thread switches back to host partition */
+ ld r6,KVM_HOST_SDR1(r4)
+ lwz r7,KVM_HOST_LPID(r4)
+ li r8,LPID_RSVD /* switch to reserved LPID */
+ mtspr SPRN_LPID,r8
+ ptesync
+ mtspr SPRN_SDR1,r6 /* switch to partition page table */
+ mtspr SPRN_LPID,r7
+ isync
+
+BEGIN_FTR_SECTION
+ /* DPDES is shared between threads */
+ mfspr r7, SPRN_DPDES
+ std r7, VCORE_DPDES(r5)
+ /* clear DPDES so we don't get guest doorbells in the host */
+ li r8, 0
+ mtspr SPRN_DPDES, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+ /* Subtract timebase offset from timebase */
+ ld r8,VCORE_TB_OFFSET(r5)
+ cmpdi r8,0
+ beq 17f
+ mftb r6 /* current guest timebase */
+ subf r8,r8,r6
+ mtspr SPRN_TBU40,r8 /* update upper 40 bits */
+ mftb r7 /* check if lower 24 bits overflowed */
+ clrldi r6,r6,40
+ clrldi r7,r7,40
+ cmpld r7,r6
+ bge 17f
+ addis r8,r8,0x100 /* if so, increment upper 40 bits */
+ mtspr SPRN_TBU40,r8
+
+ /* Reset PCR */
+17: ld r0, VCORE_PCR(r5)
+ cmpdi r0, 0
+ beq 18f
+ li r0, 0
+ mtspr SPRN_PCR, r0
+18:
+ /* Signal secondary CPUs to continue */
+ stb r0,VCORE_IN_GUEST(r5)
+ lis r8,0x7fff /* MAX_INT@h */
+ mtspr SPRN_HDEC,r8
+
+16: ld r8,KVM_HOST_LPCR(r4)
+ mtspr SPRN_LPCR,r8
+ isync
+ b 33f
+
+ /*
+ * PPC970 guest -> host partition switch code.
+ * We have to lock against concurrent tlbies, and
+ * we have to flush the whole TLB.
+ */
+32: ld r5,HSTATE_KVM_VCORE(r13)
+ ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
+
+ /* Take the guest's tlbie_lock */
+#ifdef __BIG_ENDIAN__
+ lwz r8,PACA_LOCK_TOKEN(r13)
+#else
+ lwz r8,PACAPACAINDEX(r13)
+#endif
+ addi r3,r4,KVM_TLBIE_LOCK
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+
+ ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */
+ li r0,0x18f
+ rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
+ or r0,r7,r0
+ ptesync
+ sync
+ mtspr SPRN_HID4,r0 /* switch to reserved LPID */
+ isync
+ li r0,0
+ stw r0,0(r3) /* drop guest tlbie_lock */
+
+ /* invalidate the whole TLB */
+ li r0,256
+ mtctr r0
+ li r6,0
+25: tlbiel r6
+ addi r6,r6,0x1000
+ bdnz 25b
+ ptesync
+
+ /* take native_tlbie_lock */
+ ld r3,toc_tlbie_lock@toc(2)
+24: lwarx r0,0,r3
+ cmpwi r0,0
+ bne 24b
+ stwcx. r8,0,r3
+ bne 24b
+ isync
+
+ ld r6,KVM_HOST_SDR1(r4)
+ mtspr SPRN_SDR1,r6 /* switch to host page table */
+
+ /* Set up host HID4 value */
+ sync
+ mtspr SPRN_HID4,r7
+ isync
+ li r0,0
+ stw r0,0(r3) /* drop native_tlbie_lock */
+
+ lis r8,0x7fff /* MAX_INT@h */
+ mtspr SPRN_HDEC,r8
+
+ /* Disable HDEC interrupts */
+ mfspr r0,SPRN_HID0
+ li r3,0
+ rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
+ sync
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+
+ /* load host SLB entries */
+33: ld r8,PACA_SLBSHADOWPTR(r13)
+
+ .rept SLB_NUM_BOLTED
+ ld r5,SLBSHADOW_SAVEAREA(r8)
+ ld r6,SLBSHADOW_SAVEAREA+8(r8)
+ andis. r7,r5,SLB_ESID_V@h
+ beq 1f
+ slbmte r6,r5
+1: addi r8,r8,16
+ .endr
+
+ /* Unset guest mode */
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
+
+ ld r0, 112+PPC_LR_STKOFF(r1)
+ addi r1, r1, 112
+ mtlr r0
+ blr
+
+/*
+ * Check whether an HDSI is an HPTE not found fault or something else.
+ * If it is an HPTE not found fault that is due to the guest accessing
+ * a page that they have mapped but which we have paged out, then
+ * we continue on with the guest exit path. In all other cases,
+ * reflect the HDSI to the guest as a DSI.
+ */
+kvmppc_hdsi:
+ mfspr r4, SPRN_HDAR
+ mfspr r6, SPRN_HDSISR
+ /* HPTE not found fault or protection fault? */
+ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
+ beq 1f /* if not, send it to the guest */
+ andi. r0, r11, MSR_DR /* data relocation enabled? */
+ beq 3f
+ clrrdi r0, r4, 28
+ PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
+ bne 1f /* if no SLB entry found */
+4: std r4, VCPU_FAULT_DAR(r9)
+ stw r6, VCPU_FAULT_DSISR(r9)
+
+ /* Search the hash table. */
+ mr r3, r9 /* vcpu pointer */
+ li r7, 1 /* data fault */
+ bl kvmppc_hpte_hv_fault
+ ld r9, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ li r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
+ cmpdi r3, 0 /* retry the instruction */
+ beq 6f
+ cmpdi r3, -1 /* handle in kernel mode */
+ beq guest_exit_cont
+ cmpdi r3, -2 /* MMIO emulation; need instr word */
+ beq 2f
+
+ /* Synthesize a DSI for the guest */
+ ld r4, VCPU_FAULT_DAR(r9)
+ mr r6, r3
+1: mtspr SPRN_DAR, r4
+ mtspr SPRN_DSISR, r6
+ mtspr SPRN_SRR0, r10
+ mtspr SPRN_SRR1, r11
+ li r10, BOOK3S_INTERRUPT_DATA_STORAGE
+ bl kvmppc_msr_interrupt
+fast_interrupt_c_return:
+6: ld r7, VCPU_CTR(r9)
+ lwz r8, VCPU_XER(r9)
+ mtctr r7
+ mtxer r8
+ mr r4, r9
+ b fast_guest_return
+
+3: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */
+ ld r5, KVM_VRMA_SLB_V(r5)
+ b 4b
+
+ /* If this is for emulated MMIO, load the instruction word */
+2: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */
+
+ /* Set guest mode to 'jump over instruction' so if lwz faults
+ * we'll just continue at the next IP. */
+ li r0, KVM_GUEST_MODE_SKIP
+ stb r0, HSTATE_IN_GUEST(r13)
+
+ /* Do the access with MSR:DR enabled */
+ mfmsr r3
+ ori r4, r3, MSR_DR /* Enable paging for data */
+ mtmsrd r4
+ lwz r8, 0(r10)
+ mtmsrd r3
+
+ /* Store the result */
+ stw r8, VCPU_LAST_INST(r9)
+
+ /* Unset guest mode. */
+ li r0, KVM_GUEST_MODE_HOST_HV
+ stb r0, HSTATE_IN_GUEST(r13)
+ b guest_exit_cont
+
+/*
+ * Similarly for an HISI, reflect it to the guest as an ISI unless
+ * it is an HPTE not found fault for a page that we have paged out.
+ */
+kvmppc_hisi:
+ andis. r0, r11, SRR1_ISI_NOPT@h
+ beq 1f
+ andi. r0, r11, MSR_IR /* instruction relocation enabled? */
+ beq 3f
+ clrrdi r0, r10, 28
+ PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
+ bne 1f /* if no SLB entry found */
+4:
+ /* Search the hash table. */
+ mr r3, r9 /* vcpu pointer */
+ mr r4, r10
+ mr r6, r11
+ li r7, 0 /* instruction fault */
+ bl kvmppc_hpte_hv_fault
+ ld r9, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
+ cmpdi r3, 0 /* retry the instruction */
+ beq fast_interrupt_c_return
+ cmpdi r3, -1 /* handle in kernel mode */
+ beq guest_exit_cont
+
+ /* Synthesize an ISI for the guest */
+ mr r11, r3
+1: mtspr SPRN_SRR0, r10
+ mtspr SPRN_SRR1, r11
+ li r10, BOOK3S_INTERRUPT_INST_STORAGE
+ bl kvmppc_msr_interrupt
+ b fast_interrupt_c_return
+
+3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
+ ld r5, KVM_VRMA_SLB_V(r6)
+ b 4b
+
+/*
+ * Try to handle an hcall in real mode.
+ * Returns to the guest if we handle it, or continues on up to
+ * the kernel if we can't (i.e. if we don't have a handler for
+ * it, or if the handler returns H_TOO_HARD).
+ */
+ .globl hcall_try_real_mode
+hcall_try_real_mode:
+ ld r3,VCPU_GPR(R3)(r9)
+ andi. r0,r11,MSR_PR
+ /* sc 1 from userspace - reflect to guest syscall */
+ bne sc_1_fast_return
+ clrrdi r3,r3,2
+ cmpldi r3,hcall_real_table_end - hcall_real_table
+ bge guest_exit_cont
+ LOAD_REG_ADDR(r4, hcall_real_table)
+ lwax r3,r3,r4
+ cmpwi r3,0
+ beq guest_exit_cont
+ add r3,r3,r4
+ mtctr r3
+ mr r3,r9 /* get vcpu pointer */
+ ld r4,VCPU_GPR(R4)(r9)
+ bctrl
+ cmpdi r3,H_TOO_HARD
+ beq hcall_real_fallback
+ ld r4,HSTATE_KVM_VCPU(r13)
+ std r3,VCPU_GPR(R3)(r4)
+ ld r10,VCPU_PC(r4)
+ ld r11,VCPU_MSR(r4)
+ b fast_guest_return
+
+sc_1_fast_return:
+ mtspr SPRN_SRR0,r10
+ mtspr SPRN_SRR1,r11
+ li r10, BOOK3S_INTERRUPT_SYSCALL
+ bl kvmppc_msr_interrupt
+ mr r4,r9
+ b fast_guest_return
+
+ /* We've attempted a real mode hcall, but it's punted it back
+ * to userspace. We need to restore some clobbered volatiles
+ * before resuming the pass-it-to-qemu path */
+hcall_real_fallback:
+ li r12,BOOK3S_INTERRUPT_SYSCALL
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ b guest_exit_cont
+
+ .globl hcall_real_table
+hcall_real_table:
+ .long 0 /* 0 - unused */
+ .long DOTSYM(kvmppc_h_remove) - hcall_real_table
+ .long DOTSYM(kvmppc_h_enter) - hcall_real_table
+ .long DOTSYM(kvmppc_h_read) - hcall_real_table
+ .long 0 /* 0x10 - H_CLEAR_MOD */
+ .long 0 /* 0x14 - H_CLEAR_REF */
+ .long DOTSYM(kvmppc_h_protect) - hcall_real_table
+ .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
+ .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
+ .long 0 /* 0x24 - H_SET_SPRG0 */
+ .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
+ .long 0 /* 0x2c */
+ .long 0 /* 0x30 */
+ .long 0 /* 0x34 */
+ .long 0 /* 0x38 */
+ .long 0 /* 0x3c */
+ .long 0 /* 0x40 */
+ .long 0 /* 0x44 */
+ .long 0 /* 0x48 */
+ .long 0 /* 0x4c */
+ .long 0 /* 0x50 */
+ .long 0 /* 0x54 */
+ .long 0 /* 0x58 */
+ .long 0 /* 0x5c */
+ .long 0 /* 0x60 */
+#ifdef CONFIG_KVM_XICS
+ .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
+ .long 0 /* 0x70 - H_IPOLL */
+ .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
+#else
+ .long 0 /* 0x64 - H_EOI */
+ .long 0 /* 0x68 - H_CPPR */
+ .long 0 /* 0x6c - H_IPI */
+ .long 0 /* 0x70 - H_IPOLL */
+ .long 0 /* 0x74 - H_XIRR */
+#endif
+ .long 0 /* 0x78 */
+ .long 0 /* 0x7c */
+ .long 0 /* 0x80 */
+ .long 0 /* 0x84 */
+ .long 0 /* 0x88 */
+ .long 0 /* 0x8c */
+ .long 0 /* 0x90 */
+ .long 0 /* 0x94 */
+ .long 0 /* 0x98 */
+ .long 0 /* 0x9c */
+ .long 0 /* 0xa0 */
+ .long 0 /* 0xa4 */
+ .long 0 /* 0xa8 */
+ .long 0 /* 0xac */
+ .long 0 /* 0xb0 */
+ .long 0 /* 0xb4 */
+ .long 0 /* 0xb8 */
+ .long 0 /* 0xbc */
+ .long 0 /* 0xc0 */
+ .long 0 /* 0xc4 */
+ .long 0 /* 0xc8 */
+ .long 0 /* 0xcc */
+ .long 0 /* 0xd0 */
+ .long 0 /* 0xd4 */
+ .long 0 /* 0xd8 */
+ .long 0 /* 0xdc */
+ .long DOTSYM(kvmppc_h_cede) - hcall_real_table
+ .long 0 /* 0xe4 */
+ .long 0 /* 0xe8 */
+ .long 0 /* 0xec */
+ .long 0 /* 0xf0 */
+ .long 0 /* 0xf4 */
+ .long 0 /* 0xf8 */
+ .long 0 /* 0xfc */
+ .long 0 /* 0x100 */
+ .long 0 /* 0x104 */
+ .long 0 /* 0x108 */
+ .long 0 /* 0x10c */
+ .long 0 /* 0x110 */
+ .long 0 /* 0x114 */
+ .long 0 /* 0x118 */
+ .long 0 /* 0x11c */
+ .long 0 /* 0x120 */
+ .long DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
+ .long 0 /* 0x128 */
+ .long 0 /* 0x12c */
+ .long 0 /* 0x130 */
+ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+hcall_real_table_end:
+
+ignore_hdec:
+ mr r4,r9
+ b fast_guest_return
+
+_GLOBAL(kvmppc_h_set_xdabr)
+ andi. r0, r5, DABRX_USER | DABRX_KERNEL
+ beq 6f
+ li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
+ andc. r0, r5, r0
+ beq 3f
+6: li r3, H_PARAMETER
+ blr
+
+_GLOBAL(kvmppc_h_set_dabr)
+ li r5, DABRX_USER | DABRX_KERNEL
+3:
+BEGIN_FTR_SECTION
+ b 2f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ std r4,VCPU_DABR(r3)
+ stw r5, VCPU_DABRX(r3)
+ mtspr SPRN_DABRX, r5
+ /* Work around P7 bug where DABR can get corrupted on mtspr */
+1: mtspr SPRN_DABR,r4
+ mfspr r5, SPRN_DABR
+ cmpd r4, r5
+ bne 1b
+ isync
+ li r3,0
+ blr
+
+ /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
+2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
+ rlwimi r5, r4, 1, DAWRX_WT
+ clrrdi r4, r4, 3
+ std r4, VCPU_DAWR(r3)
+ std r5, VCPU_DAWRX(r3)
+ mtspr SPRN_DAWR, r4
+ mtspr SPRN_DAWRX, r5
+ li r3, 0
+ blr
+
+_GLOBAL(kvmppc_h_cede)
+ ori r11,r11,MSR_EE
+ std r11,VCPU_MSR(r3)
+ li r0,1
+ stb r0,VCPU_CEDED(r3)
+ sync /* order setting ceded vs. testing prodded */
+ lbz r5,VCPU_PRODDED(r3)
+ cmpwi r5,0
+ bne kvm_cede_prodded
+ li r0,0 /* set trap to 0 to say hcall is handled */
+ stw r0,VCPU_TRAP(r3)
+ li r0,H_SUCCESS
+ std r0,VCPU_GPR(R3)(r3)
+BEGIN_FTR_SECTION
+ b kvm_cede_exit /* just send it up to host on 970 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
+
+ /*
+ * Set our bit in the bitmask of napping threads unless all the
+ * other threads are already napping, in which case we send this
+ * up to the host.
+ */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ lbz r6,HSTATE_PTID(r13)
+ lwz r8,VCORE_ENTRY_EXIT(r5)
+ clrldi r8,r8,56
+ li r0,1
+ sld r0,r0,r6
+ addi r6,r5,VCORE_NAPPING_THREADS
+31: lwarx r4,0,r6
+ or r4,r4,r0
+ PPC_POPCNTW(R7,R4)
+ cmpw r7,r8
+ bge kvm_cede_exit
+ stwcx. r4,0,r6
+ bne 31b
+ /* order napping_threads update vs testing entry_exit_count */
+ isync
+ li r0,NAPPING_CEDE
+ stb r0,HSTATE_NAPPING(r13)
+ lwz r7,VCORE_ENTRY_EXIT(r5)
+ cmpwi r7,0x100
+ bge 33f /* another thread already exiting */
+
+/*
+ * Although not specifically required by the architecture, POWER7
+ * preserves the following registers in nap mode, even if an SMT mode
+ * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
+ * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
+ */
+ /* Save non-volatile GPRs */
+ std r14, VCPU_GPR(R14)(r3)
+ std r15, VCPU_GPR(R15)(r3)
+ std r16, VCPU_GPR(R16)(r3)
+ std r17, VCPU_GPR(R17)(r3)
+ std r18, VCPU_GPR(R18)(r3)
+ std r19, VCPU_GPR(R19)(r3)
+ std r20, VCPU_GPR(R20)(r3)
+ std r21, VCPU_GPR(R21)(r3)
+ std r22, VCPU_GPR(R22)(r3)
+ std r23, VCPU_GPR(R23)(r3)
+ std r24, VCPU_GPR(R24)(r3)
+ std r25, VCPU_GPR(R25)(r3)
+ std r26, VCPU_GPR(R26)(r3)
+ std r27, VCPU_GPR(R27)(r3)
+ std r28, VCPU_GPR(R28)(r3)
+ std r29, VCPU_GPR(R29)(r3)
+ std r30, VCPU_GPR(R30)(r3)
+ std r31, VCPU_GPR(R31)(r3)
+
+ /* save FP state */
+ bl kvmppc_save_fp
+
+ /*
+ * Take a nap until a decrementer or external or doobell interrupt
+ * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
+ * runlatch bit before napping.
+ */
+ mfspr r2, SPRN_CTRLF
+ clrrdi r2, r2, 1
+ mtspr SPRN_CTRLT, r2
+
+ li r0,1
+ stb r0,HSTATE_HWTHREAD_REQ(r13)
+ mfspr r5,SPRN_LPCR
+ ori r5,r5,LPCR_PECE0 | LPCR_PECE1
+BEGIN_FTR_SECTION
+ oris r5,r5,LPCR_PECEDP@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtspr SPRN_LPCR,r5
+ isync
+ li r0, 0
+ std r0, HSTATE_SCRATCH0(r13)
+ ptesync
+ ld r0, HSTATE_SCRATCH0(r13)
+1: cmpd r0, r0
+ bne 1b
+ nap
+ b .
+
+33: mr r4, r3
+ li r3, 0
+ li r12, 0
+ b 34f
+
+kvm_end_cede:
+ /* get vcpu pointer */
+ ld r4, HSTATE_KVM_VCPU(r13)
+
+ /* Woken by external or decrementer interrupt */
+ ld r1, HSTATE_HOST_R1(r13)
+
+ /* load up FP state */
+ bl kvmppc_load_fp
+
+ /* Load NV GPRS */
+ ld r14, VCPU_GPR(R14)(r4)
+ ld r15, VCPU_GPR(R15)(r4)
+ ld r16, VCPU_GPR(R16)(r4)
+ ld r17, VCPU_GPR(R17)(r4)
+ ld r18, VCPU_GPR(R18)(r4)
+ ld r19, VCPU_GPR(R19)(r4)
+ ld r20, VCPU_GPR(R20)(r4)
+ ld r21, VCPU_GPR(R21)(r4)
+ ld r22, VCPU_GPR(R22)(r4)
+ ld r23, VCPU_GPR(R23)(r4)
+ ld r24, VCPU_GPR(R24)(r4)
+ ld r25, VCPU_GPR(R25)(r4)
+ ld r26, VCPU_GPR(R26)(r4)
+ ld r27, VCPU_GPR(R27)(r4)
+ ld r28, VCPU_GPR(R28)(r4)
+ ld r29, VCPU_GPR(R29)(r4)
+ ld r30, VCPU_GPR(R30)(r4)
+ ld r31, VCPU_GPR(R31)(r4)
+
+ /* Check the wake reason in SRR1 to see why we got here */
+ bl kvmppc_check_wake_reason
+
+ /* clear our bit in vcore->napping_threads */
+34: ld r5,HSTATE_KVM_VCORE(r13)
+ lbz r7,HSTATE_PTID(r13)
+ li r0,1
+ sld r0,r0,r7
+ addi r6,r5,VCORE_NAPPING_THREADS
+32: lwarx r7,0,r6
+ andc r7,r7,r0
+ stwcx. r7,0,r6
+ bne 32b
+ li r0,0
+ stb r0,HSTATE_NAPPING(r13)
+
+ /* See if the wake reason means we need to exit */
+ stw r12, VCPU_TRAP(r4)
+ mr r9, r4
+ cmpdi r3, 0
+ bgt guest_exit_cont
+
+ /* see if any other thread is already exiting */
+ lwz r0,VCORE_ENTRY_EXIT(r5)
+ cmpwi r0,0x100
+ bge guest_exit_cont
+
+ b kvmppc_cede_reentry /* if not go back to guest */
+
+ /* cede when already previously prodded case */
+kvm_cede_prodded:
+ li r0,0
+ stb r0,VCPU_PRODDED(r3)
+ sync /* order testing prodded vs. clearing ceded */
+ stb r0,VCPU_CEDED(r3)
+ li r3,H_SUCCESS
+ blr
+
+ /* we've ceded but we want to give control to the host */
+kvm_cede_exit:
+ b hcall_real_fallback
+
+ /* Try to handle a machine check in real mode */
+machine_check_realmode:
+ mr r3, r9 /* get vcpu pointer */
+ bl kvmppc_realmode_machine_check
+ nop
+ cmpdi r3, 0 /* Did we handle MCE ? */
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ /*
+ * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
+ * machine check interrupt (set HSRR0 to 0x200). And for handled
+ * errors (no-fatal), just go back to guest execution with current
+ * HSRR0 instead of exiting guest. This new approach will inject
+ * machine check to guest for fatal error causing guest to crash.
+ *
+ * The old code used to return to host for unhandled errors which
+ * was causing guest to hang with soft lockups inside guest and
+ * makes it difficult to recover guest instance.
+ */
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ bne 2f /* Continue guest execution. */
+ /* If not, deliver a machine check. SRR0/1 are already set */
+ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
+ ld r11, VCPU_MSR(r9)
+ bl kvmppc_msr_interrupt
+2: b fast_interrupt_c_return
+
+/*
+ * Check the reason we woke from nap, and take appropriate action.
+ * Returns:
+ * 0 if nothing needs to be done
+ * 1 if something happened that needs to be handled by the host
+ * -1 if there was a guest wakeup (IPI)
+ *
+ * Also sets r12 to the interrupt vector for any interrupt that needs
+ * to be handled now by the host (0x500 for external interrupt), or zero.
+ */
+kvmppc_check_wake_reason:
+ mfspr r6, SPRN_SRR1
+BEGIN_FTR_SECTION
+ rlwinm r6, r6, 45-31, 0xf /* extract wake reason field (P8) */
+FTR_SECTION_ELSE
+ rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
+ cmpwi r6, 8 /* was it an external interrupt? */
+ li r12, BOOK3S_INTERRUPT_EXTERNAL
+ beq kvmppc_read_intr /* if so, see what it was */
+ li r3, 0
+ li r12, 0
+ cmpwi r6, 6 /* was it the decrementer? */
+ beq 0f
+BEGIN_FTR_SECTION
+ cmpwi r6, 5 /* privileged doorbell? */
+ beq 0f
+ cmpwi r6, 3 /* hypervisor doorbell? */
+ beq 3f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ li r3, 1 /* anything else, return 1 */
+0: blr
+
+ /* hypervisor doorbell */
+3: li r12, BOOK3S_INTERRUPT_H_DOORBELL
+ li r3, 1
+ blr
+
+/*
+ * Determine what sort of external interrupt is pending (if any).
+ * Returns:
+ * 0 if no interrupt is pending
+ * 1 if an interrupt is pending that needs to be handled by the host
+ * -1 if there was a guest wakeup IPI (which has now been cleared)
+ */
+kvmppc_read_intr:
+ /* see if a host IPI is pending */
+ li r3, 1
+ lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
+ bne 1f
+
+ /* Now read the interrupt from the ICP */
+ ld r6, HSTATE_XICS_PHYS(r13)
+ li r7, XICS_XIRR
+ cmpdi r6, 0
+ beq- 1f
+ lwzcix r0, r6, r7
+ rlwinm. r3, r0, 0, 0xffffff
+ sync
+ beq 1f /* if nothing pending in the ICP */
+
+ /* We found something in the ICP...
+ *
+ * If it's not an IPI, stash it in the PACA and return to
+ * the host, we don't (yet) handle directing real external
+ * interrupts directly to the guest
+ */
+ cmpwi r3, XICS_IPI /* if there is, is it an IPI? */
+ bne 42f
+
+ /* It's an IPI, clear the MFRR and EOI it */
+ li r3, 0xff
+ li r8, XICS_MFRR
+ stbcix r3, r6, r8 /* clear the IPI */
+ stwcix r0, r6, r7 /* EOI it */
+ sync
+
+ /* We need to re-check host IPI now in case it got set in the
+ * meantime. If it's clear, we bounce the interrupt to the
+ * guest
+ */
+ lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
+ bne- 43f
+
+ /* OK, it's an IPI for us */
+ li r3, -1
+1: blr
+
+42: /* It's not an IPI and it's for the host, stash it in the PACA
+ * before exit, it will be picked up by the host ICP driver
+ */
+ stw r0, HSTATE_SAVED_XIRR(r13)
+ li r3, 1
+ b 1b
+
+43: /* We raced with the host, we need to resend that IPI, bummer */
+ li r0, IPI_PRIORITY
+ stbcix r0, r6, r8 /* set the IPI */
+ sync
+ li r3, 1
+ b 1b
+
+/*
+ * Save away FP, VMX and VSX registers.
+ * r3 = vcpu pointer
+ * N.B. r30 and r31 are volatile across this function,
+ * thus it is not callable from C.
+ */
+kvmppc_save_fp:
+ mflr r30
+ mr r31,r3
+ mfmsr r5
+ ori r8,r5,MSR_FP
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VEC@h
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ mtmsrd r8
+ isync
+ addi r3,r3,VCPU_FPRS
+ bl .store_fp_state
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ addi r3,r31,VCPU_VRS
+ bl .store_vr_state
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ mfspr r6,SPRN_VRSAVE
+ stw r6,VCPU_VRSAVE(r31)
+ mtlr r30
+ blr
+
+/*
+ * Load up FP, VMX and VSX registers
+ * r4 = vcpu pointer
+ * N.B. r30 and r31 are volatile across this function,
+ * thus it is not callable from C.
+ */
+kvmppc_load_fp:
+ mflr r30
+ mr r31,r4
+ mfmsr r9
+ ori r8,r9,MSR_FP
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VEC@h
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r8,r8,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ mtmsrd r8
+ isync
+ addi r3,r4,VCPU_FPRS
+ bl .load_fp_state
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ addi r3,r31,VCPU_VRS
+ bl .load_vr_state
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ lwz r7,VCPU_VRSAVE(r31)
+ mtspr SPRN_VRSAVE,r7
+ mtlr r30
+ mr r4,r31
+ blr
+
+/*
+ * We come here if we get any exception or interrupt while we are
+ * executing host real mode code while in guest MMU context.
+ * For now just spin, but we should do something better.
+ */
+kvmppc_bad_host_intr:
+ b .
+
+/*
+ * This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
+ * from VCPU_INTR_MSR and is modified based on the required TM state changes.
+ * r11 has the guest MSR value (in/out)
+ * r9 has a vcpu pointer (in)
+ * r0 is used as a scratch register
+ */
+kvmppc_msr_interrupt:
+ rldicl r0, r11, 64 - MSR_TS_S_LG, 62
+ cmpwi r0, 2 /* Check if we are in transactional state.. */
+ ld r11, VCPU_INTR_MSR(r9)
+ bne 1f
+ /* ... if transactional, change to suspended */
+ li r0, 1
+1: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+ blr
+
+/*
+ * This works around a hardware bug on POWER8E processors, where
+ * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
+ * performance monitor interrupt. Instead, when we need to have
+ * an interrupt pending, we have to arrange for a counter to overflow.
+ */
+kvmppc_fix_pmao:
+ li r3, 0
+ mtspr SPRN_MMCR2, r3
+ lis r3, (MMCR0_PMXE | MMCR0_FCECE)@h
+ ori r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
+ mtspr SPRN_MMCR0, r3
+ lis r3, 0x7fff
+ ori r3, r3, 0xffff
+ mtspr SPRN_PMC6, r3
+ isync
+ blr
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 2f0bc928b08..d044b8b7c69 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -25,55 +25,38 @@
#include <asm/exception-64s.h>
#if defined(CONFIG_PPC_BOOK3S_64)
-
-#define ULONG_SIZE 8
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define FUNC(name) name
+#else
#define FUNC(name) GLUE(.,name)
-
-#define GET_SHADOW_VCPU(reg) \
- addi reg, r13, PACA_KVM_SVCPU
-
-#define DISABLE_INTERRUPTS \
- mfmsr r0; \
- rldicl r0,r0,48,1; \
- rotldi r0,r0,16; \
- mtmsrd r0,1; \
+#endif
+#define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU
#elif defined(CONFIG_PPC_BOOK3S_32)
-
-#define ULONG_SIZE 4
#define FUNC(name) name
-
-#define GET_SHADOW_VCPU(reg) \
- lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2)
-
-#define DISABLE_INTERRUPTS \
- mfmsr r0; \
- rlwinm r0,r0,0,17,15; \
- mtmsr r0; \
+#define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2)
#endif /* CONFIG_PPC_BOOK3S_XX */
-
-#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
#define VCPU_LOAD_NVGPRS(vcpu) \
- PPC_LL r14, VCPU_GPR(r14)(vcpu); \
- PPC_LL r15, VCPU_GPR(r15)(vcpu); \
- PPC_LL r16, VCPU_GPR(r16)(vcpu); \
- PPC_LL r17, VCPU_GPR(r17)(vcpu); \
- PPC_LL r18, VCPU_GPR(r18)(vcpu); \
- PPC_LL r19, VCPU_GPR(r19)(vcpu); \
- PPC_LL r20, VCPU_GPR(r20)(vcpu); \
- PPC_LL r21, VCPU_GPR(r21)(vcpu); \
- PPC_LL r22, VCPU_GPR(r22)(vcpu); \
- PPC_LL r23, VCPU_GPR(r23)(vcpu); \
- PPC_LL r24, VCPU_GPR(r24)(vcpu); \
- PPC_LL r25, VCPU_GPR(r25)(vcpu); \
- PPC_LL r26, VCPU_GPR(r26)(vcpu); \
- PPC_LL r27, VCPU_GPR(r27)(vcpu); \
- PPC_LL r28, VCPU_GPR(r28)(vcpu); \
- PPC_LL r29, VCPU_GPR(r29)(vcpu); \
- PPC_LL r30, VCPU_GPR(r30)(vcpu); \
- PPC_LL r31, VCPU_GPR(r31)(vcpu); \
+ PPC_LL r14, VCPU_GPR(R14)(vcpu); \
+ PPC_LL r15, VCPU_GPR(R15)(vcpu); \
+ PPC_LL r16, VCPU_GPR(R16)(vcpu); \
+ PPC_LL r17, VCPU_GPR(R17)(vcpu); \
+ PPC_LL r18, VCPU_GPR(R18)(vcpu); \
+ PPC_LL r19, VCPU_GPR(R19)(vcpu); \
+ PPC_LL r20, VCPU_GPR(R20)(vcpu); \
+ PPC_LL r21, VCPU_GPR(R21)(vcpu); \
+ PPC_LL r22, VCPU_GPR(R22)(vcpu); \
+ PPC_LL r23, VCPU_GPR(R23)(vcpu); \
+ PPC_LL r24, VCPU_GPR(R24)(vcpu); \
+ PPC_LL r25, VCPU_GPR(R25)(vcpu); \
+ PPC_LL r26, VCPU_GPR(R26)(vcpu); \
+ PPC_LL r27, VCPU_GPR(R27)(vcpu); \
+ PPC_LL r28, VCPU_GPR(R28)(vcpu); \
+ PPC_LL r29, VCPU_GPR(R29)(vcpu); \
+ PPC_LL r30, VCPU_GPR(R30)(vcpu); \
+ PPC_LL r31, VCPU_GPR(R31)(vcpu); \
/*****************************************************************************
* *
@@ -85,7 +68,7 @@
* r3: kvm_run pointer
* r4: vcpu pointer
*/
-_GLOBAL(__kvmppc_vcpu_entry)
+_GLOBAL(__kvmppc_vcpu_run)
kvm_start_entry:
/* Write correct stack frame */
@@ -101,58 +84,59 @@ kvm_start_entry:
/* Save non-volatile registers (r14 - r31) */
SAVE_NVGPRS(r1)
+ /* Save CR */
+ mfcr r14
+ stw r14, _CCR(r1)
+
/* Save LR */
PPC_STL r0, _LINK(r1)
/* Load non-volatile guest state from the vcpu */
VCPU_LOAD_NVGPRS(r4)
- GET_SHADOW_VCPU(r5)
-
- /* Save R1/R2 in the PACA */
- PPC_STL r1, SVCPU_HOST_R1(r5)
- PPC_STL r2, SVCPU_HOST_R2(r5)
-
- /* XXX swap in/out on load? */
- PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4)
- PPC_STL r3, SVCPU_VMHANDLER(r5)
-
kvm_start_lightweight:
-
- PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
-
- DISABLE_INTERRUPTS
+ /* Copy registers into shadow vcpu so we can access them in real mode */
+ GET_SHADOW_VCPU(r3)
+ bl FUNC(kvmppc_copy_to_svcpu)
+ nop
+ REST_GPR(4, r1)
#ifdef CONFIG_PPC_BOOK3S_64
- /* Some guests may need to have dcbz set to 32 byte length.
- *
- * Usually we ensure that by patching the guest's instructions
- * to trap on dcbz and emulate it in the hypervisor.
- *
- * If we can, we should tell the CPU to use 32 byte dcbz though,
- * because that's a lot faster.
- */
-
+ /* Get the dcbz32 flag */
PPC_LL r3, VCPU_HFLAGS(r4)
- rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */
- beq no_dcbz32_on
-
- mfspr r3,SPRN_HID5
- ori r3, r3, 0x80 /* XXX HID5_dcbz32 = 0x80 */
- mtspr SPRN_HID5,r3
-
-no_dcbz32_on:
-
+ rldicl r3, r3, 0, 63 /* r3 &= 1 */
+ stb r3, HSTATE_RESTORE_HID5(r13)
+
+ /* Load up guest SPRG3 value, since it's user readable */
+ lwz r3, VCPU_SHAREDBE(r4)
+ cmpwi r3, 0
+ ld r5, VCPU_SHARED(r4)
+ beq sprg3_little_endian
+sprg3_big_endian:
+#ifdef __BIG_ENDIAN__
+ ld r3, VCPU_SHARED_SPRG3(r5)
+#else
+ addi r5, r5, VCPU_SHARED_SPRG3
+ ldbrx r3, 0, r5
+#endif
+ b after_sprg3_load
+sprg3_little_endian:
+#ifdef __LITTLE_ENDIAN__
+ ld r3, VCPU_SHARED_SPRG3(r5)
+#else
+ addi r5, r5, VCPU_SHARED_SPRG3
+ ldbrx r3, 0, r5
+#endif
+
+after_sprg3_load:
+ mtspr SPRN_SPRG3, r3
#endif /* CONFIG_PPC_BOOK3S_64 */
- PPC_LL r6, VCPU_RMCALL(r4)
- mtctr r6
-
- PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4)
- LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
+ PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
/* Jump to segment patching handler and into our guest */
- bctr
+ bl FUNC(kvmppc_entry_trampoline)
+ nop
/*
* This is the handler in module memory. It gets jumped at from the
@@ -160,9 +144,6 @@ no_dcbz32_on:
*
*/
-.global kvmppc_handler_highmem
-kvmppc_handler_highmem:
-
/*
* Register usage at this point:
*
@@ -171,113 +152,62 @@ kvmppc_handler_highmem:
* R12 = exit handler id
* R13 = PACA
* SVCPU.* = guest *
+ * MSR.EE = 1
*
*/
- /* R7 = vcpu */
- PPC_LL r7, GPR4(r1)
-
-#ifdef CONFIG_PPC_BOOK3S_64
-
- PPC_LL r5, VCPU_HFLAGS(r7)
- rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
- beq no_dcbz32_off
-
- li r4, 0
- mfspr r5,SPRN_HID5
- rldimi r5,r4,6,56
- mtspr SPRN_HID5,r5
-
-no_dcbz32_off:
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
- PPC_STL r14, VCPU_GPR(r14)(r7)
- PPC_STL r15, VCPU_GPR(r15)(r7)
- PPC_STL r16, VCPU_GPR(r16)(r7)
- PPC_STL r17, VCPU_GPR(r17)(r7)
- PPC_STL r18, VCPU_GPR(r18)(r7)
- PPC_STL r19, VCPU_GPR(r19)(r7)
- PPC_STL r20, VCPU_GPR(r20)(r7)
- PPC_STL r21, VCPU_GPR(r21)(r7)
- PPC_STL r22, VCPU_GPR(r22)(r7)
- PPC_STL r23, VCPU_GPR(r23)(r7)
- PPC_STL r24, VCPU_GPR(r24)(r7)
- PPC_STL r25, VCPU_GPR(r25)(r7)
- PPC_STL r26, VCPU_GPR(r26)(r7)
- PPC_STL r27, VCPU_GPR(r27)(r7)
- PPC_STL r28, VCPU_GPR(r28)(r7)
- PPC_STL r29, VCPU_GPR(r29)(r7)
- PPC_STL r30, VCPU_GPR(r30)(r7)
- PPC_STL r31, VCPU_GPR(r31)(r7)
-
- /* Restore host msr -> SRR1 */
- PPC_LL r6, VCPU_HOST_MSR(r7)
+ PPC_LL r3, GPR4(r1) /* vcpu pointer */
/*
- * For some interrupts, we need to call the real Linux
- * handler, so it can do work for us. This has to happen
- * as if the interrupt arrived from the kernel though,
- * so let's fake it here where most state is restored.
- *
- * Call Linux for hardware interrupts/decrementer
- * r3 = address of interrupt handler (exit reason)
+ * kvmppc_copy_from_svcpu can clobber volatile registers, save
+ * the exit handler id to the vcpu and restore it from there later.
*/
+ stw r12, VCPU_TRAP(r3)
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- beq call_linux_handler
- cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
- beq call_linux_handler
- cmpwi r12, BOOK3S_INTERRUPT_PERFMON
- beq call_linux_handler
-
- /* Back to EE=1 */
- mtmsr r6
- sync
- b kvm_return_point
+ /* Transfer reg values from shadow vcpu back to vcpu struct */
+ /* On 64-bit, interrupts are still off at this point */
-call_linux_handler:
+ GET_SHADOW_VCPU(r4)
+ bl FUNC(kvmppc_copy_from_svcpu)
+ nop
+#ifdef CONFIG_PPC_BOOK3S_64
/*
- * If we land here we need to jump back to the handler we
- * came from.
- *
- * We have a page that we can access from real mode, so let's
- * jump back to that and use it as a trampoline to get back into the
- * interrupt handler!
- *
- * R3 still contains the exit code,
- * R5 VCPU_HOST_RETIP and
- * R6 VCPU_HOST_MSR
+ * Reload kernel SPRG3 value.
+ * No need to save guest value as usermode can't modify SPRG3.
*/
+ ld r3, PACA_SPRG_VDSO(r13)
+ mtspr SPRN_SPRG_VDSO_WRITE, r3
+#endif /* CONFIG_PPC_BOOK3S_64 */
- /* Restore host IP -> SRR0 */
- PPC_LL r5, VCPU_HOST_RETIP(r7)
-
- /* XXX Better move to a safe function?
- * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
-
- mtlr r12
-
- PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7)
- mtsrr0 r4
- LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
- mtsrr1 r3
-
- RFI
-
-.global kvm_return_point
-kvm_return_point:
+ /* R7 = vcpu */
+ PPC_LL r7, GPR4(r1)
- /* Jump back to lightweight entry if we're supposed to */
- /* go back into the guest */
+ PPC_STL r14, VCPU_GPR(R14)(r7)
+ PPC_STL r15, VCPU_GPR(R15)(r7)
+ PPC_STL r16, VCPU_GPR(R16)(r7)
+ PPC_STL r17, VCPU_GPR(R17)(r7)
+ PPC_STL r18, VCPU_GPR(R18)(r7)
+ PPC_STL r19, VCPU_GPR(R19)(r7)
+ PPC_STL r20, VCPU_GPR(R20)(r7)
+ PPC_STL r21, VCPU_GPR(R21)(r7)
+ PPC_STL r22, VCPU_GPR(R22)(r7)
+ PPC_STL r23, VCPU_GPR(R23)(r7)
+ PPC_STL r24, VCPU_GPR(R24)(r7)
+ PPC_STL r25, VCPU_GPR(R25)(r7)
+ PPC_STL r26, VCPU_GPR(R26)(r7)
+ PPC_STL r27, VCPU_GPR(R27)(r7)
+ PPC_STL r28, VCPU_GPR(R28)(r7)
+ PPC_STL r29, VCPU_GPR(R29)(r7)
+ PPC_STL r30, VCPU_GPR(R30)(r7)
+ PPC_STL r31, VCPU_GPR(R31)(r7)
/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
- mr r5, r12
+ lwz r5, VCPU_TRAP(r7)
/* Restore r3 (kvm_run) and r4 (vcpu) */
REST_2GPRS(3, r1)
- bl FUNC(kvmppc_handle_exit)
+ bl FUNC(kvmppc_handle_exit_pr)
/* If RESUME_GUEST, get back in the loop */
cmpwi r3, RESUME_GUEST
@@ -291,6 +221,9 @@ kvm_exit_loop:
PPC_LL r4, _LINK(r1)
mtlr r4
+ lwz r14, _CCR(r1)
+ mtcr r14
+
/* Restore non-volatile host registers (r14 - r31) */
REST_NVGPRS(r1)
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 79751d8dd13..5a1ab1250a0 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -21,7 +21,6 @@
#include <linux/kvm_host.h>
#include <linux/hash.h>
#include <linux/slab.h>
-#include "trace.h"
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -29,6 +28,8 @@
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
+#include "trace_pr.h"
+
#define PTE_SIZE 12
static struct kmem_cache *hpte_cache;
@@ -55,33 +56,51 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
HPTEG_HASH_BITS_VPTE_LONG);
}
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage)
+{
+ return hash_64((vpage & 0xffffffff0ULL) >> 4,
+ HPTEG_HASH_BITS_VPTE_64K);
+}
+#endif
+
void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
u64 index;
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
trace_kvm_book3s_mmu_map(pte);
- spin_lock(&vcpu->arch.mmu_lock);
+ spin_lock(&vcpu3s->mmu_lock);
/* Add to ePTE list */
index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
- hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
+ hlist_add_head_rcu(&pte->list_pte, &vcpu3s->hpte_hash_pte[index]);
/* Add to ePTE_long list */
index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr);
hlist_add_head_rcu(&pte->list_pte_long,
- &vcpu->arch.hpte_hash_pte_long[index]);
+ &vcpu3s->hpte_hash_pte_long[index]);
/* Add to vPTE list */
index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
- hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
+ hlist_add_head_rcu(&pte->list_vpte, &vcpu3s->hpte_hash_vpte[index]);
/* Add to vPTE_long list */
index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage);
hlist_add_head_rcu(&pte->list_vpte_long,
- &vcpu->arch.hpte_hash_vpte_long[index]);
+ &vcpu3s->hpte_hash_vpte_long[index]);
- spin_unlock(&vcpu->arch.mmu_lock);
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Add to vPTE_64k list */
+ index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage);
+ hlist_add_head_rcu(&pte->list_vpte_64k,
+ &vcpu3s->hpte_hash_vpte_64k[index]);
+#endif
+
+ vcpu3s->hpte_cache_count++;
+
+ spin_unlock(&vcpu3s->mmu_lock);
}
static void free_pte_rcu(struct rcu_head *head)
@@ -92,16 +111,18 @@ static void free_pte_rcu(struct rcu_head *head)
static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
trace_kvm_book3s_mmu_invalidate(pte);
/* Different for 32 and 64 bit */
kvmppc_mmu_invalidate_pte(vcpu, pte);
- spin_lock(&vcpu->arch.mmu_lock);
+ spin_lock(&vcpu3s->mmu_lock);
/* pte already invalidated in between? */
if (hlist_unhashed(&pte->list_pte)) {
- spin_unlock(&vcpu->arch.mmu_lock);
+ spin_unlock(&vcpu3s->mmu_lock);
return;
}
@@ -109,30 +130,28 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
hlist_del_init_rcu(&pte->list_pte_long);
hlist_del_init_rcu(&pte->list_vpte);
hlist_del_init_rcu(&pte->list_vpte_long);
+#ifdef CONFIG_PPC_BOOK3S_64
+ hlist_del_init_rcu(&pte->list_vpte_64k);
+#endif
+ vcpu3s->hpte_cache_count--;
- if (pte->pte.may_write)
- kvm_release_pfn_dirty(pte->pfn);
- else
- kvm_release_pfn_clean(pte->pfn);
-
- spin_unlock(&vcpu->arch.mmu_lock);
+ spin_unlock(&vcpu3s->mmu_lock);
- vcpu->arch.hpte_cache_count--;
call_rcu(&pte->rcu_head, free_pte_rcu);
}
static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hpte_cache *pte;
- struct hlist_node *node;
int i;
rcu_read_lock();
for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
- struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
+ struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
- hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
+ hlist_for_each_entry_rcu(pte, list, list_vpte_long)
invalidate_pte(vcpu, pte);
}
@@ -141,17 +160,17 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hlist_head *list;
- struct hlist_node *node;
struct hpte_cache *pte;
/* Find the list of entries in the map */
- list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
+ list = &vcpu3s->hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
rcu_read_lock();
/* Check the list for matching entries and invalidate */
- hlist_for_each_entry_rcu(pte, node, list, list_pte)
+ hlist_for_each_entry_rcu(pte, list, list_pte)
if ((pte->pte.eaddr & ~0xfffUL) == guest_ea)
invalidate_pte(vcpu, pte);
@@ -160,18 +179,18 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hlist_head *list;
- struct hlist_node *node;
struct hpte_cache *pte;
/* Find the list of entries in the map */
- list = &vcpu->arch.hpte_hash_pte_long[
+ list = &vcpu3s->hpte_hash_pte_long[
kvmppc_mmu_hash_pte_long(guest_ea)];
rcu_read_lock();
/* Check the list for matching entries and invalidate */
- hlist_for_each_entry_rcu(pte, node, list, list_pte_long)
+ hlist_for_each_entry_rcu(pte, list, list_pte_long)
if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea)
invalidate_pte(vcpu, pte);
@@ -203,38 +222,61 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
/* Flush with mask 0xfffffffff */
static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hlist_head *list;
- struct hlist_node *node;
struct hpte_cache *pte;
u64 vp_mask = 0xfffffffffULL;
- list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
+ list = &vcpu3s->hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
+
+ rcu_read_lock();
+
+ /* Check the list for matching entries and invalidate */
+ hlist_for_each_entry_rcu(pte, list, list_vpte)
+ if ((pte->pte.vpage & vp_mask) == guest_vp)
+ invalidate_pte(vcpu, pte);
+
+ rcu_read_unlock();
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Flush with mask 0xffffffff0 */
+static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ struct hlist_head *list;
+ struct hpte_cache *pte;
+ u64 vp_mask = 0xffffffff0ULL;
+
+ list = &vcpu3s->hpte_hash_vpte_64k[
+ kvmppc_mmu_hash_vpte_64k(guest_vp)];
rcu_read_lock();
/* Check the list for matching entries and invalidate */
- hlist_for_each_entry_rcu(pte, node, list, list_vpte)
+ hlist_for_each_entry_rcu(pte, list, list_vpte_64k)
if ((pte->pte.vpage & vp_mask) == guest_vp)
invalidate_pte(vcpu, pte);
rcu_read_unlock();
}
+#endif
/* Flush with mask 0xffffff000 */
static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hlist_head *list;
- struct hlist_node *node;
struct hpte_cache *pte;
u64 vp_mask = 0xffffff000ULL;
- list = &vcpu->arch.hpte_hash_vpte_long[
+ list = &vcpu3s->hpte_hash_vpte_long[
kvmppc_mmu_hash_vpte_long(guest_vp)];
rcu_read_lock();
/* Check the list for matching entries and invalidate */
- hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
+ hlist_for_each_entry_rcu(pte, list, list_vpte_long)
if ((pte->pte.vpage & vp_mask) == guest_vp)
invalidate_pte(vcpu, pte);
@@ -250,6 +292,11 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
case 0xfffffffffULL:
kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case 0xffffffff0ULL:
+ kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp);
+ break;
+#endif
case 0xffffff000ULL:
kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
break;
@@ -261,7 +308,7 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
{
- struct hlist_node *node;
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hpte_cache *pte;
int i;
@@ -270,9 +317,9 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
rcu_read_lock();
for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
- struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
+ struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
- hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
+ hlist_for_each_entry_rcu(pte, list, list_vpte_long)
if ((pte->pte.raddr >= pa_start) &&
(pte->pte.raddr < pa_end))
invalidate_pte(vcpu, pte);
@@ -283,17 +330,22 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hpte_cache *pte;
- pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
- vcpu->arch.hpte_cache_count++;
-
- if (vcpu->arch.hpte_cache_count == HPTEG_CACHE_NUM)
+ if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
kvmppc_mmu_pte_flush_all(vcpu);
+ pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
+
return pte;
}
+void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte)
+{
+ kmem_cache_free(hpte_cache, pte);
+}
+
void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_pte_flush(vcpu, 0, 0);
@@ -309,17 +361,23 @@ static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len)
int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
/* init hpte lookup hashes */
- kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte,
- ARRAY_SIZE(vcpu->arch.hpte_hash_pte));
- kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long,
- ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long));
- kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte,
- ARRAY_SIZE(vcpu->arch.hpte_hash_vpte));
- kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long,
- ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long));
-
- spin_lock_init(&vcpu->arch.mmu_lock);
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte,
+ ARRAY_SIZE(vcpu3s->hpte_hash_pte));
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte_long,
+ ARRAY_SIZE(vcpu3s->hpte_hash_pte_long));
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte,
+ ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
+ ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
+#ifdef CONFIG_PPC_BOOK3S_64
+ kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k,
+ ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k));
+#endif
+
+ spin_lock_init(&vcpu3s->mmu_lock);
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 7b0ee96c1be..6c8011fd57e 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -24,6 +24,7 @@
#include <asm/kvm_fpu.h>
#include <asm/reg.h>
#include <asm/cacheflush.h>
+#include <asm/switch_to.h>
#include <linux/vmalloc.h>
/* #define DEBUG */
@@ -159,21 +160,23 @@
static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
{
- kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]);
+ kvm_cvt_df(&VCPU_FPR(vcpu, rt), &vcpu->arch.qpr[rt]);
}
static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
{
- u64 dsisr;
- struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
+ u32 dsisr;
+ u64 msr = kvmppc_get_msr(vcpu);
- shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0);
- shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0);
- shared->dar = eaddr;
+ msr = kvmppc_set_field(msr, 33, 36, 0);
+ msr = kvmppc_set_field(msr, 42, 47, 0);
+ kvmppc_set_msr(vcpu, msr);
+ kvmppc_set_dar(vcpu, eaddr);
/* Page Fault */
dsisr = kvmppc_set_field(0, 33, 33, 1);
if (is_store)
- shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+ dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+ kvmppc_set_dsisr(vcpu, dsisr);
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
}
@@ -196,7 +199,8 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_inject_pf(vcpu, addr, false);
goto done_load;
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1);
+ emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ len, 1);
goto done_load;
}
@@ -205,11 +209,11 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* put in registers */
switch (ls_type) {
case FPU_LS_SINGLE:
- kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]);
+ kvm_cvt_fd((u32*)tmp, &VCPU_FPR(vcpu, rs));
vcpu->arch.qpr[rs] = *((u32*)tmp);
break;
case FPU_LS_DOUBLE:
- vcpu->arch.fpr[rs] = *((u64*)tmp);
+ VCPU_FPR(vcpu, rs) = *((u64*)tmp);
break;
}
@@ -231,18 +235,18 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
switch (ls_type) {
case FPU_LS_SINGLE:
- kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp);
+ kvm_cvt_df(&VCPU_FPR(vcpu, rs), (u32*)tmp);
val = *((u32*)tmp);
len = sizeof(u32);
break;
case FPU_LS_SINGLE_LOW:
- *((u32*)tmp) = vcpu->arch.fpr[rs];
- val = vcpu->arch.fpr[rs] & 0xffffffff;
+ *((u32*)tmp) = VCPU_FPR(vcpu, rs);
+ val = VCPU_FPR(vcpu, rs) & 0xffffffff;
len = sizeof(u32);
break;
case FPU_LS_DOUBLE:
- *((u64*)tmp) = vcpu->arch.fpr[rs];
- val = vcpu->arch.fpr[rs];
+ *((u64*)tmp) = VCPU_FPR(vcpu, rs);
+ val = VCPU_FPR(vcpu, rs);
len = sizeof(u64);
break;
default:
@@ -286,18 +290,20 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_inject_pf(vcpu, addr, false);
goto done_load;
} else if ((r == EMULATE_DO_MMIO) && w) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1);
+ emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ 4, 1);
vcpu->arch.qpr[rs] = tmp[1];
goto done_load;
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1);
+ emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs,
+ 8, 1);
goto done_load;
}
emulated = EMULATE_DONE;
/* put in registers */
- kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]);
+ kvm_cvt_fd(&tmp[0], &VCPU_FPR(vcpu, rs));
vcpu->arch.qpr[rs] = tmp[1];
dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
@@ -315,7 +321,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
u32 tmp[2];
int len = w ? sizeof(u32) : sizeof(u64);
- kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]);
+ kvm_cvt_df(&VCPU_FPR(vcpu, rs), &tmp[0]);
tmp[1] = vcpu->arch.qpr[rs];
r = kvmppc_st(vcpu, &addr, len, tmp, true);
@@ -508,7 +514,6 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
u32 *src2, u32 *src3))
{
u32 *qpr = vcpu->arch.qpr;
- u64 *fpr = vcpu->arch.fpr;
u32 ps0_out;
u32 ps0_in1, ps0_in2, ps0_in3;
u32 ps1_in1, ps1_in2, ps1_in3;
@@ -517,20 +522,20 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
WARN_ON(rc);
/* PS0 */
- kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
- kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
- kvm_cvt_df(&fpr[reg_in3], &ps0_in3);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in3), &ps0_in3);
if (scalar & SCALAR_LOW)
ps0_in2 = qpr[reg_in2];
- func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
+ func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
ps0_in1, ps0_in2, ps0_in3, ps0_out);
if (!(scalar & SCALAR_NO_PS0))
- kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+ kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
/* PS1 */
ps1_in1 = qpr[reg_in1];
@@ -541,7 +546,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
ps1_in2 = ps0_in2;
if (!(scalar & SCALAR_NO_PS1))
- func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
+ func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]);
@@ -557,7 +562,6 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
u32 *src2))
{
u32 *qpr = vcpu->arch.qpr;
- u64 *fpr = vcpu->arch.fpr;
u32 ps0_out;
u32 ps0_in1, ps0_in2;
u32 ps1_out;
@@ -567,20 +571,20 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
WARN_ON(rc);
/* PS0 */
- kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1);
if (scalar & SCALAR_LOW)
ps0_in2 = qpr[reg_in2];
else
- kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2);
- func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
+ func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
if (!(scalar & SCALAR_NO_PS0)) {
dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
ps0_in1, ps0_in2, ps0_out);
- kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+ kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
}
/* PS1 */
@@ -590,7 +594,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
if (scalar & SCALAR_HIGH)
ps1_in2 = ps0_in2;
- func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2);
+ func(&vcpu->arch.fp.fpscr, &ps1_out, &ps1_in1, &ps1_in2);
if (!(scalar & SCALAR_NO_PS1)) {
qpr[reg_out] = ps1_out;
@@ -608,7 +612,6 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
u32 *dst, u32 *src1))
{
u32 *qpr = vcpu->arch.qpr;
- u64 *fpr = vcpu->arch.fpr;
u32 ps0_out, ps0_in;
u32 ps1_in;
@@ -616,17 +619,17 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
WARN_ON(rc);
/* PS0 */
- kvm_cvt_df(&fpr[reg_in], &ps0_in);
- func(&vcpu->arch.fpscr, &ps0_out, &ps0_in);
+ kvm_cvt_df(&VCPU_FPR(vcpu, reg_in), &ps0_in);
+ func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in);
dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
ps0_in, ps0_out);
- kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+ kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
/* PS1 */
ps1_in = qpr[reg_in];
- func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in);
+ func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in);
dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n",
ps1_in, qpr[reg_out]);
@@ -645,10 +648,10 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int ax_rc = inst_get_field(inst, 21, 25);
short full_d = inst_get_field(inst, 16, 31);
- u64 *fpr_d = &vcpu->arch.fpr[ax_rd];
- u64 *fpr_a = &vcpu->arch.fpr[ax_ra];
- u64 *fpr_b = &vcpu->arch.fpr[ax_rb];
- u64 *fpr_c = &vcpu->arch.fpr[ax_rc];
+ u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd);
+ u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra);
+ u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb);
+ u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc);
bool rcomp = (inst & 1) ? true : false;
u32 cr = kvmppc_get_cr(vcpu);
@@ -659,7 +662,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
if (!kvmppc_inst_is_paired_single(vcpu, inst))
return EMULATE_FAIL;
- if (!(vcpu->arch.shared->msr & MSR_FP)) {
+ if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
return EMULATE_AGAIN;
}
@@ -670,11 +673,11 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Do we need to clear FE0 / FE1 here? Don't think so. */
#ifdef DEBUG
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {
u32 f;
- kvm_cvt_df(&vcpu->arch.fpr[i], &f);
+ kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);
dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n",
- i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
+ i, f, VCPU_FPR(vcpu, i), i, vcpu->arch.qpr[i]);
}
#endif
@@ -760,8 +763,8 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
}
case OP_4X_PS_NEG:
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
- vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL;
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+ VCPU_FPR(vcpu, ax_rd) ^= 0x8000000000000000ULL;
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
vcpu->arch.qpr[ax_rd] ^= 0x80000000;
break;
@@ -771,7 +774,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_4X_PS_MR:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
break;
case OP_4X_PS_CMPO1:
@@ -780,44 +783,44 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_4X_PS_NABS:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
- vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL;
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+ VCPU_FPR(vcpu, ax_rd) |= 0x8000000000000000ULL;
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
vcpu->arch.qpr[ax_rd] |= 0x80000000;
break;
case OP_4X_PS_ABS:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
- vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL;
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+ VCPU_FPR(vcpu, ax_rd) &= ~0x8000000000000000ULL;
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
vcpu->arch.qpr[ax_rd] &= ~0x80000000;
break;
case OP_4X_PS_MERGE00:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
- /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
- kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra);
+ /* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */
+ kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),
&vcpu->arch.qpr[ax_rd]);
break;
case OP_4X_PS_MERGE01:
WARN_ON(rcomp);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra);
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
break;
case OP_4X_PS_MERGE10:
WARN_ON(rcomp);
- /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
+ /* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */
kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
- &vcpu->arch.fpr[ax_rd]);
- /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
- kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+ &VCPU_FPR(vcpu, ax_rd));
+ /* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */
+ kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),
&vcpu->arch.qpr[ax_rd]);
break;
case OP_4X_PS_MERGE11:
WARN_ON(rcomp);
- /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
+ /* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */
kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
- &vcpu->arch.fpr[ax_rd]);
+ &VCPU_FPR(vcpu, ax_rd));
vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
break;
}
@@ -852,7 +855,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
case OP_4A_PS_SUM1:
emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds);
- vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc];
+ VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rc);
break;
case OP_4A_PS_SUM0:
emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
@@ -1102,45 +1105,45 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
case 59:
switch (inst_get_field(inst, 21, 30)) {
case OP_59_FADDS:
- fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FSUBS:
- fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FDIVS:
- fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fdivs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FRES:
- fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fres(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FRSQRTES:
- fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_frsqrtes(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
}
switch (inst_get_field(inst, 26, 30)) {
case OP_59_FMULS:
- fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+ fpd_fmuls(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FMSUBS:
- fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FMADDS:
- fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FNMSUBS:
- fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fnmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_59_FNMADDS:
- fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fnmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
}
@@ -1155,12 +1158,12 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_63_MFFS:
/* XXX missing CR */
- *fpr_d = vcpu->arch.fpscr;
+ *fpr_d = vcpu->arch.fp.fpscr;
break;
case OP_63_MTFSF:
/* XXX missing fm bits */
/* XXX missing CR */
- vcpu->arch.fpscr = *fpr_b;
+ vcpu->arch.fp.fpscr = *fpr_b;
break;
case OP_63_FCMPU:
{
@@ -1168,7 +1171,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
u32 cr0_mask = 0xf0000000;
u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
- fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
+ fpd_fcmpu(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);
cr &= ~(cr0_mask >> cr_shift);
cr |= (cr & cr0_mask) >> cr_shift;
break;
@@ -1179,40 +1182,40 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
u32 cr0_mask = 0xf0000000;
u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
- fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
+ fpd_fcmpo(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);
cr &= ~(cr0_mask >> cr_shift);
cr |= (cr & cr0_mask) >> cr_shift;
break;
}
case OP_63_FNEG:
- fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fneg(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
break;
case OP_63_FMR:
*fpr_d = *fpr_b;
break;
case OP_63_FABS:
- fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fabs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
break;
case OP_63_FCPSGN:
- fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fcpsgn(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
break;
case OP_63_FDIV:
- fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
break;
case OP_63_FADD:
- fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
break;
case OP_63_FSUB:
- fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+ fpd_fsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
break;
case OP_63_FCTIW:
- fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fctiw(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
break;
case OP_63_FCTIWZ:
- fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fctiwz(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
break;
case OP_63_FRSP:
- fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_frsp(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
kvmppc_sync_qpr(vcpu, ax_rd);
break;
case OP_63_FRSQRTE:
@@ -1220,39 +1223,39 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
double one = 1.0f;
/* fD = sqrt(fB) */
- fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+ fpd_fsqrt(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
/* fD = 1.0f / fD */
- fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);
+ fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);
break;
}
}
switch (inst_get_field(inst, 26, 30)) {
case OP_63_FMUL:
- fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+ fpd_fmul(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);
break;
case OP_63_FSEL:
- fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fsel(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
case OP_63_FMSUB:
- fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
case OP_63_FMADD:
- fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
case OP_63_FNMSUB:
- fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fnmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
case OP_63_FNMADD:
- fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+ fpd_fnmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
break;
}
break;
}
#ifdef DEBUG
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {
u32 f;
- kvm_cvt_df(&vcpu->arch.fpr[i], &f);
+ kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);
dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
}
#endif
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
new file mode 100644
index 00000000000..8eef1e51907
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -0,0 +1,1674 @@
+/*
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ * Kevin Wolf <mail@kevin-wolf.de>
+ * Paul Mackerras <paulus@samba.org>
+ *
+ * Description:
+ * Functions relating to running KVM on Book 3S processors where
+ * we don't have access to hypervisor mode, and we run the guest
+ * in problem state (user mode).
+ *
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <asm/switch_to.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+
+#include "book3s.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_pr.h"
+
+/* #define EXIT_DEBUG */
+/* #define DEBUG_EXT */
+
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+ ulong msr);
+static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
+
+/* Some compatibility defines */
+#ifdef CONFIG_PPC_BOOK3S_32
+#define MSR_USER32 MSR_USER
+#define MSR_USER64 MSR_USER
+#define HW_PAGE_SIZE PAGE_SIZE
+#endif
+
+static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
+ svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
+ svcpu->in_use = 0;
+ svcpu_put(svcpu);
+#endif
+ vcpu->cpu = smp_processor_id();
+#ifdef CONFIG_PPC_BOOK3S_32
+ current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
+#endif
+}
+
+static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ if (svcpu->in_use) {
+ kvmppc_copy_from_svcpu(vcpu, svcpu);
+ }
+ memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
+ to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
+ svcpu_put(svcpu);
+#endif
+
+ kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+ vcpu->cpu = -1;
+}
+
+/* Copy data needed by real-mode code from vcpu to shadow vcpu */
+void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
+ struct kvm_vcpu *vcpu)
+{
+ svcpu->gpr[0] = vcpu->arch.gpr[0];
+ svcpu->gpr[1] = vcpu->arch.gpr[1];
+ svcpu->gpr[2] = vcpu->arch.gpr[2];
+ svcpu->gpr[3] = vcpu->arch.gpr[3];
+ svcpu->gpr[4] = vcpu->arch.gpr[4];
+ svcpu->gpr[5] = vcpu->arch.gpr[5];
+ svcpu->gpr[6] = vcpu->arch.gpr[6];
+ svcpu->gpr[7] = vcpu->arch.gpr[7];
+ svcpu->gpr[8] = vcpu->arch.gpr[8];
+ svcpu->gpr[9] = vcpu->arch.gpr[9];
+ svcpu->gpr[10] = vcpu->arch.gpr[10];
+ svcpu->gpr[11] = vcpu->arch.gpr[11];
+ svcpu->gpr[12] = vcpu->arch.gpr[12];
+ svcpu->gpr[13] = vcpu->arch.gpr[13];
+ svcpu->cr = vcpu->arch.cr;
+ svcpu->xer = vcpu->arch.xer;
+ svcpu->ctr = vcpu->arch.ctr;
+ svcpu->lr = vcpu->arch.lr;
+ svcpu->pc = vcpu->arch.pc;
+#ifdef CONFIG_PPC_BOOK3S_64
+ svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
+#endif
+ svcpu->in_use = true;
+}
+
+/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
+ struct kvmppc_book3s_shadow_vcpu *svcpu)
+{
+ /*
+ * vcpu_put would just call us again because in_use hasn't
+ * been updated yet.
+ */
+ preempt_disable();
+
+ /*
+ * Maybe we were already preempted and synced the svcpu from
+ * our preempt notifiers. Don't bother touching this svcpu then.
+ */
+ if (!svcpu->in_use)
+ goto out;
+
+ vcpu->arch.gpr[0] = svcpu->gpr[0];
+ vcpu->arch.gpr[1] = svcpu->gpr[1];
+ vcpu->arch.gpr[2] = svcpu->gpr[2];
+ vcpu->arch.gpr[3] = svcpu->gpr[3];
+ vcpu->arch.gpr[4] = svcpu->gpr[4];
+ vcpu->arch.gpr[5] = svcpu->gpr[5];
+ vcpu->arch.gpr[6] = svcpu->gpr[6];
+ vcpu->arch.gpr[7] = svcpu->gpr[7];
+ vcpu->arch.gpr[8] = svcpu->gpr[8];
+ vcpu->arch.gpr[9] = svcpu->gpr[9];
+ vcpu->arch.gpr[10] = svcpu->gpr[10];
+ vcpu->arch.gpr[11] = svcpu->gpr[11];
+ vcpu->arch.gpr[12] = svcpu->gpr[12];
+ vcpu->arch.gpr[13] = svcpu->gpr[13];
+ vcpu->arch.cr = svcpu->cr;
+ vcpu->arch.xer = svcpu->xer;
+ vcpu->arch.ctr = svcpu->ctr;
+ vcpu->arch.lr = svcpu->lr;
+ vcpu->arch.pc = svcpu->pc;
+ vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
+ vcpu->arch.fault_dar = svcpu->fault_dar;
+ vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
+ vcpu->arch.last_inst = svcpu->last_inst;
+#ifdef CONFIG_PPC_BOOK3S_64
+ vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
+#endif
+ svcpu->in_use = false;
+
+out:
+ preempt_enable();
+}
+
+static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
+{
+ int r = 1; /* Indicate we want to get back into the guest */
+
+ /* We misuse TLB_FLUSH to indicate that we want to clear
+ all shadow cache entries */
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+ return r;
+}
+
+/************* MMU Notifiers *************/
+static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
+ unsigned long end)
+{
+ long i;
+ struct kvm_vcpu *vcpu;
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long hva_start, hva_end;
+ gfn_t gfn, gfn_end;
+
+ hva_start = max(start, memslot->userspace_addr);
+ hva_end = min(end, memslot->userspace_addr +
+ (memslot->npages << PAGE_SHIFT));
+ if (hva_start >= hva_end)
+ continue;
+ /*
+ * {gfn(page) | page intersects with [hva_start, hva_end)} =
+ * {gfn, gfn+1, ..., gfn_end-1}.
+ */
+ gfn = hva_to_gfn_memslot(hva_start, memslot);
+ gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT,
+ gfn_end << PAGE_SHIFT);
+ }
+}
+
+static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
+{
+ trace_kvm_unmap_hva(hva);
+
+ do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
+
+ return 0;
+}
+
+static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
+ unsigned long end)
+{
+ do_kvm_unmap_hva(kvm, start, end);
+
+ return 0;
+}
+
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ /* The page will get remapped properly on its next fault */
+ do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
+}
+
+/*****************************************/
+
+static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
+{
+ ulong guest_msr = kvmppc_get_msr(vcpu);
+ ulong smsr = guest_msr;
+
+ /* Guest MSR values */
+ smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
+ /* Process MSR values */
+ smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
+ /* External providers the guest reserved */
+ smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
+ /* 64-bit Process MSR values */
+#ifdef CONFIG_PPC_BOOK3S_64
+ smsr |= MSR_ISF | MSR_HV;
+#endif
+ vcpu->arch.shadow_msr = smsr;
+}
+
+static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
+{
+ ulong old_msr = kvmppc_get_msr(vcpu);
+
+#ifdef EXIT_DEBUG
+ printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
+#endif
+
+ msr &= to_book3s(vcpu)->msr_mask;
+ kvmppc_set_msr_fast(vcpu, msr);
+ kvmppc_recalc_shadow_msr(vcpu);
+
+ if (msr & MSR_POW) {
+ if (!vcpu->arch.pending_exceptions) {
+ kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ vcpu->stat.halt_wakeup++;
+
+ /* Unset POW bit after we woke up */
+ msr &= ~MSR_POW;
+ kvmppc_set_msr_fast(vcpu, msr);
+ }
+ }
+
+ if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
+ (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
+ kvmppc_mmu_flush_segments(vcpu);
+ kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+
+ /* Preload magic page segment when in kernel mode */
+ if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
+ struct kvm_vcpu_arch *a = &vcpu->arch;
+
+ if (msr & MSR_DR)
+ kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
+ else
+ kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
+ }
+ }
+
+ /*
+ * When switching from 32 to 64-bit, we may have a stale 32-bit
+ * magic page around, we need to flush it. Typically 32-bit magic
+ * page will be instanciated when calling into RTAS. Note: We
+ * assume that such transition only happens while in kernel mode,
+ * ie, we never transition from user 32-bit to kernel 64-bit with
+ * a 32-bit magic page around.
+ */
+ if (vcpu->arch.magic_page_pa &&
+ !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) {
+ /* going from RTAS to normal kernel code */
+ kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa,
+ ~0xFFFUL);
+ }
+
+ /* Preload FPU if it's enabled */
+ if (kvmppc_get_msr(vcpu) & MSR_FP)
+ kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+}
+
+void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
+{
+ u32 host_pvr;
+
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
+ vcpu->arch.pvr = pvr;
+#ifdef CONFIG_PPC_BOOK3S_64
+ if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
+ kvmppc_mmu_book3s_64_init(vcpu);
+ if (!to_book3s(vcpu)->hior_explicit)
+ to_book3s(vcpu)->hior = 0xfff00000;
+ to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
+ vcpu->arch.cpu_type = KVM_CPU_3S_64;
+ } else
+#endif
+ {
+ kvmppc_mmu_book3s_32_init(vcpu);
+ if (!to_book3s(vcpu)->hior_explicit)
+ to_book3s(vcpu)->hior = 0;
+ to_book3s(vcpu)->msr_mask = 0xffffffffULL;
+ vcpu->arch.cpu_type = KVM_CPU_3S_32;
+ }
+
+ kvmppc_sanity_check(vcpu);
+
+ /* If we are in hypervisor level on 970, we can tell the CPU to
+ * treat DCBZ as 32 bytes store */
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
+ if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
+ !strcmp(cur_cpu_spec->platform, "ppc970"))
+ vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+
+ /* Cell performs badly if MSR_FEx are set. So let's hope nobody
+ really needs them in a VM on Cell and force disable them. */
+ if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
+ to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
+
+ /*
+ * If they're asking for POWER6 or later, set the flag
+ * indicating that we can do multiple large page sizes
+ * and 1TB segments.
+ * Also set the flag that indicates that tlbie has the large
+ * page bit in the RB operand instead of the instruction.
+ */
+ switch (PVR_VER(pvr)) {
+ case PVR_POWER6:
+ case PVR_POWER7:
+ case PVR_POWER7p:
+ case PVR_POWER8:
+ vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
+ BOOK3S_HFLAG_NEW_TLBIE;
+ break;
+ }
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ /* 32 bit Book3S always has 32 byte dcbz */
+ vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+#endif
+
+ /* On some CPUs we can execute paired single operations natively */
+ asm ( "mfpvr %0" : "=r"(host_pvr));
+ switch (host_pvr) {
+ case 0x00080200: /* lonestar 2.0 */
+ case 0x00088202: /* lonestar 2.2 */
+ case 0x70000100: /* gekko 1.0 */
+ case 0x00080100: /* gekko 2.0 */
+ case 0x00083203: /* gekko 2.3a */
+ case 0x00083213: /* gekko 2.3b */
+ case 0x00083204: /* gekko 2.4 */
+ case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
+ case 0x00087200: /* broadway */
+ vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
+ /* Enable HID2.PSE - in case we need it later */
+ mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
+ }
+}
+
+/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
+ * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
+ * emulate 32 bytes dcbz length.
+ *
+ * The Book3s_64 inventors also realized this case and implemented a special bit
+ * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
+ *
+ * My approach here is to patch the dcbz instruction on executing pages.
+ */
+static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+ struct page *hpage;
+ u64 hpage_offset;
+ u32 *page;
+ int i;
+
+ hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+ if (is_error_page(hpage))
+ return;
+
+ hpage_offset = pte->raddr & ~PAGE_MASK;
+ hpage_offset &= ~0xFFFULL;
+ hpage_offset /= 4;
+
+ get_page(hpage);
+ page = kmap_atomic(hpage);
+
+ /* patch dcbz into reserved instruction, so we trap */
+ for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
+ if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
+ page[i] &= cpu_to_be32(0xfffffff7);
+
+ kunmap_atomic(page);
+ put_page(hpage);
+}
+
+static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ ulong mp_pa = vcpu->arch.magic_page_pa;
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF))
+ mp_pa = (uint32_t)mp_pa;
+
+ if (unlikely(mp_pa) &&
+ unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
+ return 1;
+ }
+
+ return kvm_is_visible_gfn(vcpu->kvm, gfn);
+}
+
+int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ ulong eaddr, int vec)
+{
+ bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+ bool iswrite = false;
+ int r = RESUME_GUEST;
+ int relocated;
+ int page_found = 0;
+ struct kvmppc_pte pte;
+ bool is_mmio = false;
+ bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
+ bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
+ u64 vsid;
+
+ relocated = data ? dr : ir;
+ if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
+ iswrite = true;
+
+ /* Resolve real address if translation turned on */
+ if (relocated) {
+ page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
+ } else {
+ pte.may_execute = true;
+ pte.may_read = true;
+ pte.may_write = true;
+ pte.raddr = eaddr & KVM_PAM;
+ pte.eaddr = eaddr;
+ pte.vpage = eaddr >> 12;
+ pte.page_size = MMU_PAGE_64K;
+ }
+
+ switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
+ case 0:
+ pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
+ break;
+ case MSR_DR:
+ case MSR_IR:
+ vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
+
+ if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR)
+ pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
+ else
+ pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
+ pte.vpage |= vsid;
+
+ if (vsid == -1)
+ page_found = -EINVAL;
+ break;
+ }
+
+ if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+ /*
+ * If we do the dcbz hack, we have to NX on every execution,
+ * so we can patch the executing code. This renders our guest
+ * NX-less.
+ */
+ pte.may_execute = !data;
+ }
+
+ if (page_found == -ENOENT) {
+ /* Page not found in guest PTE entries */
+ u64 ssrr1 = vcpu->arch.shadow_srr1;
+ u64 msr = kvmppc_get_msr(vcpu);
+ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+ kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr);
+ kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
+ kvmppc_book3s_queue_irqprio(vcpu, vec);
+ } else if (page_found == -EPERM) {
+ /* Storage protection */
+ u32 dsisr = vcpu->arch.fault_dsisr;
+ u64 ssrr1 = vcpu->arch.shadow_srr1;
+ u64 msr = kvmppc_get_msr(vcpu);
+ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+ dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT;
+ kvmppc_set_dsisr(vcpu, dsisr);
+ kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
+ kvmppc_book3s_queue_irqprio(vcpu, vec);
+ } else if (page_found == -EINVAL) {
+ /* Page not found in guest SLB */
+ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+ kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
+ } else if (!is_mmio &&
+ kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+ if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
+ /*
+ * There is already a host HPTE there, presumably
+ * a read-only one for a page the guest thinks
+ * is writable, so get rid of it first.
+ */
+ kvmppc_mmu_unmap_page(vcpu, &pte);
+ }
+ /* The guest's PTE is not mapped yet. Map on the host */
+ kvmppc_mmu_map_page(vcpu, &pte, iswrite);
+ if (data)
+ vcpu->stat.sp_storage++;
+ else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+ kvmppc_patch_dcbz(vcpu, &pte);
+ } else {
+ /* MMIO */
+ vcpu->stat.mmio_exits++;
+ vcpu->arch.paddr_accessed = pte.raddr;
+ vcpu->arch.vaddr_accessed = pte.eaddr;
+ r = kvmppc_emulate_mmio(run, vcpu);
+ if ( r == RESUME_HOST_NV )
+ r = RESUME_HOST;
+ }
+
+ return r;
+}
+
+static inline int get_fpr_index(int i)
+{
+ return i * TS_FPRWIDTH;
+}
+
+/* Give up external provider (FPU, Altivec, VSX) */
+void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+{
+ struct thread_struct *t = &current->thread;
+
+ /*
+ * VSX instructions can access FP and vector registers, so if
+ * we are giving up VSX, make sure we give up FP and VMX as well.
+ */
+ if (msr & MSR_VSX)
+ msr |= MSR_FP | MSR_VEC;
+
+ msr &= vcpu->arch.guest_owned_ext;
+ if (!msr)
+ return;
+
+#ifdef DEBUG_EXT
+ printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
+#endif
+
+ if (msr & MSR_FP) {
+ /*
+ * Note that on CPUs with VSX, giveup_fpu stores
+ * both the traditional FP registers and the added VSX
+ * registers into thread.fp_state.fpr[].
+ */
+ if (t->regs->msr & MSR_FP)
+ giveup_fpu(current);
+ t->fp_save_area = NULL;
+ }
+
+#ifdef CONFIG_ALTIVEC
+ if (msr & MSR_VEC) {
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
+ t->vr_save_area = NULL;
+ }
+#endif
+
+ vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
+ kvmppc_recalc_shadow_msr(vcpu);
+}
+
+/* Give up facility (TAR / EBB / DSCR) */
+static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
+ /* Facility not available to the guest, ignore giveup request*/
+ return;
+ }
+
+ switch (fac) {
+ case FSCR_TAR_LG:
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+ mtspr(SPRN_TAR, current->thread.tar);
+ vcpu->arch.shadow_fscr &= ~FSCR_TAR;
+ break;
+ }
+#endif
+}
+
+static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
+{
+ ulong srr0 = kvmppc_get_pc(vcpu);
+ u32 last_inst = kvmppc_get_last_inst(vcpu);
+ int ret;
+
+ ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
+ if (ret == -ENOENT) {
+ ulong msr = kvmppc_get_msr(vcpu);
+
+ msr = kvmppc_set_field(msr, 33, 33, 1);
+ msr = kvmppc_set_field(msr, 34, 36, 0);
+ msr = kvmppc_set_field(msr, 42, 47, 0);
+ kvmppc_set_msr_fast(vcpu, msr);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
+ return EMULATE_AGAIN;
+ }
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+
+ /* Need to do paired single emulation? */
+ if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
+ return EMULATE_DONE;
+
+ /* Read out the instruction */
+ if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
+ /* Need to emulate */
+ return EMULATE_FAIL;
+
+ return EMULATE_AGAIN;
+}
+
+/* Handle external providers (FPU, Altivec, VSX) */
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+ ulong msr)
+{
+ struct thread_struct *t = &current->thread;
+
+ /* When we have paired singles, we emulate in software */
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
+ return RESUME_GUEST;
+
+ if (!(kvmppc_get_msr(vcpu) & msr)) {
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ return RESUME_GUEST;
+ }
+
+ if (msr == MSR_VSX) {
+ /* No VSX? Give an illegal instruction interrupt */
+#ifdef CONFIG_VSX
+ if (!cpu_has_feature(CPU_FTR_VSX))
+#endif
+ {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+
+ /*
+ * We have to load up all the FP and VMX registers before
+ * we can let the guest use VSX instructions.
+ */
+ msr = MSR_FP | MSR_VEC | MSR_VSX;
+ }
+
+ /* See if we already own all the ext(s) needed */
+ msr &= ~vcpu->arch.guest_owned_ext;
+ if (!msr)
+ return RESUME_GUEST;
+
+#ifdef DEBUG_EXT
+ printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
+#endif
+
+ if (msr & MSR_FP) {
+ preempt_disable();
+ enable_kernel_fp();
+ load_fp_state(&vcpu->arch.fp);
+ t->fp_save_area = &vcpu->arch.fp;
+ preempt_enable();
+ }
+
+ if (msr & MSR_VEC) {
+#ifdef CONFIG_ALTIVEC
+ preempt_disable();
+ enable_kernel_altivec();
+ load_vr_state(&vcpu->arch.vr);
+ t->vr_save_area = &vcpu->arch.vr;
+ preempt_enable();
+#endif
+ }
+
+ t->regs->msr |= msr;
+ vcpu->arch.guest_owned_ext |= msr;
+ kvmppc_recalc_shadow_msr(vcpu);
+
+ return RESUME_GUEST;
+}
+
+/*
+ * Kernel code using FP or VMX could have flushed guest state to
+ * the thread_struct; if so, get it back now.
+ */
+static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
+{
+ unsigned long lost_ext;
+
+ lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
+ if (!lost_ext)
+ return;
+
+ if (lost_ext & MSR_FP) {
+ preempt_disable();
+ enable_kernel_fp();
+ load_fp_state(&vcpu->arch.fp);
+ preempt_enable();
+ }
+#ifdef CONFIG_ALTIVEC
+ if (lost_ext & MSR_VEC) {
+ preempt_disable();
+ enable_kernel_altivec();
+ load_vr_state(&vcpu->arch.vr);
+ preempt_enable();
+ }
+#endif
+ current->thread.regs->msr |= lost_ext;
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
+{
+ /* Inject the Interrupt Cause field and trigger a guest interrupt */
+ vcpu->arch.fscr &= ~(0xffULL << 56);
+ vcpu->arch.fscr |= (fac << 56);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+}
+
+static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+ enum emulation_result er = EMULATE_FAIL;
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_PR))
+ er = kvmppc_emulate_instruction(vcpu->run, vcpu);
+
+ if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
+ /* Couldn't emulate, trigger interrupt in guest */
+ kvmppc_trigger_fac_interrupt(vcpu, fac);
+ }
+}
+
+/* Enable facilities (TAR, EBB, DSCR) for the guest */
+static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+ bool guest_fac_enabled;
+ BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S));
+
+ /*
+ * Not every facility is enabled by FSCR bits, check whether the
+ * guest has this facility enabled at all.
+ */
+ switch (fac) {
+ case FSCR_TAR_LG:
+ case FSCR_EBB_LG:
+ guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac));
+ break;
+ case FSCR_TM_LG:
+ guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM;
+ break;
+ default:
+ guest_fac_enabled = false;
+ break;
+ }
+
+ if (!guest_fac_enabled) {
+ /* Facility not enabled by the guest */
+ kvmppc_trigger_fac_interrupt(vcpu, fac);
+ return RESUME_GUEST;
+ }
+
+ switch (fac) {
+ case FSCR_TAR_LG:
+ /* TAR switching isn't lazy in Linux yet */
+ current->thread.tar = mfspr(SPRN_TAR);
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+ vcpu->arch.shadow_fscr |= FSCR_TAR;
+ break;
+ default:
+ kvmppc_emulate_fac(vcpu, fac);
+ break;
+ }
+
+ return RESUME_GUEST;
+}
+#endif
+
+int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
+{
+ int r = RESUME_HOST;
+ int s;
+
+ vcpu->stat.sum_exits++;
+
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->ready_for_interrupt_injection = 1;
+
+ /* We get here with MSR.EE=1 */
+
+ trace_kvm_exit(exit_nr, vcpu);
+ kvm_guest_exit();
+
+ switch (exit_nr) {
+ case BOOK3S_INTERRUPT_INST_STORAGE:
+ {
+ ulong shadow_srr1 = vcpu->arch.shadow_srr1;
+ vcpu->stat.pf_instruc++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ /* We set segments as unused segments when invalidating them. So
+ * treat the respective fault as segment fault. */
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+ u32 sr;
+
+ svcpu = svcpu_get(vcpu);
+ sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
+ svcpu_put(svcpu);
+ if (sr == SR_INVALID) {
+ kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+ r = RESUME_GUEST;
+ break;
+ }
+ }
+#endif
+
+ /* only care about PTEG not found errors, but leave NX alone */
+ if (shadow_srr1 & 0x40000000) {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ vcpu->stat.sp_instruc++;
+ } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+ (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+ /*
+ * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
+ * so we can't use the NX bit inside the guest. Let's cross our fingers,
+ * that no guest that needs the dcbz hack does NX.
+ */
+ kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
+ r = RESUME_GUEST;
+ } else {
+ u64 msr = kvmppc_get_msr(vcpu);
+ msr |= shadow_srr1 & 0x58000000;
+ kvmppc_set_msr_fast(vcpu, msr);
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_DATA_STORAGE:
+ {
+ ulong dar = kvmppc_get_fault_dar(vcpu);
+ u32 fault_dsisr = vcpu->arch.fault_dsisr;
+ vcpu->stat.pf_storage++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+ /* We set segments as unused segments when invalidating them. So
+ * treat the respective fault as segment fault. */
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+ u32 sr;
+
+ svcpu = svcpu_get(vcpu);
+ sr = svcpu->sr[dar >> SID_SHIFT];
+ svcpu_put(svcpu);
+ if (sr == SR_INVALID) {
+ kvmppc_mmu_map_segment(vcpu, dar);
+ r = RESUME_GUEST;
+ break;
+ }
+ }
+#endif
+
+ /*
+ * We need to handle missing shadow PTEs, and
+ * protection faults due to us mapping a page read-only
+ * when the guest thinks it is writable.
+ */
+ if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ } else {
+ kvmppc_set_dar(vcpu, dar);
+ kvmppc_set_dsisr(vcpu, fault_dsisr);
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_DATA_SEGMENT:
+ if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
+ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_DATA_SEGMENT);
+ }
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_INST_SEGMENT:
+ if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_INST_SEGMENT);
+ }
+ r = RESUME_GUEST;
+ break;
+ /* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_DECREMENTER:
+ case BOOK3S_INTERRUPT_HV_DECREMENTER:
+ case BOOK3S_INTERRUPT_DOORBELL:
+ vcpu->stat.dec_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
+ case BOOK3S_INTERRUPT_EXTERNAL_HV:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PERFMON:
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_PROGRAM:
+ case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+ {
+ enum emulation_result er;
+ ulong flags;
+
+program_interrupt:
+ flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+#ifdef EXIT_DEBUG
+ printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+#endif
+ if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
+ (INS_DCBZ & 0xfffffff7)) {
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ }
+ }
+
+ vcpu->stat.emulated_inst_exits++;
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_AGAIN:
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_FAIL:
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+ __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST_NV;
+ break;
+ case EMULATE_EXIT_USER:
+ r = RESUME_HOST_NV;
+ break;
+ default:
+ BUG();
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_SYSCALL:
+ if (vcpu->arch.papr_enabled &&
+ (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
+ !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+ /* SC 1 papr hypercalls */
+ ulong cmd = kvmppc_get_gpr(vcpu, 3);
+ int i;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
+ r = RESUME_GUEST;
+ break;
+ }
+#endif
+
+ run->papr_hcall.nr = cmd;
+ for (i = 0; i < 9; ++i) {
+ ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
+ run->papr_hcall.args[i] = gpr;
+ }
+ run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->arch.hcall_needed = 1;
+ r = RESUME_HOST;
+ } else if (vcpu->arch.osi_enabled &&
+ (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
+ (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+ /* MOL hypercalls */
+ u64 *gprs = run->osi.gprs;
+ int i;
+
+ run->exit_reason = KVM_EXIT_OSI;
+ for (i = 0; i < 32; i++)
+ gprs[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu->arch.osi_needed = 1;
+ r = RESUME_HOST_NV;
+ } else if (!(kvmppc_get_msr(vcpu) & MSR_PR) &&
+ (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+ /* KVM PV hypercalls */
+ kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+ r = RESUME_GUEST;
+ } else {
+ /* Guest syscalls */
+ vcpu->stat.syscall_exits++;
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
+ case BOOK3S_INTERRUPT_FP_UNAVAIL:
+ case BOOK3S_INTERRUPT_ALTIVEC:
+ case BOOK3S_INTERRUPT_VSX:
+ {
+ int ext_msr = 0;
+
+ switch (exit_nr) {
+ case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break;
+ case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break;
+ case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break;
+ }
+
+ switch (kvmppc_check_ext(vcpu, exit_nr)) {
+ case EMULATE_DONE:
+ /* everything ok - let's enable the ext */
+ r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
+ break;
+ case EMULATE_FAIL:
+ /* we need to emulate this instruction */
+ goto program_interrupt;
+ break;
+ default:
+ /* nothing to worry about - go again */
+ break;
+ }
+ break;
+ }
+ case BOOK3S_INTERRUPT_ALIGNMENT:
+ if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
+ u32 last_inst = kvmppc_get_last_inst(vcpu);
+ u32 dsisr;
+ u64 dar;
+
+ dsisr = kvmppc_alignment_dsisr(vcpu, last_inst);
+ dar = kvmppc_alignment_dar(vcpu, last_inst);
+
+ kvmppc_set_dsisr(vcpu, dsisr);
+ kvmppc_set_dar(vcpu, dar);
+
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ }
+ r = RESUME_GUEST;
+ break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case BOOK3S_INTERRUPT_FAC_UNAVAIL:
+ kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
+ r = RESUME_GUEST;
+ break;
+#endif
+ case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ case BOOK3S_INTERRUPT_TRACE:
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ break;
+ default:
+ {
+ ulong shadow_srr1 = vcpu->arch.shadow_srr1;
+ /* Ugh - bork here! What did we get? */
+ printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
+ exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
+ r = RESUME_HOST;
+ BUG();
+ break;
+ }
+ }
+
+ if (!(r & RESUME_HOST)) {
+ /* To avoid clobbering exit_reason, only check for signals if
+ * we aren't already exiting to userspace for some other
+ * reason. */
+
+ /*
+ * Interrupts could be timers for the guest which we have to
+ * inject again, so let's postpone them until we're in the guest
+ * and if we really did time things so badly, then we just exit
+ * again due to a host external interrupt.
+ */
+ s = kvmppc_prepare_to_enter(vcpu);
+ if (s <= 0)
+ r = s;
+ else {
+ /* interrupts now hard-disabled */
+ kvmppc_fix_ee_before_entry();
+ }
+
+ kvmppc_handle_lost_ext(vcpu);
+ }
+
+ trace_kvm_book3s_reenter(r, vcpu);
+
+ return r;
+}
+
+static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int i;
+
+ sregs->pvr = vcpu->arch.pvr;
+
+ sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+ for (i = 0; i < 64; i++) {
+ sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i;
+ sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
+ }
+ } else {
+ for (i = 0; i < 16; i++)
+ sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i);
+
+ for (i = 0; i < 8; i++) {
+ sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
+ sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
+ }
+ }
+
+ return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int i;
+
+ kvmppc_set_pvr_pr(vcpu, sregs->pvr);
+
+ vcpu3s->sdr1 = sregs->u.s.sdr1;
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+ for (i = 0; i < 64; i++) {
+ vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
+ sregs->u.s.ppc64.slb[i].slbe);
+ }
+ } else {
+ for (i = 0; i < 16; i++) {
+ vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
+ }
+ for (i = 0; i < 8; i++) {
+ kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
+ (u32)sregs->u.s.ppc32.ibat[i]);
+ kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
+ (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
+ kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
+ (u32)sregs->u.s.ppc32.dbat[i]);
+ kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
+ (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
+ }
+ }
+
+ /* Flush the MMU after messing with the segments */
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+ return 0;
+}
+
+static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = 0;
+
+ switch (id) {
+ case KVM_REG_PPC_HIOR:
+ *val = get_reg_val(id, to_book3s(vcpu)->hior);
+ break;
+ case KVM_REG_PPC_LPCR:
+ /*
+ * We are only interested in the LPCR_ILE bit
+ */
+ if (vcpu->arch.intr_msr & MSR_LE)
+ *val = get_reg_val(id, LPCR_ILE);
+ else
+ *val = get_reg_val(id, 0);
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
+
+static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+{
+ if (new_lpcr & LPCR_ILE)
+ vcpu->arch.intr_msr |= MSR_LE;
+ else
+ vcpu->arch.intr_msr &= ~MSR_LE;
+}
+
+static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = 0;
+
+ switch (id) {
+ case KVM_REG_PPC_HIOR:
+ to_book3s(vcpu)->hior = set_reg_val(id, *val);
+ to_book3s(vcpu)->hior_explicit = true;
+ break;
+ case KVM_REG_PPC_LPCR:
+ kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
+
+static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
+ unsigned int id)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s;
+ struct kvm_vcpu *vcpu;
+ int err = -ENOMEM;
+ unsigned long p;
+
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu)
+ goto out;
+
+ vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
+ if (!vcpu_book3s)
+ goto free_vcpu;
+ vcpu->arch.book3s = vcpu_book3s;
+
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ vcpu->arch.shadow_vcpu =
+ kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
+ if (!vcpu->arch.shadow_vcpu)
+ goto free_vcpu3s;
+#endif
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_shadow_vcpu;
+
+ err = -ENOMEM;
+ p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+ if (!p)
+ goto uninit_vcpu;
+ /* the real shared page fills the last 4k of our page */
+ vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Always start the shared struct in native endian mode */
+#ifdef __BIG_ENDIAN__
+ vcpu->arch.shared_big_endian = true;
+#else
+ vcpu->arch.shared_big_endian = false;
+#endif
+
+ /*
+ * Default to the same as the host if we're on sufficiently
+ * recent machine that we have 1TB segments;
+ * otherwise default to PPC970FX.
+ */
+ vcpu->arch.pvr = 0x3C0301;
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+ vcpu->arch.intr_msr = MSR_SF;
+#else
+ /* default to book3s_32 (750) */
+ vcpu->arch.pvr = 0x84202;
+#endif
+ kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
+ vcpu->arch.slb_nr = 64;
+
+ vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
+
+ err = kvmppc_mmu_init(vcpu);
+ if (err < 0)
+ goto uninit_vcpu;
+
+ return vcpu;
+
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+free_shadow_vcpu:
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ kfree(vcpu->arch.shadow_vcpu);
+free_vcpu3s:
+#endif
+ vfree(vcpu_book3s);
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+ return ERR_PTR(err);
+}
+
+static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+
+ free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
+ kvm_vcpu_uninit(vcpu);
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ kfree(vcpu->arch.shadow_vcpu);
+#endif
+ vfree(vcpu_book3s);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int ret;
+#ifdef CONFIG_ALTIVEC
+ unsigned long uninitialized_var(vrsave);
+#endif
+
+ /* Check if we can run the vcpu at all */
+ if (!vcpu->arch.sane) {
+ kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Interrupts could be timers for the guest which we have to inject
+ * again, so let's postpone them until we're in the guest and if we
+ * really did time things so badly, then we just exit again due to
+ * a host external interrupt.
+ */
+ ret = kvmppc_prepare_to_enter(vcpu);
+ if (ret <= 0)
+ goto out;
+ /* interrupts now hard-disabled */
+
+ /* Save FPU state in thread_struct */
+ if (current->thread.regs->msr & MSR_FP)
+ giveup_fpu(current);
+
+#ifdef CONFIG_ALTIVEC
+ /* Save Altivec state in thread_struct */
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
+#endif
+
+#ifdef CONFIG_VSX
+ /* Save VSX state in thread_struct */
+ if (current->thread.regs->msr & MSR_VSX)
+ __giveup_vsx(current);
+#endif
+
+ /* Preload FPU if it's enabled */
+ if (kvmppc_get_msr(vcpu) & MSR_FP)
+ kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+
+ kvmppc_fix_ee_before_entry();
+
+ ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+
+ /* No need for kvm_guest_exit. It's done in handle_exit.
+ We also get here with interrupts enabled. */
+
+ /* Make sure we save the guest FPU/Altivec/VSX state */
+ kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+
+ /* Make sure we save the guest TAR/EBB/DSCR state */
+ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+
+out:
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ return ret;
+}
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ struct kvm_memory_slot *memslot;
+ struct kvm_vcpu *vcpu;
+ ulong ga, ga_end;
+ int is_dirty = 0;
+ int r;
+ unsigned long n;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ if (r)
+ goto out;
+
+ /* If nothing is dirty, don't bother messing with page tables. */
+ if (is_dirty) {
+ memslot = id_to_memslot(kvm->memslots, log->slot);
+
+ ga = memslot->base_gfn << PAGE_SHIFT;
+ ga_end = ga + (memslot->npages << PAGE_SHIFT);
+
+ kvm_for_each_vcpu(n, vcpu, kvm)
+ kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
+
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+ }
+
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ return;
+}
+
+static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem)
+{
+ return 0;
+}
+
+static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old)
+{
+ return;
+}
+
+static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ return;
+}
+
+static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return 0;
+}
+
+
+#ifdef CONFIG_PPC64
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+ struct kvm_ppc_smmu_info *info)
+{
+ long int i;
+ struct kvm_vcpu *vcpu;
+
+ info->flags = 0;
+
+ /* SLB is always 64 entries */
+ info->slb_size = 64;
+
+ /* Standard 4k base page size segment */
+ info->sps[0].page_shift = 12;
+ info->sps[0].slb_enc = 0;
+ info->sps[0].enc[0].page_shift = 12;
+ info->sps[0].enc[0].pte_enc = 0;
+
+ /*
+ * 64k large page size.
+ * We only want to put this in if the CPUs we're emulating
+ * support it, but unfortunately we don't have a vcpu easily
+ * to hand here to test. Just pick the first vcpu, and if
+ * that doesn't exist yet, report the minimum capability,
+ * i.e., no 64k pages.
+ * 1T segment support goes along with 64k pages.
+ */
+ i = 1;
+ vcpu = kvm_get_vcpu(kvm, 0);
+ if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+ info->flags = KVM_PPC_1T_SEGMENTS;
+ info->sps[i].page_shift = 16;
+ info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
+ info->sps[i].enc[0].page_shift = 16;
+ info->sps[i].enc[0].pte_enc = 1;
+ ++i;
+ }
+
+ /* Standard 16M large page size segment */
+ info->sps[i].page_shift = 24;
+ info->sps[i].slb_enc = SLB_VSID_L;
+ info->sps[i].enc[0].page_shift = 24;
+ info->sps[i].enc[0].pte_enc = 0;
+
+ return 0;
+}
+#else
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+ struct kvm_ppc_smmu_info *info)
+{
+ /* We should not get called */
+ BUG();
+}
+#endif /* CONFIG_PPC64 */
+
+static unsigned int kvm_global_user_count = 0;
+static DEFINE_SPINLOCK(kvm_global_user_count_lock);
+
+static int kvmppc_core_init_vm_pr(struct kvm *kvm)
+{
+ mutex_init(&kvm->arch.hpt_mutex);
+
+ if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ spin_lock(&kvm_global_user_count_lock);
+ if (++kvm_global_user_count == 1)
+ pSeries_disable_reloc_on_exc();
+ spin_unlock(&kvm_global_user_count_lock);
+ }
+ return 0;
+}
+
+static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
+{
+#ifdef CONFIG_PPC64
+ WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+#endif
+
+ if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ spin_lock(&kvm_global_user_count_lock);
+ BUG_ON(kvm_global_user_count == 0);
+ if (--kvm_global_user_count == 0)
+ pSeries_enable_reloc_on_exc();
+ spin_unlock(&kvm_global_user_count_lock);
+ }
+}
+
+static int kvmppc_core_check_processor_compat_pr(void)
+{
+ /* we are always compatible */
+ return 0;
+}
+
+static long kvm_arch_vm_ioctl_pr(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -ENOTTY;
+}
+
+static struct kvmppc_ops kvm_ops_pr = {
+ .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
+ .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
+ .get_one_reg = kvmppc_get_one_reg_pr,
+ .set_one_reg = kvmppc_set_one_reg_pr,
+ .vcpu_load = kvmppc_core_vcpu_load_pr,
+ .vcpu_put = kvmppc_core_vcpu_put_pr,
+ .set_msr = kvmppc_set_msr_pr,
+ .vcpu_run = kvmppc_vcpu_run_pr,
+ .vcpu_create = kvmppc_core_vcpu_create_pr,
+ .vcpu_free = kvmppc_core_vcpu_free_pr,
+ .check_requests = kvmppc_core_check_requests_pr,
+ .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
+ .flush_memslot = kvmppc_core_flush_memslot_pr,
+ .prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
+ .commit_memory_region = kvmppc_core_commit_memory_region_pr,
+ .unmap_hva = kvm_unmap_hva_pr,
+ .unmap_hva_range = kvm_unmap_hva_range_pr,
+ .age_hva = kvm_age_hva_pr,
+ .test_age_hva = kvm_test_age_hva_pr,
+ .set_spte_hva = kvm_set_spte_hva_pr,
+ .mmu_destroy = kvmppc_mmu_destroy_pr,
+ .free_memslot = kvmppc_core_free_memslot_pr,
+ .create_memslot = kvmppc_core_create_memslot_pr,
+ .init_vm = kvmppc_core_init_vm_pr,
+ .destroy_vm = kvmppc_core_destroy_vm_pr,
+ .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
+ .emulate_op = kvmppc_core_emulate_op_pr,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
+ .fast_vcpu_kick = kvm_vcpu_kick,
+ .arch_vm_ioctl = kvm_arch_vm_ioctl_pr,
+};
+
+
+int kvmppc_book3s_init_pr(void)
+{
+ int r;
+
+ r = kvmppc_core_check_processor_compat_pr();
+ if (r < 0)
+ return r;
+
+ kvm_ops_pr.owner = THIS_MODULE;
+ kvmppc_pr_ops = &kvm_ops_pr;
+
+ r = kvmppc_mmu_hpte_sysinit();
+ return r;
+}
+
+void kvmppc_book3s_exit_pr(void)
+{
+ kvmppc_pr_ops = NULL;
+ kvmppc_mmu_hpte_sysexit();
+}
+
+/*
+ * We only support separate modules for book3s 64
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+
+module_init(kvmppc_book3s_init_pr);
+module_exit(kvmppc_book3s_exit_pr);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
+#endif
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
new file mode 100644
index 00000000000..52a63bfe3f0
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2011. Freescale Inc. All rights reserved.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ * Paul Mackerras <paulus@samba.org>
+ *
+ * Description:
+ *
+ * Hypercall handling for running PAPR guests in PR KVM on Book 3S
+ * processors.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/anon_inodes.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+#define HPTE_SIZE 16 /* bytes per HPT entry */
+
+static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
+{
+ struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ unsigned long pteg_addr;
+
+ pte_index <<= 4;
+ pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70;
+ pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
+ pteg_addr |= pte_index;
+
+ return pteg_addr;
+}
+
+static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
+{
+ long flags = kvmppc_get_gpr(vcpu, 4);
+ long pte_index = kvmppc_get_gpr(vcpu, 5);
+ unsigned long pteg[2 * 8];
+ unsigned long pteg_addr, i, *hpte;
+ long int ret;
+
+ i = pte_index & 7;
+ pte_index &= ~7UL;
+ pteg_addr = get_pteg_addr(vcpu, pte_index);
+
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+ copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
+ hpte = pteg;
+
+ ret = H_PTEG_FULL;
+ if (likely((flags & H_EXACT) == 0)) {
+ for (i = 0; ; ++i) {
+ if (i == 8)
+ goto done;
+ if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0)
+ break;
+ hpte += 2;
+ }
+ } else {
+ hpte += i * 2;
+ if (*hpte & HPTE_V_VALID)
+ goto done;
+ }
+
+ hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6));
+ hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7));
+ pteg_addr += i * HPTE_SIZE;
+ copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
+ kvmppc_set_gpr(vcpu, 4, pte_index | i);
+ ret = H_SUCCESS;
+
+ done:
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+ kvmppc_set_gpr(vcpu, 3, ret);
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags= kvmppc_get_gpr(vcpu, 4);
+ unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+ unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+ unsigned long v = 0, pteg, rb;
+ unsigned long pte[2];
+ long int ret;
+
+ pteg = get_pteg_addr(vcpu, pte_index);
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+ copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+ pte[0] = be64_to_cpu(pte[0]);
+ pte[1] = be64_to_cpu(pte[1]);
+
+ ret = H_NOT_FOUND;
+ if ((pte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
+ ((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
+ goto done;
+
+ copy_to_user((void __user *)pteg, &v, sizeof(v));
+
+ rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
+ vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+
+ ret = H_SUCCESS;
+ kvmppc_set_gpr(vcpu, 4, pte[0]);
+ kvmppc_set_gpr(vcpu, 5, pte[1]);
+
+ done:
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+ kvmppc_set_gpr(vcpu, 3, ret);
+
+ return EMULATE_DONE;
+}
+
+/* Request defs for kvmppc_h_pr_bulk_remove() */
+#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL
+#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL
+#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL
+#define H_BULK_REMOVE_END 0xc000000000000000ULL
+#define H_BULK_REMOVE_CODE 0x3000000000000000ULL
+#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL
+#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL
+#define H_BULK_REMOVE_PARM 0x2000000000000000ULL
+#define H_BULK_REMOVE_HW 0x3000000000000000ULL
+#define H_BULK_REMOVE_RC 0x0c00000000000000ULL
+#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL
+#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL
+#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL
+#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL
+#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL
+#define H_BULK_REMOVE_MAX_BATCH 4
+
+static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
+{
+ int i;
+ int paramnr = 4;
+ int ret = H_SUCCESS;
+
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+ for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
+ unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
+ unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
+ unsigned long pteg, rb, flags;
+ unsigned long pte[2];
+ unsigned long v = 0;
+
+ if ((tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
+ break; /* Exit success */
+ } else if ((tsh & H_BULK_REMOVE_TYPE) !=
+ H_BULK_REMOVE_REQUEST) {
+ ret = H_PARAMETER;
+ break; /* Exit fail */
+ }
+
+ tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
+ tsh |= H_BULK_REMOVE_RESPONSE;
+
+ if ((tsh & H_BULK_REMOVE_ANDCOND) &&
+ (tsh & H_BULK_REMOVE_AVPN)) {
+ tsh |= H_BULK_REMOVE_PARM;
+ kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
+ ret = H_PARAMETER;
+ break; /* Exit fail */
+ }
+
+ pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
+ copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+ pte[0] = be64_to_cpu(pte[0]);
+ pte[1] = be64_to_cpu(pte[1]);
+
+ /* tsl = AVPN */
+ flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
+
+ if ((pte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != tsl) ||
+ ((flags & H_ANDCOND) && (pte[0] & tsl) != 0)) {
+ tsh |= H_BULK_REMOVE_NOT_FOUND;
+ } else {
+ /* Splat the pteg in (userland) hpt */
+ copy_to_user((void __user *)pteg, &v, sizeof(v));
+
+ rb = compute_tlbie_rb(pte[0], pte[1],
+ tsh & H_BULK_REMOVE_PTEX);
+ vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+ tsh |= H_BULK_REMOVE_SUCCESS;
+ tsh |= (pte[1] & (HPTE_R_C | HPTE_R_R)) << 43;
+ }
+ kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
+ }
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+ kvmppc_set_gpr(vcpu, 3, ret);
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags = kvmppc_get_gpr(vcpu, 4);
+ unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+ unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+ unsigned long rb, pteg, r, v;
+ unsigned long pte[2];
+ long int ret;
+
+ pteg = get_pteg_addr(vcpu, pte_index);
+ mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+ copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+ pte[0] = be64_to_cpu(pte[0]);
+ pte[1] = be64_to_cpu(pte[1]);
+
+ ret = H_NOT_FOUND;
+ if ((pte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn))
+ goto done;
+
+ v = pte[0];
+ r = pte[1];
+ r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI |
+ HPTE_R_KEY_LO);
+ r |= (flags << 55) & HPTE_R_PP0;
+ r |= (flags << 48) & HPTE_R_KEY_HI;
+ r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+ pte[1] = r;
+
+ rb = compute_tlbie_rb(v, r, pte_index);
+ vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+ pte[0] = cpu_to_be64(pte[0]);
+ pte[1] = cpu_to_be64(pte[1]);
+ copy_to_user((void __user *)pteg, pte, sizeof(pte));
+ ret = H_SUCCESS;
+
+ done:
+ mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+ kvmppc_set_gpr(vcpu, 3, ret);
+
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce = kvmppc_get_gpr(vcpu, 6);
+ long rc;
+
+ rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+{
+ long rc = kvmppc_xics_hcall(vcpu, cmd);
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
+int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
+{
+ switch (cmd) {
+ case H_ENTER:
+ return kvmppc_h_pr_enter(vcpu);
+ case H_REMOVE:
+ return kvmppc_h_pr_remove(vcpu);
+ case H_PROTECT:
+ return kvmppc_h_pr_protect(vcpu);
+ case H_BULK_REMOVE:
+ return kvmppc_h_pr_bulk_remove(vcpu);
+ case H_PUT_TCE:
+ return kvmppc_h_pr_put_tce(vcpu);
+ case H_CEDE:
+ kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
+ kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ vcpu->stat.halt_wakeup++;
+ return EMULATE_DONE;
+ case H_XIRR:
+ case H_CPPR:
+ case H_EOI:
+ case H_IPI:
+ case H_IPOLL:
+ case H_XIRR_X:
+ if (kvmppc_xics_enabled(vcpu))
+ return kvmppc_h_pr_xics_hcall(vcpu, cmd);
+ break;
+ case H_RTAS:
+ if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ return RESUME_HOST;
+ if (kvmppc_rtas_hcall(vcpu))
+ break;
+ kvmppc_set_gpr(vcpu, 3, 0);
+ return EMULATE_DONE;
+ }
+
+ return EMULATE_FAIL;
+}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 2b9c9088d00..16c4d88ba27 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -20,6 +20,7 @@
#include <asm/ppc_asm.h>
#include <asm/kvm_asm.h>
#include <asm/reg.h>
+#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/asm-offsets.h>
@@ -35,38 +36,16 @@
#if defined(CONFIG_PPC_BOOK3S_64)
-#define LOAD_SHADOW_VCPU(reg) \
- mfspr reg, SPRN_SPRG_PACA
-
-#define SHADOW_VCPU_OFF PACA_KVM_SVCPU
-#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR)
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define FUNC(name) name
+#else
#define FUNC(name) GLUE(.,name)
+#endif
#elif defined(CONFIG_PPC_BOOK3S_32)
-#define LOAD_SHADOW_VCPU(reg) \
- mfspr reg, SPRN_SPRG_THREAD; \
- lwz reg, THREAD_KVM_SVCPU(reg); \
- /* PPC32 can have a NULL pointer - let's check for that */ \
- mtspr SPRN_SPRG_SCRATCH1, r12; /* Save r12 */ \
- mfcr r12; \
- cmpwi reg, 0; \
- bne 1f; \
- mfspr reg, SPRN_SPRG_SCRATCH0; \
- mtcr r12; \
- mfspr r12, SPRN_SPRG_SCRATCH1; \
- b kvmppc_resume_\intno; \
-1:; \
- mtcr r12; \
- mfspr r12, SPRN_SPRG_SCRATCH1; \
- tophys(reg, reg)
-
-#define SHADOW_VCPU_OFF 0
-#define MSR_NOIRQ MSR_KERNEL
#define FUNC(name) name
-#endif
-
.macro INTERRUPT_TRAMPOLINE intno
.global kvmppc_trampoline_\intno
@@ -80,19 +59,28 @@ kvmppc_trampoline_\intno:
*
* To distinguish, we check a magic byte in the PACA/current
*/
- LOAD_SHADOW_VCPU(r13)
- PPC_STL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
+ mfspr r13, SPRN_SPRG_THREAD
+ lwz r13, THREAD_KVM_SVCPU(r13)
+ /* PPC32 can have a NULL pointer - let's check for that */
+ mtspr SPRN_SPRG_SCRATCH1, r12 /* Save r12 */
mfcr r12
- stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
- lbz r12, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13)
+ cmpwi r13, 0
+ bne 1f
+2: mtcr r12
+ mfspr r12, SPRN_SPRG_SCRATCH1
+ mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
+ b kvmppc_resume_\intno /* Get back original handler */
+
+1: tophys(r13, r13)
+ stw r12, HSTATE_SCRATCH1(r13)
+ mfspr r12, SPRN_SPRG_SCRATCH1
+ stw r12, HSTATE_SCRATCH0(r13)
+ lbz r12, HSTATE_IN_GUEST(r13)
cmpwi r12, KVM_GUEST_MODE_NONE
bne ..kvmppc_handler_hasmagic_\intno
/* No KVM guest? Then jump back to the Linux handler! */
- lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
- mtcr r12
- PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
- mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
- b kvmppc_resume_\intno /* Get back original handler */
+ lwz r12, HSTATE_SCRATCH1(r13)
+ b 2b
/* Now we know we're handling a KVM guest */
..kvmppc_handler_hasmagic_\intno:
@@ -123,14 +111,6 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
-/* Those are only available on 64 bit machines */
-
-#ifdef CONFIG_PPC_BOOK3S_64
-INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT
-INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT
-INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
-#endif
-
/*
* Bring us back to the faulting code, but skip the
* faulting instruction.
@@ -142,8 +122,8 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
*
* R12 = free
* R13 = Shadow VCPU (PACA)
- * SVCPU.SCRATCH0 = guest R12
- * SVCPU.SCRATCH1 = guest CR
+ * HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH1 = guest CR
* SPRG_SCRATCH0 = guest R13
*
*/
@@ -155,107 +135,35 @@ kvmppc_handler_skip_ins:
mtsrr0 r12
/* Clean up all state */
- lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
+ lwz r12, HSTATE_SCRATCH1(r13)
mtcr r12
- PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
- mfspr r13, SPRN_SPRG_SCRATCH0
+ PPC_LL r12, HSTATE_SCRATCH0(r13)
+ GET_SCRATCH0(r13)
/* And get back into the code */
RFI
+#endif
/*
- * This trampoline brings us back to a real mode handler
- *
- * Input Registers:
- *
- * R5 = SRR0
- * R6 = SRR1
- * LR = real-mode IP
+ * Call kvmppc_handler_trampoline_enter in real mode
*
+ * On entry, r4 contains the guest shadow MSR
+ * MSR.EE has to be 0 when calling this function
*/
-.global kvmppc_handler_lowmem_trampoline
-kvmppc_handler_lowmem_trampoline:
+_GLOBAL_TOC(kvmppc_entry_trampoline)
+ mfmsr r5
+ LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
+ toreal(r7)
- mtsrr0 r5
+ li r6, MSR_IR | MSR_DR
+ andc r6, r5, r6 /* Clear DR and IR in MSR value */
+ /*
+ * Set EE in HOST_MSR so that it's enabled when we get into our
+ * C exit handler function.
+ */
+ ori r5, r5, MSR_EE
+ mtsrr0 r7
mtsrr1 r6
- blr
-kvmppc_handler_lowmem_trampoline_end:
-
-/*
- * Call a function in real mode
- *
- * Input Registers:
- *
- * R3 = function
- * R4 = MSR
- * R5 = scratch register
- *
- */
-_GLOBAL(kvmppc_rmcall)
- LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ)
- mtmsr r5 /* Disable relocation and interrupts, so mtsrr
- doesn't get interrupted */
- sync
- mtsrr0 r3
- mtsrr1 r4
RFI
-#if defined(CONFIG_PPC_BOOK3S_32)
-#define STACK_LR INT_FRAME_SIZE+4
-
-/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */
-#define MSR_EXT_START \
- PPC_STL r20, _NIP(r1); \
- mfmsr r20; \
- LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \
- andc r3,r20,r3; /* Disable DR,EE */ \
- mtmsr r3; \
- sync
-
-#define MSR_EXT_END \
- mtmsr r20; /* Enable DR,EE */ \
- sync; \
- PPC_LL r20, _NIP(r1)
-
-#elif defined(CONFIG_PPC_BOOK3S_64)
-#define STACK_LR _LINK
-#define MSR_EXT_START
-#define MSR_EXT_END
-#endif
-
-/*
- * Activate current's external feature (FPU/Altivec/VSX)
- */
-#define define_load_up(what) \
- \
-_GLOBAL(kvmppc_load_up_ ## what); \
- PPC_STLU r1, -INT_FRAME_SIZE(r1); \
- mflr r3; \
- PPC_STL r3, STACK_LR(r1); \
- MSR_EXT_START; \
- \
- bl FUNC(load_up_ ## what); \
- \
- MSR_EXT_END; \
- PPC_LL r3, STACK_LR(r1); \
- mtlr r3; \
- addi r1, r1, INT_FRAME_SIZE; \
- blr
-
-define_load_up(fpu)
-#ifdef CONFIG_ALTIVEC
-define_load_up(altivec)
-#endif
-#ifdef CONFIG_VSX
-define_load_up(vsx)
-#endif
-
-.global kvmppc_trampoline_lowmem
-kvmppc_trampoline_lowmem:
- PPC_LONG kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
-
-.global kvmppc_trampoline_enter
-kvmppc_trampoline_enter:
- PPC_LONG kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
-
#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
new file mode 100644
index 00000000000..ef27fbd5d9c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/err.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/rtas.h>
+
+#ifdef CONFIG_KVM_XICS
+static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq, server, priority;
+ int rc;
+
+ if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = be32_to_cpu(args->args[0]);
+ server = be32_to_cpu(args->args[1]);
+ priority = be32_to_cpu(args->args[2]);
+
+ rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+ if (rc)
+ rc = -3;
+out:
+ args->rets[0] = cpu_to_be32(rc);
+}
+
+static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq, server, priority;
+ int rc;
+
+ if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = be32_to_cpu(args->args[0]);
+
+ server = priority = 0;
+ rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+ if (rc) {
+ rc = -3;
+ goto out;
+ }
+
+ args->rets[1] = cpu_to_be32(server);
+ args->rets[2] = cpu_to_be32(priority);
+out:
+ args->rets[0] = cpu_to_be32(rc);
+}
+
+static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq;
+ int rc;
+
+ if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = be32_to_cpu(args->args[0]);
+
+ rc = kvmppc_xics_int_off(vcpu->kvm, irq);
+ if (rc)
+ rc = -3;
+out:
+ args->rets[0] = cpu_to_be32(rc);
+}
+
+static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq;
+ int rc;
+
+ if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = be32_to_cpu(args->args[0]);
+
+ rc = kvmppc_xics_int_on(vcpu->kvm, irq);
+ if (rc)
+ rc = -3;
+out:
+ args->rets[0] = cpu_to_be32(rc);
+}
+#endif /* CONFIG_KVM_XICS */
+
+struct rtas_handler {
+ void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
+ char *name;
+};
+
+static struct rtas_handler rtas_handlers[] = {
+#ifdef CONFIG_KVM_XICS
+ { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
+ { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
+ { .name = "ibm,int-off", .handler = kvm_rtas_int_off },
+ { .name = "ibm,int-on", .handler = kvm_rtas_int_on },
+#endif
+};
+
+struct rtas_token_definition {
+ struct list_head list;
+ struct rtas_handler *handler;
+ u64 token;
+};
+
+static int rtas_name_matches(char *s1, char *s2)
+{
+ struct kvm_rtas_token_args args;
+ return !strncmp(s1, s2, sizeof(args.name));
+}
+
+static int rtas_token_undefine(struct kvm *kvm, char *name)
+{
+ struct rtas_token_definition *d, *tmp;
+
+ lockdep_assert_held(&kvm->lock);
+
+ list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+ if (rtas_name_matches(d->handler->name, name)) {
+ list_del(&d->list);
+ kfree(d);
+ return 0;
+ }
+ }
+
+ /* It's not an error to undefine an undefined token */
+ return 0;
+}
+
+static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
+{
+ struct rtas_token_definition *d;
+ struct rtas_handler *h = NULL;
+ bool found;
+ int i;
+
+ lockdep_assert_held(&kvm->lock);
+
+ list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
+ if (d->token == token)
+ return -EEXIST;
+ }
+
+ found = false;
+ for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
+ h = &rtas_handlers[i];
+ if (rtas_name_matches(h->name, name)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ d->handler = h;
+ d->token = token;
+
+ list_add_tail(&d->list, &kvm->arch.rtas_tokens);
+
+ return 0;
+}
+
+int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
+{
+ struct kvm_rtas_token_args args;
+ int rc;
+
+ if (copy_from_user(&args, argp, sizeof(args)))
+ return -EFAULT;
+
+ mutex_lock(&kvm->lock);
+
+ if (args.token)
+ rc = rtas_token_define(kvm, args.name, args.token);
+ else
+ rc = rtas_token_undefine(kvm, args.name);
+
+ mutex_unlock(&kvm->lock);
+
+ return rc;
+}
+
+int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
+{
+ struct rtas_token_definition *d;
+ struct rtas_args args;
+ rtas_arg_t *orig_rets;
+ gpa_t args_phys;
+ int rc;
+
+ /*
+ * r4 contains the guest physical address of the RTAS args
+ * Mask off the top 4 bits since this is a guest real address
+ */
+ args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
+
+ rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+ if (rc)
+ goto fail;
+
+ /*
+ * args->rets is a pointer into args->args. Now that we've
+ * copied args we need to fix it up to point into our copy,
+ * not the guest args. We also need to save the original
+ * value so we can restore it on the way out.
+ */
+ orig_rets = args.rets;
+ args.rets = &args.args[be32_to_cpu(args.nargs)];
+
+ mutex_lock(&vcpu->kvm->lock);
+
+ rc = -ENOENT;
+ list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
+ if (d->token == be32_to_cpu(args.token)) {
+ d->handler->handler(vcpu, &args);
+ rc = 0;
+ break;
+ }
+ }
+
+ mutex_unlock(&vcpu->kvm->lock);
+
+ if (rc == 0) {
+ args.rets = orig_rets;
+ rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+ if (rc)
+ goto fail;
+ }
+
+ return rc;
+
+fail:
+ /*
+ * We only get here if the guest has called RTAS with a bogus
+ * args pointer. That means we can't get to the args, and so we
+ * can't fail the RTAS call. So fail right out to userspace,
+ * which should kill the guest.
+ */
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall);
+
+void kvmppc_rtas_tokens_free(struct kvm *kvm)
+{
+ struct rtas_token_definition *d, *tmp;
+
+ lockdep_assert_held(&kvm->lock);
+
+ list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+ list_del(&d->list);
+ kfree(d);
+ }
+}
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 7c52ed0b705..acee37cde84 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -22,7 +22,7 @@
#if defined(CONFIG_PPC_BOOK3S_64)
#define GET_SHADOW_VCPU(reg) \
- addi reg, r13, PACA_KVM_SVCPU
+ mr reg, r13
#elif defined(CONFIG_PPC_BOOK3S_32)
@@ -57,10 +57,12 @@ kvmppc_handler_trampoline_enter:
/* Required state:
*
* MSR = ~IR|DR
- * R13 = PACA
* R1 = host R1
* R2 = host R2
- * R10 = guest MSR
+ * R4 = guest shadow MSR
+ * R5 = normal host MSR
+ * R6 = current host MSR (EE, IR, DR off)
+ * LR = highmem guest exit code
* all other volatile GPRS = free
* SVCPU[CR] = guest CR
* SVCPU[XER] = guest XER
@@ -71,44 +73,87 @@ kvmppc_handler_trampoline_enter:
/* r3 = shadow vcpu */
GET_SHADOW_VCPU(r3)
- /* Move SRR0 and SRR1 into the respective regs */
- PPC_LL r9, SVCPU_PC(r3)
- mtsrr0 r9
- mtsrr1 r10
+ /* Save guest exit handler address and MSR */
+ mflr r0
+ PPC_STL r0, HSTATE_VMHANDLER(r3)
+ PPC_STL r5, HSTATE_HOST_MSR(r3)
+
+ /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
+ PPC_STL r1, HSTATE_HOST_R1(r3)
+ PPC_STL r2, HSTATE_HOST_R2(r3)
/* Activate guest mode, so faults get handled by KVM */
li r11, KVM_GUEST_MODE_GUEST
- stb r11, SVCPU_IN_GUEST(r3)
+ stb r11, HSTATE_IN_GUEST(r3)
/* Switch to guest segment. This is subarch specific. */
LOAD_GUEST_SEGMENTS
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+ /* Save host FSCR */
+ mfspr r8, SPRN_FSCR
+ std r8, HSTATE_HOST_FSCR(r13)
+ /* Set FSCR during guest execution */
+ ld r9, SVCPU_SHADOW_FSCR(r13)
+ mtspr SPRN_FSCR, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+ /* Some guests may need to have dcbz set to 32 byte length.
+ *
+ * Usually we ensure that by patching the guest's instructions
+ * to trap on dcbz and emulate it in the hypervisor.
+ *
+ * If we can, we should tell the CPU to use 32 byte dcbz though,
+ * because that's a lot faster.
+ */
+ lbz r0, HSTATE_RESTORE_HID5(r3)
+ cmpwi r0, 0
+ beq no_dcbz32_on
+
+ mfspr r0,SPRN_HID5
+ ori r0, r0, 0x80 /* XXX HID5_dcbz32 = 0x80 */
+ mtspr SPRN_HID5,r0
+no_dcbz32_on:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
/* Enter guest */
- PPC_LL r4, (SVCPU_CTR)(r3)
- PPC_LL r5, (SVCPU_LR)(r3)
- lwz r6, (SVCPU_CR)(r3)
- lwz r7, (SVCPU_XER)(r3)
-
- mtctr r4
- mtlr r5
- mtcr r6
- mtxer r7
-
- PPC_LL r0, (SVCPU_R0)(r3)
- PPC_LL r1, (SVCPU_R1)(r3)
- PPC_LL r2, (SVCPU_R2)(r3)
- PPC_LL r4, (SVCPU_R4)(r3)
- PPC_LL r5, (SVCPU_R5)(r3)
- PPC_LL r6, (SVCPU_R6)(r3)
- PPC_LL r7, (SVCPU_R7)(r3)
- PPC_LL r8, (SVCPU_R8)(r3)
- PPC_LL r9, (SVCPU_R9)(r3)
- PPC_LL r10, (SVCPU_R10)(r3)
- PPC_LL r11, (SVCPU_R11)(r3)
- PPC_LL r12, (SVCPU_R12)(r3)
- PPC_LL r13, (SVCPU_R13)(r3)
+ PPC_LL r8, SVCPU_CTR(r3)
+ PPC_LL r9, SVCPU_LR(r3)
+ lwz r10, SVCPU_CR(r3)
+ lwz r11, SVCPU_XER(r3)
+
+ mtctr r8
+ mtlr r9
+ mtcr r10
+ mtxer r11
+ /* Move SRR0 and SRR1 into the respective regs */
+ PPC_LL r9, SVCPU_PC(r3)
+ /* First clear RI in our current MSR value */
+ li r0, MSR_RI
+ andc r6, r6, r0
+
+ PPC_LL r0, SVCPU_R0(r3)
+ PPC_LL r1, SVCPU_R1(r3)
+ PPC_LL r2, SVCPU_R2(r3)
+ PPC_LL r5, SVCPU_R5(r3)
+ PPC_LL r7, SVCPU_R7(r3)
+ PPC_LL r8, SVCPU_R8(r3)
+ PPC_LL r10, SVCPU_R10(r3)
+ PPC_LL r11, SVCPU_R11(r3)
+ PPC_LL r12, SVCPU_R12(r3)
+ PPC_LL r13, SVCPU_R13(r3)
+
+ MTMSR_EERI(r6)
+ mtsrr0 r9
+ mtsrr1 r4
+
+ PPC_LL r4, SVCPU_R4(r3)
+ PPC_LL r6, SVCPU_R6(r3)
+ PPC_LL r9, SVCPU_R9(r3)
PPC_LL r3, (SVCPU_R3)(r3)
RFI
@@ -125,50 +170,64 @@ kvmppc_handler_trampoline_enter_end:
.global kvmppc_handler_trampoline_exit
kvmppc_handler_trampoline_exit:
+.global kvmppc_interrupt_pr
+kvmppc_interrupt_pr:
+
/* Register usage at this point:
*
* SPRG_SCRATCH0 = guest R13
* R12 = exit handler id
- * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
- * SVCPU.SCRATCH0 = guest R12
- * SVCPU.SCRATCH1 = guest CR
+ * R13 = shadow vcpu (32-bit) or PACA (64-bit)
+ * HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH1 = guest CR
*
*/
/* Save registers */
- PPC_STL r0, (SHADOW_VCPU_OFF + SVCPU_R0)(r13)
- PPC_STL r1, (SHADOW_VCPU_OFF + SVCPU_R1)(r13)
- PPC_STL r2, (SHADOW_VCPU_OFF + SVCPU_R2)(r13)
- PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_R3)(r13)
- PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_R4)(r13)
- PPC_STL r5, (SHADOW_VCPU_OFF + SVCPU_R5)(r13)
- PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_R6)(r13)
- PPC_STL r7, (SHADOW_VCPU_OFF + SVCPU_R7)(r13)
- PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R8)(r13)
- PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R9)(r13)
- PPC_STL r10, (SHADOW_VCPU_OFF + SVCPU_R10)(r13)
- PPC_STL r11, (SHADOW_VCPU_OFF + SVCPU_R11)(r13)
+ PPC_STL r0, SVCPU_R0(r13)
+ PPC_STL r1, SVCPU_R1(r13)
+ PPC_STL r2, SVCPU_R2(r13)
+ PPC_STL r3, SVCPU_R3(r13)
+ PPC_STL r4, SVCPU_R4(r13)
+ PPC_STL r5, SVCPU_R5(r13)
+ PPC_STL r6, SVCPU_R6(r13)
+ PPC_STL r7, SVCPU_R7(r13)
+ PPC_STL r8, SVCPU_R8(r13)
+ PPC_STL r9, SVCPU_R9(r13)
+ PPC_STL r10, SVCPU_R10(r13)
+ PPC_STL r11, SVCPU_R11(r13)
/* Restore R1/R2 so we can handle faults */
- PPC_LL r1, (SHADOW_VCPU_OFF + SVCPU_HOST_R1)(r13)
- PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13)
+ PPC_LL r1, HSTATE_HOST_R1(r13)
+ PPC_LL r2, HSTATE_HOST_R2(r13)
/* Save guest PC and MSR */
- mfsrr0 r3
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ andi. r0, r12, 0x2
+ cmpwi cr1, r0, 0
+ beq 1f
+ mfspr r3,SPRN_HSRR0
+ mfspr r4,SPRN_HSRR1
+ andi. r12,r12,0x3ffd
+ b 2f
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
+1: mfsrr0 r3
mfsrr1 r4
-
- PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13)
- PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13)
+2:
+ PPC_STL r3, SVCPU_PC(r13)
+ PPC_STL r4, SVCPU_SHADOW_SRR1(r13)
/* Get scratch'ed off registers */
- mfspr r9, SPRN_SPRG_SCRATCH0
- PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
- lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
+ GET_SCRATCH0(r9)
+ PPC_LL r8, HSTATE_SCRATCH0(r13)
+ lwz r7, HSTATE_SCRATCH1(r13)
- PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R13)(r13)
- PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R12)(r13)
- stw r7, (SHADOW_VCPU_OFF + SVCPU_CR)(r13)
+ PPC_STL r9, SVCPU_R13(r13)
+ PPC_STL r8, SVCPU_R12(r13)
+ stw r7, SVCPU_CR(r13)
/* Save more register state */
@@ -178,11 +237,11 @@ kvmppc_handler_trampoline_exit:
mfctr r8
mflr r9
- stw r5, (SHADOW_VCPU_OFF + SVCPU_XER)(r13)
- PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_FAULT_DAR)(r13)
- stw r7, (SHADOW_VCPU_OFF + SVCPU_FAULT_DSISR)(r13)
- PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_CTR)(r13)
- PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_LR)(r13)
+ stw r5, SVCPU_XER(r13)
+ PPC_STL r6, SVCPU_FAULT_DAR(r13)
+ stw r7, SVCPU_FAULT_DSISR(r13)
+ PPC_STL r8, SVCPU_CTR(r13)
+ PPC_STL r9, SVCPU_LR(r13)
/*
* In order for us to easily get the last instruction,
@@ -196,11 +255,26 @@ kvmppc_handler_trampoline_exit:
beq ld_last_inst
cmpwi r12, BOOK3S_INTERRUPT_PROGRAM
beq ld_last_inst
+ cmpwi r12, BOOK3S_INTERRUPT_SYSCALL
+ beq ld_last_prev_inst
cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT
beq- ld_last_inst
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
+ beq- ld_last_inst
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+BEGIN_FTR_SECTION
+ cmpwi r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
+ beq- ld_last_inst
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+#endif
b no_ld_last_inst
+ld_last_prev_inst:
+ addi r3, r3, -4
+
ld_last_inst:
/* Save off the guest instruction we're at */
@@ -212,7 +286,7 @@ ld_last_inst:
/* Set guest mode to 'jump over instruction' so if lwz faults
* we'll just continue at the next IP. */
li r9, KVM_GUEST_MODE_SKIP
- stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13)
+ stb r9, HSTATE_IN_GUEST(r13)
/* 1) enable paging for data */
mfmsr r9
@@ -226,34 +300,94 @@ ld_last_inst:
sync
#endif
- stw r0, (SHADOW_VCPU_OFF + SVCPU_LAST_INST)(r13)
+ stw r0, SVCPU_LAST_INST(r13)
no_ld_last_inst:
/* Unset guest mode */
li r9, KVM_GUEST_MODE_NONE
- stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13)
+ stb r9, HSTATE_IN_GUEST(r13)
/* Switch back to host MMU */
LOAD_HOST_SEGMENTS
+#ifdef CONFIG_PPC_BOOK3S_64
+
+ lbz r5, HSTATE_RESTORE_HID5(r13)
+ cmpwi r5, 0
+ beq no_dcbz32_off
+
+ li r4, 0
+ mfspr r5,SPRN_HID5
+ rldimi r5,r4,6,56
+ mtspr SPRN_HID5,r5
+
+no_dcbz32_off:
+
+BEGIN_FTR_SECTION
+ /* Save guest FSCR on a FAC_UNAVAIL interrupt */
+ cmpwi r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
+ bne+ no_fscr_save
+ mfspr r7, SPRN_FSCR
+ std r7, SVCPU_SHADOW_FSCR(r13)
+no_fscr_save:
+ /* Restore host FSCR */
+ ld r8, HSTATE_HOST_FSCR(r13)
+ mtspr SPRN_FSCR, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+ /*
+ * For some interrupts, we need to call the real Linux
+ * handler, so it can do work for us. This has to happen
+ * as if the interrupt arrived from the kernel though,
+ * so let's fake it here where most state is restored.
+ *
+ * Having set up SRR0/1 with the address where we want
+ * to continue with relocation on (potentially in module
+ * space), we either just go straight there with rfi[d],
+ * or we jump to an interrupt handler if there is an
+ * interrupt to be handled first. In the latter case,
+ * the rfi[d] at the end of the interrupt handler will
+ * get us back to where we want to continue.
+ */
+
/* Register usage at this point:
*
* R1 = host R1
* R2 = host R2
+ * R10 = raw exit handler id
* R12 = exit handler id
- * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
+ * R13 = shadow vcpu (32-bit) or PACA (64-bit)
* SVCPU.* = guest *
*
*/
- /* RFI into the highmem handler */
- mfmsr r7
- ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */
- mtsrr1 r7
+ PPC_LL r6, HSTATE_HOST_MSR(r13)
+ PPC_LL r8, HSTATE_VMHANDLER(r13)
+
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ beq cr1, 1f
+ mtspr SPRN_HSRR1, r6
+ mtspr SPRN_HSRR0, r8
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
+1: /* Restore host msr -> SRR1 */
+ mtsrr1 r6
/* Load highmem handler address */
- PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_VMHANDLER)(r13)
mtsrr0 r8
+ /* RFI into the highmem handler, or jump to interrupt handler */
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+ beqa BOOK3S_INTERRUPT_EXTERNAL
+ cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
+ beqa BOOK3S_INTERRUPT_DECREMENTER
+ cmpwi r12, BOOK3S_INTERRUPT_PERFMON
+ beqa BOOK3S_INTERRUPT_PERFMON
+ cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
+ beqa BOOK3S_INTERRUPT_DOORBELL
+
RFI
kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
new file mode 100644
index 00000000000..d1acd32a64c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -0,0 +1,1303 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/anon_inodes.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+#include <asm/time.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xics.h"
+
+#if 1
+#define XICS_DBG(fmt...) do { } while (0)
+#else
+#define XICS_DBG(fmt...) trace_printk(fmt)
+#endif
+
+#define ENABLE_REALMODE true
+#define DEBUG_REALMODE false
+
+/*
+ * LOCKING
+ * =======
+ *
+ * Each ICS has a mutex protecting the information about the IRQ
+ * sources and avoiding simultaneous deliveries if the same interrupt.
+ *
+ * ICP operations are done via a single compare & swap transaction
+ * (most ICP state fits in the union kvmppc_icp_state)
+ */
+
+/*
+ * TODO
+ * ====
+ *
+ * - To speed up resends, keep a bitmap of "resend" set bits in the
+ * ICS
+ *
+ * - Speed up server# -> ICP lookup (array ? hash table ?)
+ *
+ * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
+ * locks array to improve scalability
+ */
+
+/* -- ICS routines -- */
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u32 new_irq);
+
+static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
+ bool report_status)
+{
+ struct ics_irq_state *state;
+ struct kvmppc_ics *ics;
+ u16 src;
+
+ XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics) {
+ XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
+ return -EINVAL;
+ }
+ state = &ics->irq_state[src];
+ if (!state->exists)
+ return -EINVAL;
+
+ if (report_status)
+ return state->asserted;
+
+ /*
+ * We set state->asserted locklessly. This should be fine as
+ * we are the only setter, thus concurrent access is undefined
+ * to begin with.
+ */
+ if (level == KVM_INTERRUPT_SET_LEVEL)
+ state->asserted = 1;
+ else if (level == KVM_INTERRUPT_UNSET) {
+ state->asserted = 0;
+ return 0;
+ }
+
+ /* Attempt delivery */
+ icp_deliver_irq(xics, NULL, irq);
+
+ return state->asserted;
+}
+
+static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+ struct kvmppc_icp *icp)
+{
+ int i;
+
+ mutex_lock(&ics->lock);
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct ics_irq_state *state = &ics->irq_state[i];
+
+ if (!state->resend)
+ continue;
+
+ XICS_DBG("resend %#x prio %#x\n", state->number,
+ state->priority);
+
+ mutex_unlock(&ics->lock);
+ icp_deliver_irq(xics, icp, state->number);
+ mutex_lock(&ics->lock);
+ }
+
+ mutex_unlock(&ics->lock);
+}
+
+static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+ struct ics_irq_state *state,
+ u32 server, u32 priority, u32 saved_priority)
+{
+ bool deliver;
+
+ mutex_lock(&ics->lock);
+
+ state->server = server;
+ state->priority = priority;
+ state->saved_priority = saved_priority;
+ deliver = false;
+ if ((state->masked_pending || state->resend) && priority != MASKED) {
+ state->masked_pending = 0;
+ deliver = true;
+ }
+
+ mutex_unlock(&ics->lock);
+
+ return deliver;
+}
+
+int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_icp *icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ icp = kvmppc_xics_find_server(kvm, server);
+ if (!icp)
+ return -EINVAL;
+
+ XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
+ irq, server, priority,
+ state->masked_pending, state->resend);
+
+ if (write_xive(xics, ics, state, server, priority, priority))
+ icp_deliver_irq(xics, icp, irq);
+
+ return 0;
+}
+
+int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ mutex_lock(&ics->lock);
+ *server = state->server;
+ *priority = state->priority;
+ mutex_unlock(&ics->lock);
+
+ return 0;
+}
+
+int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_icp *icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ icp = kvmppc_xics_find_server(kvm, state->server);
+ if (!icp)
+ return -EINVAL;
+
+ if (write_xive(xics, ics, state, state->server, state->saved_priority,
+ state->saved_priority))
+ icp_deliver_irq(xics, icp, irq);
+
+ return 0;
+}
+
+int kvmppc_xics_int_off(struct kvm *kvm, u32 irq)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ write_xive(xics, ics, state, state->server, MASKED, state->priority);
+
+ return 0;
+}
+
+/* -- ICP routines, including hcalls -- */
+
+static inline bool icp_try_update(struct kvmppc_icp *icp,
+ union kvmppc_icp_state old,
+ union kvmppc_icp_state new,
+ bool change_self)
+{
+ bool success;
+
+ /* Calculate new output value */
+ new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+ /* Attempt atomic update */
+ success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+ if (!success)
+ goto bail;
+
+ XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+ icp->server_num,
+ old.cppr, old.mfrr, old.pending_pri, old.xisr,
+ old.need_resend, old.out_ee);
+ XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+ new.cppr, new.mfrr, new.pending_pri, new.xisr,
+ new.need_resend, new.out_ee);
+ /*
+ * Check for output state update
+ *
+ * Note that this is racy since another processor could be updating
+ * the state already. This is why we never clear the interrupt output
+ * here, we only ever set it. The clear only happens prior to doing
+ * an update and only by the processor itself. Currently we do it
+ * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+ *
+ * We also do not try to figure out whether the EE state has changed,
+ * we unconditionally set it if the new state calls for it. The reason
+ * for that is that we opportunistically remove the pending interrupt
+ * flag when raising CPPR, so we need to set it back here if an
+ * interrupt is still pending.
+ */
+ if (new.out_ee) {
+ kvmppc_book3s_queue_irqprio(icp->vcpu,
+ BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ if (!change_self)
+ kvmppc_fast_vcpu_kick(icp->vcpu);
+ }
+ bail:
+ return success;
+}
+
+static void icp_check_resend(struct kvmppc_xics *xics,
+ struct kvmppc_icp *icp)
+{
+ u32 icsid;
+
+ /* Order this load with the test for need_resend in the caller */
+ smp_rmb();
+ for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+ struct kvmppc_ics *ics = xics->ics[icsid];
+
+ if (!test_and_clear_bit(icsid, icp->resend_map))
+ continue;
+ if (!ics)
+ continue;
+ ics_check_resend(xics, ics, icp);
+ }
+}
+
+static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+ u32 *reject)
+{
+ union kvmppc_icp_state old_state, new_state;
+ bool success;
+
+ XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
+ icp->server_num);
+
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ *reject = 0;
+
+ /* See if we can deliver */
+ success = new_state.cppr > priority &&
+ new_state.mfrr > priority &&
+ new_state.pending_pri > priority;
+
+ /*
+ * If we can, check for a rejection and perform the
+ * delivery
+ */
+ if (success) {
+ *reject = new_state.xisr;
+ new_state.xisr = irq;
+ new_state.pending_pri = priority;
+ } else {
+ /*
+ * If we failed to deliver we set need_resend
+ * so a subsequent CPPR state change causes us
+ * to try a new delivery.
+ */
+ new_state.need_resend = true;
+ }
+
+ } while (!icp_try_update(icp, old_state, new_state, false));
+
+ return success;
+}
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u32 new_irq)
+{
+ struct ics_irq_state *state;
+ struct kvmppc_ics *ics;
+ u32 reject;
+ u16 src;
+
+ /*
+ * This is used both for initial delivery of an interrupt and
+ * for subsequent rejection.
+ *
+ * Rejection can be racy vs. resends. We have evaluated the
+ * rejection in an atomic ICP transaction which is now complete,
+ * so potentially the ICP can already accept the interrupt again.
+ *
+ * So we need to retry the delivery. Essentially the reject path
+ * boils down to a failed delivery. Always.
+ *
+ * Now the interrupt could also have moved to a different target,
+ * thus we may need to re-do the ICP lookup as well
+ */
+
+ again:
+ /* Get the ICS state and lock it */
+ ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+ if (!ics) {
+ XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
+ return;
+ }
+ state = &ics->irq_state[src];
+
+ /* Get a lock on the ICS */
+ mutex_lock(&ics->lock);
+
+ /* Get our server */
+ if (!icp || state->server != icp->server_num) {
+ icp = kvmppc_xics_find_server(xics->kvm, state->server);
+ if (!icp) {
+ pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n",
+ new_irq, state->server);
+ goto out;
+ }
+ }
+
+ /* Clear the resend bit of that interrupt */
+ state->resend = 0;
+
+ /*
+ * If masked, bail out
+ *
+ * Note: PAPR doesn't mention anything about masked pending
+ * when doing a resend, only when doing a delivery.
+ *
+ * However that would have the effect of losing a masked
+ * interrupt that was rejected and isn't consistent with
+ * the whole masked_pending business which is about not
+ * losing interrupts that occur while masked.
+ *
+ * I don't differenciate normal deliveries and resends, this
+ * implementation will differ from PAPR and not lose such
+ * interrupts.
+ */
+ if (state->priority == MASKED) {
+ XICS_DBG("irq %#x masked pending\n", new_irq);
+ state->masked_pending = 1;
+ goto out;
+ }
+
+ /*
+ * Try the delivery, this will set the need_resend flag
+ * in the ICP as part of the atomic transaction if the
+ * delivery is not possible.
+ *
+ * Note that if successful, the new delivery might have itself
+ * rejected an interrupt that was "delivered" before we took the
+ * icp mutex.
+ *
+ * In this case we do the whole sequence all over again for the
+ * new guy. We cannot assume that the rejected interrupt is less
+ * favored than the new one, and thus doesn't need to be delivered,
+ * because by the time we exit icp_try_to_deliver() the target
+ * processor may well have alrady consumed & completed it, and thus
+ * the rejected interrupt might actually be already acceptable.
+ */
+ if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+ /*
+ * Delivery was successful, did we reject somebody else ?
+ */
+ if (reject && reject != XICS_IPI) {
+ mutex_unlock(&ics->lock);
+ new_irq = reject;
+ goto again;
+ }
+ } else {
+ /*
+ * We failed to deliver the interrupt we need to set the
+ * resend map bit and mark the ICS state as needing a resend
+ */
+ set_bit(ics->icsid, icp->resend_map);
+ state->resend = 1;
+
+ /*
+ * If the need_resend flag got cleared in the ICP some time
+ * between icp_try_to_deliver() atomic update and now, then
+ * we know it might have missed the resend_map bit. So we
+ * retry
+ */
+ smp_mb();
+ if (!icp->state.need_resend) {
+ mutex_unlock(&ics->lock);
+ goto again;
+ }
+ }
+ out:
+ mutex_unlock(&ics->lock);
+}
+
+static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u8 new_cppr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ bool resend;
+
+ /*
+ * This handles several related states in one operation:
+ *
+ * ICP State: Down_CPPR
+ *
+ * Load CPPR with new value and if the XISR is 0
+ * then check for resends:
+ *
+ * ICP State: Resend
+ *
+ * If MFRR is more favored than CPPR, check for IPIs
+ * and notify ICS of a potential resend. This is done
+ * asynchronously (when used in real mode, we will have
+ * to exit here).
+ *
+ * We do not handle the complete Check_IPI as documented
+ * here. In the PAPR, this state will be used for both
+ * Set_MFRR and Down_CPPR. However, we know that we aren't
+ * changing the MFRR state here so we don't need to handle
+ * the case of an MFRR causing a reject of a pending irq,
+ * this will have been handled when the MFRR was set in the
+ * first place.
+ *
+ * Thus we don't have to handle rejects, only resends.
+ *
+ * When implementing real mode for HV KVM, resend will lead to
+ * a H_TOO_HARD return and the whole transaction will be handled
+ * in virtual mode.
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ /* Down_CPPR */
+ new_state.cppr = new_cppr;
+
+ /*
+ * Cut down Resend / Check_IPI / IPI
+ *
+ * The logic is that we cannot have a pending interrupt
+ * trumped by an IPI at this point (see above), so we
+ * know that either the pending interrupt is already an
+ * IPI (in which case we don't care to override it) or
+ * it's either more favored than us or non existent
+ */
+ if (new_state.mfrr < new_cppr &&
+ new_state.mfrr <= new_state.pending_pri) {
+ WARN_ON(new_state.xisr != XICS_IPI &&
+ new_state.xisr != 0);
+ new_state.pending_pri = new_state.mfrr;
+ new_state.xisr = XICS_IPI;
+ }
+
+ /* Latch/clear resend bit */
+ resend = new_state.need_resend;
+ new_state.need_resend = 0;
+
+ } while (!icp_try_update(icp, old_state, new_state, true));
+
+ /*
+ * Now handle resend checks. Those are asynchronous to the ICP
+ * state update in HW (ie bus transactions) so we can handle them
+ * separately here too
+ */
+ if (resend)
+ icp_check_resend(xics, icp);
+}
+
+static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 xirr;
+
+ /* First, remove EE from the processor */
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+ BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+ /*
+ * ICP State: Accept_Interrupt
+ *
+ * Return the pending interrupt (if any) along with the
+ * current CPPR, then clear the XISR & set CPPR to the
+ * pending priority
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+ if (!old_state.xisr)
+ break;
+ new_state.cppr = new_state.pending_pri;
+ new_state.pending_pri = 0xff;
+ new_state.xisr = 0;
+
+ } while (!icp_try_update(icp, old_state, new_state, true));
+
+ XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
+
+ return xirr;
+}
+
+static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp;
+ u32 reject;
+ bool resend;
+ bool local;
+
+ XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
+ vcpu->vcpu_id, server, mfrr);
+
+ icp = vcpu->arch.icp;
+ local = icp->server_num == server;
+ if (!local) {
+ icp = kvmppc_xics_find_server(vcpu->kvm, server);
+ if (!icp)
+ return H_PARAMETER;
+ }
+
+ /*
+ * ICP state: Set_MFRR
+ *
+ * If the CPPR is more favored than the new MFRR, then
+ * nothing needs to be rejected as there can be no XISR to
+ * reject. If the MFRR is being made less favored then
+ * there might be a previously-rejected interrupt needing
+ * to be resent.
+ *
+ * If the CPPR is less favored, then we might be replacing
+ * an interrupt, and thus need to possibly reject it as in
+ *
+ * ICP state: Check_IPI
+ */
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ /* Set_MFRR */
+ new_state.mfrr = mfrr;
+
+ /* Check_IPI */
+ reject = 0;
+ resend = false;
+ if (mfrr < new_state.cppr) {
+ /* Reject a pending interrupt if not an IPI */
+ if (mfrr <= new_state.pending_pri)
+ reject = new_state.xisr;
+ new_state.pending_pri = mfrr;
+ new_state.xisr = XICS_IPI;
+ }
+
+ if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+ resend = new_state.need_resend;
+ new_state.need_resend = 0;
+ }
+ } while (!icp_try_update(icp, old_state, new_state, local));
+
+ /* Handle reject */
+ if (reject && reject != XICS_IPI)
+ icp_deliver_irq(xics, icp, reject);
+
+ /* Handle resend */
+ if (resend)
+ icp_check_resend(xics, icp);
+
+ return H_SUCCESS;
+}
+
+static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+{
+ union kvmppc_icp_state state;
+ struct kvmppc_icp *icp;
+
+ icp = vcpu->arch.icp;
+ if (icp->server_num != server) {
+ icp = kvmppc_xics_find_server(vcpu->kvm, server);
+ if (!icp)
+ return H_PARAMETER;
+ }
+ state = ACCESS_ONCE(icp->state);
+ kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr);
+ kvmppc_set_gpr(vcpu, 5, state.mfrr);
+ return H_SUCCESS;
+}
+
+static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 reject;
+
+ XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
+
+ /*
+ * ICP State: Set_CPPR
+ *
+ * We can safely compare the new value with the current
+ * value outside of the transaction as the CPPR is only
+ * ever changed by the processor on itself
+ */
+ if (cppr > icp->state.cppr)
+ icp_down_cppr(xics, icp, cppr);
+ else if (cppr == icp->state.cppr)
+ return;
+
+ /*
+ * ICP State: Up_CPPR
+ *
+ * The processor is raising its priority, this can result
+ * in a rejection of a pending interrupt:
+ *
+ * ICP State: Reject_Current
+ *
+ * We can remove EE from the current processor, the update
+ * transaction will set it again if needed
+ */
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+ BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+ do {
+ old_state = new_state = ACCESS_ONCE(icp->state);
+
+ reject = 0;
+ new_state.cppr = cppr;
+
+ if (cppr <= new_state.pending_pri) {
+ reject = new_state.xisr;
+ new_state.xisr = 0;
+ new_state.pending_pri = 0xff;
+ }
+
+ } while (!icp_try_update(icp, old_state, new_state, true));
+
+ /*
+ * Check for rejects. They are handled by doing a new delivery
+ * attempt (see comments in icp_deliver_irq).
+ */
+ if (reject && reject != XICS_IPI)
+ icp_deliver_irq(xics, icp, reject);
+}
+
+static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u32 irq = xirr & 0x00ffffff;
+ u16 src;
+
+ XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
+
+ /*
+ * ICP State: EOI
+ *
+ * Note: If EOI is incorrectly used by SW to lower the CPPR
+ * value (ie more favored), we do not check for rejection of
+ * a pending interrupt, this is a SW error and PAPR sepcifies
+ * that we don't have to deal with it.
+ *
+ * The sending of an EOI to the ICS is handled after the
+ * CPPR update
+ *
+ * ICP State: Down_CPPR which we handle
+ * in a separate function as it's shared with H_CPPR.
+ */
+ icp_down_cppr(xics, icp, xirr >> 24);
+
+ /* IPIs have no EOI */
+ if (irq == XICS_IPI)
+ return H_SUCCESS;
+ /*
+ * EOI handling: If the interrupt is still asserted, we need to
+ * resend it. We can take a lockless "peek" at the ICS state here.
+ *
+ * "Message" interrupts will never have "asserted" set
+ */
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics) {
+ XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
+ return H_PARAMETER;
+ }
+ state = &ics->irq_state[src];
+
+ /* Still asserted, resend it */
+ if (state->asserted)
+ icp_deliver_irq(xics, icp, irq);
+
+ return H_SUCCESS;
+}
+
+static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+
+ XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
+ hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
+
+ if (icp->rm_action & XICS_RM_KICK_VCPU)
+ kvmppc_fast_vcpu_kick(icp->rm_kick_target);
+ if (icp->rm_action & XICS_RM_CHECK_RESEND)
+ icp_check_resend(xics, icp);
+ if (icp->rm_action & XICS_RM_REJECT)
+ icp_deliver_irq(xics, icp, icp->rm_reject);
+
+ icp->rm_action = 0;
+
+ return H_SUCCESS;
+}
+
+int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ unsigned long res;
+ int rc = H_SUCCESS;
+
+ /* Check if we have an ICP */
+ if (!xics || !vcpu->arch.icp)
+ return H_HARDWARE;
+
+ /* These requests don't have real-mode implementations at present */
+ switch (req) {
+ case H_XIRR_X:
+ res = kvmppc_h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 4, res);
+ kvmppc_set_gpr(vcpu, 5, get_tb());
+ return rc;
+ case H_IPOLL:
+ rc = kvmppc_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+ return rc;
+ }
+
+ /* Check for real mode returning too hard */
+ if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm))
+ return kvmppc_xics_rm_complete(vcpu, req);
+
+ switch (req) {
+ case H_XIRR:
+ res = kvmppc_h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 4, res);
+ break;
+ case H_CPPR:
+ kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ break;
+ case H_EOI:
+ rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+ break;
+ case H_IPI:
+ rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ break;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
+
+
+/* -- Initialisation code etc. -- */
+
+static int xics_debug_show(struct seq_file *m, void *private)
+{
+ struct kvmppc_xics *xics = m->private;
+ struct kvm *kvm = xics->kvm;
+ struct kvm_vcpu *vcpu;
+ int icsid, i;
+
+ if (!kvm)
+ return 0;
+
+ seq_printf(m, "=========\nICP state\n=========\n");
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ union kvmppc_icp_state state;
+
+ if (!icp)
+ continue;
+
+ state.raw = ACCESS_ONCE(icp->state.raw);
+ seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
+ icp->server_num, state.xisr,
+ state.pending_pri, state.cppr, state.mfrr,
+ state.out_ee, state.need_resend);
+ }
+
+ for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
+ struct kvmppc_ics *ics = xics->ics[icsid];
+
+ if (!ics)
+ continue;
+
+ seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
+ icsid);
+
+ mutex_lock(&ics->lock);
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct ics_irq_state *irq = &ics->irq_state[i];
+
+ seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
+ irq->number, irq->server, irq->priority,
+ irq->saved_priority, irq->asserted,
+ irq->resend, irq->masked_pending);
+
+ }
+ mutex_unlock(&ics->lock);
+ }
+ return 0;
+}
+
+static int xics_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, xics_debug_show, inode->i_private);
+}
+
+static const struct file_operations xics_debug_fops = {
+ .open = xics_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void xics_debugfs_init(struct kvmppc_xics *xics)
+{
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
+ if (!name) {
+ pr_err("%s: no memory for name\n", __func__);
+ return;
+ }
+
+ xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+ xics, &xics_debug_fops);
+
+ pr_debug("%s: created %s\n", __func__, name);
+ kfree(name);
+}
+
+static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
+ struct kvmppc_xics *xics, int irq)
+{
+ struct kvmppc_ics *ics;
+ int i, icsid;
+
+ icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+
+ mutex_lock(&kvm->lock);
+
+ /* ICS already exists - somebody else got here first */
+ if (xics->ics[icsid])
+ goto out;
+
+ /* Create the ICS */
+ ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
+ if (!ics)
+ goto out;
+
+ mutex_init(&ics->lock);
+ ics->icsid = icsid;
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
+ ics->irq_state[i].priority = MASKED;
+ ics->irq_state[i].saved_priority = MASKED;
+ }
+ smp_wmb();
+ xics->ics[icsid] = ics;
+
+ if (icsid > xics->max_icsid)
+ xics->max_icsid = icsid;
+
+ out:
+ mutex_unlock(&kvm->lock);
+ return xics->ics[icsid];
+}
+
+int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
+{
+ struct kvmppc_icp *icp;
+
+ if (!vcpu->kvm->arch.xics)
+ return -ENODEV;
+
+ if (kvmppc_xics_find_server(vcpu->kvm, server_num))
+ return -EEXIST;
+
+ icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
+ if (!icp)
+ return -ENOMEM;
+
+ icp->vcpu = vcpu;
+ icp->server_num = server_num;
+ icp->state.mfrr = MASKED;
+ icp->state.pending_pri = MASKED;
+ vcpu->arch.icp = icp;
+
+ XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
+
+ return 0;
+}
+
+u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ union kvmppc_icp_state state;
+
+ if (!icp)
+ return 0;
+ state = icp->state;
+ return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) |
+ ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) |
+ ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) |
+ ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT);
+}
+
+int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
+{
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ union kvmppc_icp_state old_state, new_state;
+ struct kvmppc_ics *ics;
+ u8 cppr, mfrr, pending_pri;
+ u32 xisr;
+ u16 src;
+ bool resend;
+
+ if (!icp || !xics)
+ return -ENOENT;
+
+ cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
+ xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
+ KVM_REG_PPC_ICP_XISR_MASK;
+ mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
+ pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT;
+
+ /* Require the new state to be internally consistent */
+ if (xisr == 0) {
+ if (pending_pri != 0xff)
+ return -EINVAL;
+ } else if (xisr == XICS_IPI) {
+ if (pending_pri != mfrr || pending_pri >= cppr)
+ return -EINVAL;
+ } else {
+ if (pending_pri >= mfrr || pending_pri >= cppr)
+ return -EINVAL;
+ ics = kvmppc_xics_find_ics(xics, xisr, &src);
+ if (!ics)
+ return -EINVAL;
+ }
+
+ new_state.raw = 0;
+ new_state.cppr = cppr;
+ new_state.xisr = xisr;
+ new_state.mfrr = mfrr;
+ new_state.pending_pri = pending_pri;
+
+ /*
+ * Deassert the CPU interrupt request.
+ * icp_try_update will reassert it if necessary.
+ */
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu,
+ BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+
+ /*
+ * Note that if we displace an interrupt from old_state.xisr,
+ * we don't mark it as rejected. We expect userspace to set
+ * the state of the interrupt sources to be consistent with
+ * the ICP states (either before or afterwards, which doesn't
+ * matter). We do handle resends due to CPPR becoming less
+ * favoured because that is necessary to end up with a
+ * consistent state in the situation where userspace restores
+ * the ICS states before the ICP states.
+ */
+ do {
+ old_state = ACCESS_ONCE(icp->state);
+
+ if (new_state.mfrr <= old_state.mfrr) {
+ resend = false;
+ new_state.need_resend = old_state.need_resend;
+ } else {
+ resend = old_state.need_resend;
+ new_state.need_resend = 0;
+ }
+ } while (!icp_try_update(icp, old_state, new_state, false));
+
+ if (resend)
+ icp_check_resend(xics, icp);
+
+ return 0;
+}
+
+static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
+{
+ int ret;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *irqp;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 idx;
+ u64 val, prio;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &idx);
+ if (!ics)
+ return -ENOENT;
+
+ irqp = &ics->irq_state[idx];
+ mutex_lock(&ics->lock);
+ ret = -ENOENT;
+ if (irqp->exists) {
+ val = irqp->server;
+ prio = irqp->priority;
+ if (prio == MASKED) {
+ val |= KVM_XICS_MASKED;
+ prio = irqp->saved_priority;
+ }
+ val |= prio << KVM_XICS_PRIORITY_SHIFT;
+ if (irqp->asserted)
+ val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
+ else if (irqp->masked_pending || irqp->resend)
+ val |= KVM_XICS_PENDING;
+ ret = 0;
+ }
+ mutex_unlock(&ics->lock);
+
+ if (!ret && put_user(val, ubufp))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
+{
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *irqp;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 idx;
+ u64 val;
+ u8 prio;
+ u32 server;
+
+ if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+ return -ENOENT;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &idx);
+ if (!ics) {
+ ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
+ if (!ics)
+ return -ENOMEM;
+ }
+ irqp = &ics->irq_state[idx];
+ if (get_user(val, ubufp))
+ return -EFAULT;
+
+ server = val & KVM_XICS_DESTINATION_MASK;
+ prio = val >> KVM_XICS_PRIORITY_SHIFT;
+ if (prio != MASKED &&
+ kvmppc_xics_find_server(xics->kvm, server) == NULL)
+ return -EINVAL;
+
+ mutex_lock(&ics->lock);
+ irqp->server = server;
+ irqp->saved_priority = prio;
+ if (val & KVM_XICS_MASKED)
+ prio = MASKED;
+ irqp->priority = prio;
+ irqp->resend = 0;
+ irqp->masked_pending = 0;
+ irqp->asserted = 0;
+ if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
+ irqp->asserted = 1;
+ irqp->exists = 1;
+ mutex_unlock(&ics->lock);
+
+ if (val & KVM_XICS_PENDING)
+ icp_deliver_irq(xics, NULL, irqp->number);
+
+ return 0;
+}
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+ bool line_status)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+
+ return ics_deliver_irq(xics, irq, level, line_status);
+}
+
+static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct kvmppc_xics *xics = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XICS_GRP_SOURCES:
+ return xics_set_source(xics, attr->attr, attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct kvmppc_xics *xics = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XICS_GRP_SOURCES:
+ return xics_get_source(xics, attr->attr, attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_XICS_GRP_SOURCES:
+ if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+ attr->attr < KVMPPC_XICS_NR_IRQS)
+ return 0;
+ break;
+ }
+ return -ENXIO;
+}
+
+static void kvmppc_xics_free(struct kvm_device *dev)
+{
+ struct kvmppc_xics *xics = dev->private;
+ int i;
+ struct kvm *kvm = xics->kvm;
+
+ debugfs_remove(xics->dentry);
+
+ if (kvm)
+ kvm->arch.xics = NULL;
+
+ for (i = 0; i <= xics->max_icsid; i++)
+ kfree(xics->ics[i]);
+ kfree(xics);
+ kfree(dev);
+}
+
+static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
+{
+ struct kvmppc_xics *xics;
+ struct kvm *kvm = dev->kvm;
+ int ret = 0;
+
+ xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+ if (!xics)
+ return -ENOMEM;
+
+ dev->private = xics;
+ xics->dev = dev;
+ xics->kvm = kvm;
+
+ /* Already there ? */
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.xics)
+ ret = -EEXIST;
+ else
+ kvm->arch.xics = xics;
+ mutex_unlock(&kvm->lock);
+
+ if (ret) {
+ kfree(xics);
+ return ret;
+ }
+
+ xics_debugfs_init(xics);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ /* Enable real mode support */
+ xics->real_mode = ENABLE_REALMODE;
+ xics->real_mode_dbg = DEBUG_REALMODE;
+ }
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+ return 0;
+}
+
+struct kvm_device_ops kvm_xics_ops = {
+ .name = "kvm-xics",
+ .create = kvmppc_xics_create,
+ .destroy = kvmppc_xics_free,
+ .set_attr = xics_set_attr,
+ .get_attr = xics_get_attr,
+ .has_attr = xics_has_attr,
+};
+
+int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+ u32 xcpu)
+{
+ struct kvmppc_xics *xics = dev->private;
+ int r = -EBUSY;
+
+ if (dev->ops != &kvm_xics_ops)
+ return -EPERM;
+ if (xics->kvm != vcpu->kvm)
+ return -EPERM;
+ if (vcpu->arch.irq_type)
+ return -EBUSY;
+
+ r = kvmppc_xics_create_icp(vcpu, xcpu);
+ if (!r)
+ vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+
+ return r;
+}
+
+void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->arch.icp)
+ return;
+ kfree(vcpu->arch.icp);
+ vcpu->arch.icp = NULL;
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+}
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
new file mode 100644
index 00000000000..dd9326c5c19
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XICS_H
+#define _KVM_PPC_BOOK3S_XICS_H
+
+/*
+ * We use a two-level tree to store interrupt source information.
+ * There are up to 1024 ICS nodes, each of which can represent
+ * 1024 sources.
+ */
+#define KVMPPC_XICS_MAX_ICS_ID 1023
+#define KVMPPC_XICS_ICS_SHIFT 10
+#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT)
+#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1)
+
+/*
+ * Interrupt source numbers below this are reserved, for example
+ * 0 is "no interrupt", and 2 is used for IPIs.
+ */
+#define KVMPPC_XICS_FIRST_IRQ 16
+#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \
+ KVMPPC_XICS_IRQ_PER_ICS)
+
+/* Priority value to use for disabling an interrupt */
+#define MASKED 0xff
+
+/* State for one irq source */
+struct ics_irq_state {
+ u32 number;
+ u32 server;
+ u8 priority;
+ u8 saved_priority;
+ u8 resend;
+ u8 masked_pending;
+ u8 asserted; /* Only for LSI */
+ u8 exists;
+};
+
+/* Atomic ICP state, updated with a single compare & swap */
+union kvmppc_icp_state {
+ unsigned long raw;
+ struct {
+ u8 out_ee:1;
+ u8 need_resend:1;
+ u8 cppr;
+ u8 mfrr;
+ u8 pending_pri;
+ u32 xisr;
+ };
+};
+
+/* One bit per ICS */
+#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
+
+struct kvmppc_icp {
+ struct kvm_vcpu *vcpu;
+ unsigned long server_num;
+ union kvmppc_icp_state state;
+ unsigned long resend_map[ICP_RESEND_MAP_SIZE];
+
+ /* Real mode might find something too hard, here's the action
+ * it might request from virtual mode
+ */
+#define XICS_RM_KICK_VCPU 0x1
+#define XICS_RM_CHECK_RESEND 0x2
+#define XICS_RM_REJECT 0x4
+ u32 rm_action;
+ struct kvm_vcpu *rm_kick_target;
+ u32 rm_reject;
+
+ /* Debug stuff for real mode */
+ union kvmppc_icp_state rm_dbgstate;
+ struct kvm_vcpu *rm_dbgtgt;
+};
+
+struct kvmppc_ics {
+ struct mutex lock;
+ u16 icsid;
+ struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+};
+
+struct kvmppc_xics {
+ struct kvm *kvm;
+ struct kvm_device *dev;
+ struct dentry *dentry;
+ u32 max_icsid;
+ bool real_mode;
+ bool real_mode_dbg;
+ struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
+};
+
+static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
+ u32 nr)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
+ return vcpu->arch.icp;
+ }
+ return NULL;
+}
+
+static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
+ u32 irq, u16 *source)
+{
+ u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+ u16 src = irq & KVMPPC_XICS_SRC_MASK;
+ struct kvmppc_ics *ics;
+
+ if (source)
+ *source = src;
+ if (icsid > KVMPPC_XICS_MAX_ICS_ID)
+ return NULL;
+ ics = xics->ics[icsid];
+ if (!ics)
+ return NULL;
+ return ics;
+}
+
+
+#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 77575d08c81..ab62109fdfa 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -13,9 +13,12 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
* Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ * Scott Wood <scottwood@freescale.com>
+ * Varun Sethi <varun.sethi@freescale.com>
*/
#include <linux/errno.h>
@@ -29,11 +32,18 @@
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
-#include "timing.h"
#include <asm/cacheflush.h>
+#include <asm/dbell.h>
+#include <asm/hw_irq.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+#include "timing.h"
#include "booke.h"
+#define CREATE_TRACE_POINTS
+#include "trace_booke.h"
+
unsigned long kvmppc_booke_handlers;
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
@@ -54,6 +64,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "dec", VCPU_STAT(dec_exits) },
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ { "doorbell", VCPU_STAT(dbell_exits) },
+ { "guest doorbell", VCPU_STAT(gdbell_exits) },
+ { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ NULL }
};
@@ -78,9 +91,97 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
}
}
+#ifdef CONFIG_SPE
+void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ enable_kernel_spe();
+ kvmppc_save_guest_spe(vcpu);
+ vcpu->arch.shadow_msr &= ~MSR_SPE;
+ preempt_enable();
+}
+
+static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ enable_kernel_spe();
+ kvmppc_load_guest_spe(vcpu);
+ vcpu->arch.shadow_msr |= MSR_SPE;
+ preempt_enable();
+}
+
+static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.shared->msr & MSR_SPE) {
+ if (!(vcpu->arch.shadow_msr & MSR_SPE))
+ kvmppc_vcpu_enable_spe(vcpu);
+ } else if (vcpu->arch.shadow_msr & MSR_SPE) {
+ kvmppc_vcpu_disable_spe(vcpu);
+ }
+}
+#else
+static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
+{
+}
+#endif
+
+static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
+ /* We always treat the FP bit as enabled from the host
+ perspective, so only need to adjust the shadow MSR */
+ vcpu->arch.shadow_msr &= ~MSR_FP;
+ vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP;
+#endif
+}
+
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+ /* Synchronize guest's desire to get debug interrupts into shadow MSR */
+#ifndef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_msr &= ~MSR_DE;
+ vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
+#endif
+
+ /* Force enable debug interrupts when user space wants to debug */
+ if (vcpu->guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+ /*
+ * Since there is no shadow MSR, sync MSR_DE into the guest
+ * visible MSR.
+ */
+ vcpu->arch.shared->msr |= MSR_DE;
+#else
+ vcpu->arch.shadow_msr |= MSR_DE;
+ vcpu->arch.shared->msr &= ~MSR_DE;
+#endif
+ }
+}
+
+/*
+ * Helper function for "full" MSR writes. No need to call this if only
+ * EE/CE/ME/DE/RI are changing.
+ */
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+ u32 old_msr = vcpu->arch.shared->msr;
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ new_msr |= MSR_GS;
+#endif
+
+ vcpu->arch.shared->msr = new_msr;
+
+ kvmppc_mmu_msr_notify(vcpu, old_msr);
+ kvmppc_vcpu_sync_spe(vcpu);
+ kvmppc_vcpu_sync_fpu(vcpu);
+ kvmppc_vcpu_sync_debug(vcpu);
+}
+
static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
unsigned int priority)
{
+ trace_kvm_booke_queue_irqprio(vcpu, priority);
set_bit(priority, &vcpu->arch.pending_exceptions);
}
@@ -107,6 +208,14 @@ static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
}
+static void kvmppc_core_queue_alignment(struct kvm_vcpu *vcpu, ulong dear_flags,
+ ulong esr_flags)
+{
+ vcpu->arch.queued_dear = dear_flags;
+ vcpu->arch.queued_esr = esr_flags;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALIGNMENT);
+}
+
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
{
vcpu->arch.queued_esr = esr_flags;
@@ -139,24 +248,113 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, prio);
}
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
}
+static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
+static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GSRR0, srr0);
+ mtspr(SPRN_GSRR1, srr1);
+#else
+ vcpu->arch.shared->srr0 = srr0;
+ vcpu->arch.shared->srr1 = srr1;
+#endif
+}
+
+static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+ vcpu->arch.csrr0 = srr0;
+ vcpu->arch.csrr1 = srr1;
+}
+
+static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+ if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) {
+ vcpu->arch.dsrr0 = srr0;
+ vcpu->arch.dsrr1 = srr1;
+ } else {
+ set_guest_csrr(vcpu, srr0, srr1);
+ }
+}
+
+static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+ vcpu->arch.mcsrr0 = srr0;
+ vcpu->arch.mcsrr1 = srr1;
+}
+
+static unsigned long get_guest_dear(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ return mfspr(SPRN_GDEAR);
+#else
+ return vcpu->arch.shared->dar;
+#endif
+}
+
+static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GDEAR, dear);
+#else
+ vcpu->arch.shared->dar = dear;
+#endif
+}
+
+static unsigned long get_guest_esr(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ return mfspr(SPRN_GESR);
+#else
+ return vcpu->arch.shared->esr;
+#endif
+}
+
+static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GESR, esr);
+#else
+ vcpu->arch.shared->esr = esr;
+#endif
+}
+
+static unsigned long get_guest_epr(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ return mfspr(SPRN_GEPR);
+#else
+ return vcpu->arch.epr;
+#endif
+}
+
/* Deliver the interrupt of the corresponding priority, if possible. */
static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
unsigned int priority)
{
int allowed = 0;
- ulong uninitialized_var(msr_mask);
- bool update_esr = false, update_dear = false;
+ ulong msr_mask = 0;
+ bool update_esr = false, update_dear = false, update_epr = false;
ulong crit_raw = vcpu->arch.shared->critical;
ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
bool crit;
bool keep_irq = false;
+ enum int_class int_class;
+ ulong new_msr = vcpu->arch.shared->msr;
/* Truncate crit indicators in 32 bit mode */
if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -174,9 +372,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
keep_irq = true;
}
+ if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
+ update_epr = true;
+
switch (priority) {
case BOOKE_IRQPRIO_DTLB_MISS:
case BOOKE_IRQPRIO_DATA_STORAGE:
+ case BOOKE_IRQPRIO_ALIGNMENT:
update_dear = true;
/* fall through */
case BOOKE_IRQPRIO_INST_STORAGE:
@@ -190,58 +392,228 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_SPE_FP_DATA:
case BOOKE_IRQPRIO_SPE_FP_ROUND:
case BOOKE_IRQPRIO_AP_UNAVAIL:
- case BOOKE_IRQPRIO_ALIGNMENT:
allowed = 1;
- msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ msr_mask = MSR_CE | MSR_ME | MSR_DE;
+ int_class = INT_CLASS_NONCRIT;
break;
- case BOOKE_IRQPRIO_CRITICAL:
case BOOKE_IRQPRIO_WATCHDOG:
+ case BOOKE_IRQPRIO_CRITICAL:
+ case BOOKE_IRQPRIO_DBELL_CRIT:
allowed = vcpu->arch.shared->msr & MSR_CE;
+ allowed = allowed && !crit;
msr_mask = MSR_ME;
+ int_class = INT_CLASS_CRIT;
break;
case BOOKE_IRQPRIO_MACHINE_CHECK:
allowed = vcpu->arch.shared->msr & MSR_ME;
- msr_mask = 0;
+ allowed = allowed && !crit;
+ int_class = INT_CLASS_MC;
break;
- case BOOKE_IRQPRIO_EXTERNAL:
case BOOKE_IRQPRIO_DECREMENTER:
case BOOKE_IRQPRIO_FIT:
+ keep_irq = true;
+ /* fall through */
+ case BOOKE_IRQPRIO_EXTERNAL:
+ case BOOKE_IRQPRIO_DBELL:
allowed = vcpu->arch.shared->msr & MSR_EE;
allowed = allowed && !crit;
- msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ msr_mask = MSR_CE | MSR_ME | MSR_DE;
+ int_class = INT_CLASS_NONCRIT;
break;
case BOOKE_IRQPRIO_DEBUG:
allowed = vcpu->arch.shared->msr & MSR_DE;
+ allowed = allowed && !crit;
msr_mask = MSR_ME;
+ int_class = INT_CLASS_CRIT;
break;
}
if (allowed) {
- vcpu->arch.shared->srr0 = vcpu->arch.pc;
- vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
+ switch (int_class) {
+ case INT_CLASS_NONCRIT:
+ set_guest_srr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_CRIT:
+ set_guest_csrr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_DBG:
+ set_guest_dsrr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_MC:
+ set_guest_mcsrr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ }
+
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
if (update_esr == true)
- vcpu->arch.esr = vcpu->arch.queued_esr;
+ set_guest_esr(vcpu, vcpu->arch.queued_esr);
if (update_dear == true)
- vcpu->arch.shared->dar = vcpu->arch.queued_dear;
- kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
+ set_guest_dear(vcpu, vcpu->arch.queued_dear);
+ if (update_epr == true) {
+ if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
+ kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
+ else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
+ BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
+ kvmppc_mpic_set_epr(vcpu);
+ }
+ }
+
+ new_msr &= msr_mask;
+#if defined(CONFIG_64BIT)
+ if (vcpu->arch.epcr & SPRN_EPCR_ICM)
+ new_msr |= MSR_CM;
+#endif
+ kvmppc_set_msr(vcpu, new_msr);
if (!keep_irq)
clear_bit(priority, &vcpu->arch.pending_exceptions);
}
+#ifdef CONFIG_KVM_BOOKE_HV
+ /*
+ * If an interrupt is pending but masked, raise a guest doorbell
+ * so that we are notified when the guest enables the relevant
+ * MSR bit.
+ */
+ if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE)
+ kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT);
+ if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE)
+ kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT);
+ if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK)
+ kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC);
+#endif
+
return allowed;
}
-/* Check pending exceptions and deliver one, if possible. */
-void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
+/*
+ * Return the number of jiffies until the next timeout. If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+ u64 tb, wdt_tb, wdt_ticks = 0;
+ u64 nr_jiffies = 0;
+ u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+ wdt_tb = 1ULL << (63 - period);
+ tb = get_tb();
+ /*
+ * The watchdog timeout will hapeen when TB bit corresponding
+ * to watchdog will toggle from 0 to 1.
+ */
+ if (tb & wdt_tb)
+ wdt_ticks = wdt_tb;
+
+ wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+ /* Convert timebase ticks to jiffies */
+ nr_jiffies = wdt_ticks;
+
+ if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+ nr_jiffies++;
+
+ return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+ unsigned long nr_jiffies;
+ unsigned long flags;
+
+ /*
+ * If TSR_ENW and TSR_WIS are not set then no need to exit to
+ * userspace, so clear the KVM_REQ_WATCHDOG request.
+ */
+ if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
+ clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+
+ spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
+ nr_jiffies = watchdog_next_timeout(vcpu);
+ /*
+ * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+ * then do not run the watchdog timer as this can break timer APIs.
+ */
+ if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+ mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+ else
+ del_timer(&vcpu->arch.wdt_timer);
+ spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+ u32 tsr, new_tsr;
+ int final;
+
+ do {
+ new_tsr = tsr = vcpu->arch.tsr;
+ final = 0;
+
+ /* Time out event */
+ if (tsr & TSR_ENW) {
+ if (tsr & TSR_WIS)
+ final = 1;
+ else
+ new_tsr = tsr | TSR_WIS;
+ } else {
+ new_tsr = tsr | TSR_ENW;
+ }
+ } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+ if (new_tsr & TSR_WIS) {
+ smp_wmb();
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ /*
+ * If this is final watchdog expiry and some action is required
+ * then exit to userspace.
+ */
+ if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
+ vcpu->arch.watchdog_enabled) {
+ smp_wmb();
+ kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ /*
+ * Stop running the watchdog timer after final expiration to
+ * prevent the host from being flooded with timers if the
+ * guest sets a short period.
+ * Timers will resume when TSR/TCR is updated next time.
+ */
+ if (!final)
+ arm_next_watchdog(vcpu);
+}
+
+static void update_timer_ints(struct kvm_vcpu *vcpu)
+{
+ if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
+ kvmppc_core_queue_dec(vcpu);
+ else
+ kvmppc_core_dequeue_dec(vcpu);
+
+ if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+ kvmppc_core_queue_watchdog(vcpu);
+ else
+ kvmppc_core_dequeue_watchdog(vcpu);
+}
+
+static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
- unsigned long old_pending = vcpu->arch.pending_exceptions;
unsigned int priority;
priority = __ffs(*pending);
- while (priority <= BOOKE_IRQPRIO_MAX) {
+ while (priority < BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
break;
@@ -251,10 +623,247 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
}
/* Tell the guest about our interrupt status */
- if (*pending)
- vcpu->arch.shared->int_pending = 1;
- else if (old_pending)
- vcpu->arch.shared->int_pending = 0;
+ vcpu->arch.shared->int_pending = !!*pending;
+}
+
+/* Check pending exceptions and deliver one, if possible. */
+int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+ int r = 0;
+ WARN_ON_ONCE(!irqs_disabled());
+
+ kvmppc_core_check_exceptions(vcpu);
+
+ if (vcpu->requests) {
+ /* Exception delivery raised request; start over */
+ return 1;
+ }
+
+ if (vcpu->arch.shared->msr & MSR_WE) {
+ local_irq_enable();
+ kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ hard_irq_disable();
+
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+ r = 1;
+ };
+
+ return r;
+}
+
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+ int r = 1; /* Indicate we want to get back into the guest */
+
+ if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
+ update_timer_ints(vcpu);
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ kvmppc_core_flush_tlb(vcpu);
+#endif
+
+ if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
+ r = 0;
+ }
+
+ if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) {
+ vcpu->run->epr.epr = 0;
+ vcpu->arch.epr_needed = true;
+ vcpu->run->exit_reason = KVM_EXIT_EPR;
+ r = 0;
+ }
+
+ return r;
+}
+
+int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ int ret, s;
+ struct debug_reg debug;
+
+ if (!vcpu->arch.sane) {
+ kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return -EINVAL;
+ }
+
+ s = kvmppc_prepare_to_enter(vcpu);
+ if (s <= 0) {
+ ret = s;
+ goto out;
+ }
+ /* interrupts now hard-disabled */
+
+#ifdef CONFIG_PPC_FPU
+ /* Save userspace FPU state in stack */
+ enable_kernel_fp();
+
+ /*
+ * Since we can't trap on MSR_FP in GS-mode, we consider the guest
+ * as always using the FPU. Kernel usage of FP (via
+ * enable_kernel_fp()) in this thread must not occur while
+ * vcpu->fpu_active is set.
+ */
+ vcpu->fpu_active = 1;
+
+ kvmppc_load_guest_fp(vcpu);
+#endif
+
+ /* Switch to guest debug context */
+ debug = vcpu->arch.shadow_dbg_reg;
+ switch_booke_debug_regs(&debug);
+ debug = current->thread.debug;
+ current->thread.debug = vcpu->arch.shadow_dbg_reg;
+
+ vcpu->arch.pgdir = current->mm->pgd;
+ kvmppc_fix_ee_before_entry();
+
+ ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+
+ /* No need for kvm_guest_exit. It's done in handle_exit.
+ We also get here with interrupts enabled. */
+
+ /* Switch back to user space debug context */
+ switch_booke_debug_regs(&debug);
+ current->thread.debug = debug;
+
+#ifdef CONFIG_PPC_FPU
+ kvmppc_save_guest_fp(vcpu);
+
+ vcpu->fpu_active = 0;
+#endif
+
+out:
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ return ret;
+}
+
+static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er;
+
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ /* don't overwrite subtypes, just account kvm_stats */
+ kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
+ /* Future optimization: only reload non-volatiles if
+ * they were actually modified by emulation. */
+ return RESUME_GUEST_NV;
+
+ case EMULATE_DO_DCR:
+ run->exit_reason = KVM_EXIT_DCR;
+ return RESUME_HOST;
+
+ case EMULATE_FAIL:
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+ __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+ /* For debugging, encode the failing instruction and
+ * report it to userspace. */
+ run->hw.hardware_exit_reason = ~0ULL << 32;
+ run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+ kvmppc_core_queue_program(vcpu, ESR_PIL);
+ return RESUME_HOST;
+
+ case EMULATE_EXIT_USER:
+ return RESUME_HOST;
+
+ default:
+ BUG();
+ }
+}
+
+static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+ u32 dbsr = vcpu->arch.dbsr;
+
+ run->debug.arch.status = 0;
+ run->debug.arch.address = vcpu->arch.pc;
+
+ if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+ run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+ } else {
+ if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
+ run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
+ else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
+ run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
+ if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
+ run->debug.arch.address = dbg_reg->dac1;
+ else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
+ run->debug.arch.address = dbg_reg->dac2;
+ }
+
+ return RESUME_HOST;
+}
+
+static void kvmppc_fill_pt_regs(struct pt_regs *regs)
+{
+ ulong r1, ip, msr, lr;
+
+ asm("mr %0, 1" : "=r"(r1));
+ asm("mflr %0" : "=r"(lr));
+ asm("mfmsr %0" : "=r"(msr));
+ asm("bl 1f; 1: mflr %0" : "=r"(ip));
+
+ memset(regs, 0, sizeof(*regs));
+ regs->gpr[1] = r1;
+ regs->nip = ip;
+ regs->msr = msr;
+ regs->link = lr;
+}
+
+/*
+ * For interrupts needed to be handled by host interrupt handlers,
+ * corresponding host handler are called from here in similar way
+ * (but not exact) as they are called from low level handler
+ * (such as from arch/powerpc/kernel/head_fsl_booke.S).
+ */
+static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
+{
+ struct pt_regs regs;
+
+ switch (exit_nr) {
+ case BOOKE_INTERRUPT_EXTERNAL:
+ kvmppc_fill_pt_regs(&regs);
+ do_IRQ(&regs);
+ break;
+ case BOOKE_INTERRUPT_DECREMENTER:
+ kvmppc_fill_pt_regs(&regs);
+ timer_interrupt(&regs);
+ break;
+#if defined(CONFIG_PPC_DOORBELL)
+ case BOOKE_INTERRUPT_DOORBELL:
+ kvmppc_fill_pt_regs(&regs);
+ doorbell_exception(&regs);
+ break;
+#endif
+ case BOOKE_INTERRUPT_MACHINE_CHECK:
+ /* FIXME */
+ break;
+ case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
+ kvmppc_fill_pt_regs(&regs);
+ performance_monitor_exception(&regs);
+ break;
+ case BOOKE_INTERRUPT_WATCHDOG:
+ kvmppc_fill_pt_regs(&regs);
+#ifdef CONFIG_BOOKE_WDT
+ WatchdogException(&regs);
+#else
+ unknown_exception(&regs);
+#endif
+ break;
+ case BOOKE_INTERRUPT_CRITICAL:
+ unknown_exception(&regs);
+ break;
+ case BOOKE_INTERRUPT_DEBUG:
+ /* Save DBSR before preemption is enabled */
+ vcpu->arch.dbsr = mfspr(SPRN_DBSR);
+ kvmppc_clear_dbsr();
+ break;
+ }
}
/**
@@ -265,14 +874,21 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
- enum emulation_result er;
int r = RESUME_HOST;
+ int s;
+ int idx;
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
+ /* restart interrupts if they were meant for the host */
+ kvmppc_restart_interrupt(vcpu, exit_nr);
+
local_irq_enable();
+ trace_kvm_exit(exit_nr, vcpu);
+ kvm_guest_exit();
+
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
@@ -280,62 +896,78 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOKE_INTERRUPT_MACHINE_CHECK:
printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
kvmppc_dump_vcpu(vcpu);
+ /* For debugging, send invalid exit reason to user space */
+ run->hw.hardware_exit_reason = ~1ULL << 32;
+ run->hw.hardware_exit_reason |= mfspr(SPRN_MCSR);
r = RESUME_HOST;
break;
case BOOKE_INTERRUPT_EXTERNAL:
kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
- if (need_resched())
- cond_resched();
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DECREMENTER:
- /* Since we switched IVPR back to the host's value, the host
- * handled this interrupt the moment we enabled interrupts.
- * Now we just offer it a chance to reschedule the guest. */
kvmppc_account_exit(vcpu, DEC_EXITS);
- if (need_resched())
- cond_resched();
r = RESUME_GUEST;
break;
+ case BOOKE_INTERRUPT_WATCHDOG:
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_DOORBELL:
+ kvmppc_account_exit(vcpu, DBELL_EXITS);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_GUEST_DBELL_CRIT:
+ kvmppc_account_exit(vcpu, GDBELL_EXITS);
+
+ /*
+ * We are here because there is a pending guest interrupt
+ * which could not be delivered as MSR_CE or MSR_ME was not
+ * set. Once we break from here we will retry delivery.
+ */
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_GUEST_DBELL:
+ kvmppc_account_exit(vcpu, GDBELL_EXITS);
+
+ /*
+ * We are here because there is a pending guest interrupt
+ * which could not be delivered as MSR_EE was not set. Once
+ * we break from here we will retry delivery.
+ */
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_HV_PRIV:
+ r = emulation_exit(run, vcpu);
+ break;
+
case BOOKE_INTERRUPT_PROGRAM:
- if (vcpu->arch.shared->msr & MSR_PR) {
- /* Program traps generated by user-level software must be handled
- * by the guest kernel. */
+ if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
+ /*
+ * Program traps generated by user-level software must
+ * be handled by the guest kernel.
+ *
+ * In GS mode, hypervisor privileged instructions trap
+ * on BOOKE_INTERRUPT_HV_PRIV, not here, so these are
+ * actual program interrupts, handled by the guest.
+ */
kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
r = RESUME_GUEST;
kvmppc_account_exit(vcpu, USR_PR_INST);
break;
}
- er = kvmppc_emulate_instruction(run, vcpu);
- switch (er) {
- case EMULATE_DONE:
- /* don't overwrite subtypes, just account kvm_stats */
- kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
- /* Future optimization: only reload non-volatiles if
- * they were actually modified by emulation. */
- r = RESUME_GUEST_NV;
- break;
- case EMULATE_DO_DCR:
- run->exit_reason = KVM_EXIT_DCR;
- r = RESUME_HOST;
- break;
- case EMULATE_FAIL:
- /* XXX Deliver Program interrupt to guest. */
- printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
- __func__, vcpu->arch.pc, vcpu->arch.last_inst);
- /* For debugging, encode the failing instruction and
- * report it to userspace. */
- run->hw.hardware_exit_reason = ~0ULL << 32;
- run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
- r = RESUME_HOST;
- break;
- default:
- BUG();
- }
+ r = emulation_exit(run, vcpu);
break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
@@ -344,10 +976,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
- case BOOKE_INTERRUPT_SPE_UNAVAIL:
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
+#ifdef CONFIG_SPE
+ case BOOKE_INTERRUPT_SPE_UNAVAIL: {
+ if (vcpu->arch.shared->msr & MSR_SPE)
+ kvmppc_vcpu_enable_spe(vcpu);
+ else
+ kvmppc_booke_queue_irqprio(vcpu,
+ BOOKE_IRQPRIO_SPE_UNAVAIL);
r = RESUME_GUEST;
break;
+ }
case BOOKE_INTERRUPT_SPE_FP_DATA:
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
@@ -358,6 +996,28 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
r = RESUME_GUEST;
break;
+#else
+ case BOOKE_INTERRUPT_SPE_UNAVAIL:
+ /*
+ * Guest wants SPE, but host kernel doesn't support it. Send
+ * an "unimplemented operation" program check to the guest.
+ */
+ kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV);
+ r = RESUME_GUEST;
+ break;
+
+ /*
+ * These really should never happen without CONFIG_SPE,
+ * as we should never enable the real MSR[SPE] in the guest.
+ */
+ case BOOKE_INTERRUPT_SPE_FP_DATA:
+ case BOOKE_INTERRUPT_SPE_FP_ROUND:
+ printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
+ __func__, exit_nr, vcpu->arch.pc);
+ run->hw.hardware_exit_reason = exit_nr;
+ r = RESUME_HOST;
+ break;
+#endif
case BOOKE_INTERRUPT_DATA_STORAGE:
kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
@@ -372,6 +1032,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
+ case BOOKE_INTERRUPT_ALIGNMENT:
+ kvmppc_core_queue_alignment(vcpu, vcpu->arch.fault_dear,
+ vcpu->arch.fault_esr);
+ r = RESUME_GUEST;
+ break;
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ case BOOKE_INTERRUPT_HV_SYSCALL:
+ if (!(vcpu->arch.shared->msr & MSR_PR)) {
+ kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+ } else {
+ /*
+ * hcall from guest userspace -- send privileged
+ * instruction program check.
+ */
+ kvmppc_core_queue_program(vcpu, ESR_PPR);
+ }
+
+ r = RESUME_GUEST;
+ break;
+#else
case BOOKE_INTERRUPT_SYSCALL:
if (!(vcpu->arch.shared->msr & MSR_PR) &&
(((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
@@ -385,6 +1066,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_account_exit(vcpu, SYSCALL_EXITS);
r = RESUME_GUEST;
break;
+#endif
case BOOKE_INTERRUPT_DTLB_MISS: {
unsigned long eaddr = vcpu->arch.fault_dear;
@@ -392,6 +1074,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
gpa_t gpaddr;
gfn_t gfn;
+#ifdef CONFIG_KVM_E500V2
+ if (!(vcpu->arch.shared->msr & MSR_PR) &&
+ (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
+ kvmppc_map_magic(vcpu);
+ kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+ r = RESUME_GUEST;
+
+ break;
+ }
+#endif
+
/* Check the guest TLB. */
gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
@@ -405,6 +1098,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
gfn = gpaddr >> PAGE_SHIFT;
@@ -422,10 +1117,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Guest has mapped and accessed a page which is not
* actually RAM. */
vcpu->arch.paddr_accessed = gpaddr;
+ vcpu->arch.vaddr_accessed = eaddr;
r = kvmppc_emulate_mmio(run, vcpu);
kvmppc_account_exit(vcpu, MMIO_EXITS);
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
@@ -449,6 +1146,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
gfn = gpaddr >> PAGE_SHIFT;
@@ -465,22 +1164,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case BOOKE_INTERRUPT_DEBUG: {
- u32 dbsr;
-
- vcpu->arch.pc = mfspr(SPRN_CSRR0);
-
- /* clear IAC events in DBSR register */
- dbsr = mfspr(SPRN_DBSR);
- dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
- mtspr(SPRN_DBSR, dbsr);
-
- run->exit_reason = KVM_EXIT_DEBUG;
+ r = kvmppc_handle_debug(run, vcpu);
+ if (r == RESUME_HOST)
+ run->exit_reason = KVM_EXIT_DEBUG;
kvmppc_account_exit(vcpu, DEBUG_EXITS);
- r = RESUME_HOST;
break;
}
@@ -489,34 +1181,51 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
BUG();
}
- local_irq_disable();
-
- kvmppc_core_deliver_interrupts(vcpu);
-
+ /*
+ * To avoid clobbering exit_reason, only check for signals if we
+ * aren't already exiting to userspace for some other reason.
+ */
if (!(r & RESUME_HOST)) {
- /* To avoid clobbering exit_reason, only check for signals if
- * we aren't already exiting to userspace for some other
- * reason. */
- if (signal_pending(current)) {
- run->exit_reason = KVM_EXIT_INTR;
- r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
- kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+ s = kvmppc_prepare_to_enter(vcpu);
+ if (s <= 0)
+ r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
+ else {
+ /* interrupts now hard-disabled */
+ kvmppc_fix_ee_before_entry();
}
}
return r;
}
+static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
+{
+ u32 old_tsr = vcpu->arch.tsr;
+
+ vcpu->arch.tsr = new_tsr;
+
+ if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+ arm_next_watchdog(vcpu);
+
+ update_timer_ints(vcpu);
+}
+
/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int i;
+ int r;
vcpu->arch.pc = 0;
- vcpu->arch.shared->msr = 0;
+ vcpu->arch.shared->pir = vcpu->vcpu_id;
kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+ kvmppc_set_msr(vcpu, 0);
+#ifndef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
vcpu->arch.shadow_pid = 1;
+ vcpu->arch.shared->msr = 0;
+#endif
/* Eye-catching numbers so we know if the guest takes an interrupt
* before it's programmed its own IVPR/IVORs. */
@@ -526,7 +1235,24 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvmppc_init_timing_stats(vcpu);
- return kvmppc_core_vcpu_setup(vcpu);
+ r = kvmppc_core_vcpu_setup(vcpu);
+ kvmppc_sanity_check(vcpu);
+ return r;
+}
+
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ /* setup watchdog timer once */
+ spin_lock_init(&vcpu->arch.wdt_lock);
+ setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
+ (unsigned long)vcpu);
+
+ return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+ del_timer_sync(&vcpu->arch.wdt_timer);
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -546,9 +1272,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->sprg1 = vcpu->arch.shared->sprg1;
regs->sprg2 = vcpu->arch.shared->sprg2;
regs->sprg3 = vcpu->arch.shared->sprg3;
- regs->sprg5 = vcpu->arch.sprg4;
- regs->sprg6 = vcpu->arch.sprg5;
- regs->sprg7 = vcpu->arch.sprg6;
+ regs->sprg4 = vcpu->arch.shared->sprg4;
+ regs->sprg5 = vcpu->arch.shared->sprg5;
+ regs->sprg6 = vcpu->arch.shared->sprg6;
+ regs->sprg7 = vcpu->arch.shared->sprg7;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -568,13 +1295,15 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.shared->srr0 = regs->srr0;
vcpu->arch.shared->srr1 = regs->srr1;
+ kvmppc_set_pid(vcpu, regs->pid);
vcpu->arch.shared->sprg0 = regs->sprg0;
vcpu->arch.shared->sprg1 = regs->sprg1;
vcpu->arch.shared->sprg2 = regs->sprg2;
vcpu->arch.shared->sprg3 = regs->sprg3;
- vcpu->arch.sprg5 = regs->sprg4;
- vcpu->arch.sprg6 = regs->sprg5;
- vcpu->arch.sprg7 = regs->sprg6;
+ vcpu->arch.shared->sprg4 = regs->sprg4;
+ vcpu->arch.shared->sprg5 = regs->sprg5;
+ vcpu->arch.shared->sprg6 = regs->sprg6;
+ vcpu->arch.shared->sprg7 = regs->sprg7;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -582,16 +1311,298 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
+static void get_sregs_base(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ u64 tb = get_tb();
+
+ sregs->u.e.features |= KVM_SREGS_E_BASE;
+
+ sregs->u.e.csrr0 = vcpu->arch.csrr0;
+ sregs->u.e.csrr1 = vcpu->arch.csrr1;
+ sregs->u.e.mcsr = vcpu->arch.mcsr;
+ sregs->u.e.esr = get_guest_esr(vcpu);
+ sregs->u.e.dear = get_guest_dear(vcpu);
+ sregs->u.e.tsr = vcpu->arch.tsr;
+ sregs->u.e.tcr = vcpu->arch.tcr;
+ sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
+ sregs->u.e.tb = tb;
+ sregs->u.e.vrsave = vcpu->arch.vrsave;
+}
+
+static int set_sregs_base(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
+ return 0;
+
+ vcpu->arch.csrr0 = sregs->u.e.csrr0;
+ vcpu->arch.csrr1 = sregs->u.e.csrr1;
+ vcpu->arch.mcsr = sregs->u.e.mcsr;
+ set_guest_esr(vcpu, sregs->u.e.esr);
+ set_guest_dear(vcpu, sregs->u.e.dear);
+ vcpu->arch.vrsave = sregs->u.e.vrsave;
+ kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
+
+ if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
+ vcpu->arch.dec = sregs->u.e.dec;
+ kvmppc_emulate_dec(vcpu);
+ }
+
+ if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR)
+ kvmppc_set_tsr(vcpu, sregs->u.e.tsr);
+
+ return 0;
+}
+
+static void get_sregs_arch206(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ sregs->u.e.features |= KVM_SREGS_E_ARCH206;
+
+ sregs->u.e.pir = vcpu->vcpu_id;
+ sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
+ sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
+ sregs->u.e.decar = vcpu->arch.decar;
+ sregs->u.e.ivpr = vcpu->arch.ivpr;
+}
+
+static int set_sregs_arch206(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
+ return 0;
+
+ if (sregs->u.e.pir != vcpu->vcpu_id)
+ return -EINVAL;
+
+ vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
+ vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
+ vcpu->arch.decar = sregs->u.e.decar;
+ vcpu->arch.ivpr = sregs->u.e.ivpr;
+
+ return 0;
+}
+
+int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ sregs->u.e.features |= KVM_SREGS_E_IVOR;
+
+ sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+ sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+ sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+ sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+ sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+ sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+ sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+ sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+ sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+ sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+ sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+ sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+ sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+ sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+ sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+ sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+ return 0;
+}
+
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+ return 0;
+
+ vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return -ENOTSUPP;
+ sregs->pvr = vcpu->arch.pvr;
+
+ get_sregs_base(vcpu, sregs);
+ get_sregs_arch206(vcpu, sregs);
+ return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return -ENOTSUPP;
+ int ret;
+
+ if (vcpu->arch.pvr != sregs->pvr)
+ return -EINVAL;
+
+ ret = set_sregs_base(vcpu, sregs);
+ if (ret < 0)
+ return ret;
+
+ ret = set_sregs_arch206(vcpu, sregs);
+ if (ret < 0)
+ return ret;
+
+ return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+}
+
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = 0;
+ union kvmppc_one_reg val;
+ int size;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_IAC1:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
+ break;
+ case KVM_REG_PPC_IAC2:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case KVM_REG_PPC_IAC3:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
+ break;
+ case KVM_REG_PPC_IAC4:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
+ break;
+#endif
+ case KVM_REG_PPC_DAC1:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
+ break;
+ case KVM_REG_PPC_DAC2:
+ val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
+ break;
+ case KVM_REG_PPC_EPR: {
+ u32 epr = get_guest_epr(vcpu);
+ val = get_reg_val(reg->id, epr);
+ break;
+ }
+#if defined(CONFIG_64BIT)
+ case KVM_REG_PPC_EPCR:
+ val = get_reg_val(reg->id, vcpu->arch.epcr);
+ break;
+#endif
+ case KVM_REG_PPC_TCR:
+ val = get_reg_val(reg->id, vcpu->arch.tcr);
+ break;
+ case KVM_REG_PPC_TSR:
+ val = get_reg_val(reg->id, vcpu->arch.tsr);
+ break;
+ case KVM_REG_PPC_DEBUG_INST:
+ val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ break;
+ default:
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+ break;
+ }
+
+ if (r)
+ return r;
+
+ if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+ r = -EFAULT;
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = 0;
+ union kvmppc_one_reg val;
+ int size;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+ return -EFAULT;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_IAC1:
+ vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_IAC2:
+ vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case KVM_REG_PPC_IAC3:
+ vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_IAC4:
+ vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
+ break;
+#endif
+ case KVM_REG_PPC_DAC1:
+ vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_DAC2:
+ vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_EPR: {
+ u32 new_epr = set_reg_val(reg->id, val);
+ kvmppc_set_epr(vcpu, new_epr);
+ break;
+ }
+#if defined(CONFIG_64BIT)
+ case KVM_REG_PPC_EPCR: {
+ u32 new_epcr = set_reg_val(reg->id, val);
+ kvmppc_set_epcr(vcpu, new_epcr);
+ break;
+ }
+#endif
+ case KVM_REG_PPC_OR_TSR: {
+ u32 tsr_bits = set_reg_val(reg->id, val);
+ kvmppc_set_tsr_bits(vcpu, tsr_bits);
+ break;
+ }
+ case KVM_REG_PPC_CLEAR_TSR: {
+ u32 tsr_bits = set_reg_val(reg->id, val);
+ kvmppc_clr_tsr_bits(vcpu, tsr_bits);
+ break;
+ }
+ case KVM_REG_PPC_TSR: {
+ u32 tsr = set_reg_val(reg->id, val);
+ kvmppc_set_tsr(vcpu, tsr);
+ break;
+ }
+ case KVM_REG_PPC_TCR: {
+ u32 tcr = set_reg_val(reg->id, val);
+ kvmppc_set_tcr(vcpu, tcr);
+ break;
+ }
+ case KVM_REG_PPC_VRSAVE:
+ vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ break;
+ default:
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+ break;
+ }
+
+ return r;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
@@ -618,10 +1629,295 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
return -ENOTSUPP;
}
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return 0;
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem)
+{
+ return 0;
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old)
+{
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
+{
+#if defined(CONFIG_64BIT)
+ vcpu->arch.epcr = new_epcr;
+#ifdef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM;
+ if (vcpu->arch.epcr & SPRN_EPCR_ICM)
+ vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM;
+#endif
+#endif
+}
+
+void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
+{
+ vcpu->arch.tcr = new_tcr;
+ arm_next_watchdog(vcpu);
+ update_timer_ints(vcpu);
+}
+
+void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
+{
+ set_bits(tsr_bits, &vcpu->arch.tsr);
+ smp_wmb();
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
+{
+ clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+ /*
+ * We may have stopped the watchdog due to
+ * being stuck on final expiration.
+ */
+ if (tsr_bits & (TSR_ENW | TSR_WIS))
+ arm_next_watchdog(vcpu);
+
+ update_timer_ints(vcpu);
+}
+
+void kvmppc_decrementer_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+ if (vcpu->arch.tcr & TCR_ARE) {
+ vcpu->arch.dec = vcpu->arch.decar;
+ kvmppc_emulate_dec(vcpu);
+ }
+
+ kvmppc_set_tsr_bits(vcpu, TSR_DIS);
+}
+
+static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg,
+ uint64_t addr, int index)
+{
+ switch (index) {
+ case 0:
+ dbg_reg->dbcr0 |= DBCR0_IAC1;
+ dbg_reg->iac1 = addr;
+ break;
+ case 1:
+ dbg_reg->dbcr0 |= DBCR0_IAC2;
+ dbg_reg->iac2 = addr;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case 2:
+ dbg_reg->dbcr0 |= DBCR0_IAC3;
+ dbg_reg->iac3 = addr;
+ break;
+ case 3:
+ dbg_reg->dbcr0 |= DBCR0_IAC4;
+ dbg_reg->iac4 = addr;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ dbg_reg->dbcr0 |= DBCR0_IDM;
+ return 0;
+}
+
+static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr,
+ int type, int index)
+{
+ switch (index) {
+ case 0:
+ if (type & KVMPPC_DEBUG_WATCH_READ)
+ dbg_reg->dbcr0 |= DBCR0_DAC1R;
+ if (type & KVMPPC_DEBUG_WATCH_WRITE)
+ dbg_reg->dbcr0 |= DBCR0_DAC1W;
+ dbg_reg->dac1 = addr;
+ break;
+ case 1:
+ if (type & KVMPPC_DEBUG_WATCH_READ)
+ dbg_reg->dbcr0 |= DBCR0_DAC2R;
+ if (type & KVMPPC_DEBUG_WATCH_WRITE)
+ dbg_reg->dbcr0 |= DBCR0_DAC2W;
+ dbg_reg->dac2 = addr;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dbg_reg->dbcr0 |= DBCR0_IDM;
+ return 0;
+}
+void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set)
+{
+ /* XXX: Add similar MSR protection for BookE-PR */
+#ifdef CONFIG_KVM_BOOKE_HV
+ BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP));
+ if (set) {
+ if (prot_bitmap & MSR_UCLE)
+ vcpu->arch.shadow_msrp |= MSRP_UCLEP;
+ if (prot_bitmap & MSR_DE)
+ vcpu->arch.shadow_msrp |= MSRP_DEP;
+ if (prot_bitmap & MSR_PMM)
+ vcpu->arch.shadow_msrp |= MSRP_PMMP;
+ } else {
+ if (prot_bitmap & MSR_UCLE)
+ vcpu->arch.shadow_msrp &= ~MSRP_UCLEP;
+ if (prot_bitmap & MSR_DE)
+ vcpu->arch.shadow_msrp &= ~MSRP_DEP;
+ if (prot_bitmap & MSR_PMM)
+ vcpu->arch.shadow_msrp &= ~MSRP_PMMP;
+ }
+#endif
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ struct debug_reg *dbg_reg;
+ int n, b = 0, w = 0;
+
+ if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
+ vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+ vcpu->guest_debug = 0;
+ kvm_guest_protect_msr(vcpu, MSR_DE, false);
+ return 0;
+ }
+
+ kvm_guest_protect_msr(vcpu, MSR_DE, true);
+ vcpu->guest_debug = dbg->control;
+ vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+ /* Set DBCR0_EDM in guest visible DBCR0 register. */
+ vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+
+ /* Code below handles only HW breakpoints */
+ dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ /*
+ * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1
+ * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0
+ */
+ dbg_reg->dbcr1 = 0;
+ dbg_reg->dbcr2 = 0;
+#else
+ /*
+ * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1
+ * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR
+ * is set.
+ */
+ dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
+ DBCR1_IAC4US;
+ dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#endif
+
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ return 0;
+
+ for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
+ uint64_t addr = dbg->arch.bp[n].addr;
+ uint32_t type = dbg->arch.bp[n].type;
+
+ if (type == KVMPPC_DEBUG_NONE)
+ continue;
+
+ if (type & !(KVMPPC_DEBUG_WATCH_READ |
+ KVMPPC_DEBUG_WATCH_WRITE |
+ KVMPPC_DEBUG_BREAKPOINT))
+ return -EINVAL;
+
+ if (type & KVMPPC_DEBUG_BREAKPOINT) {
+ /* Setting H/W breakpoint */
+ if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
+ return -EINVAL;
+ } else {
+ /* Setting H/W watchpoint */
+ if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
+ type, w++))
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ vcpu->cpu = smp_processor_id();
+ current->thread.kvm_vcpu = vcpu;
+}
+
+void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ current->thread.kvm_vcpu = NULL;
+ vcpu->cpu = -1;
+
+ /* Clear pending debug event in DBSR */
+ kvmppc_clear_dbsr();
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return kvm->arch.kvm_ops->init_vm(kvm);
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+ kvm->arch.kvm_ops->destroy_vm(kvm);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
+}
+
int __init kvmppc_booke_init(void)
{
+#ifndef CONFIG_KVM_BOOKE_HV
unsigned long ivor[16];
+ unsigned long *handler = kvmppc_booke_handler_addr;
unsigned long max_ivor = 0;
+ unsigned long handler_len;
int i;
/* We install our own exception handlers by hijacking IVPR. IVPR must
@@ -654,15 +1950,17 @@ int __init kvmppc_booke_init(void)
for (i = 0; i < 16; i++) {
if (ivor[i] > max_ivor)
- max_ivor = ivor[i];
+ max_ivor = i;
+ handler_len = handler[i + 1] - handler[i];
memcpy((void *)kvmppc_booke_handlers + ivor[i],
- kvmppc_handlers_start + i * kvmppc_handler_len,
- kvmppc_handler_len);
+ (void *)handler[i], handler_len);
}
- flush_icache_range(kvmppc_booke_handlers,
- kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+ handler_len = handler[max_ivor + 1] - handler[max_ivor];
+ flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
+ ivor[max_ivor] + handler_len);
+#endif /* !BOOKE_HV */
return 0;
}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 492bb703035..b632cd35919 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -23,6 +23,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <asm/kvm_ppc.h>
+#include <asm/switch_to.h>
#include "timing.h"
/* interrupt priortity ordering */
@@ -48,28 +49,116 @@
#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
/* Internal pseudo-irqprio for level triggered externals */
#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
-#define BOOKE_IRQPRIO_MAX 20
+#define BOOKE_IRQPRIO_DBELL 21
+#define BOOKE_IRQPRIO_DBELL_CRIT 22
+#define BOOKE_IRQPRIO_MAX 23
-extern unsigned long kvmppc_booke_handlers;
+#define BOOKE_IRQMASK_EE ((1 << BOOKE_IRQPRIO_EXTERNAL_LEVEL) | \
+ (1 << BOOKE_IRQPRIO_PERFORMANCE_MONITOR) | \
+ (1 << BOOKE_IRQPRIO_DBELL) | \
+ (1 << BOOKE_IRQPRIO_DECREMENTER) | \
+ (1 << BOOKE_IRQPRIO_FIT) | \
+ (1 << BOOKE_IRQPRIO_EXTERNAL))
-/* Helper function for "full" MSR writes. No need to call this if only EE is
- * changing. */
-static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-{
- if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR))
- kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+#define BOOKE_IRQMASK_CE ((1 << BOOKE_IRQPRIO_DBELL_CRIT) | \
+ (1 << BOOKE_IRQPRIO_WATCHDOG) | \
+ (1 << BOOKE_IRQPRIO_CRITICAL))
+
+extern unsigned long kvmppc_booke_handlers;
+extern unsigned long kvmppc_booke_handler_addr[];
- vcpu->arch.shared->msr = new_msr;
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
- if (vcpu->arch.shared->msr & MSR_WE) {
- kvm_vcpu_block(vcpu);
- kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
- };
-}
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr);
+void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
+void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
+void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
-int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
-int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
+
+/* low-level asm code to transfer guest state */
+void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
+void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
+
+/* high-level function, manages flags, host state */
+void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
+
+void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu);
+
+enum int_class {
+ INT_CLASS_NONCRIT,
+ INT_CLASS_CRIT,
+ INT_CLASS_MC,
+ INT_CLASS_DBG,
+};
+
+void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
+
+extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn,
+ ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn,
+ ulong *spr_val);
+extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
+ struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+ ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+ ulong *spr_val);
+extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
+ struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+ ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+ ulong *spr_val);
+
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
+ enable_kernel_fp();
+ load_fp_state(&vcpu->arch.fp);
+ current->thread.fp_save_area = &vcpu->arch.fp;
+ current->thread.regs->msr |= MSR_FP;
+ }
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
+ giveup_fpu(current);
+ current->thread.fp_save_area = NULL;
+#endif
+}
+static inline void kvmppc_clear_dbsr(void)
+{
+ mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
+}
#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 1260f5f24c0..27a4b2877c1 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -13,6 +13,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
+ * Copyright 2011 Freescale Semiconductor, Inc.
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
@@ -23,6 +24,7 @@
#include "booke.h"
#define OP_19_XOP_RFI 50
+#define OP_19_XOP_RFCI 51
#define OP_31_XOP_MFMSR 83
#define OP_31_XOP_WRTEE 131
@@ -35,12 +37,18 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
}
+static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.csrr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.csrr1);
+}
+
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
- int rs;
- int rt;
+ int rs = get_rs(inst);
+ int rt = get_rt(inst);
switch (get_op(inst)) {
case 19:
@@ -51,6 +59,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
*advance = 0;
break;
+ case OP_19_XOP_RFCI:
+ kvmppc_emul_rfci(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_RFCI_EXITS);
+ *advance = 0;
+ break;
+
default:
emulated = EMULATE_FAIL;
break;
@@ -61,19 +75,16 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
- rt = get_rt(inst);
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
break;
case OP_31_XOP_MTMSR:
- rs = get_rs(inst);
kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_WRTEE:
- rs = get_rs(inst);
vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
| (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
@@ -98,43 +109,79 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+/*
+ * NOTE: some of these registers are not emulated on BOOKE_HV (GS-mode).
+ * Their backing store is in real registers, and these functions
+ * will return the wrong result if called for them in another context
+ * (such as debugging).
+ */
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
- ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_DEAR:
- vcpu->arch.shared->dar = spr_val; break;
+ vcpu->arch.shared->dar = spr_val;
+ break;
case SPRN_ESR:
- vcpu->arch.esr = spr_val; break;
+ vcpu->arch.shared->esr = spr_val;
+ break;
+ case SPRN_CSRR0:
+ vcpu->arch.csrr0 = spr_val;
+ break;
+ case SPRN_CSRR1:
+ vcpu->arch.csrr1 = spr_val;
+ break;
case SPRN_DBCR0:
- vcpu->arch.dbcr0 = spr_val; break;
+ vcpu->arch.dbg_reg.dbcr0 = spr_val;
+ break;
case SPRN_DBCR1:
- vcpu->arch.dbcr1 = spr_val; break;
+ vcpu->arch.dbg_reg.dbcr1 = spr_val;
+ break;
case SPRN_DBSR:
- vcpu->arch.dbsr &= ~spr_val; break;
+ vcpu->arch.dbsr &= ~spr_val;
+ break;
case SPRN_TSR:
- vcpu->arch.tsr &= ~spr_val; break;
+ kvmppc_clr_tsr_bits(vcpu, spr_val);
+ break;
case SPRN_TCR:
- vcpu->arch.tcr = spr_val;
- kvmppc_emulate_dec(vcpu);
+ /*
+ * WRC is a 2-bit field that is supposed to preserve its
+ * value once written to non-zero.
+ */
+ if (vcpu->arch.tcr & TCR_WRC_MASK) {
+ spr_val &= ~TCR_WRC_MASK;
+ spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+ }
+ kvmppc_set_tcr(vcpu, spr_val);
break;
- /* Note: SPRG4-7 are user-readable. These values are
- * loaded into the real SPRGs when resuming the
- * guest. */
+ case SPRN_DECAR:
+ vcpu->arch.decar = spr_val;
+ break;
+ /*
+ * Note: SPRG4-7 are user-readable.
+ * These values are loaded into the real SPRGs when resuming the
+ * guest (PR-mode only).
+ */
case SPRN_SPRG4:
- vcpu->arch.sprg4 = spr_val; break;
+ vcpu->arch.shared->sprg4 = spr_val;
+ break;
case SPRN_SPRG5:
- vcpu->arch.sprg5 = spr_val; break;
+ vcpu->arch.shared->sprg5 = spr_val;
+ break;
case SPRN_SPRG6:
- vcpu->arch.sprg6 = spr_val; break;
+ vcpu->arch.shared->sprg6 = spr_val;
+ break;
case SPRN_SPRG7:
- vcpu->arch.sprg7 = spr_val; break;
+ vcpu->arch.shared->sprg7 = spr_val;
+ break;
case SPRN_IVPR:
vcpu->arch.ivpr = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GIVPR, spr_val);
+#endif
break;
case SPRN_IVOR0:
vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
@@ -144,6 +191,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
break;
case SPRN_IVOR2:
vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GIVOR2, spr_val);
+#endif
break;
case SPRN_IVOR3:
vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
@@ -162,6 +212,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
break;
case SPRN_IVOR8:
vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GIVOR8, spr_val);
+#endif
break;
case SPRN_IVOR9:
vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
@@ -184,7 +237,17 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_IVOR15:
vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
break;
-
+ case SPRN_MCSR:
+ vcpu->arch.mcsr &= ~spr_val;
+ break;
+#if defined(CONFIG_64BIT)
+ case SPRN_EPCR:
+ kvmppc_set_epcr(vcpu, spr_val);
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
+#endif
+ break;
+#endif
default:
emulated = EMULATE_FAIL;
}
@@ -192,72 +255,101 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
return emulated;
}
-int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
switch (sprn) {
case SPRN_IVPR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
+ *spr_val = vcpu->arch.ivpr;
+ break;
case SPRN_DEAR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
+ *spr_val = vcpu->arch.shared->dar;
+ break;
case SPRN_ESR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
+ *spr_val = vcpu->arch.shared->esr;
+ break;
+ case SPRN_EPR:
+ *spr_val = vcpu->arch.epr;
+ break;
+ case SPRN_CSRR0:
+ *spr_val = vcpu->arch.csrr0;
+ break;
+ case SPRN_CSRR1:
+ *spr_val = vcpu->arch.csrr1;
+ break;
case SPRN_DBCR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
+ *spr_val = vcpu->arch.dbg_reg.dbcr0;
+ break;
case SPRN_DBCR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
+ *spr_val = vcpu->arch.dbg_reg.dbcr1;
+ break;
case SPRN_DBSR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
+ *spr_val = vcpu->arch.dbsr;
+ break;
+ case SPRN_TSR:
+ *spr_val = vcpu->arch.tsr;
+ break;
+ case SPRN_TCR:
+ *spr_val = vcpu->arch.tcr;
+ break;
case SPRN_IVOR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
break;
case SPRN_IVOR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
break;
case SPRN_IVOR2:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
break;
case SPRN_IVOR3:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
break;
case SPRN_IVOR4:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
break;
case SPRN_IVOR5:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
break;
case SPRN_IVOR6:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
break;
case SPRN_IVOR7:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
break;
case SPRN_IVOR8:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
break;
case SPRN_IVOR9:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
break;
case SPRN_IVOR10:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
break;
case SPRN_IVOR11:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
break;
case SPRN_IVOR12:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
break;
case SPRN_IVOR13:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
break;
case SPRN_IVOR14:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
break;
case SPRN_IVOR15:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+ break;
+ case SPRN_MCSR:
+ *spr_val = vcpu->arch.mcsr;
+ break;
+#if defined(CONFIG_64BIT)
+ case SPRN_EPCR:
+ *spr_val = vcpu->arch.epcr;
break;
+#endif
default:
emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 1cc471faac2..2c6deb5ef2f 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -13,6 +13,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
@@ -24,10 +25,6 @@
#include <asm/page.h>
#include <asm/asm-offsets.h>
-#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS)
-
-#define VCPU_GPR(n) (VCPU_GPRS + (n * 4))
-
/* The host stack layout: */
#define HOST_R1 0 /* Implied by stwu. */
#define HOST_CALLEE_LR 4
@@ -35,9 +32,11 @@
/* r2 is special: it holds 'current', and it made nonvolatile in the
* kernel with the -ffixed-r2 gcc option. */
#define HOST_R2 12
-#define HOST_NV_GPRS 16
-#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4))
-#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4)
+#define HOST_CR 16
+#define HOST_NV_GPRS 20
+#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4))
+#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n)
+#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4)
#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */
@@ -46,53 +45,102 @@
(1<<BOOKE_INTERRUPT_DEBUG))
#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
- (1<<BOOKE_INTERRUPT_DTLB_MISS))
+ (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
+ (1<<BOOKE_INTERRUPT_ALIGNMENT))
#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
(1<<BOOKE_INTERRUPT_INST_STORAGE) | \
(1<<BOOKE_INTERRUPT_PROGRAM) | \
- (1<<BOOKE_INTERRUPT_DTLB_MISS))
+ (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
+ (1<<BOOKE_INTERRUPT_ALIGNMENT))
-.macro KVM_HANDLER ivor_nr
-_GLOBAL(kvmppc_handler_\ivor_nr)
+.macro __KVM_HANDLER ivor_nr scratch srr0
/* Get pointer to vcpu and record exit number. */
- mtspr SPRN_SPRG_WSCRATCH0, r4
- mfspr r4, SPRN_SPRG_RVCPU
- stw r5, VCPU_GPR(r5)(r4)
- stw r6, VCPU_GPR(r6)(r4)
+ mtspr \scratch , r4
+ mfspr r4, SPRN_SPRG_THREAD
+ lwz r4, THREAD_KVM_VCPU(r4)
+ stw r3, VCPU_GPR(R3)(r4)
+ stw r5, VCPU_GPR(R5)(r4)
+ stw r6, VCPU_GPR(R6)(r4)
+ mfspr r3, \scratch
mfctr r5
- lis r6, kvmppc_resume_host@h
+ stw r3, VCPU_GPR(R4)(r4)
stw r5, VCPU_CTR(r4)
+ mfspr r3, \srr0
+ lis r6, kvmppc_resume_host@h
+ stw r3, VCPU_PC(r4)
li r5, \ivor_nr
ori r6, r6, kvmppc_resume_host@l
mtctr r6
bctr
.endm
-_GLOBAL(kvmppc_handlers_start)
-KVM_HANDLER BOOKE_INTERRUPT_CRITICAL
-KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK
-KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE
-KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE
-KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL
-KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT
-KVM_HANDLER BOOKE_INTERRUPT_PROGRAM
-KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL
-KVM_HANDLER BOOKE_INTERRUPT_SYSCALL
-KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL
-KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER
-KVM_HANDLER BOOKE_INTERRUPT_FIT
-KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
-KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
-KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
-KVM_HANDLER BOOKE_INTERRUPT_DEBUG
-KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL
-KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA
-KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND
-
-_GLOBAL(kvmppc_handler_len)
- .long kvmppc_handler_1 - kvmppc_handler_0
+.macro KVM_HANDLER ivor_nr scratch srr0
+_GLOBAL(kvmppc_handler_\ivor_nr)
+ __KVM_HANDLER \ivor_nr \scratch \srr0
+.endm
+
+.macro KVM_DBG_HANDLER ivor_nr scratch srr0
+_GLOBAL(kvmppc_handler_\ivor_nr)
+ mtspr \scratch, r4
+ mfspr r4, SPRN_SPRG_THREAD
+ lwz r4, THREAD_KVM_VCPU(r4)
+ stw r3, VCPU_CRIT_SAVE(r4)
+ mfcr r3
+ mfspr r4, SPRN_CSRR1
+ andi. r4, r4, MSR_PR
+ bne 1f
+ /* debug interrupt happened in enter/exit path */
+ mfspr r4, SPRN_CSRR1
+ rlwinm r4, r4, 0, ~MSR_DE
+ mtspr SPRN_CSRR1, r4
+ lis r4, 0xffff
+ ori r4, r4, 0xffff
+ mtspr SPRN_DBSR, r4
+ mfspr r4, SPRN_SPRG_THREAD
+ lwz r4, THREAD_KVM_VCPU(r4)
+ mtcr r3
+ lwz r3, VCPU_CRIT_SAVE(r4)
+ mfspr r4, \scratch
+ rfci
+1: /* debug interrupt happened in guest */
+ mtcr r3
+ mfspr r4, SPRN_SPRG_THREAD
+ lwz r4, THREAD_KVM_VCPU(r4)
+ lwz r3, VCPU_CRIT_SAVE(r4)
+ mfspr r4, \scratch
+ __KVM_HANDLER \ivor_nr \scratch \srr0
+.endm
+
+.macro KVM_HANDLER_ADDR ivor_nr
+ .long kvmppc_handler_\ivor_nr
+.endm
+.macro KVM_HANDLER_END
+ .long kvmppc_handlers_end
+.endm
+
+_GLOBAL(kvmppc_handlers_start)
+KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
+KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0
+KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_PROGRAM SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_SYSCALL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
+KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
+KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+_GLOBAL(kvmppc_handlers_end)
/* Registers:
* SPRG_SCRATCH0: guest r4
@@ -100,12 +148,11 @@ _GLOBAL(kvmppc_handler_len)
* r5: KVM exit number
*/
_GLOBAL(kvmppc_resume_host)
- stw r3, VCPU_GPR(r3)(r4)
mfcr r3
stw r3, VCPU_CR(r4)
- stw r7, VCPU_GPR(r7)(r4)
- stw r8, VCPU_GPR(r8)(r4)
- stw r9, VCPU_GPR(r9)(r4)
+ stw r7, VCPU_GPR(R7)(r4)
+ stw r8, VCPU_GPR(R8)(r4)
+ stw r9, VCPU_GPR(R9)(r4)
li r6, 1
slw r6, r6, r5
@@ -135,23 +182,23 @@ _GLOBAL(kvmppc_resume_host)
isync
stw r9, VCPU_LAST_INST(r4)
- stw r15, VCPU_GPR(r15)(r4)
- stw r16, VCPU_GPR(r16)(r4)
- stw r17, VCPU_GPR(r17)(r4)
- stw r18, VCPU_GPR(r18)(r4)
- stw r19, VCPU_GPR(r19)(r4)
- stw r20, VCPU_GPR(r20)(r4)
- stw r21, VCPU_GPR(r21)(r4)
- stw r22, VCPU_GPR(r22)(r4)
- stw r23, VCPU_GPR(r23)(r4)
- stw r24, VCPU_GPR(r24)(r4)
- stw r25, VCPU_GPR(r25)(r4)
- stw r26, VCPU_GPR(r26)(r4)
- stw r27, VCPU_GPR(r27)(r4)
- stw r28, VCPU_GPR(r28)(r4)
- stw r29, VCPU_GPR(r29)(r4)
- stw r30, VCPU_GPR(r30)(r4)
- stw r31, VCPU_GPR(r31)(r4)
+ stw r15, VCPU_GPR(R15)(r4)
+ stw r16, VCPU_GPR(R16)(r4)
+ stw r17, VCPU_GPR(R17)(r4)
+ stw r18, VCPU_GPR(R18)(r4)
+ stw r19, VCPU_GPR(R19)(r4)
+ stw r20, VCPU_GPR(R20)(r4)
+ stw r21, VCPU_GPR(R21)(r4)
+ stw r22, VCPU_GPR(R22)(r4)
+ stw r23, VCPU_GPR(R23)(r4)
+ stw r24, VCPU_GPR(R24)(r4)
+ stw r25, VCPU_GPR(R25)(r4)
+ stw r26, VCPU_GPR(R26)(r4)
+ stw r27, VCPU_GPR(R27)(r4)
+ stw r28, VCPU_GPR(R28)(r4)
+ stw r29, VCPU_GPR(R29)(r4)
+ stw r30, VCPU_GPR(R30)(r4)
+ stw r31, VCPU_GPR(R31)(r4)
..skip_inst_copy:
/* Also grab DEAR and ESR before the host can clobber them. */
@@ -169,22 +216,18 @@ _GLOBAL(kvmppc_resume_host)
..skip_esr:
/* Save remaining volatile guest register state to vcpu. */
- stw r0, VCPU_GPR(r0)(r4)
- stw r1, VCPU_GPR(r1)(r4)
- stw r2, VCPU_GPR(r2)(r4)
- stw r10, VCPU_GPR(r10)(r4)
- stw r11, VCPU_GPR(r11)(r4)
- stw r12, VCPU_GPR(r12)(r4)
- stw r13, VCPU_GPR(r13)(r4)
- stw r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */
+ stw r0, VCPU_GPR(R0)(r4)
+ stw r1, VCPU_GPR(R1)(r4)
+ stw r2, VCPU_GPR(R2)(r4)
+ stw r10, VCPU_GPR(R10)(r4)
+ stw r11, VCPU_GPR(R11)(r4)
+ stw r12, VCPU_GPR(R12)(r4)
+ stw r13, VCPU_GPR(R13)(r4)
+ stw r14, VCPU_GPR(R14)(r4) /* We need a NV GPR below. */
mflr r3
stw r3, VCPU_LR(r4)
mfxer r3
stw r3, VCPU_XER(r4)
- mfspr r3, SPRN_SPRG_RSCRATCH0
- stw r3, VCPU_GPR(r4)(r4)
- mfspr r3, SPRN_SRR0
- stw r3, VCPU_PC(r4)
/* Restore host stack pointer and PID before IVPR, since the host
* exception handlers use them. */
@@ -192,6 +235,12 @@ _GLOBAL(kvmppc_resume_host)
lwz r3, VCPU_HOST_PID(r4)
mtspr SPRN_PID, r3
+#ifdef CONFIG_FSL_BOOKE
+ /* we cheat and know that Linux doesn't use PID1 which is always 0 */
+ lis r3, 0
+ mtspr SPRN_PID1, r3
+#endif
+
/* Restore host IVPR before re-enabling interrupts. We cheat and know
* that Linux IVPR is always 0xc0000000. */
lis r3, 0xc000
@@ -208,28 +257,28 @@ _GLOBAL(kvmppc_resume_host)
/* Restore vcpu pointer and the nonvolatiles we used. */
mr r4, r14
- lwz r14, VCPU_GPR(r14)(r4)
+ lwz r14, VCPU_GPR(R14)(r4)
/* Sometimes instruction emulation must restore complete GPR state. */
andi. r5, r3, RESUME_FLAG_NV
beq ..skip_nv_load
- lwz r15, VCPU_GPR(r15)(r4)
- lwz r16, VCPU_GPR(r16)(r4)
- lwz r17, VCPU_GPR(r17)(r4)
- lwz r18, VCPU_GPR(r18)(r4)
- lwz r19, VCPU_GPR(r19)(r4)
- lwz r20, VCPU_GPR(r20)(r4)
- lwz r21, VCPU_GPR(r21)(r4)
- lwz r22, VCPU_GPR(r22)(r4)
- lwz r23, VCPU_GPR(r23)(r4)
- lwz r24, VCPU_GPR(r24)(r4)
- lwz r25, VCPU_GPR(r25)(r4)
- lwz r26, VCPU_GPR(r26)(r4)
- lwz r27, VCPU_GPR(r27)(r4)
- lwz r28, VCPU_GPR(r28)(r4)
- lwz r29, VCPU_GPR(r29)(r4)
- lwz r30, VCPU_GPR(r30)(r4)
- lwz r31, VCPU_GPR(r31)(r4)
+ lwz r15, VCPU_GPR(R15)(r4)
+ lwz r16, VCPU_GPR(R16)(r4)
+ lwz r17, VCPU_GPR(R17)(r4)
+ lwz r18, VCPU_GPR(R18)(r4)
+ lwz r19, VCPU_GPR(R19)(r4)
+ lwz r20, VCPU_GPR(R20)(r4)
+ lwz r21, VCPU_GPR(R21)(r4)
+ lwz r22, VCPU_GPR(R22)(r4)
+ lwz r23, VCPU_GPR(R23)(r4)
+ lwz r24, VCPU_GPR(R24)(r4)
+ lwz r25, VCPU_GPR(R25)(r4)
+ lwz r26, VCPU_GPR(R26)(r4)
+ lwz r27, VCPU_GPR(R27)(r4)
+ lwz r28, VCPU_GPR(R28)(r4)
+ lwz r29, VCPU_GPR(R29)(r4)
+ lwz r30, VCPU_GPR(R30)(r4)
+ lwz r31, VCPU_GPR(R31)(r4)
..skip_nv_load:
/* Should we return to the guest? */
@@ -241,50 +290,60 @@ _GLOBAL(kvmppc_resume_host)
heavyweight_exit:
/* Not returning to guest. */
+#ifdef CONFIG_SPE
+ /* save guest SPEFSCR and load host SPEFSCR */
+ mfspr r9, SPRN_SPEFSCR
+ stw r9, VCPU_SPEFSCR(r4)
+ lwz r9, VCPU_HOST_SPEFSCR(r4)
+ mtspr SPRN_SPEFSCR, r9
+#endif
+
/* We already saved guest volatile register state; now save the
* non-volatiles. */
- stw r15, VCPU_GPR(r15)(r4)
- stw r16, VCPU_GPR(r16)(r4)
- stw r17, VCPU_GPR(r17)(r4)
- stw r18, VCPU_GPR(r18)(r4)
- stw r19, VCPU_GPR(r19)(r4)
- stw r20, VCPU_GPR(r20)(r4)
- stw r21, VCPU_GPR(r21)(r4)
- stw r22, VCPU_GPR(r22)(r4)
- stw r23, VCPU_GPR(r23)(r4)
- stw r24, VCPU_GPR(r24)(r4)
- stw r25, VCPU_GPR(r25)(r4)
- stw r26, VCPU_GPR(r26)(r4)
- stw r27, VCPU_GPR(r27)(r4)
- stw r28, VCPU_GPR(r28)(r4)
- stw r29, VCPU_GPR(r29)(r4)
- stw r30, VCPU_GPR(r30)(r4)
- stw r31, VCPU_GPR(r31)(r4)
+ stw r15, VCPU_GPR(R15)(r4)
+ stw r16, VCPU_GPR(R16)(r4)
+ stw r17, VCPU_GPR(R17)(r4)
+ stw r18, VCPU_GPR(R18)(r4)
+ stw r19, VCPU_GPR(R19)(r4)
+ stw r20, VCPU_GPR(R20)(r4)
+ stw r21, VCPU_GPR(R21)(r4)
+ stw r22, VCPU_GPR(R22)(r4)
+ stw r23, VCPU_GPR(R23)(r4)
+ stw r24, VCPU_GPR(R24)(r4)
+ stw r25, VCPU_GPR(R25)(r4)
+ stw r26, VCPU_GPR(R26)(r4)
+ stw r27, VCPU_GPR(R27)(r4)
+ stw r28, VCPU_GPR(R28)(r4)
+ stw r29, VCPU_GPR(R29)(r4)
+ stw r30, VCPU_GPR(R30)(r4)
+ stw r31, VCPU_GPR(R31)(r4)
/* Load host non-volatile register state from host stack. */
- lwz r14, HOST_NV_GPR(r14)(r1)
- lwz r15, HOST_NV_GPR(r15)(r1)
- lwz r16, HOST_NV_GPR(r16)(r1)
- lwz r17, HOST_NV_GPR(r17)(r1)
- lwz r18, HOST_NV_GPR(r18)(r1)
- lwz r19, HOST_NV_GPR(r19)(r1)
- lwz r20, HOST_NV_GPR(r20)(r1)
- lwz r21, HOST_NV_GPR(r21)(r1)
- lwz r22, HOST_NV_GPR(r22)(r1)
- lwz r23, HOST_NV_GPR(r23)(r1)
- lwz r24, HOST_NV_GPR(r24)(r1)
- lwz r25, HOST_NV_GPR(r25)(r1)
- lwz r26, HOST_NV_GPR(r26)(r1)
- lwz r27, HOST_NV_GPR(r27)(r1)
- lwz r28, HOST_NV_GPR(r28)(r1)
- lwz r29, HOST_NV_GPR(r29)(r1)
- lwz r30, HOST_NV_GPR(r30)(r1)
- lwz r31, HOST_NV_GPR(r31)(r1)
+ lwz r14, HOST_NV_GPR(R14)(r1)
+ lwz r15, HOST_NV_GPR(R15)(r1)
+ lwz r16, HOST_NV_GPR(R16)(r1)
+ lwz r17, HOST_NV_GPR(R17)(r1)
+ lwz r18, HOST_NV_GPR(R18)(r1)
+ lwz r19, HOST_NV_GPR(R19)(r1)
+ lwz r20, HOST_NV_GPR(R20)(r1)
+ lwz r21, HOST_NV_GPR(R21)(r1)
+ lwz r22, HOST_NV_GPR(R22)(r1)
+ lwz r23, HOST_NV_GPR(R23)(r1)
+ lwz r24, HOST_NV_GPR(R24)(r1)
+ lwz r25, HOST_NV_GPR(R25)(r1)
+ lwz r26, HOST_NV_GPR(R26)(r1)
+ lwz r27, HOST_NV_GPR(R27)(r1)
+ lwz r28, HOST_NV_GPR(R28)(r1)
+ lwz r29, HOST_NV_GPR(R29)(r1)
+ lwz r30, HOST_NV_GPR(R30)(r1)
+ lwz r31, HOST_NV_GPR(R31)(r1)
/* Return to kvm_vcpu_run(). */
lwz r4, HOST_STACK_LR(r1)
+ lwz r5, HOST_CR(r1)
addi r1, r1, HOST_STACK_SIZE
mtlr r4
+ mtcr r5
/* r3 still contains the return code from kvmppc_handle_exit(). */
blr
@@ -301,46 +360,56 @@ _GLOBAL(__kvmppc_vcpu_run)
stw r3, HOST_RUN(r1)
mflr r3
stw r3, HOST_STACK_LR(r1)
+ mfcr r5
+ stw r5, HOST_CR(r1)
/* Save host non-volatile register state to stack. */
- stw r14, HOST_NV_GPR(r14)(r1)
- stw r15, HOST_NV_GPR(r15)(r1)
- stw r16, HOST_NV_GPR(r16)(r1)
- stw r17, HOST_NV_GPR(r17)(r1)
- stw r18, HOST_NV_GPR(r18)(r1)
- stw r19, HOST_NV_GPR(r19)(r1)
- stw r20, HOST_NV_GPR(r20)(r1)
- stw r21, HOST_NV_GPR(r21)(r1)
- stw r22, HOST_NV_GPR(r22)(r1)
- stw r23, HOST_NV_GPR(r23)(r1)
- stw r24, HOST_NV_GPR(r24)(r1)
- stw r25, HOST_NV_GPR(r25)(r1)
- stw r26, HOST_NV_GPR(r26)(r1)
- stw r27, HOST_NV_GPR(r27)(r1)
- stw r28, HOST_NV_GPR(r28)(r1)
- stw r29, HOST_NV_GPR(r29)(r1)
- stw r30, HOST_NV_GPR(r30)(r1)
- stw r31, HOST_NV_GPR(r31)(r1)
+ stw r14, HOST_NV_GPR(R14)(r1)
+ stw r15, HOST_NV_GPR(R15)(r1)
+ stw r16, HOST_NV_GPR(R16)(r1)
+ stw r17, HOST_NV_GPR(R17)(r1)
+ stw r18, HOST_NV_GPR(R18)(r1)
+ stw r19, HOST_NV_GPR(R19)(r1)
+ stw r20, HOST_NV_GPR(R20)(r1)
+ stw r21, HOST_NV_GPR(R21)(r1)
+ stw r22, HOST_NV_GPR(R22)(r1)
+ stw r23, HOST_NV_GPR(R23)(r1)
+ stw r24, HOST_NV_GPR(R24)(r1)
+ stw r25, HOST_NV_GPR(R25)(r1)
+ stw r26, HOST_NV_GPR(R26)(r1)
+ stw r27, HOST_NV_GPR(R27)(r1)
+ stw r28, HOST_NV_GPR(R28)(r1)
+ stw r29, HOST_NV_GPR(R29)(r1)
+ stw r30, HOST_NV_GPR(R30)(r1)
+ stw r31, HOST_NV_GPR(R31)(r1)
/* Load guest non-volatiles. */
- lwz r14, VCPU_GPR(r14)(r4)
- lwz r15, VCPU_GPR(r15)(r4)
- lwz r16, VCPU_GPR(r16)(r4)
- lwz r17, VCPU_GPR(r17)(r4)
- lwz r18, VCPU_GPR(r18)(r4)
- lwz r19, VCPU_GPR(r19)(r4)
- lwz r20, VCPU_GPR(r20)(r4)
- lwz r21, VCPU_GPR(r21)(r4)
- lwz r22, VCPU_GPR(r22)(r4)
- lwz r23, VCPU_GPR(r23)(r4)
- lwz r24, VCPU_GPR(r24)(r4)
- lwz r25, VCPU_GPR(r25)(r4)
- lwz r26, VCPU_GPR(r26)(r4)
- lwz r27, VCPU_GPR(r27)(r4)
- lwz r28, VCPU_GPR(r28)(r4)
- lwz r29, VCPU_GPR(r29)(r4)
- lwz r30, VCPU_GPR(r30)(r4)
- lwz r31, VCPU_GPR(r31)(r4)
+ lwz r14, VCPU_GPR(R14)(r4)
+ lwz r15, VCPU_GPR(R15)(r4)
+ lwz r16, VCPU_GPR(R16)(r4)
+ lwz r17, VCPU_GPR(R17)(r4)
+ lwz r18, VCPU_GPR(R18)(r4)
+ lwz r19, VCPU_GPR(R19)(r4)
+ lwz r20, VCPU_GPR(R20)(r4)
+ lwz r21, VCPU_GPR(R21)(r4)
+ lwz r22, VCPU_GPR(R22)(r4)
+ lwz r23, VCPU_GPR(R23)(r4)
+ lwz r24, VCPU_GPR(R24)(r4)
+ lwz r25, VCPU_GPR(R25)(r4)
+ lwz r26, VCPU_GPR(R26)(r4)
+ lwz r27, VCPU_GPR(R27)(r4)
+ lwz r28, VCPU_GPR(R28)(r4)
+ lwz r29, VCPU_GPR(R29)(r4)
+ lwz r30, VCPU_GPR(R30)(r4)
+ lwz r31, VCPU_GPR(R31)(r4)
+
+#ifdef CONFIG_SPE
+ /* save host SPEFSCR and load guest SPEFSCR */
+ mfspr r3, SPRN_SPEFSCR
+ stw r3, VCPU_HOST_SPEFSCR(r4)
+ lwz r3, VCPU_SPEFSCR(r4)
+ mtspr SPRN_SPEFSCR, r3
+#endif
lightweight_exit:
stw r2, HOST_R2(r1)
@@ -350,18 +419,23 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
+#ifdef CONFIG_FSL_BOOKE
+ lwz r3, VCPU_SHADOW_PID1(r4)
+ mtspr SPRN_PID1, r3
+#endif
+
#ifdef CONFIG_44x
iccci 0, 0 /* XXX hack */
#endif
/* Load some guest volatiles. */
- lwz r0, VCPU_GPR(r0)(r4)
- lwz r2, VCPU_GPR(r2)(r4)
- lwz r9, VCPU_GPR(r9)(r4)
- lwz r10, VCPU_GPR(r10)(r4)
- lwz r11, VCPU_GPR(r11)(r4)
- lwz r12, VCPU_GPR(r12)(r4)
- lwz r13, VCPU_GPR(r13)(r4)
+ lwz r0, VCPU_GPR(R0)(r4)
+ lwz r2, VCPU_GPR(R2)(r4)
+ lwz r9, VCPU_GPR(R9)(r4)
+ lwz r10, VCPU_GPR(R10)(r4)
+ lwz r11, VCPU_GPR(R11)(r4)
+ lwz r12, VCPU_GPR(R12)(r4)
+ lwz r13, VCPU_GPR(R13)(r4)
lwz r3, VCPU_LR(r4)
mtlr r3
lwz r3, VCPU_XER(r4)
@@ -373,23 +447,25 @@ lightweight_exit:
lwz r8, kvmppc_booke_handlers@l(r8)
mtspr SPRN_IVPR, r8
- /* Save vcpu pointer for the exception handlers. */
- mtspr SPRN_SPRG_WVCPU, r4
+ lwz r5, VCPU_SHARED(r4)
/* Can't switch the stack pointer until after IVPR is switched,
* because host interrupt handlers would get confused. */
- lwz r1, VCPU_GPR(r1)(r4)
-
- /* XXX handle USPRG0 */
- /* Host interrupt handlers may have clobbered these guest-readable
- * SPRGs, so we need to reload them here with the guest's values. */
- lwz r3, VCPU_SPRG4(r4)
+ lwz r1, VCPU_GPR(R1)(r4)
+
+ /*
+ * Host interrupt handlers may have clobbered these
+ * guest-readable SPRGs, or the guest kernel may have
+ * written directly to the shared area, so we
+ * need to reload them here with the guest's values.
+ */
+ PPC_LD(r3, VCPU_SHARED_SPRG4, r5)
mtspr SPRN_SPRG4W, r3
- lwz r3, VCPU_SPRG5(r4)
+ PPC_LD(r3, VCPU_SHARED_SPRG5, r5)
mtspr SPRN_SPRG5W, r3
- lwz r3, VCPU_SPRG6(r4)
+ PPC_LD(r3, VCPU_SHARED_SPRG6, r5)
mtspr SPRN_SPRG6W, r3
- lwz r3, VCPU_SPRG7(r4)
+ PPC_LD(r3, VCPU_SHARED_SPRG7, r5)
mtspr SPRN_SPRG7W, r3
#ifdef CONFIG_KVM_EXIT_TIMING
@@ -406,20 +482,17 @@ lightweight_exit:
/* Finish loading guest volatiles and jump to guest. */
lwz r3, VCPU_CTR(r4)
+ lwz r5, VCPU_CR(r4)
+ lwz r6, VCPU_PC(r4)
+ lwz r7, VCPU_SHADOW_MSR(r4)
mtctr r3
- lwz r3, VCPU_CR(r4)
- mtcr r3
- lwz r5, VCPU_GPR(r5)(r4)
- lwz r6, VCPU_GPR(r6)(r4)
- lwz r7, VCPU_GPR(r7)(r4)
- lwz r8, VCPU_GPR(r8)(r4)
- lwz r3, VCPU_PC(r4)
- mtsrr0 r3
- lwz r3, VCPU_SHARED(r4)
- lwz r3, (VCPU_SHARED_MSR + 4)(r3)
- oris r3, r3, KVMPPC_MSR_MASK@h
- ori r3, r3, KVMPPC_MSR_MASK@l
- mtsrr1 r3
+ mtcr r5
+ mtsrr0 r6
+ mtsrr1 r7
+ lwz r5, VCPU_GPR(R5)(r4)
+ lwz r6, VCPU_GPR(R6)(r4)
+ lwz r7, VCPU_GPR(R7)(r4)
+ lwz r8, VCPU_GPR(R8)(r4)
/* Clear any debug events which occurred since we disabled MSR[DE].
* XXX This gives us a 3-instruction window in which a breakpoint
@@ -428,6 +501,52 @@ lightweight_exit:
ori r3, r3, 0xffff
mtspr SPRN_DBSR, r3
- lwz r3, VCPU_GPR(r3)(r4)
- lwz r4, VCPU_GPR(r4)(r4)
+ lwz r3, VCPU_GPR(R3)(r4)
+ lwz r4, VCPU_GPR(R4)(r4)
rfi
+
+ .data
+ .align 4
+ .globl kvmppc_booke_handler_addr
+kvmppc_booke_handler_addr:
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DECREMENTER
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_FIT
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_WATCHDOG
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DTLB_MISS
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_ITLB_MISS
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DEBUG
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_DATA
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_ROUND
+KVM_HANDLER_END /*Always keep this in end*/
+
+#ifdef CONFIG_SPE
+_GLOBAL(kvmppc_save_guest_spe)
+ cmpi 0,r3,0
+ beqlr-
+ SAVE_32EVRS(0, r4, r3, VCPU_EVR)
+ evxor evr6, evr6, evr6
+ evmwumiaa evr6, evr6, evr6
+ li r4,VCPU_ACC
+ evstddx evr6, r4, r3 /* save acc */
+ blr
+
+_GLOBAL(kvmppc_load_guest_spe)
+ cmpi 0,r3,0
+ beqlr-
+ li r4,VCPU_ACC
+ evlddx evr6,r4,r3
+ evmra evr6,evr6 /* load acc */
+ REST_32EVRS(0, r4, r3, VCPU_EVR)
+ blr
+#endif
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
new file mode 100644
index 00000000000..a1712b818a5
--- /dev/null
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -0,0 +1,734 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Varun Sethi <varun.sethi@freescale.com>
+ * Author: Scott Wood <scotwood@freescale.com>
+ * Author: Mihai Caraman <mihai.caraman@freescale.com>
+ *
+ * This file is derived from arch/powerpc/kvm/booke_interrupts.S
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu-44x.h>
+#include <asm/page.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/bitsperlong.h>
+#include <asm/thread_info.h>
+
+#ifdef CONFIG_64BIT
+#include <asm/exception-64e.h>
+#include <asm/hw_irq.h>
+#include <asm/irqflags.h>
+#else
+#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
+#endif
+
+#define LONGBYTES (BITS_PER_LONG / 8)
+
+#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
+
+/* The host stack layout: */
+#define HOST_R1 0 /* Implied by stwu. */
+#define HOST_CALLEE_LR PPC_LR_STKOFF
+#define HOST_RUN (HOST_CALLEE_LR + LONGBYTES)
+/*
+ * r2 is special: it holds 'current', and it made nonvolatile in the
+ * kernel with the -ffixed-r2 gcc option.
+ */
+#define HOST_R2 (HOST_RUN + LONGBYTES)
+#define HOST_CR (HOST_R2 + LONGBYTES)
+#define HOST_NV_GPRS (HOST_CR + LONGBYTES)
+#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
+#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n)
+#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
+#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
+/* LR in caller stack frame. */
+#define HOST_STACK_LR (HOST_STACK_SIZE + PPC_LR_STKOFF)
+
+#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
+#define NEED_DEAR 0x00000002 /* save faulting DEAR */
+#define NEED_ESR 0x00000004 /* save faulting ESR */
+
+/*
+ * On entry:
+ * r4 = vcpu, r5 = srr0, r6 = srr1
+ * saved in vcpu: cr, ctr, r3-r13
+ */
+.macro kvm_handler_common intno, srr0, flags
+ /* Restore host stack pointer */
+ PPC_STL r1, VCPU_GPR(R1)(r4)
+ PPC_STL r2, VCPU_GPR(R2)(r4)
+ PPC_LL r1, VCPU_HOST_STACK(r4)
+ PPC_LL r2, HOST_R2(r1)
+
+ mfspr r10, SPRN_PID
+ lwz r8, VCPU_HOST_PID(r4)
+ PPC_LL r11, VCPU_SHARED(r4)
+ PPC_STL r14, VCPU_GPR(R14)(r4) /* We need a non-volatile GPR. */
+ li r14, \intno
+
+ stw r10, VCPU_GUEST_PID(r4)
+ mtspr SPRN_PID, r8
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save exit time */
+1: mfspr r7, SPRN_TBRU
+ mfspr r8, SPRN_TBRL
+ mfspr r9, SPRN_TBRU
+ cmpw r9, r7
+ stw r8, VCPU_TIMING_EXIT_TBL(r4)
+ bne- 1b
+ stw r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
+ oris r8, r6, MSR_CE@h
+ PPC_STD(r6, VCPU_SHARED_MSR, r11)
+ ori r8, r8, MSR_ME | MSR_RI
+ PPC_STL r5, VCPU_PC(r4)
+
+ /*
+ * Make sure CE/ME/RI are set (if appropriate for exception type)
+ * whether or not the guest had it set. Since mfmsr/mtmsr are
+ * somewhat expensive, skip in the common case where the guest
+ * had all these bits set (and thus they're still set if
+ * appropriate for the exception type).
+ */
+ cmpw r6, r8
+ beq 1f
+ mfmsr r7
+ .if \srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0
+ oris r7, r7, MSR_CE@h
+ .endif
+ .if \srr0 != SPRN_MCSRR0
+ ori r7, r7, MSR_ME | MSR_RI
+ .endif
+ mtmsr r7
+1:
+
+ .if \flags & NEED_EMU
+ /*
+ * This assumes you have external PID support.
+ * To support a bookehv CPU without external PID, you'll
+ * need to look up the TLB entry and create a temporary mapping.
+ *
+ * FIXME: we don't currently handle if the lwepx faults. PR-mode
+ * booke doesn't handle it either. Since Linux doesn't use
+ * broadcast tlbivax anymore, the only way this should happen is
+ * if the guest maps its memory execute-but-not-read, or if we
+ * somehow take a TLB miss in the middle of this entry code and
+ * evict the relevant entry. On e500mc, all kernel lowmem is
+ * bolted into TLB1 large page mappings, and we don't use
+ * broadcast invalidates, so we should not take a TLB miss here.
+ *
+ * Later we'll need to deal with faults here. Disallowing guest
+ * mappings that are execute-but-not-read could be an option on
+ * e500mc, but not on chips with an LRAT if it is used.
+ */
+
+ mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */
+ PPC_STL r15, VCPU_GPR(R15)(r4)
+ PPC_STL r16, VCPU_GPR(R16)(r4)
+ PPC_STL r17, VCPU_GPR(R17)(r4)
+ PPC_STL r18, VCPU_GPR(R18)(r4)
+ PPC_STL r19, VCPU_GPR(R19)(r4)
+ mr r8, r3
+ PPC_STL r20, VCPU_GPR(R20)(r4)
+ rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS
+ PPC_STL r21, VCPU_GPR(R21)(r4)
+ rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR
+ PPC_STL r22, VCPU_GPR(R22)(r4)
+ rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID
+ PPC_STL r23, VCPU_GPR(R23)(r4)
+ PPC_STL r24, VCPU_GPR(R24)(r4)
+ PPC_STL r25, VCPU_GPR(R25)(r4)
+ PPC_STL r26, VCPU_GPR(R26)(r4)
+ PPC_STL r27, VCPU_GPR(R27)(r4)
+ PPC_STL r28, VCPU_GPR(R28)(r4)
+ PPC_STL r29, VCPU_GPR(R29)(r4)
+ PPC_STL r30, VCPU_GPR(R30)(r4)
+ PPC_STL r31, VCPU_GPR(R31)(r4)
+ mtspr SPRN_EPLC, r8
+
+ /* disable preemption, so we are sure we hit the fixup handler */
+ CURRENT_THREAD_INFO(r8, r1)
+ li r7, 1
+ stw r7, TI_PREEMPT(r8)
+
+ isync
+
+ /*
+ * In case the read goes wrong, we catch it and write an invalid value
+ * in LAST_INST instead.
+ */
+1: lwepx r9, 0, r5
+2:
+.section .fixup, "ax"
+3: li r9, KVM_INST_FETCH_FAILED
+ b 2b
+.previous
+.section __ex_table,"a"
+ PPC_LONG_ALIGN
+ PPC_LONG 1b,3b
+.previous
+
+ mtspr SPRN_EPLC, r3
+ li r7, 0
+ stw r7, TI_PREEMPT(r8)
+ stw r9, VCPU_LAST_INST(r4)
+ .endif
+
+ .if \flags & NEED_ESR
+ mfspr r8, SPRN_ESR
+ PPC_STL r8, VCPU_FAULT_ESR(r4)
+ .endif
+
+ .if \flags & NEED_DEAR
+ mfspr r9, SPRN_DEAR
+ PPC_STL r9, VCPU_FAULT_DEAR(r4)
+ .endif
+
+ b kvmppc_resume_host
+.endm
+
+#ifdef CONFIG_64BIT
+/* Exception types */
+#define EX_GEN 1
+#define EX_GDBELL 2
+#define EX_DBG 3
+#define EX_MC 4
+#define EX_CRIT 5
+#define EX_TLB 6
+
+/*
+ * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
+ */
+.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
+ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
+ mr r11, r4
+ /*
+ * Get vcpu from Paca: paca->__current.thread->kvm_vcpu
+ */
+ PPC_LL r4, PACACURRENT(r13)
+ PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
+ stw r10, VCPU_CR(r4)
+ PPC_STL r11, VCPU_GPR(R4)(r4)
+ PPC_STL r5, VCPU_GPR(R5)(r4)
+ PPC_STL r6, VCPU_GPR(R6)(r4)
+ PPC_STL r8, VCPU_GPR(R8)(r4)
+ PPC_STL r9, VCPU_GPR(R9)(r4)
+ .if \type == EX_TLB
+ PPC_LL r5, EX_TLB_R13(r12)
+ PPC_LL r6, EX_TLB_R10(r12)
+ PPC_LL r8, EX_TLB_R11(r12)
+ mfspr r12, \scratch
+ .else
+ mfspr r5, \scratch
+ PPC_LL r6, (\paca_ex + \ex_r10)(r13)
+ PPC_LL r8, (\paca_ex + \ex_r11)(r13)
+ .endif
+ PPC_STL r5, VCPU_GPR(R13)(r4)
+ PPC_STL r3, VCPU_GPR(R3)(r4)
+ PPC_STL r7, VCPU_GPR(R7)(r4)
+ PPC_STL r12, VCPU_GPR(R12)(r4)
+ PPC_STL r6, VCPU_GPR(R10)(r4)
+ PPC_STL r8, VCPU_GPR(R11)(r4)
+ mfctr r5
+ PPC_STL r5, VCPU_CTR(r4)
+ mfspr r5, \srr0
+ mfspr r6, \srr1
+ kvm_handler_common \intno, \srr0, \flags
+.endm
+
+#define EX_PARAMS(type) \
+ EX_##type, \
+ SPRN_SPRG_##type##_SCRATCH, \
+ PACA_EX##type, \
+ EX_R10, \
+ EX_R11
+
+#define EX_PARAMS_TLB \
+ EX_TLB, \
+ SPRN_SPRG_GEN_SCRATCH, \
+ PACA_EXTLB, \
+ EX_TLB_R10, \
+ EX_TLB_R11
+
+kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
+ SPRN_MCSRR0, SPRN_MCSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1,NEED_ESR
+kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
+ SPRN_CSRR0, SPRN_CSRR1, 0
+/*
+ * Only bolted TLB miss exception handlers are supported for now
+ */
+kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, NEED_EMU
+kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
+ SPRN_GSRR0, SPRN_GSRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
+ SPRN_DSRR0, SPRN_DSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+#else
+/*
+ * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
+ */
+.macro kvm_handler intno srr0, srr1, flags
+_GLOBAL(kvmppc_handler_\intno\()_\srr1)
+ PPC_LL r11, THREAD_KVM_VCPU(r10)
+ PPC_STL r3, VCPU_GPR(R3)(r11)
+ mfspr r3, SPRN_SPRG_RSCRATCH0
+ PPC_STL r4, VCPU_GPR(R4)(r11)
+ PPC_LL r4, THREAD_NORMSAVE(0)(r10)
+ PPC_STL r5, VCPU_GPR(R5)(r11)
+ stw r13, VCPU_CR(r11)
+ mfspr r5, \srr0
+ PPC_STL r3, VCPU_GPR(R10)(r11)
+ PPC_LL r3, THREAD_NORMSAVE(2)(r10)
+ PPC_STL r6, VCPU_GPR(R6)(r11)
+ PPC_STL r4, VCPU_GPR(R11)(r11)
+ mfspr r6, \srr1
+ PPC_STL r7, VCPU_GPR(R7)(r11)
+ PPC_STL r8, VCPU_GPR(R8)(r11)
+ PPC_STL r9, VCPU_GPR(R9)(r11)
+ PPC_STL r3, VCPU_GPR(R13)(r11)
+ mfctr r7
+ PPC_STL r12, VCPU_GPR(R12)(r11)
+ PPC_STL r7, VCPU_CTR(r11)
+ mr r4, r11
+ kvm_handler_common \intno, \srr0, \flags
+.endm
+
+.macro kvm_lvl_handler intno scratch srr0, srr1, flags
+_GLOBAL(kvmppc_handler_\intno\()_\srr1)
+ mfspr r10, SPRN_SPRG_THREAD
+ PPC_LL r11, THREAD_KVM_VCPU(r10)
+ PPC_STL r3, VCPU_GPR(R3)(r11)
+ mfspr r3, \scratch
+ PPC_STL r4, VCPU_GPR(R4)(r11)
+ PPC_LL r4, GPR9(r8)
+ PPC_STL r5, VCPU_GPR(R5)(r11)
+ stw r9, VCPU_CR(r11)
+ mfspr r5, \srr0
+ PPC_STL r3, VCPU_GPR(R8)(r11)
+ PPC_LL r3, GPR10(r8)
+ PPC_STL r6, VCPU_GPR(R6)(r11)
+ PPC_STL r4, VCPU_GPR(R9)(r11)
+ mfspr r6, \srr1
+ PPC_LL r4, GPR11(r8)
+ PPC_STL r7, VCPU_GPR(R7)(r11)
+ PPC_STL r3, VCPU_GPR(R10)(r11)
+ mfctr r7
+ PPC_STL r12, VCPU_GPR(R12)(r11)
+ PPC_STL r13, VCPU_GPR(R13)(r11)
+ PPC_STL r4, VCPU_GPR(R11)(r11)
+ PPC_STL r7, VCPU_CTR(r11)
+ mr r4, r11
+ kvm_handler_common \intno, \srr0, \flags
+.endm
+
+kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \
+ SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU
+kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
+ SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
+#endif
+
+/* Registers:
+ * SPRG_SCRATCH0: guest r10
+ * r4: vcpu pointer
+ * r11: vcpu->arch.shared
+ * r14: KVM exit number
+ */
+_GLOBAL(kvmppc_resume_host)
+ /* Save remaining volatile guest register state to vcpu. */
+ mfspr r3, SPRN_VRSAVE
+ PPC_STL r0, VCPU_GPR(R0)(r4)
+ mflr r5
+ mfspr r6, SPRN_SPRG4
+ PPC_STL r5, VCPU_LR(r4)
+ mfspr r7, SPRN_SPRG5
+ stw r3, VCPU_VRSAVE(r4)
+#ifdef CONFIG_64BIT
+ PPC_LL r3, PACA_SPRG_VDSO(r13)
+#endif
+ PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
+ mfspr r8, SPRN_SPRG6
+ PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
+ mfspr r9, SPRN_SPRG7
+#ifdef CONFIG_64BIT
+ mtspr SPRN_SPRG_VDSO_WRITE, r3
+#endif
+ PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
+ mfxer r3
+ PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
+
+ /* save guest MAS registers and restore host mas4 & mas6 */
+ mfspr r5, SPRN_MAS0
+ PPC_STL r3, VCPU_XER(r4)
+ mfspr r6, SPRN_MAS1
+ stw r5, VCPU_SHARED_MAS0(r11)
+ mfspr r7, SPRN_MAS2
+ stw r6, VCPU_SHARED_MAS1(r11)
+ PPC_STD(r7, VCPU_SHARED_MAS2, r11)
+ mfspr r5, SPRN_MAS3
+ mfspr r6, SPRN_MAS4
+ stw r5, VCPU_SHARED_MAS7_3+4(r11)
+ mfspr r7, SPRN_MAS6
+ stw r6, VCPU_SHARED_MAS4(r11)
+ mfspr r5, SPRN_MAS7
+ lwz r6, VCPU_HOST_MAS4(r4)
+ stw r7, VCPU_SHARED_MAS6(r11)
+ lwz r8, VCPU_HOST_MAS6(r4)
+ mtspr SPRN_MAS4, r6
+ stw r5, VCPU_SHARED_MAS7_3+0(r11)
+ mtspr SPRN_MAS6, r8
+ /* Enable MAS register updates via exception */
+ mfspr r3, SPRN_EPCR
+ rlwinm r3, r3, 0, ~SPRN_EPCR_DMIUH
+ mtspr SPRN_EPCR, r3
+ isync
+
+#ifdef CONFIG_64BIT
+ /*
+ * We enter with interrupts disabled in hardware, but
+ * we need to call RECONCILE_IRQ_STATE to ensure
+ * that the software state is kept in sync.
+ */
+ RECONCILE_IRQ_STATE(r3,r5)
+#endif
+
+ /* Switch to kernel stack and jump to handler. */
+ PPC_LL r3, HOST_RUN(r1)
+ mr r5, r14 /* intno */
+ mr r14, r4 /* Save vcpu pointer. */
+ bl kvmppc_handle_exit
+
+ /* Restore vcpu pointer and the nonvolatiles we used. */
+ mr r4, r14
+ PPC_LL r14, VCPU_GPR(R14)(r4)
+
+ andi. r5, r3, RESUME_FLAG_NV
+ beq skip_nv_load
+ PPC_LL r15, VCPU_GPR(R15)(r4)
+ PPC_LL r16, VCPU_GPR(R16)(r4)
+ PPC_LL r17, VCPU_GPR(R17)(r4)
+ PPC_LL r18, VCPU_GPR(R18)(r4)
+ PPC_LL r19, VCPU_GPR(R19)(r4)
+ PPC_LL r20, VCPU_GPR(R20)(r4)
+ PPC_LL r21, VCPU_GPR(R21)(r4)
+ PPC_LL r22, VCPU_GPR(R22)(r4)
+ PPC_LL r23, VCPU_GPR(R23)(r4)
+ PPC_LL r24, VCPU_GPR(R24)(r4)
+ PPC_LL r25, VCPU_GPR(R25)(r4)
+ PPC_LL r26, VCPU_GPR(R26)(r4)
+ PPC_LL r27, VCPU_GPR(R27)(r4)
+ PPC_LL r28, VCPU_GPR(R28)(r4)
+ PPC_LL r29, VCPU_GPR(R29)(r4)
+ PPC_LL r30, VCPU_GPR(R30)(r4)
+ PPC_LL r31, VCPU_GPR(R31)(r4)
+skip_nv_load:
+ /* Should we return to the guest? */
+ andi. r5, r3, RESUME_FLAG_HOST
+ beq lightweight_exit
+
+ srawi r3, r3, 2 /* Shift -ERR back down. */
+
+heavyweight_exit:
+ /* Not returning to guest. */
+ PPC_LL r5, HOST_STACK_LR(r1)
+ lwz r6, HOST_CR(r1)
+
+ /*
+ * We already saved guest volatile register state; now save the
+ * non-volatiles.
+ */
+
+ PPC_STL r15, VCPU_GPR(R15)(r4)
+ PPC_STL r16, VCPU_GPR(R16)(r4)
+ PPC_STL r17, VCPU_GPR(R17)(r4)
+ PPC_STL r18, VCPU_GPR(R18)(r4)
+ PPC_STL r19, VCPU_GPR(R19)(r4)
+ PPC_STL r20, VCPU_GPR(R20)(r4)
+ PPC_STL r21, VCPU_GPR(R21)(r4)
+ PPC_STL r22, VCPU_GPR(R22)(r4)
+ PPC_STL r23, VCPU_GPR(R23)(r4)
+ PPC_STL r24, VCPU_GPR(R24)(r4)
+ PPC_STL r25, VCPU_GPR(R25)(r4)
+ PPC_STL r26, VCPU_GPR(R26)(r4)
+ PPC_STL r27, VCPU_GPR(R27)(r4)
+ PPC_STL r28, VCPU_GPR(R28)(r4)
+ PPC_STL r29, VCPU_GPR(R29)(r4)
+ PPC_STL r30, VCPU_GPR(R30)(r4)
+ PPC_STL r31, VCPU_GPR(R31)(r4)
+
+ /* Load host non-volatile register state from host stack. */
+ PPC_LL r14, HOST_NV_GPR(R14)(r1)
+ PPC_LL r15, HOST_NV_GPR(R15)(r1)
+ PPC_LL r16, HOST_NV_GPR(R16)(r1)
+ PPC_LL r17, HOST_NV_GPR(R17)(r1)
+ PPC_LL r18, HOST_NV_GPR(R18)(r1)
+ PPC_LL r19, HOST_NV_GPR(R19)(r1)
+ PPC_LL r20, HOST_NV_GPR(R20)(r1)
+ PPC_LL r21, HOST_NV_GPR(R21)(r1)
+ PPC_LL r22, HOST_NV_GPR(R22)(r1)
+ PPC_LL r23, HOST_NV_GPR(R23)(r1)
+ PPC_LL r24, HOST_NV_GPR(R24)(r1)
+ PPC_LL r25, HOST_NV_GPR(R25)(r1)
+ PPC_LL r26, HOST_NV_GPR(R26)(r1)
+ PPC_LL r27, HOST_NV_GPR(R27)(r1)
+ PPC_LL r28, HOST_NV_GPR(R28)(r1)
+ PPC_LL r29, HOST_NV_GPR(R29)(r1)
+ PPC_LL r30, HOST_NV_GPR(R30)(r1)
+ PPC_LL r31, HOST_NV_GPR(R31)(r1)
+
+ /* Return to kvm_vcpu_run(). */
+ mtlr r5
+ mtcr r6
+ addi r1, r1, HOST_STACK_SIZE
+ /* r3 still contains the return code from kvmppc_handle_exit(). */
+ blr
+
+/* Registers:
+ * r3: kvm_run pointer
+ * r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+ stwu r1, -HOST_STACK_SIZE(r1)
+ PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+
+ /* Save host state to stack. */
+ PPC_STL r3, HOST_RUN(r1)
+ mflr r3
+ mfcr r5
+ PPC_STL r3, HOST_STACK_LR(r1)
+
+ stw r5, HOST_CR(r1)
+
+ /* Save host non-volatile register state to stack. */
+ PPC_STL r14, HOST_NV_GPR(R14)(r1)
+ PPC_STL r15, HOST_NV_GPR(R15)(r1)
+ PPC_STL r16, HOST_NV_GPR(R16)(r1)
+ PPC_STL r17, HOST_NV_GPR(R17)(r1)
+ PPC_STL r18, HOST_NV_GPR(R18)(r1)
+ PPC_STL r19, HOST_NV_GPR(R19)(r1)
+ PPC_STL r20, HOST_NV_GPR(R20)(r1)
+ PPC_STL r21, HOST_NV_GPR(R21)(r1)
+ PPC_STL r22, HOST_NV_GPR(R22)(r1)
+ PPC_STL r23, HOST_NV_GPR(R23)(r1)
+ PPC_STL r24, HOST_NV_GPR(R24)(r1)
+ PPC_STL r25, HOST_NV_GPR(R25)(r1)
+ PPC_STL r26, HOST_NV_GPR(R26)(r1)
+ PPC_STL r27, HOST_NV_GPR(R27)(r1)
+ PPC_STL r28, HOST_NV_GPR(R28)(r1)
+ PPC_STL r29, HOST_NV_GPR(R29)(r1)
+ PPC_STL r30, HOST_NV_GPR(R30)(r1)
+ PPC_STL r31, HOST_NV_GPR(R31)(r1)
+
+ /* Load guest non-volatiles. */
+ PPC_LL r14, VCPU_GPR(R14)(r4)
+ PPC_LL r15, VCPU_GPR(R15)(r4)
+ PPC_LL r16, VCPU_GPR(R16)(r4)
+ PPC_LL r17, VCPU_GPR(R17)(r4)
+ PPC_LL r18, VCPU_GPR(R18)(r4)
+ PPC_LL r19, VCPU_GPR(R19)(r4)
+ PPC_LL r20, VCPU_GPR(R20)(r4)
+ PPC_LL r21, VCPU_GPR(R21)(r4)
+ PPC_LL r22, VCPU_GPR(R22)(r4)
+ PPC_LL r23, VCPU_GPR(R23)(r4)
+ PPC_LL r24, VCPU_GPR(R24)(r4)
+ PPC_LL r25, VCPU_GPR(R25)(r4)
+ PPC_LL r26, VCPU_GPR(R26)(r4)
+ PPC_LL r27, VCPU_GPR(R27)(r4)
+ PPC_LL r28, VCPU_GPR(R28)(r4)
+ PPC_LL r29, VCPU_GPR(R29)(r4)
+ PPC_LL r30, VCPU_GPR(R30)(r4)
+ PPC_LL r31, VCPU_GPR(R31)(r4)
+
+
+lightweight_exit:
+ PPC_STL r2, HOST_R2(r1)
+
+ mfspr r3, SPRN_PID
+ stw r3, VCPU_HOST_PID(r4)
+ lwz r3, VCPU_GUEST_PID(r4)
+ mtspr SPRN_PID, r3
+
+ PPC_LL r11, VCPU_SHARED(r4)
+ /* Disable MAS register updates via exception */
+ mfspr r3, SPRN_EPCR
+ oris r3, r3, SPRN_EPCR_DMIUH@h
+ mtspr SPRN_EPCR, r3
+ isync
+ /* Save host mas4 and mas6 and load guest MAS registers */
+ mfspr r3, SPRN_MAS4
+ stw r3, VCPU_HOST_MAS4(r4)
+ mfspr r3, SPRN_MAS6
+ stw r3, VCPU_HOST_MAS6(r4)
+ lwz r3, VCPU_SHARED_MAS0(r11)
+ lwz r5, VCPU_SHARED_MAS1(r11)
+ PPC_LD(r6, VCPU_SHARED_MAS2, r11)
+ lwz r7, VCPU_SHARED_MAS7_3+4(r11)
+ lwz r8, VCPU_SHARED_MAS4(r11)
+ mtspr SPRN_MAS0, r3
+ mtspr SPRN_MAS1, r5
+ mtspr SPRN_MAS2, r6
+ mtspr SPRN_MAS3, r7
+ mtspr SPRN_MAS4, r8
+ lwz r3, VCPU_SHARED_MAS6(r11)
+ lwz r5, VCPU_SHARED_MAS7_3+0(r11)
+ mtspr SPRN_MAS6, r3
+ mtspr SPRN_MAS7, r5
+
+ /*
+ * Host interrupt handlers may have clobbered these guest-readable
+ * SPRGs, so we need to reload them here with the guest's values.
+ */
+ lwz r3, VCPU_VRSAVE(r4)
+ PPC_LD(r5, VCPU_SHARED_SPRG4, r11)
+ mtspr SPRN_VRSAVE, r3
+ PPC_LD(r6, VCPU_SHARED_SPRG5, r11)
+ mtspr SPRN_SPRG4W, r5
+ PPC_LD(r7, VCPU_SHARED_SPRG6, r11)
+ mtspr SPRN_SPRG5W, r6
+ PPC_LD(r8, VCPU_SHARED_SPRG7, r11)
+ mtspr SPRN_SPRG6W, r7
+ mtspr SPRN_SPRG7W, r8
+
+ /* Load some guest volatiles. */
+ PPC_LL r3, VCPU_LR(r4)
+ PPC_LL r5, VCPU_XER(r4)
+ PPC_LL r6, VCPU_CTR(r4)
+ lwz r7, VCPU_CR(r4)
+ PPC_LL r8, VCPU_PC(r4)
+ PPC_LD(r9, VCPU_SHARED_MSR, r11)
+ PPC_LL r0, VCPU_GPR(R0)(r4)
+ PPC_LL r1, VCPU_GPR(R1)(r4)
+ PPC_LL r2, VCPU_GPR(R2)(r4)
+ PPC_LL r10, VCPU_GPR(R10)(r4)
+ PPC_LL r11, VCPU_GPR(R11)(r4)
+ PPC_LL r12, VCPU_GPR(R12)(r4)
+ PPC_LL r13, VCPU_GPR(R13)(r4)
+ mtlr r3
+ mtxer r5
+ mtctr r6
+ mtsrr0 r8
+ mtsrr1 r9
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save enter time */
+1:
+ mfspr r6, SPRN_TBRU
+ mfspr r9, SPRN_TBRL
+ mfspr r8, SPRN_TBRU
+ cmpw r8, r6
+ stw r9, VCPU_TIMING_LAST_ENTER_TBL(r4)
+ bne 1b
+ stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
+ /*
+ * Don't execute any instruction which can change CR after
+ * below instruction.
+ */
+ mtcr r7
+
+ /* Finish loading guest volatiles and jump to guest. */
+ PPC_LL r5, VCPU_GPR(R5)(r4)
+ PPC_LL r6, VCPU_GPR(R6)(r4)
+ PPC_LL r7, VCPU_GPR(R7)(r4)
+ PPC_LL r8, VCPU_GPR(R8)(r4)
+ PPC_LL r9, VCPU_GPR(R9)(r4)
+
+ PPC_LL r3, VCPU_GPR(R3)(r4)
+ PPC_LL r4, VCPU_GPR(R4)(r4)
+ rfi
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e3768ee9b59..2e02ed849f3 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, <yu.liu@freescale.com>
*
@@ -15,15 +15,289 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
#include <asm/reg.h>
#include <asm/cputable.h>
#include <asm/tlbflush.h>
-#include <asm/kvm_e500.h>
#include <asm/kvm_ppc.h>
+#include "../mm/mmu_decl.h"
#include "booke.h"
-#include "e500_tlb.h"
+#include "e500.h"
+
+struct id {
+ unsigned long val;
+ struct id **pentry;
+};
+
+#define NUM_TIDS 256
+
+/*
+ * This table provide mappings from:
+ * (guestAS,guestTID,guestPR) --> ID of physical cpu
+ * guestAS [0..1]
+ * guestTID [0..255]
+ * guestPR [0..1]
+ * ID [1..255]
+ * Each vcpu keeps one vcpu_id_table.
+ */
+struct vcpu_id_table {
+ struct id id[2][NUM_TIDS][2];
+};
+
+/*
+ * This table provide reversed mappings of vcpu_id_table:
+ * ID --> address of vcpu_id_table item.
+ * Each physical core has one pcpu_id_table.
+ */
+struct pcpu_id_table {
+ struct id *entry[NUM_TIDS];
+};
+
+static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
+
+/* This variable keeps last used shadow ID on local core.
+ * The valid range of shadow ID is [1..255] */
+static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
+
+/*
+ * Allocate a free shadow id and setup a valid sid mapping in given entry.
+ * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_setup_one(struct id *entry)
+{
+ unsigned long sid;
+ int ret = -1;
+
+ sid = ++(__get_cpu_var(pcpu_last_used_sid));
+ if (sid < NUM_TIDS) {
+ __get_cpu_var(pcpu_sids).entry[sid] = entry;
+ entry->val = sid;
+ entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
+ ret = sid;
+ }
+
+ /*
+ * If sid == NUM_TIDS, we've run out of sids. We return -1, and
+ * the caller will invalidate everything and start over.
+ *
+ * sid > NUM_TIDS indicates a race, which we disable preemption to
+ * avoid.
+ */
+ WARN_ON(sid > NUM_TIDS);
+
+ return ret;
+}
+
+/*
+ * Check if given entry contain a valid shadow id mapping.
+ * An ID mapping is considered valid only if
+ * both vcpu and pcpu know this mapping.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_lookup(struct id *entry)
+{
+ if (entry && entry->val != 0 &&
+ __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
+ entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
+ return entry->val;
+ return -1;
+}
+
+/* Invalidate all id mappings on local core -- call with preempt disabled */
+static inline void local_sid_destroy_all(void)
+{
+ __get_cpu_var(pcpu_last_used_sid) = 0;
+ memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
+}
+
+static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
+ return vcpu_e500->idt;
+}
+
+static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kfree(vcpu_e500->idt);
+ vcpu_e500->idt = NULL;
+}
+
+/* Map guest pid to shadow.
+ * We use PID to keep shadow of current guest non-zero PID,
+ * and use PID1 to keep shadow of guest zero PID.
+ * So that guest tlbe with TID=0 can be accessed at any time */
+static void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ preempt_disable();
+ vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu),
+ get_cur_pid(&vcpu_e500->vcpu),
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu), 0,
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ preempt_enable();
+}
+
+/* Invalidate all mappings on vcpu */
+static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/* Invalidate one ID mapping on vcpu */
+static inline void kvmppc_e500_id_table_reset_one(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ int as, int pid, int pr)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+
+ BUG_ON(as >= 2);
+ BUG_ON(pid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ idt->id[as][pid][pr].val = 0;
+ idt->id[as][pid][pr].pentry = NULL;
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/*
+ * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
+ * This function first lookup if a valid mapping exists,
+ * if not, then creates a new one.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+ unsigned int as, unsigned int gid,
+ unsigned int pr, int avoid_recursion)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ int sid;
+
+ BUG_ON(as >= 2);
+ BUG_ON(gid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ sid = local_sid_lookup(&idt->id[as][gid][pr]);
+
+ while (sid <= 0) {
+ /* No mapping yet */
+ sid = local_sid_setup_one(&idt->id[as][gid][pr]);
+ if (sid <= 0) {
+ _tlbil_all();
+ local_sid_destroy_all();
+ }
+
+ /* Update shadow pid when mappings are changed */
+ if (!avoid_recursion)
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
+
+ return sid;
+}
+
+unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ return kvmppc_e500_get_sid(to_e500(vcpu), get_tlb_ts(gtlbe),
+ get_tlb_tid(gtlbe), get_cur_pr(vcpu), 0);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ if (vcpu->arch.pid != pid) {
+ vcpu_e500->pid[0] = vcpu->arch.pid = pid;
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
+}
+
+/* gtlbe must not be mapped by more than one host tlbe */
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ unsigned int pr, tid, ts, pid;
+ u32 val, eaddr;
+ unsigned long flags;
+
+ ts = get_tlb_ts(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+
+ preempt_disable();
+
+ /* One guest ID may be mapped to two shadow IDs */
+ for (pr = 0; pr < 2; pr++) {
+ /*
+ * The shadow PID can have a valid mapping on at most one
+ * host CPU. In the common case, it will be valid on this
+ * CPU, in which case we do a local invalidation of the
+ * specific address.
+ *
+ * If the shadow PID is not valid on the current host CPU,
+ * we invalidate the entire shadow PID.
+ */
+ pid = local_sid_lookup(&idt->id[ts][tid][pr]);
+ if (pid <= 0) {
+ kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
+ continue;
+ }
+
+ /*
+ * The guest is invalidating a 4K entry which is in a PID
+ * that has a valid shadow mapping on this host CPU. We
+ * search host TLB to invalidate it's shadow TLB entry,
+ * similar to __tlbil_va except that we need to look in AS1.
+ */
+ val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
+ eaddr = get_tlb_eaddr(gtlbe);
+
+ local_irq_save(flags);
+
+ mtspr(SPRN_MAS6, val);
+ asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
+ val = mfspr(SPRN_MAS1);
+ if (val & MAS1_VALID) {
+ mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ asm volatile("tlbwe");
+ }
+
+ local_irq_restore(flags);
+ }
+
+ preempt_enable();
+}
+
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kvmppc_e500_id_table_reset_all(vcpu_e500);
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+ /* Recalc shadow pid since MSR changes */
+ kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
+}
void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
{
@@ -33,14 +307,22 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
{
}
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
{
- kvmppc_e500_tlb_load(vcpu, cpu);
+ kvmppc_booke_vcpu_load(vcpu, cpu);
+
+ /* Shadow PID may be expired on local core */
+ kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
}
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu)
{
- kvmppc_e500_tlb_put(vcpu);
+#ifdef CONFIG_SPE
+ if (vcpu->arch.shadow_msr & MSR_SPE)
+ kvmppc_vcpu_disable_spe(vcpu);
+#endif
+
+ kvmppc_booke_vcpu_put(vcpu);
}
int kvmppc_core_check_processor_compat(void)
@@ -55,6 +337,23 @@ int kvmppc_core_check_processor_compat(void)
return r;
}
+static void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ struct kvm_book3e_206_tlb_entry *tlbe;
+
+ /* Insert large initial mapping for guest. */
+ tlbe = get_entry(vcpu_e500, 1, 0);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
+ tlbe->mas2 = 0;
+ tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
+
+ /* 4K map for serial output. Used by kernel wrapper. */
+ tlbe = get_entry(vcpu_e500, 1, 1);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+ tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
+ tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -63,40 +362,90 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
/* Registers init */
vcpu->arch.pvr = mfspr(SPRN_PVR);
+ vcpu_e500->svr = mfspr(SPRN_SVR);
- /* Since booke kvm only support one core, update all vcpus' PIR to 0 */
- vcpu->vcpu_id = 0;
+ vcpu->arch.cpu_type = KVM_CPU_E500V2;
return 0;
}
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
-int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
- struct kvm_translation *tr)
+static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
- int index;
- gva_t eaddr;
- u8 pid;
- u8 as;
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
+ KVM_SREGS_E_PM;
+ sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+ sregs->u.e.impl.fsl.features = 0;
+ sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+ sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+ sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+ sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+ sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+ sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+ sregs->u.e.ivor_high[3] =
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+
+ kvmppc_get_sregs_ivor(vcpu, sregs);
+ kvmppc_get_sregs_e500_tlb(vcpu, sregs);
+ return 0;
+}
+
+static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int ret;
+
+ if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+ vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+ vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+ vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+ }
- eaddr = tr->linear_address;
- pid = (tr->linear_address >> 32) & 0xff;
- as = (tr->linear_address >> 40) & 0x1;
+ ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
+ if (ret < 0)
+ return ret;
- index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
- if (index < 0) {
- tr->valid = 0;
+ if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
return 0;
+
+ if (sregs->u.e.features & KVM_SREGS_E_SPE) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
+ sregs->u.e.ivor_high[0];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
+ sregs->u.e.ivor_high[1];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
+ sregs->u.e.ivor_high[2];
}
- tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
- /* XXX what does "writeable" and "usermode" even mean? */
- tr->valid = 1;
+ if (sregs->u.e.features & KVM_SREGS_E_PM) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+ sregs->u.e.ivor_high[3];
+ }
- return 0;
+ return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
+static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+ return r;
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+ return r;
+}
+
+static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
+ unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
struct kvm_vcpu *vcpu;
@@ -113,9 +462,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (err)
goto free_vcpu;
+ if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ goto uninit_vcpu;
+
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
- goto uninit_vcpu;
+ goto uninit_id;
vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!vcpu->arch.shared)
@@ -125,6 +477,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
+uninit_id:
+ kvmppc_e500_id_table_free(vcpu_e500);
uninit_vcpu:
kvm_vcpu_uninit(vcpu);
free_vcpu:
@@ -133,48 +487,93 @@ out:
return ERR_PTR(err);
}
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
free_page((unsigned long)vcpu->arch.shared);
- kvm_vcpu_uninit(vcpu);
kvmppc_e500_tlb_uninit(vcpu_e500);
+ kvmppc_e500_id_table_free(vcpu_e500);
+ kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
+static int kvmppc_core_init_vm_e500(struct kvm *kvm)
+{
+ return 0;
+}
+
+static void kvmppc_core_destroy_vm_e500(struct kvm *kvm)
+{
+}
+
+static struct kvmppc_ops kvm_ops_e500 = {
+ .get_sregs = kvmppc_core_get_sregs_e500,
+ .set_sregs = kvmppc_core_set_sregs_e500,
+ .get_one_reg = kvmppc_get_one_reg_e500,
+ .set_one_reg = kvmppc_set_one_reg_e500,
+ .vcpu_load = kvmppc_core_vcpu_load_e500,
+ .vcpu_put = kvmppc_core_vcpu_put_e500,
+ .vcpu_create = kvmppc_core_vcpu_create_e500,
+ .vcpu_free = kvmppc_core_vcpu_free_e500,
+ .mmu_destroy = kvmppc_mmu_destroy_e500,
+ .init_vm = kvmppc_core_init_vm_e500,
+ .destroy_vm = kvmppc_core_destroy_vm_e500,
+ .emulate_op = kvmppc_core_emulate_op_e500,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+};
+
static int __init kvmppc_e500_init(void)
{
int r, i;
unsigned long ivor[3];
+ /* Process remaining handlers above the generic first 16 */
+ unsigned long *handler = &kvmppc_booke_handler_addr[16];
+ unsigned long handler_len;
unsigned long max_ivor = 0;
+ r = kvmppc_core_check_processor_compat();
+ if (r)
+ goto err_out;
+
r = kvmppc_booke_init();
if (r)
- return r;
+ goto err_out;
/* copy extra E500 exception handlers */
ivor[0] = mfspr(SPRN_IVOR32);
ivor[1] = mfspr(SPRN_IVOR33);
ivor[2] = mfspr(SPRN_IVOR34);
for (i = 0; i < 3; i++) {
- if (ivor[i] > max_ivor)
- max_ivor = ivor[i];
+ if (ivor[i] > ivor[max_ivor])
+ max_ivor = i;
+ handler_len = handler[i + 1] - handler[i];
memcpy((void *)kvmppc_booke_handlers + ivor[i],
- kvmppc_handlers_start + (i + 16) * kvmppc_handler_len,
- kvmppc_handler_len);
+ (void *)handler[i], handler_len);
}
- flush_icache_range(kvmppc_booke_handlers,
- kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+ handler_len = handler[max_ivor + 1] - handler[max_ivor];
+ flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
+ ivor[max_ivor] + handler_len);
- return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+ if (r)
+ goto err_out;
+ kvm_ops_e500.owner = THIS_MODULE;
+ kvmppc_pr_ops = &kvm_ops_e500;
+
+err_out:
+ return r;
}
static void __exit kvmppc_e500_exit(void)
{
+ kvmppc_pr_ops = NULL;
kvmppc_booke_exit();
}
module_init(kvmppc_e500_init);
module_exit(kvmppc_e500_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
new file mode 100644
index 00000000000..a326178bdea
--- /dev/null
+++ b/arch/powerpc/kvm/e500.h
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu <yu.liu@freescale.com>
+ * Scott Wood <scottwood@freescale.com>
+ * Ashish Kalra <ashish.kalra@freescale.com>
+ * Varun Sethi <varun.sethi@freescale.com>
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.h and
+ * arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard <hollisb@us.ibm.com>,
+ * Copyright IBM Corp. 2007-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef KVM_E500_H
+#define KVM_E500_H
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-book3e.h>
+#include <asm/tlb.h>
+
+enum vcpu_ftr {
+ VCPU_FTR_MMU_V2
+};
+
+#define E500_PID_NUM 3
+#define E500_TLB_NUM 2
+
+/* entry is mapped somewhere in host TLB */
+#define E500_TLB_VALID (1 << 31)
+/* TLB1 entry is mapped by host TLB1, tracked by bitmaps */
+#define E500_TLB_BITMAP (1 << 30)
+/* TLB1 entry is mapped by host TLB0 */
+#define E500_TLB_TLB0 (1 << 29)
+/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */
+#define E500_TLB_MAS2_ATTR (0x7f)
+
+struct tlbe_ref {
+ pfn_t pfn; /* valid only for TLB0, except briefly */
+ unsigned int flags; /* E500_TLB_* */
+};
+
+struct tlbe_priv {
+ struct tlbe_ref ref;
+};
+
+#ifdef CONFIG_KVM_E500V2
+struct vcpu_id_table;
+#endif
+
+struct kvmppc_e500_tlb_params {
+ int entries, ways, sets;
+};
+
+struct kvmppc_vcpu_e500 {
+ struct kvm_vcpu vcpu;
+
+ /* Unmodified copy of the guest's TLB -- shared with host userspace. */
+ struct kvm_book3e_206_tlb_entry *gtlb_arch;
+
+ /* Starting entry number in gtlb_arch[] */
+ int gtlb_offset[E500_TLB_NUM];
+
+ /* KVM internal information associated with each guest TLB entry */
+ struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
+
+ struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
+
+ unsigned int gtlb_nv[E500_TLB_NUM];
+
+ unsigned int host_tlb1_nv;
+
+ u32 svr;
+ u32 l1csr0;
+ u32 l1csr1;
+ u32 hid0;
+ u32 hid1;
+ u64 mcar;
+
+ struct page **shared_tlb_pages;
+ int num_shared_tlb_pages;
+
+ u64 *g2h_tlb1_map;
+ unsigned int *h2g_tlb1_rmap;
+
+ /* Minimum and maximum address mapped my TLB1 */
+ unsigned long tlb1_min_eaddr;
+ unsigned long tlb1_max_eaddr;
+
+#ifdef CONFIG_KVM_E500V2
+ u32 pid[E500_PID_NUM];
+
+ /* vcpu id table */
+ struct vcpu_id_table *idt;
+#endif
+};
+
+static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
+{
+ return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
+}
+
+
+/* This geometry is the legacy default -- can be overridden by userspace */
+#define KVM_E500_TLB0_WAY_SIZE 128
+#define KVM_E500_TLB0_WAY_NUM 2
+
+#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
+#define KVM_E500_TLB1_SIZE 16
+
+#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
+#define tlbsel_of(index) ((index) >> 16)
+#define esel_of(index) ((index) & 0xFFFF)
+
+#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
+#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
+#define MAS2_ATTRIB_MASK \
+ (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G)
+#define MAS3_ATTRIB_MASK \
+ (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
+ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
+ ulong value);
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea);
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea);
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea);
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val);
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val);
+
+#ifdef CONFIG_KVM_E500V2
+unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+ unsigned int as, unsigned int gid,
+ unsigned int pr, int avoid_recursion);
+#endif
+
+/* TLB helper functions */
+static inline unsigned int
+get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 7) & 0x1f;
+}
+
+static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas2 & MAS2_EPN;
+}
+
+static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ unsigned int pgsize = get_tlb_size(tlbe);
+ return 1ULL << 10 << pgsize;
+}
+
+static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ u64 bytes = get_tlb_bytes(tlbe);
+ return get_tlb_eaddr(tlbe) + bytes - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas7_3 & ~0xfffULL;
+}
+
+static inline unsigned int
+get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 16) & 0xff;
+}
+
+static inline unsigned int
+get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 12) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 31) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 30) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_tsize(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
+}
+
+static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pid & 0xff;
+}
+
+static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
+}
+
+static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & MSR_PR);
+}
+
+static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.shared->mas6 >> 16) & 0xff;
+}
+
+static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.shared->mas6 & 0x1;
+}
+
+static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * Manual says that tlbsel has 2 bits wide.
+ * Since we only have two TLBs, only lower bit is used.
+ */
+ return (vcpu->arch.shared->mas0 >> 28) & 0x1;
+}
+
+static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.shared->mas0 & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
+}
+
+static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ gpa_t gpa;
+
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+#ifndef CONFIG_KVM_BOOKE_HV
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
+ return 0;
+#endif
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+static inline struct kvm_book3e_206_tlb_entry *get_entry(
+ struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+ int offset = vcpu_e500->gtlb_offset[tlbsel];
+ return &vcpu_e500->gtlb_arch[offset + entry];
+}
+
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe);
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe)
+#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu)
+#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS)
+#else
+unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe);
+
+static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int tidseld = (vcpu->arch.shared->mas4 >> 16) & 0xf;
+
+ return vcpu_e500->pid[tidseld];
+}
+
+/* Force TS=1 for all guest mappings. */
+#define get_tlb_sts(gtlbe) (MAS1_TS)
+#endif /* !BOOKE_HV */
+
+static inline bool has_feature(const struct kvm_vcpu *vcpu,
+ enum vcpu_ftr ftr)
+{
+ bool has_ftr;
+ switch (ftr) {
+ case VCPU_FTR_MMU_V2:
+ has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
+ break;
+ default:
+ return false;
+ }
+ return has_ftr;
+}
+
+#endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 8e3edfbc963..002d5176414 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, <yu.liu@freescale.com>
*
@@ -14,27 +14,132 @@
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
-#include <asm/kvm_e500.h>
+#include <asm/dbell.h>
#include "booke.h"
-#include "e500_tlb.h"
+#include "e500.h"
+#define XOP_DCBTLS 166
+#define XOP_MSGSND 206
+#define XOP_MSGCLR 238
#define XOP_TLBIVAX 786
#define XOP_TLBSX 914
#define XOP_TLBRE 946
#define XOP_TLBWE 978
+#define XOP_TLBILX 18
+#define XOP_EHPRIV 270
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance)
+#ifdef CONFIG_KVM_E500MC
+static int dbell2prio(ulong param)
+{
+ int msg = param & PPC_DBELL_TYPE_MASK;
+ int prio = -1;
+
+ switch (msg) {
+ case PPC_DBELL_TYPE(PPC_DBELL):
+ prio = BOOKE_IRQPRIO_DBELL;
+ break;
+ case PPC_DBELL_TYPE(PPC_DBELL_CRIT):
+ prio = BOOKE_IRQPRIO_DBELL_CRIT;
+ break;
+ default:
+ break;
+ }
+
+ return prio;
+}
+
+static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
+{
+ ulong param = vcpu->arch.gpr[rb];
+ int prio = dbell2prio(param);
+
+ if (prio < 0)
+ return EMULATE_FAIL;
+
+ clear_bit(prio, &vcpu->arch.pending_exceptions);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
+{
+ ulong param = vcpu->arch.gpr[rb];
+ int prio = dbell2prio(rb);
+ int pir = param & PPC_DBELL_PIR_MASK;
+ int i;
+ struct kvm_vcpu *cvcpu;
+
+ if (prio < 0)
+ return EMULATE_FAIL;
+
+ kvm_for_each_vcpu(i, cvcpu, vcpu->kvm) {
+ int cpir = cvcpu->arch.shared->pir;
+ if ((param & PPC_DBELL_MSG_BRDCAST) || (cpir == pir)) {
+ set_bit(prio, &cvcpu->arch.pending_exceptions);
+ kvm_vcpu_kick(cvcpu);
+ }
+ }
+
+ return EMULATE_DONE;
+}
+#endif
+
+static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
- int ra;
- int rb;
+
+ switch (get_oc(inst)) {
+ case EHPRIV_OC_DEBUG:
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.address = vcpu->arch.pc;
+ run->debug.arch.status = 0;
+ kvmppc_account_exit(vcpu, DEBUG_EXITS);
+ emulated = EMULATE_EXIT_USER;
+ *advance = 0;
+ break;
+ default:
+ emulated = EMULATE_FAIL;
+ }
+ return emulated;
+}
+
+static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ /* Always fail to lock the cache */
+ vcpu_e500->l1csr0 |= L1CSR0_CUL;
+ return EMULATE_DONE;
+}
+
+int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int ra = get_ra(inst);
+ int rb = get_rb(inst);
+ int rt = get_rt(inst);
+ gva_t ea;
switch (get_op(inst)) {
case 31:
switch (get_xop(inst)) {
+ case XOP_DCBTLS:
+ emulated = kvmppc_e500_emul_dcbtls(vcpu);
+ break;
+
+#ifdef CONFIG_KVM_E500MC
+ case XOP_MSGSND:
+ emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
+ break;
+
+ case XOP_MSGCLR:
+ emulated = kvmppc_e500_emul_msgclr(vcpu, rb);
+ break;
+#endif
+
case XOP_TLBRE:
emulated = kvmppc_e500_emul_tlbre(vcpu);
break;
@@ -44,14 +149,25 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case XOP_TLBSX:
- rb = get_rb(inst);
- emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
+ ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+ emulated = kvmppc_e500_emul_tlbsx(vcpu, ea);
+ break;
+
+ case XOP_TLBILX: {
+ int type = rt & 0x3;
+ ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+ emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea);
break;
+ }
case XOP_TLBIVAX:
- ra = get_ra(inst);
- rb = get_rb(inst);
- emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
+ ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+ emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
+ break;
+
+ case XOP_EHPRIV:
+ emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
+ advance);
break;
default:
@@ -70,45 +186,64 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
- ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
+#ifndef CONFIG_KVM_BOOKE_HV
case SPRN_PID:
- vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
- vcpu->arch.pid = spr_val;
+ kvmppc_set_pid(vcpu, spr_val);
break;
case SPRN_PID1:
- vcpu_e500->pid[1] = spr_val; break;
+ if (spr_val != 0)
+ return EMULATE_FAIL;
+ vcpu_e500->pid[1] = spr_val;
+ break;
case SPRN_PID2:
- vcpu_e500->pid[2] = spr_val; break;
+ if (spr_val != 0)
+ return EMULATE_FAIL;
+ vcpu_e500->pid[2] = spr_val;
+ break;
case SPRN_MAS0:
- vcpu_e500->mas0 = spr_val; break;
+ vcpu->arch.shared->mas0 = spr_val;
+ break;
case SPRN_MAS1:
- vcpu_e500->mas1 = spr_val; break;
+ vcpu->arch.shared->mas1 = spr_val;
+ break;
case SPRN_MAS2:
- vcpu_e500->mas2 = spr_val; break;
+ vcpu->arch.shared->mas2 = spr_val;
+ break;
case SPRN_MAS3:
- vcpu_e500->mas3 = spr_val; break;
+ vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff;
+ vcpu->arch.shared->mas7_3 |= spr_val;
+ break;
case SPRN_MAS4:
- vcpu_e500->mas4 = spr_val; break;
+ vcpu->arch.shared->mas4 = spr_val;
+ break;
case SPRN_MAS6:
- vcpu_e500->mas6 = spr_val; break;
+ vcpu->arch.shared->mas6 = spr_val;
+ break;
case SPRN_MAS7:
- vcpu_e500->mas7 = spr_val; break;
+ vcpu->arch.shared->mas7_3 &= (u64)0xffffffff;
+ vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
+ break;
+#endif
case SPRN_L1CSR0:
vcpu_e500->l1csr0 = spr_val;
vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
break;
case SPRN_L1CSR1:
- vcpu_e500->l1csr1 = spr_val; break;
+ vcpu_e500->l1csr1 = spr_val;
+ vcpu_e500->l1csr1 &= ~(L1CSR1_ICFI | L1CSR1_ICLFR);
+ break;
case SPRN_HID0:
- vcpu_e500->hid0 = spr_val; break;
+ vcpu_e500->hid0 = spr_val;
+ break;
case SPRN_HID1:
- vcpu_e500->hid1 = spr_val; break;
+ vcpu_e500->hid1 = spr_val;
+ break;
case SPRN_MMUCSR0:
emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
@@ -128,75 +263,134 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_IVOR35:
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
break;
-
+#ifdef CONFIG_KVM_BOOKE_HV
+ case SPRN_IVOR36:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = spr_val;
+ break;
+ case SPRN_IVOR37:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = spr_val;
+ break;
+#endif
default:
- emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
}
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
switch (sprn) {
+#ifndef CONFIG_KVM_BOOKE_HV
case SPRN_PID:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
+ *spr_val = vcpu_e500->pid[0];
+ break;
case SPRN_PID1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
+ *spr_val = vcpu_e500->pid[1];
+ break;
case SPRN_PID2:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
+ *spr_val = vcpu_e500->pid[2];
+ break;
case SPRN_MAS0:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
+ *spr_val = vcpu->arch.shared->mas0;
+ break;
case SPRN_MAS1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
+ *spr_val = vcpu->arch.shared->mas1;
+ break;
case SPRN_MAS2:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
+ *spr_val = vcpu->arch.shared->mas2;
+ break;
case SPRN_MAS3:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
+ *spr_val = (u32)vcpu->arch.shared->mas7_3;
+ break;
case SPRN_MAS4:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
+ *spr_val = vcpu->arch.shared->mas4;
+ break;
case SPRN_MAS6:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
+ *spr_val = vcpu->arch.shared->mas6;
+ break;
case SPRN_MAS7:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
-
+ *spr_val = vcpu->arch.shared->mas7_3 >> 32;
+ break;
+#endif
+ case SPRN_DECAR:
+ *spr_val = vcpu->arch.decar;
+ break;
case SPRN_TLB0CFG:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
+ *spr_val = vcpu->arch.tlbcfg[0];
+ break;
case SPRN_TLB1CFG:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
+ *spr_val = vcpu->arch.tlbcfg[1];
+ break;
+ case SPRN_TLB0PS:
+ if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+ return EMULATE_FAIL;
+ *spr_val = vcpu->arch.tlbps[0];
+ break;
+ case SPRN_TLB1PS:
+ if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+ return EMULATE_FAIL;
+ *spr_val = vcpu->arch.tlbps[1];
+ break;
case SPRN_L1CSR0:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
+ *spr_val = vcpu_e500->l1csr0;
+ break;
case SPRN_L1CSR1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
+ *spr_val = vcpu_e500->l1csr1;
+ break;
case SPRN_HID0:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
+ *spr_val = vcpu_e500->hid0;
+ break;
case SPRN_HID1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
+ *spr_val = vcpu_e500->hid1;
+ break;
+ case SPRN_SVR:
+ *spr_val = vcpu_e500->svr;
+ break;
case SPRN_MMUCSR0:
- kvmppc_set_gpr(vcpu, rt, 0); break;
+ *spr_val = 0;
+ break;
case SPRN_MMUCFG:
- kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
+ *spr_val = vcpu->arch.mmucfg;
+ break;
+ case SPRN_EPTCFG:
+ if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+ return EMULATE_FAIL;
+ /*
+ * Legacy Linux guests access EPTCFG register even if the E.PT
+ * category is disabled in the VM. Give them a chance to live.
+ */
+ *spr_val = vcpu->arch.eptcfg;
+ break;
/* extra exceptions */
case SPRN_IVOR32:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
break;
case SPRN_IVOR33:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
break;
case SPRN_IVOR34:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
break;
case SPRN_IVOR35:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+ break;
+#ifdef CONFIG_KVM_BOOKE_HV
+ case SPRN_IVOR36:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
+ break;
+ case SPRN_IVOR37:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
break;
+#endif
default:
- emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
}
return emulated;
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
new file mode 100644
index 00000000000..50860e919cb
--- /dev/null
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -0,0 +1,962 @@
+/*
+ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ * Scott Wood, scottwood@freescale.com
+ * Ashish Kalra, ashish.kalra@freescale.com
+ * Varun Sethi, varun.sethi@freescale.com
+ * Alexander Graf, agraf@suse.de
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
+#include <asm/kvm_ppc.h>
+
+#include "e500.h"
+#include "trace_booke.h"
+#include "timing.h"
+#include "e500_mmu_host.h"
+
+static inline unsigned int gtlb0_get_next_victim(
+ struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ unsigned int victim;
+
+ victim = vcpu_e500->gtlb_nv[0]++;
+ if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways))
+ vcpu_e500->gtlb_nv[0] = 0;
+
+ return victim;
+}
+
+static int tlb0_set_base(gva_t addr, int sets, int ways)
+{
+ int set_base;
+
+ set_base = (addr >> PAGE_SHIFT) & (sets - 1);
+ set_base *= ways;
+
+ return set_base;
+}
+
+static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
+{
+ return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets,
+ vcpu_e500->gtlb_params[0].ways);
+}
+
+static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int esel = get_tlb_esel_bit(vcpu);
+
+ if (tlbsel == 0) {
+ esel &= vcpu_e500->gtlb_params[0].ways - 1;
+ esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2);
+ } else {
+ esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1;
+ }
+
+ return esel;
+}
+
+/* Search the guest TLB for a matching entry. */
+static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
+ gva_t eaddr, int tlbsel, unsigned int pid, int as)
+{
+ int size = vcpu_e500->gtlb_params[tlbsel].entries;
+ unsigned int set_base, offset;
+ int i;
+
+ if (tlbsel == 0) {
+ set_base = gtlb0_set_base(vcpu_e500, eaddr);
+ size = vcpu_e500->gtlb_params[0].ways;
+ } else {
+ if (eaddr < vcpu_e500->tlb1_min_eaddr ||
+ eaddr > vcpu_e500->tlb1_max_eaddr)
+ return -1;
+ set_base = 0;
+ }
+
+ offset = vcpu_e500->gtlb_offset[tlbsel];
+
+ for (i = 0; i < size; i++) {
+ struct kvm_book3e_206_tlb_entry *tlbe =
+ &vcpu_e500->gtlb_arch[offset + set_base + i];
+ unsigned int tid;
+
+ if (eaddr < get_tlb_eaddr(tlbe))
+ continue;
+
+ if (eaddr > get_tlb_end(tlbe))
+ continue;
+
+ tid = get_tlb_tid(tlbe);
+ if (tid && (tid != pid))
+ continue;
+
+ if (!get_tlb_v(tlbe))
+ continue;
+
+ if (get_tlb_ts(tlbe) != as && as != -1)
+ continue;
+
+ return set_base + i;
+ }
+
+ return -1;
+}
+
+static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
+ gva_t eaddr, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int victim, tsized;
+ int tlbsel;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1;
+ victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
+ tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f;
+
+ vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+ | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+ vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
+ | MAS1_TID(get_tlbmiss_tid(vcpu))
+ | MAS1_TSIZE(tsized);
+ vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN)
+ | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK);
+ vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+ vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1)
+ | (get_cur_pid(vcpu) << 16)
+ | (as ? MAS6_SAS : 0);
+}
+
+static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int size = vcpu_e500->gtlb_params[1].entries;
+ unsigned int offset;
+ gva_t eaddr;
+ int i;
+
+ vcpu_e500->tlb1_min_eaddr = ~0UL;
+ vcpu_e500->tlb1_max_eaddr = 0;
+ offset = vcpu_e500->gtlb_offset[1];
+
+ for (i = 0; i < size; i++) {
+ struct kvm_book3e_206_tlb_entry *tlbe =
+ &vcpu_e500->gtlb_arch[offset + i];
+
+ if (!get_tlb_v(tlbe))
+ continue;
+
+ eaddr = get_tlb_eaddr(tlbe);
+ vcpu_e500->tlb1_min_eaddr =
+ min(vcpu_e500->tlb1_min_eaddr, eaddr);
+
+ eaddr = get_tlb_end(tlbe);
+ vcpu_e500->tlb1_max_eaddr =
+ max(vcpu_e500->tlb1_max_eaddr, eaddr);
+ }
+}
+
+static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ unsigned long start, end, size;
+
+ size = get_tlb_bytes(gtlbe);
+ start = get_tlb_eaddr(gtlbe) & ~(size - 1);
+ end = start + size - 1;
+
+ return vcpu_e500->tlb1_min_eaddr == start ||
+ vcpu_e500->tlb1_max_eaddr == end;
+}
+
+/* This function is supposed to be called for a adding a new valid tlb entry */
+static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ unsigned long start, end, size;
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ if (!get_tlb_v(gtlbe))
+ return;
+
+ size = get_tlb_bytes(gtlbe);
+ start = get_tlb_eaddr(gtlbe) & ~(size - 1);
+ end = start + size - 1;
+
+ vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start);
+ vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end);
+}
+
+static inline int kvmppc_e500_gtlbe_invalidate(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct kvm_book3e_206_tlb_entry *gtlbe =
+ get_entry(vcpu_e500, tlbsel, esel);
+
+ if (unlikely(get_tlb_iprot(gtlbe)))
+ return -1;
+
+ if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
+
+ gtlbe->mas1 = 0;
+
+ return 0;
+}
+
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
+{
+ int esel;
+
+ if (value & MMUCSR0_TLB0FI)
+ for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
+ if (value & MMUCSR0_TLB1FI)
+ for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
+
+ /* Invalidate all host shadow mappings */
+ kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int ia;
+ int esel, tlbsel;
+
+ ia = (ea >> 2) & 0x1;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = (ea >> 3) & 0x1;
+
+ if (ia) {
+ /* invalidate all entries */
+ for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries;
+ esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ } else {
+ ea &= 0xfffff000;
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel,
+ get_cur_pid(vcpu), -1);
+ if (esel >= 0)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ }
+
+ /* Invalidate all host shadow mappings */
+ kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
+
+ return EMULATE_DONE;
+}
+
+static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
+ int pid, int type)
+{
+ struct kvm_book3e_206_tlb_entry *tlbe;
+ int tid, esel;
+
+ /* invalidate all entries */
+ for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
+ tlbe = get_entry(vcpu_e500, tlbsel, esel);
+ tid = get_tlb_tid(tlbe);
+ if (type == 0 || tid == pid) {
+ inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ }
+ }
+}
+
+static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
+ gva_t ea)
+{
+ int tlbsel, esel;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
+ if (esel >= 0) {
+ inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ break;
+ }
+ }
+}
+
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int pid = get_cur_spid(vcpu);
+
+ if (type == 0 || type == 1) {
+ tlbilx_all(vcpu_e500, 0, pid, type);
+ tlbilx_all(vcpu_e500, 1, pid, type);
+ } else if (type == 3) {
+ tlbilx_one(vcpu_e500, pid, ea);
+ }
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int tlbsel, esel;
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+
+ tlbsel = get_tlb_tlbsel(vcpu);
+ esel = get_tlb_esel(vcpu, tlbsel);
+
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+ vcpu->arch.shared->mas0 &= ~MAS0_NV(~0);
+ vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+ vcpu->arch.shared->mas1 = gtlbe->mas1;
+ vcpu->arch.shared->mas2 = gtlbe->mas2;
+ vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int as = !!get_cur_sas(vcpu);
+ unsigned int pid = get_cur_spid(vcpu);
+ int esel, tlbsel;
+ struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
+ if (esel >= 0) {
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+ break;
+ }
+ }
+
+ if (gtlbe) {
+ esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1;
+
+ vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
+ | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+ vcpu->arch.shared->mas1 = gtlbe->mas1;
+ vcpu->arch.shared->mas2 = gtlbe->mas2;
+ vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
+ } else {
+ int victim;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1;
+ victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
+
+ vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel)
+ | MAS0_ESEL(victim)
+ | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+ vcpu->arch.shared->mas1 =
+ (vcpu->arch.shared->mas6 & MAS6_SPID0)
+ | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+ | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
+ vcpu->arch.shared->mas2 &= MAS2_EPN;
+ vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
+ MAS2_ATTRIB_MASK;
+ vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 |
+ MAS3_U2 | MAS3_U3;
+ }
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+ int tlbsel, esel;
+ int recal = 0;
+ int idx;
+
+ tlbsel = get_tlb_tlbsel(vcpu);
+ esel = get_tlb_esel(vcpu, tlbsel);
+
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+
+ if (get_tlb_v(gtlbe)) {
+ inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+ if ((tlbsel == 1) &&
+ kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
+ recal = 1;
+ }
+
+ gtlbe->mas1 = vcpu->arch.shared->mas1;
+ gtlbe->mas2 = vcpu->arch.shared->mas2;
+ if (!(vcpu->arch.shared->msr & MSR_CM))
+ gtlbe->mas2 &= 0xffffffffUL;
+ gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
+
+ trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
+ gtlbe->mas2, gtlbe->mas7_3);
+
+ if (tlbsel == 1) {
+ /*
+ * If a valid tlb1 entry is overwritten then recalculate the
+ * min/max TLB1 map address range otherwise no need to look
+ * in tlb1 array.
+ */
+ if (recal)
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
+ else
+ kvmppc_set_tlb1map_range(vcpu, gtlbe);
+ }
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+ if (tlbe_is_host_safe(vcpu, gtlbe)) {
+ u64 eaddr = get_tlb_eaddr(gtlbe);
+ u64 raddr = get_tlb_raddr(gtlbe);
+
+ if (tlbsel == 0) {
+ gtlbe->mas1 &= ~MAS1_TSIZE(~0);
+ gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+ }
+
+ /* Premap the faulting page */
+ kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel));
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
+ gva_t eaddr, unsigned int pid, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int esel, tlbsel;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
+ if (esel >= 0)
+ return index_of(tlbsel, esel);
+ }
+
+ return -1;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
+ if (index < 0) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
+
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
+
+ return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
+
+ return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
+
+ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+ unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
+
+ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
+}
+
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
+ gva_t eaddr)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+ u64 pgmask;
+
+ gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
+ pgmask = get_tlb_bytes(gtlbe) - 1;
+
+ return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu)
+{
+}
+
+/*****************************************/
+
+static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int i;
+
+ kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
+ kfree(vcpu_e500->g2h_tlb1_map);
+ kfree(vcpu_e500->gtlb_priv[0]);
+ kfree(vcpu_e500->gtlb_priv[1]);
+
+ if (vcpu_e500->shared_tlb_pages) {
+ vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
+ PAGE_SIZE)));
+
+ for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) {
+ set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]);
+ put_page(vcpu_e500->shared_tlb_pages[i]);
+ }
+
+ vcpu_e500->num_shared_tlb_pages = 0;
+
+ kfree(vcpu_e500->shared_tlb_pages);
+ vcpu_e500->shared_tlb_pages = NULL;
+ } else {
+ kfree(vcpu_e500->gtlb_arch);
+ }
+
+ vcpu_e500->gtlb_arch = NULL;
+}
+
+void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ sregs->u.e.mas0 = vcpu->arch.shared->mas0;
+ sregs->u.e.mas1 = vcpu->arch.shared->mas1;
+ sregs->u.e.mas2 = vcpu->arch.shared->mas2;
+ sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
+ sregs->u.e.mas4 = vcpu->arch.shared->mas4;
+ sregs->u.e.mas6 = vcpu->arch.shared->mas6;
+
+ sregs->u.e.mmucfg = vcpu->arch.mmucfg;
+ sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0];
+ sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1];
+ sregs->u.e.tlbcfg[2] = 0;
+ sregs->u.e.tlbcfg[3] = 0;
+}
+
+int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
+ vcpu->arch.shared->mas0 = sregs->u.e.mas0;
+ vcpu->arch.shared->mas1 = sregs->u.e.mas1;
+ vcpu->arch.shared->mas2 = sregs->u.e.mas2;
+ vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
+ vcpu->arch.shared->mas4 = sregs->u.e.mas4;
+ vcpu->arch.shared->mas6 = sregs->u.e.mas6;
+ }
+
+ return 0;
+}
+
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = 0;
+ long int i;
+
+ switch (id) {
+ case KVM_REG_PPC_MAS0:
+ *val = get_reg_val(id, vcpu->arch.shared->mas0);
+ break;
+ case KVM_REG_PPC_MAS1:
+ *val = get_reg_val(id, vcpu->arch.shared->mas1);
+ break;
+ case KVM_REG_PPC_MAS2:
+ *val = get_reg_val(id, vcpu->arch.shared->mas2);
+ break;
+ case KVM_REG_PPC_MAS7_3:
+ *val = get_reg_val(id, vcpu->arch.shared->mas7_3);
+ break;
+ case KVM_REG_PPC_MAS4:
+ *val = get_reg_val(id, vcpu->arch.shared->mas4);
+ break;
+ case KVM_REG_PPC_MAS6:
+ *val = get_reg_val(id, vcpu->arch.shared->mas6);
+ break;
+ case KVM_REG_PPC_MMUCFG:
+ *val = get_reg_val(id, vcpu->arch.mmucfg);
+ break;
+ case KVM_REG_PPC_EPTCFG:
+ *val = get_reg_val(id, vcpu->arch.eptcfg);
+ break;
+ case KVM_REG_PPC_TLB0CFG:
+ case KVM_REG_PPC_TLB1CFG:
+ case KVM_REG_PPC_TLB2CFG:
+ case KVM_REG_PPC_TLB3CFG:
+ i = id - KVM_REG_PPC_TLB0CFG;
+ *val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
+ break;
+ case KVM_REG_PPC_TLB0PS:
+ case KVM_REG_PPC_TLB1PS:
+ case KVM_REG_PPC_TLB2PS:
+ case KVM_REG_PPC_TLB3PS:
+ i = id - KVM_REG_PPC_TLB0PS;
+ *val = get_reg_val(id, vcpu->arch.tlbps[i]);
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
+
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = 0;
+ long int i;
+
+ switch (id) {
+ case KVM_REG_PPC_MAS0:
+ vcpu->arch.shared->mas0 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS1:
+ vcpu->arch.shared->mas1 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS2:
+ vcpu->arch.shared->mas2 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS7_3:
+ vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS4:
+ vcpu->arch.shared->mas4 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MAS6:
+ vcpu->arch.shared->mas6 = set_reg_val(id, *val);
+ break;
+ /* Only allow MMU registers to be set to the config supported by KVM */
+ case KVM_REG_PPC_MMUCFG: {
+ u32 reg = set_reg_val(id, *val);
+ if (reg != vcpu->arch.mmucfg)
+ r = -EINVAL;
+ break;
+ }
+ case KVM_REG_PPC_EPTCFG: {
+ u32 reg = set_reg_val(id, *val);
+ if (reg != vcpu->arch.eptcfg)
+ r = -EINVAL;
+ break;
+ }
+ case KVM_REG_PPC_TLB0CFG:
+ case KVM_REG_PPC_TLB1CFG:
+ case KVM_REG_PPC_TLB2CFG:
+ case KVM_REG_PPC_TLB3CFG: {
+ /* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
+ u32 reg = set_reg_val(id, *val);
+ i = id - KVM_REG_PPC_TLB0CFG;
+ if (reg != vcpu->arch.tlbcfg[i])
+ r = -EINVAL;
+ break;
+ }
+ case KVM_REG_PPC_TLB0PS:
+ case KVM_REG_PPC_TLB1PS:
+ case KVM_REG_PPC_TLB2PS:
+ case KVM_REG_PPC_TLB3PS: {
+ u32 reg = set_reg_val(id, *val);
+ i = id - KVM_REG_PPC_TLB0PS;
+ if (reg != vcpu->arch.tlbps[i])
+ r = -EINVAL;
+ break;
+ }
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
+
+static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_params *params)
+{
+ vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ if (params->tlb_sizes[0] <= 2048)
+ vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
+ vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
+ vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+ return 0;
+}
+
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+ struct kvm_config_tlb *cfg)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct kvm_book3e_206_tlb_params params;
+ char *virt;
+ struct page **pages;
+ struct tlbe_priv *privs[2] = {};
+ u64 *g2h_bitmap = NULL;
+ size_t array_len;
+ u32 sets;
+ int num_pages, ret, i;
+
+ if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
+ return -EINVAL;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
+ sizeof(params)))
+ return -EFAULT;
+
+ if (params.tlb_sizes[1] > 64)
+ return -EINVAL;
+ if (params.tlb_ways[1] != params.tlb_sizes[1])
+ return -EINVAL;
+ if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
+ return -EINVAL;
+ if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0)
+ return -EINVAL;
+
+ if (!is_power_of_2(params.tlb_ways[0]))
+ return -EINVAL;
+
+ sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]);
+ if (!is_power_of_2(sets))
+ return -EINVAL;
+
+ array_len = params.tlb_sizes[0] + params.tlb_sizes[1];
+ array_len *= sizeof(struct kvm_book3e_206_tlb_entry);
+
+ if (cfg->array_len < array_len)
+ return -EINVAL;
+
+ num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
+ cfg->array / PAGE_SIZE;
+ pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
+ if (ret < 0)
+ goto err_pages;
+
+ if (ret != num_pages) {
+ num_pages = ret;
+ ret = -EFAULT;
+ goto err_put_page;
+ }
+
+ virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
+ if (!virt) {
+ ret = -ENOMEM;
+ goto err_put_page;
+ }
+
+ privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
+ GFP_KERNEL);
+ privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
+ GFP_KERNEL);
+
+ if (!privs[0] || !privs[1]) {
+ ret = -ENOMEM;
+ goto err_privs;
+ }
+
+ g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
+ GFP_KERNEL);
+ if (!g2h_bitmap) {
+ ret = -ENOMEM;
+ goto err_privs;
+ }
+
+ free_gtlb(vcpu_e500);
+
+ vcpu_e500->gtlb_priv[0] = privs[0];
+ vcpu_e500->gtlb_priv[1] = privs[1];
+ vcpu_e500->g2h_tlb1_map = g2h_bitmap;
+
+ vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
+ (virt + (cfg->array & (PAGE_SIZE - 1)));
+
+ vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0];
+ vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1];
+
+ vcpu_e500->gtlb_offset[0] = 0;
+ vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
+
+ /* Update vcpu's MMU geometry based on SW_TLB input */
+ vcpu_mmu_geometry_update(vcpu, &params);
+
+ vcpu_e500->shared_tlb_pages = pages;
+ vcpu_e500->num_shared_tlb_pages = num_pages;
+
+ vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0];
+ vcpu_e500->gtlb_params[0].sets = sets;
+
+ vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
+ vcpu_e500->gtlb_params[1].sets = 1;
+
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
+ return 0;
+
+err_privs:
+ kfree(privs[0]);
+ kfree(privs[1]);
+
+err_put_page:
+ for (i = 0; i < num_pages; i++)
+ put_page(pages[i]);
+
+err_pages:
+ kfree(pages);
+ return ret;
+}
+
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+ struct kvm_dirty_tlb *dirty)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
+ kvmppc_core_flush_tlb(vcpu);
+ return 0;
+}
+
+/* Vcpu's MMU default configuration */
+static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
+ struct kvmppc_e500_tlb_params *params)
+{
+ /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
+ vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
+
+ /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
+ vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
+ ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.tlbcfg[0] |= params[0].entries;
+ vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
+ ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.tlbcfg[1] |= params[1].entries;
+ vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
+
+ if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
+ vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
+ vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
+
+ vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
+
+ /* Guest mmu emulation currently doesn't handle E.PT */
+ vcpu->arch.eptcfg = 0;
+ vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
+ vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
+ }
+
+ return 0;
+}
+
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
+ int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
+ int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
+
+ if (e500_mmu_host_init(vcpu_e500))
+ goto err;
+
+ vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
+ vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
+
+ vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM;
+ vcpu_e500->gtlb_params[0].sets =
+ KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
+
+ vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
+ vcpu_e500->gtlb_params[1].sets = 1;
+
+ vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
+ if (!vcpu_e500->gtlb_arch)
+ return -ENOMEM;
+
+ vcpu_e500->gtlb_offset[0] = 0;
+ vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
+
+ vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
+ vcpu_e500->gtlb_params[0].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->gtlb_priv[0])
+ goto err;
+
+ vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
+ vcpu_e500->gtlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->gtlb_priv[1])
+ goto err;
+
+ vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
+ vcpu_e500->gtlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->g2h_tlb1_map)
+ goto err;
+
+ vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
+
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
+ return 0;
+
+err:
+ free_gtlb(vcpu_e500);
+ return -1;
+}
+
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ free_gtlb(vcpu_e500);
+ e500_mmu_host_uninit(vcpu_e500);
+}
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
new file mode 100644
index 00000000000..86903d3f5a0
--- /dev/null
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -0,0 +1,699 @@
+/*
+ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ * Scott Wood, scottwood@freescale.com
+ * Ashish Kalra, ashish.kalra@freescale.com
+ * Varun Sethi, varun.sethi@freescale.com
+ * Alexander Graf, agraf@suse.de
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
+#include <asm/kvm_ppc.h>
+
+#include "e500.h"
+#include "timing.h"
+#include "e500_mmu_host.h"
+
+#include "trace_booke.h"
+
+#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
+
+static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
+
+static inline unsigned int tlb1_max_shadow_size(void)
+{
+ /* reserve one entry for magic page */
+ return host_tlb_params[1].entries - tlbcam_index - 1;
+}
+
+static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
+{
+ /* Mask off reserved bits. */
+ mas3 &= MAS3_ATTRIB_MASK;
+
+#ifndef CONFIG_KVM_BOOKE_HV
+ if (!usermode) {
+ /* Guest is in supervisor mode,
+ * so we need to translate guest
+ * supervisor permissions into user permissions. */
+ mas3 &= ~E500_TLB_USER_PERM_MASK;
+ mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
+ }
+ mas3 |= E500_TLB_SUPER_PERM_MASK;
+#endif
+ return mas3;
+}
+
+/*
+ * writing shadow tlb entry to host TLB
+ */
+static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
+ uint32_t mas0)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mtspr(SPRN_MAS0, mas0);
+ mtspr(SPRN_MAS1, stlbe->mas1);
+ mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
+ mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
+ mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_MAS8, stlbe->mas8);
+#endif
+ asm volatile("isync; tlbwe" : : : "memory");
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ /* Must clear mas8 for other host tlbwe's */
+ mtspr(SPRN_MAS8, 0);
+ isync();
+#endif
+ local_irq_restore(flags);
+
+ trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
+ stlbe->mas2, stlbe->mas7_3);
+}
+
+/*
+ * Acquire a mas0 with victim hint, as if we just took a TLB miss.
+ *
+ * We don't care about the address we're searching for, other than that it's
+ * in the right set and is not present in the TLB. Using a zero PID and a
+ * userspace address means we don't have to set and then restore MAS5, or
+ * calculate a proper MAS6 value.
+ */
+static u32 get_host_mas0(unsigned long eaddr)
+{
+ unsigned long flags;
+ u32 mas0;
+
+ local_irq_save(flags);
+ mtspr(SPRN_MAS6, 0);
+ asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
+ mas0 = mfspr(SPRN_MAS0);
+ local_irq_restore(flags);
+
+ return mas0;
+}
+
+/* sesel is for tlb1 only */
+static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe)
+{
+ u32 mas0;
+
+ if (tlbsel == 0) {
+ mas0 = get_host_mas0(stlbe->mas2);
+ __write_host_tlbe(stlbe, mas0);
+ } else {
+ __write_host_tlbe(stlbe,
+ MAS0_TLBSEL(1) |
+ MAS0_ESEL(to_htlb1_esel(sesel)));
+ }
+}
+
+/* sesel is for tlb1 only */
+static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe,
+ struct kvm_book3e_206_tlb_entry *stlbe,
+ int stlbsel, int sesel)
+{
+ int stid;
+
+ preempt_disable();
+ stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
+
+ stlbe->mas1 |= MAS1_TID(stid);
+ write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
+ preempt_enable();
+}
+
+#ifdef CONFIG_KVM_E500V2
+/* XXX should be a hook in the gva2hpa translation */
+void kvmppc_map_magic(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct kvm_book3e_206_tlb_entry magic;
+ ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+ unsigned int stid;
+ pfn_t pfn;
+
+ pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT;
+ get_page(pfn_to_page(pfn));
+
+ preempt_disable();
+ stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0);
+
+ magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
+ MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+ magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
+ magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+ MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
+ magic.mas8 = 0;
+
+ __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
+ preempt_enable();
+}
+#endif
+
+void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
+ int esel)
+{
+ struct kvm_book3e_206_tlb_entry *gtlbe =
+ get_entry(vcpu_e500, tlbsel, esel);
+ struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref;
+
+ /* Don't bother with unmapped entries */
+ if (!(ref->flags & E500_TLB_VALID)) {
+ WARN(ref->flags & (E500_TLB_BITMAP | E500_TLB_TLB0),
+ "%s: flags %x\n", __func__, ref->flags);
+ WARN_ON(tlbsel == 1 && vcpu_e500->g2h_tlb1_map[esel]);
+ }
+
+ if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) {
+ u64 tmp = vcpu_e500->g2h_tlb1_map[esel];
+ int hw_tlb_indx;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ while (tmp) {
+ hw_tlb_indx = __ilog2_u64(tmp & -tmp);
+ mtspr(SPRN_MAS0,
+ MAS0_TLBSEL(1) |
+ MAS0_ESEL(to_htlb1_esel(hw_tlb_indx)));
+ mtspr(SPRN_MAS1, 0);
+ asm volatile("tlbwe");
+ vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0;
+ tmp &= tmp - 1;
+ }
+ mb();
+ vcpu_e500->g2h_tlb1_map[esel] = 0;
+ ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID);
+ local_irq_restore(flags);
+ }
+
+ if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) {
+ /*
+ * TLB1 entry is backed by 4k pages. This should happen
+ * rarely and is not worth optimizing. Invalidate everything.
+ */
+ kvmppc_e500_tlbil_all(vcpu_e500);
+ ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID);
+ }
+
+ /*
+ * If TLB entry is still valid then it's a TLB0 entry, and thus
+ * backed by at most one host tlbe per shadow pid
+ */
+ if (ref->flags & E500_TLB_VALID)
+ kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
+
+ /* Mark the TLB as not backed by the host anymore */
+ ref->flags = 0;
+}
+
+static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
+}
+
+static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
+ struct kvm_book3e_206_tlb_entry *gtlbe,
+ pfn_t pfn, unsigned int wimg)
+{
+ ref->pfn = pfn;
+ ref->flags = E500_TLB_VALID;
+
+ /* Use guest supplied MAS2_G and MAS2_E */
+ ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
+
+ /* Mark the page accessed */
+ kvm_set_pfn_accessed(pfn);
+
+ if (tlbe_is_writable(gtlbe))
+ kvm_set_pfn_dirty(pfn);
+}
+
+static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
+{
+ if (ref->flags & E500_TLB_VALID) {
+ /* FIXME: don't log bogus pfn for TLB1 */
+ trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
+ ref->flags = 0;
+ }
+}
+
+static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ if (vcpu_e500->g2h_tlb1_map)
+ memset(vcpu_e500->g2h_tlb1_map, 0,
+ sizeof(u64) * vcpu_e500->gtlb_params[1].entries);
+ if (vcpu_e500->h2g_tlb1_rmap)
+ memset(vcpu_e500->h2g_tlb1_rmap, 0,
+ sizeof(unsigned int) * host_tlb_params[1].entries);
+}
+
+static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int tlbsel;
+ int i;
+
+ for (tlbsel = 0; tlbsel <= 1; tlbsel++) {
+ for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) {
+ struct tlbe_ref *ref =
+ &vcpu_e500->gtlb_priv[tlbsel][i].ref;
+ kvmppc_e500_ref_release(ref);
+ }
+ }
+}
+
+void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ kvmppc_e500_tlbil_all(vcpu_e500);
+ clear_tlb_privs(vcpu_e500);
+ clear_tlb1_bitmap(vcpu_e500);
+}
+
+/* TID must be supplied by the caller */
+static void kvmppc_e500_setup_stlbe(
+ struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe,
+ int tsize, struct tlbe_ref *ref, u64 gvaddr,
+ struct kvm_book3e_206_tlb_entry *stlbe)
+{
+ pfn_t pfn = ref->pfn;
+ u32 pr = vcpu->arch.shared->msr & MSR_PR;
+
+ BUG_ON(!(ref->flags & E500_TLB_VALID));
+
+ /* Force IPROT=0 for all guest mappings. */
+ stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
+ stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
+ stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+ e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
+#endif
+}
+
+static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+ int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe,
+ struct tlbe_ref *ref)
+{
+ struct kvm_memory_slot *slot;
+ unsigned long pfn = 0; /* silence GCC warning */
+ unsigned long hva;
+ int pfnmap = 0;
+ int tsize = BOOK3E_PAGESZ_4K;
+ int ret = 0;
+ unsigned long mmu_seq;
+ struct kvm *kvm = vcpu_e500->vcpu.kvm;
+ unsigned long tsize_pages = 0;
+ pte_t *ptep;
+ unsigned int wimg = 0;
+ pgd_t *pgdir;
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ /*
+ * Translate guest physical to true physical, acquiring
+ * a page reference if it is normal, non-reserved memory.
+ *
+ * gfn_to_memslot() must succeed because otherwise we wouldn't
+ * have gotten this far. Eventually we should just pass the slot
+ * pointer through from the first lookup.
+ */
+ slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
+ hva = gfn_to_hva_memslot(slot, gfn);
+
+ if (tlbsel == 1) {
+ struct vm_area_struct *vma;
+ down_read(&current->mm->mmap_sem);
+
+ vma = find_vma(current->mm, hva);
+ if (vma && hva >= vma->vm_start &&
+ (vma->vm_flags & VM_PFNMAP)) {
+ /*
+ * This VMA is a physically contiguous region (e.g.
+ * /dev/mem) that bypasses normal Linux page
+ * management. Find the overlap between the
+ * vma and the memslot.
+ */
+
+ unsigned long start, end;
+ unsigned long slot_start, slot_end;
+
+ pfnmap = 1;
+
+ start = vma->vm_pgoff;
+ end = start +
+ ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
+
+ pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
+
+ slot_start = pfn - (gfn - slot->base_gfn);
+ slot_end = slot_start + slot->npages;
+
+ if (start < slot_start)
+ start = slot_start;
+ if (end > slot_end)
+ end = slot_end;
+
+ tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+ MAS1_TSIZE_SHIFT;
+
+ /*
+ * e500 doesn't implement the lowest tsize bit,
+ * or 1K pages.
+ */
+ tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+
+ /*
+ * Now find the largest tsize (up to what the guest
+ * requested) that will cover gfn, stay within the
+ * range, and for which gfn and pfn are mutually
+ * aligned.
+ */
+
+ for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
+ unsigned long gfn_start, gfn_end;
+ tsize_pages = 1 << (tsize - 2);
+
+ gfn_start = gfn & ~(tsize_pages - 1);
+ gfn_end = gfn_start + tsize_pages;
+
+ if (gfn_start + pfn - gfn < start)
+ continue;
+ if (gfn_end + pfn - gfn > end)
+ continue;
+ if ((gfn & (tsize_pages - 1)) !=
+ (pfn & (tsize_pages - 1)))
+ continue;
+
+ gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+ pfn &= ~(tsize_pages - 1);
+ break;
+ }
+ } else if (vma && hva >= vma->vm_start &&
+ (vma->vm_flags & VM_HUGETLB)) {
+ unsigned long psize = vma_kernel_pagesize(vma);
+
+ tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+ MAS1_TSIZE_SHIFT;
+
+ /*
+ * Take the largest page size that satisfies both host
+ * and guest mapping
+ */
+ tsize = min(__ilog2(psize) - 10, tsize);
+
+ /*
+ * e500 doesn't implement the lowest tsize bit,
+ * or 1K pages.
+ */
+ tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+ }
+
+ up_read(&current->mm->mmap_sem);
+ }
+
+ if (likely(!pfnmap)) {
+ tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+ pfn = gfn_to_pfn_memslot(slot, gfn);
+ if (is_error_noslot_pfn(pfn)) {
+ if (printk_ratelimit())
+ pr_err("%s: real page not found for gfn %lx\n",
+ __func__, (long)gfn);
+ return -EINVAL;
+ }
+
+ /* Align guest and physical address to page map boundaries */
+ pfn &= ~(tsize_pages - 1);
+ gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+ }
+
+ spin_lock(&kvm->mmu_lock);
+ if (mmu_notifier_retry(kvm, mmu_seq)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+
+ pgdir = vcpu_e500->vcpu.arch.pgdir;
+ ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages);
+ if (pte_present(*ptep))
+ wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
+ else {
+ if (printk_ratelimit())
+ pr_err("%s: pte not present: gfn %lx, pfn %lx\n",
+ __func__, (long)gfn, pfn);
+ ret = -EINVAL;
+ goto out;
+ }
+ kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
+
+ kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
+ ref, gvaddr, stlbe);
+
+ /* Clear i-cache for new pages */
+ kvmppc_mmu_flush_icache(pfn);
+
+out:
+ spin_unlock(&kvm->mmu_lock);
+
+ /* Drop refcount on page, so that mmu notifiers can clear it */
+ kvm_release_pfn_clean(pfn);
+
+ return ret;
+}
+
+/* XXX only map the one-one case, for now use TLB0 */
+static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel,
+ struct kvm_book3e_206_tlb_entry *stlbe)
+{
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+ struct tlbe_ref *ref;
+ int stlbsel = 0;
+ int sesel = 0;
+ int r;
+
+ gtlbe = get_entry(vcpu_e500, 0, esel);
+ ref = &vcpu_e500->gtlb_priv[0][esel].ref;
+
+ r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
+ get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+ gtlbe, 0, stlbe, ref);
+ if (r)
+ return r;
+
+ write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel);
+
+ return 0;
+}
+
+static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct tlbe_ref *ref,
+ int esel)
+{
+ unsigned int sesel = vcpu_e500->host_tlb1_nv++;
+
+ if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
+ vcpu_e500->host_tlb1_nv = 0;
+
+ if (vcpu_e500->h2g_tlb1_rmap[sesel]) {
+ unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel] - 1;
+ vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel);
+ }
+
+ vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
+ vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel;
+ vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1;
+ WARN_ON(!(ref->flags & E500_TLB_VALID));
+
+ return sesel;
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+/* For both one-one and one-to-many */
+static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+ struct kvm_book3e_206_tlb_entry *stlbe, int esel)
+{
+ struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[1][esel].ref;
+ int sesel;
+ int r;
+
+ r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe,
+ ref);
+ if (r)
+ return r;
+
+ /* Use TLB0 when we can only map a page with 4k */
+ if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) {
+ vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0;
+ write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0);
+ return 0;
+ }
+
+ /* Otherwise map into TLB1 */
+ sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, ref, esel);
+ write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel);
+
+ return 0;
+}
+
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
+ unsigned int index)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct tlbe_priv *priv;
+ struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
+ int tlbsel = tlbsel_of(index);
+ int esel = esel_of(index);
+
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+
+ switch (tlbsel) {
+ case 0:
+ priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
+
+ /* Triggers after clear_tlb_privs or on initial mapping */
+ if (!(priv->ref.flags & E500_TLB_VALID)) {
+ kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
+ } else {
+ kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
+ &priv->ref, eaddr, &stlbe);
+ write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0);
+ }
+ break;
+
+ case 1: {
+ gfn_t gfn = gpaddr >> PAGE_SHIFT;
+ kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe, &stlbe,
+ esel);
+ break;
+ }
+
+ default:
+ BUG();
+ break;
+ }
+}
+
+/************* MMU Notifiers *************/
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ trace_kvm_unmap_hva(hva);
+
+ /*
+ * Flush all shadow tlb entries everywhere. This is slow, but
+ * we are 100% sure that we catch the to be unmapped page
+ */
+ kvm_flush_remote_tlbs(kvm);
+
+ return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ /* kvm_unmap_hva flushes everything anyways */
+ kvm_unmap_hva(kvm, start);
+
+ return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ /* The page will get remapped properly on its next fault */
+ kvm_unmap_hva(kvm, hva);
+}
+
+/*****************************************/
+
+int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY;
+ host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+ /*
+ * This should never happen on real e500 hardware, but is
+ * architecturally possible -- e.g. in some weird nested
+ * virtualization case.
+ */
+ if (host_tlb_params[0].entries == 0 ||
+ host_tlb_params[1].entries == 0) {
+ pr_err("%s: need to know host tlb size\n", __func__);
+ return -ENODEV;
+ }
+
+ host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >>
+ TLBnCFG_ASSOC_SHIFT;
+ host_tlb_params[1].ways = host_tlb_params[1].entries;
+
+ if (!is_power_of_2(host_tlb_params[0].entries) ||
+ !is_power_of_2(host_tlb_params[0].ways) ||
+ host_tlb_params[0].entries < host_tlb_params[0].ways ||
+ host_tlb_params[0].ways == 0) {
+ pr_err("%s: bad tlb0 host config: %u entries %u ways\n",
+ __func__, host_tlb_params[0].entries,
+ host_tlb_params[0].ways);
+ return -ENODEV;
+ }
+
+ host_tlb_params[0].sets =
+ host_tlb_params[0].entries / host_tlb_params[0].ways;
+ host_tlb_params[1].sets = 1;
+
+ vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
+ host_tlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->h2g_tlb1_rmap)
+ return -EINVAL;
+
+ return 0;
+}
+
+void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kfree(vcpu_e500->h2g_tlb1_rmap);
+}
diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h
new file mode 100644
index 00000000000..7624835b76c
--- /dev/null
+++ b/arch/powerpc/kvm/e500_mmu_host.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef KVM_E500_MMU_HOST_H
+#define KVM_E500_MMU_HOST_H
+
+void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
+ int esel);
+
+int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500);
+void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+#endif /* KVM_E500_MMU_HOST_H */
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
deleted file mode 100644
index d6d6d47a75a..00000000000
--- a/arch/powerpc/kvm/e500_tlb.c
+++ /dev/null
@@ -1,762 +0,0 @@
-/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Yu Liu, yu.liu@freescale.com
- *
- * Description:
- * This file is based on arch/powerpc/kvm/44x_tlb.c,
- * by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/highmem.h>
-#include <asm/kvm_ppc.h>
-#include <asm/kvm_e500.h>
-
-#include "../mm/mmu_decl.h"
-#include "e500_tlb.h"
-#include "trace.h"
-
-#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
-
-static unsigned int tlb1_entry_num;
-
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- struct tlbe *tlbe;
- int i, tlbsel;
-
- printk("| %8s | %8s | %8s | %8s | %8s |\n",
- "nr", "mas1", "mas2", "mas3", "mas7");
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++) {
- printk("Guest TLB%d:\n", tlbsel);
- for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
- tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
- if (tlbe->mas1 & MAS1_VALID)
- printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n",
- tlbsel, i, tlbe->mas1, tlbe->mas2,
- tlbe->mas3, tlbe->mas7);
- }
- }
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++) {
- printk("Shadow TLB%d:\n", tlbsel);
- for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
- tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
- if (tlbe->mas1 & MAS1_VALID)
- printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n",
- tlbsel, i, tlbe->mas1, tlbe->mas2,
- tlbe->mas3, tlbe->mas7);
- }
- }
-}
-
-static inline unsigned int tlb0_get_next_victim(
- struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- unsigned int victim;
-
- victim = vcpu_e500->guest_tlb_nv[0]++;
- if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
- vcpu_e500->guest_tlb_nv[0] = 0;
-
- return victim;
-}
-
-static inline unsigned int tlb1_max_shadow_size(void)
-{
- return tlb1_entry_num - tlbcam_index;
-}
-
-static inline int tlbe_is_writable(struct tlbe *tlbe)
-{
- return tlbe->mas3 & (MAS3_SW|MAS3_UW);
-}
-
-static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
-{
- /* Mask off reserved bits. */
- mas3 &= MAS3_ATTRIB_MASK;
-
- if (!usermode) {
- /* Guest is in supervisor mode,
- * so we need to translate guest
- * supervisor permissions into user permissions. */
- mas3 &= ~E500_TLB_USER_PERM_MASK;
- mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
- }
-
- return mas3 | E500_TLB_SUPER_PERM_MASK;
-}
-
-static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
-{
-#ifdef CONFIG_SMP
- return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
-#else
- return mas2 & MAS2_ATTRIB_MASK;
-#endif
-}
-
-/*
- * writing shadow tlb entry to host TLB
- */
-static inline void __write_host_tlbe(struct tlbe *stlbe)
-{
- mtspr(SPRN_MAS1, stlbe->mas1);
- mtspr(SPRN_MAS2, stlbe->mas2);
- mtspr(SPRN_MAS3, stlbe->mas3);
- mtspr(SPRN_MAS7, stlbe->mas7);
- __asm__ __volatile__ ("tlbwe\n" : : );
-}
-
-static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
-{
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-
- local_irq_disable();
- if (tlbsel == 0) {
- __write_host_tlbe(stlbe);
- } else {
- unsigned register mas0;
-
- mas0 = mfspr(SPRN_MAS0);
-
- mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
- __write_host_tlbe(stlbe);
-
- mtspr(SPRN_MAS0, mas0);
- }
- local_irq_enable();
-}
-
-void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int i;
- unsigned register mas0;
-
- /* Load all valid TLB1 entries to reduce guest tlb miss fault */
- local_irq_disable();
- mas0 = mfspr(SPRN_MAS0);
- for (i = 0; i < tlb1_max_shadow_size(); i++) {
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
-
- if (get_tlb_v(stlbe)) {
- mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
- | MAS0_ESEL(to_htlb1_esel(i)));
- __write_host_tlbe(stlbe);
- }
- }
- mtspr(SPRN_MAS0, mas0);
- local_irq_enable();
-}
-
-void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
-{
- _tlbil_all();
-}
-
-/* Search the guest TLB for a matching entry. */
-static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
- gva_t eaddr, int tlbsel, unsigned int pid, int as)
-{
- int i;
-
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
- struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
- unsigned int tid;
-
- if (eaddr < get_tlb_eaddr(tlbe))
- continue;
-
- if (eaddr > get_tlb_end(tlbe))
- continue;
-
- tid = get_tlb_tid(tlbe);
- if (tid && (tid != pid))
- continue;
-
- if (!get_tlb_v(tlbe))
- continue;
-
- if (get_tlb_ts(tlbe) != as && as != -1)
- continue;
-
- return i;
- }
-
- return -1;
-}
-
-static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
-{
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
- struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
-
- if (page) {
- vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
-
- if (get_tlb_v(stlbe)) {
- if (tlbe_is_writable(stlbe))
- kvm_release_page_dirty(page);
- else
- kvm_release_page_clean(page);
- }
- }
-}
-
-static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
-{
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-
- kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
- stlbe->mas1 = 0;
- trace_kvm_stlb_inval(index_of(tlbsel, esel));
-}
-
-static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
- gva_t eaddr, gva_t eend, u32 tid)
-{
- unsigned int pid = tid & 0xff;
- unsigned int i;
-
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) {
- struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
- unsigned int tid;
-
- if (!get_tlb_v(stlbe))
- continue;
-
- if (eend < get_tlb_eaddr(stlbe))
- continue;
-
- if (eaddr > get_tlb_end(stlbe))
- continue;
-
- tid = get_tlb_tid(stlbe);
- if (tid && (tid != pid))
- continue;
-
- kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
- write_host_tlbe(vcpu_e500, 1, i);
- }
-}
-
-static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
- unsigned int eaddr, int as)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- unsigned int victim, pidsel, tsized;
- int tlbsel;
-
- /* since we only have two TLBs, only lower bit is used. */
- tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
- victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
- pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
- tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
-
- vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
- | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
- vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
- | MAS1_TID(vcpu_e500->pid[pidsel])
- | MAS1_TSIZE(tsized);
- vcpu_e500->mas2 = (eaddr & MAS2_EPN)
- | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
- vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
- vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
- | (get_cur_pid(vcpu) << 16)
- | (as ? MAS6_SAS : 0);
- vcpu_e500->mas7 = 0;
-}
-
-static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
- u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
-{
- struct page *new_page;
- struct tlbe *stlbe;
- hpa_t hpaddr;
-
- stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
-
- /* Get reference to new page. */
- new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
- if (is_error_page(new_page)) {
- printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n",
- (long)gfn);
- kvm_release_page_clean(new_page);
- return;
- }
- hpaddr = page_to_phys(new_page);
-
- /* Drop reference to old page. */
- kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
-
- vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
-
- /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
- stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
- | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
- stlbe->mas2 = (gvaddr & MAS2_EPN)
- | e500_shadow_mas2_attrib(gtlbe->mas2,
- vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
- stlbe->mas3 = (hpaddr & MAS3_RPN)
- | e500_shadow_mas3_attrib(gtlbe->mas3,
- vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
- stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
-
- trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
- stlbe->mas3, stlbe->mas7);
-}
-
-/* XXX only map the one-one case, for now use TLB0 */
-static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
-{
- struct tlbe *gtlbe;
-
- gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
-
- kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
- get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
- gtlbe, tlbsel, esel);
-
- return esel;
-}
-
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB. */
-/* XXX for both one-one and one-to-many , for now use TLB1 */
-static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
- u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
-{
- unsigned int victim;
-
- victim = vcpu_e500->guest_tlb_nv[1]++;
-
- if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
- vcpu_e500->guest_tlb_nv[1] = 0;
-
- kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim);
-
- return victim;
-}
-
-/* Invalidate all guest kernel mappings when enter usermode,
- * so that when they fault back in they will get the
- * proper permission bits. */
-void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
-{
- if (usermode) {
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int i;
-
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i < tlb1_max_shadow_size(); i++)
- kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
-
- _tlbil_all();
- }
-}
-
-static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
-{
- struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
-
- if (unlikely(get_tlb_iprot(gtlbe)))
- return -1;
-
- if (tlbsel == 1) {
- kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
- get_tlb_end(gtlbe),
- get_tlb_tid(gtlbe));
- } else {
- kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
- }
-
- gtlbe->mas1 = 0;
-
- return 0;
-}
-
-int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
-{
- int esel;
-
- if (value & MMUCSR0_TLB0FI)
- for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++)
- kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
- if (value & MMUCSR0_TLB1FI)
- for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++)
- kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
-
- _tlbil_all();
-
- return EMULATE_DONE;
-}
-
-int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- unsigned int ia;
- int esel, tlbsel;
- gva_t ea;
-
- ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
-
- ia = (ea >> 2) & 0x1;
-
- /* since we only have two TLBs, only lower bit is used. */
- tlbsel = (ea >> 3) & 0x1;
-
- if (ia) {
- /* invalidate all entries */
- for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++)
- kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
- } else {
- ea &= 0xfffff000;
- esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel,
- get_cur_pid(vcpu), -1);
- if (esel >= 0)
- kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
- }
-
- _tlbil_all();
-
- return EMULATE_DONE;
-}
-
-int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int tlbsel, esel;
- struct tlbe *gtlbe;
-
- tlbsel = get_tlb_tlbsel(vcpu_e500);
- esel = get_tlb_esel(vcpu_e500, tlbsel);
-
- gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
- vcpu_e500->mas0 &= ~MAS0_NV(~0);
- vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
- vcpu_e500->mas1 = gtlbe->mas1;
- vcpu_e500->mas2 = gtlbe->mas2;
- vcpu_e500->mas3 = gtlbe->mas3;
- vcpu_e500->mas7 = gtlbe->mas7;
-
- return EMULATE_DONE;
-}
-
-int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int as = !!get_cur_sas(vcpu_e500);
- unsigned int pid = get_cur_spid(vcpu_e500);
- int esel, tlbsel;
- struct tlbe *gtlbe = NULL;
- gva_t ea;
-
- ea = kvmppc_get_gpr(vcpu, rb);
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++) {
- esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
- if (esel >= 0) {
- gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
- break;
- }
- }
-
- if (gtlbe) {
- vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
- | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
- vcpu_e500->mas1 = gtlbe->mas1;
- vcpu_e500->mas2 = gtlbe->mas2;
- vcpu_e500->mas3 = gtlbe->mas3;
- vcpu_e500->mas7 = gtlbe->mas7;
- } else {
- int victim;
-
- /* since we only have two TLBs, only lower bit is used. */
- tlbsel = vcpu_e500->mas4 >> 28 & 0x1;
- victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
-
- vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
- | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
- vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
- | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
- | (vcpu_e500->mas4 & MAS4_TSIZED(~0));
- vcpu_e500->mas2 &= MAS2_EPN;
- vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
- vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
- vcpu_e500->mas7 = 0;
- }
-
- return EMULATE_DONE;
-}
-
-int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- u64 eaddr;
- u64 raddr;
- u32 tid;
- struct tlbe *gtlbe;
- int tlbsel, esel, stlbsel, sesel;
-
- tlbsel = get_tlb_tlbsel(vcpu_e500);
- esel = get_tlb_esel(vcpu_e500, tlbsel);
-
- gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
-
- if (get_tlb_v(gtlbe) && tlbsel == 1) {
- eaddr = get_tlb_eaddr(gtlbe);
- tid = get_tlb_tid(gtlbe);
- kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
- get_tlb_end(gtlbe), tid);
- }
-
- gtlbe->mas1 = vcpu_e500->mas1;
- gtlbe->mas2 = vcpu_e500->mas2;
- gtlbe->mas3 = vcpu_e500->mas3;
- gtlbe->mas7 = vcpu_e500->mas7;
-
- trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
- gtlbe->mas3, gtlbe->mas7);
-
- /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
- if (tlbe_is_host_safe(vcpu, gtlbe)) {
- switch (tlbsel) {
- case 0:
- /* TLB0 */
- gtlbe->mas1 &= ~MAS1_TSIZE(~0);
- gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
-
- stlbsel = 0;
- sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
-
- break;
-
- case 1:
- /* TLB1 */
- eaddr = get_tlb_eaddr(gtlbe);
- raddr = get_tlb_raddr(gtlbe);
-
- /* Create a 4KB mapping on the host.
- * If the guest wanted a large page,
- * only the first 4KB is mapped here and the rest
- * are mapped on the fly. */
- stlbsel = 1;
- sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
- raddr >> PAGE_SHIFT, gtlbe);
- break;
-
- default:
- BUG();
- }
- write_host_tlbe(vcpu_e500, stlbsel, sesel);
- }
-
- return EMULATE_DONE;
-}
-
-int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
-{
- unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
-
- return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
-}
-
-int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
-{
- unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
-
- return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
-}
-
-void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
-{
- unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
-
- kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
-}
-
-void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
-{
- unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
-
- kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
-}
-
-gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
- gva_t eaddr)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- struct tlbe *gtlbe =
- &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)];
- u64 pgmask = get_tlb_bytes(gtlbe) - 1;
-
- return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
-}
-
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int tlbsel, i;
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++)
- for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
- kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
-
- /* discard all guest mapping */
- _tlbil_all();
-}
-
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
- unsigned int index)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int tlbsel = tlbsel_of(index);
- int esel = esel_of(index);
- int stlbsel, sesel;
-
- switch (tlbsel) {
- case 0:
- stlbsel = 0;
- sesel = esel;
- break;
-
- case 1: {
- gfn_t gfn = gpaddr >> PAGE_SHIFT;
- struct tlbe *gtlbe
- = &vcpu_e500->guest_tlb[tlbsel][esel];
-
- stlbsel = 1;
- sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
- break;
- }
-
- default:
- BUG();
- break;
- }
- write_host_tlbe(vcpu_e500, stlbsel, sesel);
-}
-
-int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
- gva_t eaddr, unsigned int pid, int as)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int esel, tlbsel;
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++) {
- esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
- if (esel >= 0)
- return index_of(tlbsel, esel);
- }
-
- return -1;
-}
-
-void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- struct tlbe *tlbe;
-
- /* Insert large initial mapping for guest. */
- tlbe = &vcpu_e500->guest_tlb[1][0];
- tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
- tlbe->mas2 = 0;
- tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
- tlbe->mas7 = 0;
-
- /* 4K map for serial output. Used by kernel wrapper. */
- tlbe = &vcpu_e500->guest_tlb[1][1];
- tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
- tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
- tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
- tlbe->mas7 = 0;
-}
-
-int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
-
- vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
- vcpu_e500->guest_tlb[0] =
- kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
- if (vcpu_e500->guest_tlb[0] == NULL)
- goto err_out;
-
- vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE;
- vcpu_e500->shadow_tlb[0] =
- kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
- if (vcpu_e500->shadow_tlb[0] == NULL)
- goto err_out_guest0;
-
- vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
- vcpu_e500->guest_tlb[1] =
- kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
- if (vcpu_e500->guest_tlb[1] == NULL)
- goto err_out_shadow0;
-
- vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num;
- vcpu_e500->shadow_tlb[1] =
- kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL);
- if (vcpu_e500->shadow_tlb[1] == NULL)
- goto err_out_guest1;
-
- vcpu_e500->shadow_pages[0] = (struct page **)
- kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
- if (vcpu_e500->shadow_pages[0] == NULL)
- goto err_out_shadow1;
-
- vcpu_e500->shadow_pages[1] = (struct page **)
- kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL);
- if (vcpu_e500->shadow_pages[1] == NULL)
- goto err_out_page0;
-
- /* Init TLB configuration register */
- vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
- vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
- vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
- vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1];
-
- return 0;
-
-err_out_page0:
- kfree(vcpu_e500->shadow_pages[0]);
-err_out_shadow1:
- kfree(vcpu_e500->shadow_tlb[1]);
-err_out_guest1:
- kfree(vcpu_e500->guest_tlb[1]);
-err_out_shadow0:
- kfree(vcpu_e500->shadow_tlb[0]);
-err_out_guest0:
- kfree(vcpu_e500->guest_tlb[0]);
-err_out:
- return -1;
-}
-
-void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- kfree(vcpu_e500->shadow_pages[1]);
- kfree(vcpu_e500->shadow_pages[0]);
- kfree(vcpu_e500->shadow_tlb[1]);
- kfree(vcpu_e500->guest_tlb[1]);
- kfree(vcpu_e500->shadow_tlb[0]);
- kfree(vcpu_e500->guest_tlb[0]);
-}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
deleted file mode 100644
index 458946b4775..00000000000
--- a/arch/powerpc/kvm/e500_tlb.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Yu Liu, yu.liu@freescale.com
- *
- * Description:
- * This file is based on arch/powerpc/kvm/44x_tlb.h,
- * by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- */
-
-#ifndef __KVM_E500_TLB_H__
-#define __KVM_E500_TLB_H__
-
-#include <linux/kvm_host.h>
-#include <asm/mmu-book3e.h>
-#include <asm/tlb.h>
-#include <asm/kvm_e500.h>
-
-#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */
-#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
-#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1)
-
-#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */
-#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT)
-#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1)
-
-#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
-#define KVM_E500_TLB1_SIZE 16
-
-#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
-#define tlbsel_of(index) ((index) >> 16)
-#define esel_of(index) ((index) & 0xFFFF)
-
-#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
-#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
-#define MAS2_ATTRIB_MASK \
- (MAS2_X0 | MAS2_X1)
-#define MAS3_ATTRIB_MASK \
- (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
- | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
-
-extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
-extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong);
-extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
-extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
-extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
-extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
-extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
-extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
-extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
-extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
-extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
-extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
-
-/* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
-{
- return (tlbe->mas1 >> 7) & 0x1f;
-}
-
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
-{
- return tlbe->mas2 & 0xfffff000;
-}
-
-static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
-{
- unsigned int pgsize = get_tlb_size(tlbe);
- return 1ULL << 10 << pgsize;
-}
-
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
-{
- u64 bytes = get_tlb_bytes(tlbe);
- return get_tlb_eaddr(tlbe) + bytes - 1;
-}
-
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
-{
- u64 rpn = tlbe->mas7;
- return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
-}
-
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
-{
- return (tlbe->mas1 >> 16) & 0xff;
-}
-
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
-{
- return (tlbe->mas1 >> 12) & 0x1;
-}
-
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
-{
- return (tlbe->mas1 >> 31) & 0x1;
-}
-
-static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
-{
- return (tlbe->mas1 >> 30) & 0x1;
-}
-
-static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.pid & 0xff;
-}
-
-static inline unsigned int get_cur_spid(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- return (vcpu_e500->mas6 >> 16) & 0xff;
-}
-
-static inline unsigned int get_cur_sas(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- return vcpu_e500->mas6 & 0x1;
-}
-
-static inline unsigned int get_tlb_tlbsel(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- /*
- * Manual says that tlbsel has 2 bits wide.
- * Since we only have two TLBs, only lower bit is used.
- */
- return (vcpu_e500->mas0 >> 28) & 0x1;
-}
-
-static inline unsigned int get_tlb_nv_bit(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- return vcpu_e500->mas0 & 0xfff;
-}
-
-static inline unsigned int get_tlb_esel_bit(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- return (vcpu_e500->mas0 >> 16) & 0xfff;
-}
-
-static inline unsigned int get_tlb_esel(
- const struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel)
-{
- unsigned int esel = get_tlb_esel_bit(vcpu_e500);
-
- if (tlbsel == 0) {
- esel &= KVM_E500_TLB0_WAY_NUM_MASK;
- esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
- << KVM_E500_TLB0_WAY_NUM_BIT;
- } else {
- esel &= KVM_E500_TLB1_SIZE - 1;
- }
-
- return esel;
-}
-
-static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
- const struct tlbe *tlbe)
-{
- gpa_t gpa;
-
- if (!get_tlb_v(tlbe))
- return 0;
-
- /* Does it match current guest AS? */
- /* XXX what about IS != DS? */
- if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
- return 0;
-
- gpa = get_tlb_raddr(tlbe);
- if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
- /* Mapping is not for RAM. */
- return 0;
-
- return 1;
-}
-
-#endif /* __KVM_E500_TLB_H__ */
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
new file mode 100644
index 00000000000..17e45627922
--- /dev/null
+++ b/arch/powerpc/kvm/e500mc.c
@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2010,2012 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Varun Sethi, <varun.sethi@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/e500.c,
+ * by Yu Liu <yu.liu@freescale.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
+
+#include "booke.h"
+#include "e500.h"
+
+void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
+{
+ enum ppc_dbell dbell_type;
+ unsigned long tag;
+
+ switch (type) {
+ case INT_CLASS_NONCRIT:
+ dbell_type = PPC_G_DBELL;
+ break;
+ case INT_CLASS_CRIT:
+ dbell_type = PPC_G_DBELL_CRIT;
+ break;
+ case INT_CLASS_MC:
+ dbell_type = PPC_G_DBELL_MC;
+ break;
+ default:
+ WARN_ONCE(1, "%s: unknown int type %d\n", __func__, type);
+ return;
+ }
+
+
+ tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id;
+ mb();
+ ppc_msgsnd(dbell_type, 0, tag);
+}
+
+/* gtlbe must not be mapped by more than one host tlb entry */
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ unsigned int tid, ts;
+ gva_t eaddr;
+ u32 val, lpid;
+ unsigned long flags;
+
+ ts = get_tlb_ts(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+ lpid = vcpu_e500->vcpu.kvm->arch.lpid;
+
+ /* We search the host TLB to invalidate its shadow TLB entry */
+ val = (tid << 16) | ts;
+ eaddr = get_tlb_eaddr(gtlbe);
+
+ local_irq_save(flags);
+
+ mtspr(SPRN_MAS6, val);
+ mtspr(SPRN_MAS5, MAS5_SGS | lpid);
+
+ asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
+ val = mfspr(SPRN_MAS1);
+ if (val & MAS1_VALID) {
+ mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ asm volatile("tlbwe");
+ }
+ mtspr(SPRN_MAS5, 0);
+ /* NOTE: tlbsx also updates mas8, so clear it for host tlbwe */
+ mtspr(SPRN_MAS8, 0);
+ isync();
+
+ local_irq_restore(flags);
+}
+
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid);
+ asm volatile("tlbilxlpid");
+ mtspr(SPRN_MAS5, 0);
+ local_irq_restore(flags);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+ vcpu->arch.pid = pid;
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+}
+
+static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu);
+
+static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ kvmppc_booke_vcpu_load(vcpu, cpu);
+
+ mtspr(SPRN_LPID, vcpu->kvm->arch.lpid);
+ mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
+ mtspr(SPRN_GPIR, vcpu->vcpu_id);
+ mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
+ mtspr(SPRN_EPLC, vcpu->arch.eplc);
+ mtspr(SPRN_EPSC, vcpu->arch.epsc);
+
+ mtspr(SPRN_GIVPR, vcpu->arch.ivpr);
+ mtspr(SPRN_GIVOR2, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+ mtspr(SPRN_GIVOR8, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
+ mtspr(SPRN_GSPRG0, (unsigned long)vcpu->arch.shared->sprg0);
+ mtspr(SPRN_GSPRG1, (unsigned long)vcpu->arch.shared->sprg1);
+ mtspr(SPRN_GSPRG2, (unsigned long)vcpu->arch.shared->sprg2);
+ mtspr(SPRN_GSPRG3, (unsigned long)vcpu->arch.shared->sprg3);
+
+ mtspr(SPRN_GSRR0, vcpu->arch.shared->srr0);
+ mtspr(SPRN_GSRR1, vcpu->arch.shared->srr1);
+
+ mtspr(SPRN_GEPR, vcpu->arch.epr);
+ mtspr(SPRN_GDEAR, vcpu->arch.shared->dar);
+ mtspr(SPRN_GESR, vcpu->arch.shared->esr);
+
+ if (vcpu->arch.oldpir != mfspr(SPRN_PIR) ||
+ __get_cpu_var(last_vcpu_on_cpu) != vcpu) {
+ kvmppc_e500_tlbil_all(vcpu_e500);
+ __get_cpu_var(last_vcpu_on_cpu) = vcpu;
+ }
+
+ kvmppc_load_guest_fp(vcpu);
+}
+
+static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.eplc = mfspr(SPRN_EPLC);
+ vcpu->arch.epsc = mfspr(SPRN_EPSC);
+
+ vcpu->arch.shared->sprg0 = mfspr(SPRN_GSPRG0);
+ vcpu->arch.shared->sprg1 = mfspr(SPRN_GSPRG1);
+ vcpu->arch.shared->sprg2 = mfspr(SPRN_GSPRG2);
+ vcpu->arch.shared->sprg3 = mfspr(SPRN_GSPRG3);
+
+ vcpu->arch.shared->srr0 = mfspr(SPRN_GSRR0);
+ vcpu->arch.shared->srr1 = mfspr(SPRN_GSRR1);
+
+ vcpu->arch.epr = mfspr(SPRN_GEPR);
+ vcpu->arch.shared->dar = mfspr(SPRN_GDEAR);
+ vcpu->arch.shared->esr = mfspr(SPRN_GESR);
+
+ vcpu->arch.oldpir = mfspr(SPRN_PIR);
+
+ kvmppc_booke_vcpu_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ int r;
+
+ if (strcmp(cur_cpu_spec->cpu_name, "e500mc") == 0)
+ r = 0;
+ else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \
+ SPRN_EPCR_DUVD;
+#ifdef CONFIG_64BIT
+ vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
+#endif
+ vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
+ vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
+ vcpu->arch.epsc = vcpu->arch.eplc;
+
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+ vcpu_e500->svr = mfspr(SPRN_SVR);
+
+ vcpu->arch.cpu_type = KVM_CPU_E500MC;
+
+ return 0;
+}
+
+static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_PM |
+ KVM_SREGS_E_PC;
+ sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+ sregs->u.e.impl.fsl.features = 0;
+ sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+ sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+ sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+ kvmppc_get_sregs_e500_tlb(vcpu, sregs);
+
+ sregs->u.e.ivor_high[3] =
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+ sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
+ sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
+
+ return kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int ret;
+
+ if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+ vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+ vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+ vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+ }
+
+ ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
+ if (ret < 0)
+ return ret;
+
+ if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+ return 0;
+
+ if (sregs->u.e.features & KVM_SREGS_E_PM) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+ sregs->u.e.ivor_high[3];
+ }
+
+ if (sregs->u.e.features & KVM_SREGS_E_PC) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] =
+ sregs->u.e.ivor_high[4];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] =
+ sregs->u.e.ivor_high[5];
+ }
+
+ return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
+static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+ return r;
+}
+
+static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
+{
+ int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+ return r;
+}
+
+static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
+ unsigned int id)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500;
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu_e500) {
+ err = -ENOMEM;
+ goto out;
+ }
+ vcpu = &vcpu_e500->vcpu;
+
+ /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
+ vcpu->arch.oldpir = 0xffffffff;
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ err = kvmppc_e500_tlb_init(vcpu_e500);
+ if (err)
+ goto uninit_vcpu;
+
+ vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vcpu->arch.shared)
+ goto uninit_tlb;
+
+ return vcpu;
+
+uninit_tlb:
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+out:
+ return ERR_PTR(err);
+}
+
+static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ free_page((unsigned long)vcpu->arch.shared);
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+}
+
+static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
+{
+ int lpid;
+
+ lpid = kvmppc_alloc_lpid();
+ if (lpid < 0)
+ return lpid;
+
+ kvm->arch.lpid = lpid;
+ return 0;
+}
+
+static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
+{
+ kvmppc_free_lpid(kvm->arch.lpid);
+}
+
+static struct kvmppc_ops kvm_ops_e500mc = {
+ .get_sregs = kvmppc_core_get_sregs_e500mc,
+ .set_sregs = kvmppc_core_set_sregs_e500mc,
+ .get_one_reg = kvmppc_get_one_reg_e500mc,
+ .set_one_reg = kvmppc_set_one_reg_e500mc,
+ .vcpu_load = kvmppc_core_vcpu_load_e500mc,
+ .vcpu_put = kvmppc_core_vcpu_put_e500mc,
+ .vcpu_create = kvmppc_core_vcpu_create_e500mc,
+ .vcpu_free = kvmppc_core_vcpu_free_e500mc,
+ .mmu_destroy = kvmppc_mmu_destroy_e500,
+ .init_vm = kvmppc_core_init_vm_e500mc,
+ .destroy_vm = kvmppc_core_destroy_vm_e500mc,
+ .emulate_op = kvmppc_core_emulate_op_e500,
+ .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+ .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+};
+
+static int __init kvmppc_e500mc_init(void)
+{
+ int r;
+
+ r = kvmppc_booke_init();
+ if (r)
+ goto err_out;
+
+ kvmppc_init_lpid(64);
+ kvmppc_claim_lpid(0); /* host */
+
+ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+ if (r)
+ goto err_out;
+ kvm_ops_e500mc.owner = THIS_MODULE;
+ kvmppc_pr_ops = &kvm_ops_e500mc;
+
+err_out:
+ return r;
+}
+
+static void __exit kvmppc_e500mc_exit(void)
+{
+ kvmppc_pr_ops = NULL;
+ kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500mc_init);
+module_exit(kvmppc_e500mc_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index c64fd2909bb..da86d9ba347 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -13,6 +13,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
@@ -22,96 +23,188 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kvm_host.h>
+#include <linux/clockchips.h>
#include <asm/reg.h>
#include <asm/time.h>
#include <asm/byteorder.h>
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
#include "timing.h"
#include "trace.h"
-#define OP_TRAP 3
-#define OP_TRAP_64 2
-
-#define OP_31_XOP_LWZX 23
-#define OP_31_XOP_LBZX 87
-#define OP_31_XOP_STWX 151
-#define OP_31_XOP_STBX 215
-#define OP_31_XOP_LBZUX 119
-#define OP_31_XOP_STBUX 247
-#define OP_31_XOP_LHZX 279
-#define OP_31_XOP_LHZUX 311
-#define OP_31_XOP_MFSPR 339
-#define OP_31_XOP_LHAX 343
-#define OP_31_XOP_STHX 407
-#define OP_31_XOP_STHUX 439
-#define OP_31_XOP_MTSPR 467
-#define OP_31_XOP_DCBI 470
-#define OP_31_XOP_LWBRX 534
-#define OP_31_XOP_TLBSYNC 566
-#define OP_31_XOP_STWBRX 662
-#define OP_31_XOP_LHBRX 790
-#define OP_31_XOP_STHBRX 918
-
-#define OP_LWZ 32
-#define OP_LWZU 33
-#define OP_LBZ 34
-#define OP_LBZU 35
-#define OP_STW 36
-#define OP_STWU 37
-#define OP_STB 38
-#define OP_STBU 39
-#define OP_LHZ 40
-#define OP_LHZU 41
-#define OP_LHA 42
-#define OP_LHAU 43
-#define OP_STH 44
-#define OP_STHU 45
-
-#ifdef CONFIG_PPC_BOOK3S
-static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
-{
- return 1;
-}
-#else
-static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.tcr & TCR_DIE;
-}
-#endif
-
void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
{
unsigned long dec_nsec;
+ unsigned long long dec_time;
pr_debug("mtDEC: %x\n", vcpu->arch.dec);
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+
#ifdef CONFIG_PPC_BOOK3S
/* mtdec lowers the interrupt line when positive. */
kvmppc_core_dequeue_dec(vcpu);
/* POWER4+ triggers a dec interrupt if the value is < 0 */
if (vcpu->arch.dec & 0x80000000) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
kvmppc_core_queue_dec(vcpu);
return;
}
#endif
- if (kvmppc_dec_enabled(vcpu)) {
- /* The decrementer ticks at the same rate as the timebase, so
- * that's how we convert the guest DEC value to the number of
- * host ticks. */
-
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- dec_nsec = vcpu->arch.dec;
- dec_nsec *= 1000;
- dec_nsec /= tb_ticks_per_usec;
- hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
- HRTIMER_MODE_REL);
- vcpu->arch.dec_jiffies = get_tb();
- } else {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+
+#ifdef CONFIG_BOOKE
+ /* On BOOKE, DEC = 0 is as good as decrementer not enabled */
+ if (vcpu->arch.dec == 0)
+ return;
+#endif
+
+ /*
+ * The decrementer ticks at the same rate as the timebase, so
+ * that's how we convert the guest DEC value to the number of
+ * host ticks.
+ */
+
+ dec_time = vcpu->arch.dec;
+ /*
+ * Guest timebase ticks at the same frequency as host decrementer.
+ * So use the host decrementer calculations for decrementer emulation.
+ */
+ dec_time = dec_time << decrementer_clockevent.shift;
+ do_div(dec_time, decrementer_clockevent.mult);
+ dec_nsec = do_div(dec_time, NSEC_PER_SEC);
+ hrtimer_start(&vcpu->arch.dec_timer,
+ ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
+ vcpu->arch.dec_jiffies = get_tb();
+}
+
+u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+{
+ u64 jd = tb - vcpu->arch.dec_jiffies;
+
+#ifdef CONFIG_BOOKE
+ if (vcpu->arch.dec < jd)
+ return 0;
+#endif
+
+ return vcpu->arch.dec - jd;
+}
+
+static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ enum emulation_result emulated = EMULATE_DONE;
+ ulong spr_val = kvmppc_get_gpr(vcpu, rs);
+
+ switch (sprn) {
+ case SPRN_SRR0:
+ kvmppc_set_srr0(vcpu, spr_val);
+ break;
+ case SPRN_SRR1:
+ kvmppc_set_srr1(vcpu, spr_val);
+ break;
+
+ /* XXX We need to context-switch the timebase for
+ * watchdog and FIT. */
+ case SPRN_TBWL: break;
+ case SPRN_TBWU: break;
+
+ case SPRN_DEC:
+ vcpu->arch.dec = spr_val;
+ kvmppc_emulate_dec(vcpu);
+ break;
+
+ case SPRN_SPRG0:
+ kvmppc_set_sprg0(vcpu, spr_val);
+ break;
+ case SPRN_SPRG1:
+ kvmppc_set_sprg1(vcpu, spr_val);
+ break;
+ case SPRN_SPRG2:
+ kvmppc_set_sprg2(vcpu, spr_val);
+ break;
+ case SPRN_SPRG3:
+ kvmppc_set_sprg3(vcpu, spr_val);
+ break;
+
+ /* PIR can legally be written, but we ignore it */
+ case SPRN_PIR: break;
+
+ default:
+ emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn,
+ spr_val);
+ if (emulated == EMULATE_FAIL)
+ printk(KERN_INFO "mtspr: unknown spr "
+ "0x%x\n", sprn);
+ break;
}
+
+ kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+
+ return emulated;
+}
+
+static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ enum emulation_result emulated = EMULATE_DONE;
+ ulong spr_val = 0;
+
+ switch (sprn) {
+ case SPRN_SRR0:
+ spr_val = kvmppc_get_srr0(vcpu);
+ break;
+ case SPRN_SRR1:
+ spr_val = kvmppc_get_srr1(vcpu);
+ break;
+ case SPRN_PVR:
+ spr_val = vcpu->arch.pvr;
+ break;
+ case SPRN_PIR:
+ spr_val = vcpu->vcpu_id;
+ break;
+
+ /* Note: mftb and TBRL/TBWL are user-accessible, so
+ * the guest can always access the real TB anyways.
+ * In fact, we probably will never see these traps. */
+ case SPRN_TBWL:
+ spr_val = get_tb() >> 32;
+ break;
+ case SPRN_TBWU:
+ spr_val = get_tb();
+ break;
+
+ case SPRN_SPRG0:
+ spr_val = kvmppc_get_sprg0(vcpu);
+ break;
+ case SPRN_SPRG1:
+ spr_val = kvmppc_get_sprg1(vcpu);
+ break;
+ case SPRN_SPRG2:
+ spr_val = kvmppc_get_sprg2(vcpu);
+ break;
+ case SPRN_SPRG3:
+ spr_val = kvmppc_get_sprg3(vcpu);
+ break;
+ /* Note: SPRG4-7 are user-readable, so we don't get
+ * a trap. */
+
+ case SPRN_DEC:
+ spr_val = kvmppc_get_dec(vcpu, get_tb());
+ break;
+ default:
+ emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn,
+ &spr_val);
+ if (unlikely(emulated == EMULATE_FAIL)) {
+ printk(KERN_INFO "mfspr: unknown spr "
+ "0x%x\n", sprn);
+ }
+ break;
+ }
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, rt, spr_val);
+ kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+
+ return emulated;
}
/* XXX to do:
@@ -126,19 +219,16 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
* lmw
* stmw
*
- * XXX is_bigendian should depend on MMU mapping or MSR[LE]
*/
/* XXX Should probably auto-generate instruction decoding for a particular core
* from opcode tables in the future. */
int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
u32 inst = kvmppc_get_last_inst(vcpu);
- u32 ea;
- int ra;
- int rb;
- int rs;
- int rt;
- int sprn;
+ int ra = get_ra(inst);
+ int rs = get_rs(inst);
+ int rt = get_rt(inst);
+ int sprn = get_sprn(inst);
enum emulation_result emulated = EMULATE_DONE;
int advance = 1;
@@ -153,7 +243,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case OP_TRAP_64:
kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
#else
- kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
+ kvmppc_core_queue_program(vcpu,
+ vcpu->arch.shared->esr | ESR_PTR);
#endif
advance = 0;
break;
@@ -161,210 +252,86 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case 31:
switch (get_xop(inst)) {
+ case OP_31_XOP_TRAP:
+#ifdef CONFIG_64BIT
+ case OP_31_XOP_TRAP_64:
+#endif
+#ifdef CONFIG_PPC_BOOK3S
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
+#else
+ kvmppc_core_queue_program(vcpu,
+ vcpu->arch.shared->esr | ESR_PTR);
+#endif
+ advance = 0;
+ break;
case OP_31_XOP_LWZX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
case OP_31_XOP_LBZX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
case OP_31_XOP_LBZUX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- kvmppc_set_gpr(vcpu, ra, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_STWX:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
case OP_31_XOP_STBX:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
case OP_31_XOP_STBUX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
- kvmppc_set_gpr(vcpu, rs, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_LHAX:
- rt = get_rt(inst);
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
break;
case OP_31_XOP_LHZX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
case OP_31_XOP_LHZUX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_MFSPR:
- sprn = get_sprn(inst);
- rt = get_rt(inst);
-
- switch (sprn) {
- case SPRN_SRR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0);
- break;
- case SPRN_SRR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1);
- break;
- case SPRN_PVR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
- case SPRN_PIR:
- kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
- case SPRN_MSSSR0:
- kvmppc_set_gpr(vcpu, rt, 0); break;
-
- /* Note: mftb and TBRL/TBWL are user-accessible, so
- * the guest can always access the real TB anyways.
- * In fact, we probably will never see these traps. */
- case SPRN_TBWL:
- kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
- case SPRN_TBWU:
- kvmppc_set_gpr(vcpu, rt, get_tb()); break;
-
- case SPRN_SPRG0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0);
- break;
- case SPRN_SPRG1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1);
- break;
- case SPRN_SPRG2:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2);
- break;
- case SPRN_SPRG3:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3);
- break;
- /* Note: SPRG4-7 are user-readable, so we don't get
- * a trap. */
-
- case SPRN_DEC:
- {
- u64 jd = get_tb() - vcpu->arch.dec_jiffies;
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
- pr_debug("mfDEC: %x - %llx = %lx\n",
- vcpu->arch.dec, jd,
- kvmppc_get_gpr(vcpu, rt));
- break;
- }
- default:
- emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
- if (emulated == EMULATE_FAIL) {
- printk("mfspr: unknown spr %x\n", sprn);
- kvmppc_set_gpr(vcpu, rt, 0);
- }
- break;
- }
+ emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
break;
case OP_31_XOP_STHX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
case OP_31_XOP_STHUX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
- kvmppc_set_gpr(vcpu, ra, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_MTSPR:
- sprn = get_sprn(inst);
- rs = get_rs(inst);
- switch (sprn) {
- case SPRN_SRR0:
- vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs);
- break;
- case SPRN_SRR1:
- vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs);
- break;
-
- /* XXX We need to context-switch the timebase for
- * watchdog and FIT. */
- case SPRN_TBWL: break;
- case SPRN_TBWU: break;
-
- case SPRN_MSSSR0: break;
-
- case SPRN_DEC:
- vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
- kvmppc_emulate_dec(vcpu);
- break;
-
- case SPRN_SPRG0:
- vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs);
- break;
- case SPRN_SPRG1:
- vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs);
- break;
- case SPRN_SPRG2:
- vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs);
- break;
- case SPRN_SPRG3:
- vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs);
- break;
-
- default:
- emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
- if (emulated == EMULATE_FAIL)
- printk("mtspr: unknown spr %x\n", sprn);
- break;
- }
+ emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
break;
+ case OP_31_XOP_DCBST:
+ case OP_31_XOP_DCBF:
case OP_31_XOP_DCBI:
/* Do nothing. The guest is performing dcbi because
* hardware DMA is not snooped by the dcache, but
@@ -374,7 +341,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_31_XOP_LWBRX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
break;
@@ -382,25 +348,16 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_31_XOP_STWBRX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 0);
break;
case OP_31_XOP_LHBRX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
break;
case OP_31_XOP_STHBRX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 0);
@@ -413,99 +370,92 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_LWZ:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
- case OP_LWZU:
- ra = get_ra(inst);
+ /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
+ case OP_LD:
rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+ break;
+
+ case OP_LWZU:
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_LBZ:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
case OP_LBZU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STW:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
- case OP_STWU:
- ra = get_ra(inst);
+ /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
+ case OP_STD:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
+ 8, 1);
+ break;
+
+ case OP_STWU:
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
4, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STB:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
case OP_STBU:
- ra = get_ra(inst);
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_LHZ:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
case OP_LHZU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_LHA:
- rt = get_rt(inst);
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
break;
case OP_LHAU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STH:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
case OP_STHU:
- ra = get_ra(inst);
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
default:
@@ -513,7 +463,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
if (emulated == EMULATE_FAIL) {
- emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+ emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst,
+ &advance);
if (emulated == EMULATE_AGAIN) {
advance = 0;
} else if (emulated == EMULATE_FAIL) {
@@ -532,3 +483,4 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
return emulated;
}
+EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
new file mode 100644
index 00000000000..5a9a10b9076
--- /dev/null
+++ b/arch/powerpc/kvm/irq.h
@@ -0,0 +1,20 @@
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+ int ret = 0;
+
+#ifdef CONFIG_KVM_MPIC
+ ret = ret || (kvm->arch.mpic != NULL);
+#endif
+#ifdef CONFIG_KVM_XICS
+ ret = ret || (kvm->arch.xics != NULL);
+#endif
+ smp_rmb();
+ return ret;
+}
+
+#endif
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
new file mode 100644
index 00000000000..b68d0dc9479
--- /dev/null
+++ b/arch/powerpc/kvm/mpic.c
@@ -0,0 +1,1857 @@
+/*
+ * OpenPIC emulation
+ *
+ * Copyright (c) 2004 Jocelyn Mayer
+ * 2011 Alexander Graf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <asm/uaccess.h>
+#include <asm/mpic.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_ppc.h>
+#include "iodev.h"
+
+#define MAX_CPU 32
+#define MAX_SRC 256
+#define MAX_TMR 4
+#define MAX_IPI 4
+#define MAX_MSI 8
+#define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR)
+#define VID 0x03 /* MPIC version ID */
+
+/* OpenPIC capability flags */
+#define OPENPIC_FLAG_IDR_CRIT (1 << 0)
+#define OPENPIC_FLAG_ILR (2 << 0)
+
+/* OpenPIC address map */
+#define OPENPIC_REG_SIZE 0x40000
+#define OPENPIC_GLB_REG_START 0x0
+#define OPENPIC_GLB_REG_SIZE 0x10F0
+#define OPENPIC_TMR_REG_START 0x10F0
+#define OPENPIC_TMR_REG_SIZE 0x220
+#define OPENPIC_MSI_REG_START 0x1600
+#define OPENPIC_MSI_REG_SIZE 0x200
+#define OPENPIC_SUMMARY_REG_START 0x3800
+#define OPENPIC_SUMMARY_REG_SIZE 0x800
+#define OPENPIC_SRC_REG_START 0x10000
+#define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20)
+#define OPENPIC_CPU_REG_START 0x20000
+#define OPENPIC_CPU_REG_SIZE (0x100 + ((MAX_CPU - 1) * 0x1000))
+
+struct fsl_mpic_info {
+ int max_ext;
+};
+
+static struct fsl_mpic_info fsl_mpic_20 = {
+ .max_ext = 12,
+};
+
+static struct fsl_mpic_info fsl_mpic_42 = {
+ .max_ext = 12,
+};
+
+#define FRR_NIRQ_SHIFT 16
+#define FRR_NCPU_SHIFT 8
+#define FRR_VID_SHIFT 0
+
+#define VID_REVISION_1_2 2
+#define VID_REVISION_1_3 3
+
+#define VIR_GENERIC 0x00000000 /* Generic Vendor ID */
+
+#define GCR_RESET 0x80000000
+#define GCR_MODE_PASS 0x00000000
+#define GCR_MODE_MIXED 0x20000000
+#define GCR_MODE_PROXY 0x60000000
+
+#define TBCR_CI 0x80000000 /* count inhibit */
+#define TCCR_TOG 0x80000000 /* toggles when decrement to zero */
+
+#define IDR_EP_SHIFT 31
+#define IDR_EP_MASK (1 << IDR_EP_SHIFT)
+#define IDR_CI0_SHIFT 30
+#define IDR_CI1_SHIFT 29
+#define IDR_P1_SHIFT 1
+#define IDR_P0_SHIFT 0
+
+#define ILR_INTTGT_MASK 0x000000ff
+#define ILR_INTTGT_INT 0x00
+#define ILR_INTTGT_CINT 0x01 /* critical */
+#define ILR_INTTGT_MCP 0x02 /* machine check */
+#define NUM_OUTPUTS 3
+
+#define MSIIR_OFFSET 0x140
+#define MSIIR_SRS_SHIFT 29
+#define MSIIR_SRS_MASK (0x7 << MSIIR_SRS_SHIFT)
+#define MSIIR_IBS_SHIFT 24
+#define MSIIR_IBS_MASK (0x1f << MSIIR_IBS_SHIFT)
+
+static int get_current_cpu(void)
+{
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+ struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
+ return vcpu ? vcpu->arch.irq_cpu_id : -1;
+#else
+ /* XXX */
+ return -1;
+#endif
+}
+
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+ u32 val, int idx);
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+ u32 *ptr, int idx);
+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
+ uint32_t val);
+
+enum irq_type {
+ IRQ_TYPE_NORMAL = 0,
+ IRQ_TYPE_FSLINT, /* FSL internal interrupt -- level only */
+ IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */
+};
+
+struct irq_queue {
+ /* Round up to the nearest 64 IRQs so that the queue length
+ * won't change when moving between 32 and 64 bit hosts.
+ */
+ unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
+ int next;
+ int priority;
+};
+
+struct irq_source {
+ uint32_t ivpr; /* IRQ vector/priority register */
+ uint32_t idr; /* IRQ destination register */
+ uint32_t destmask; /* bitmap of CPU destinations */
+ int last_cpu;
+ int output; /* IRQ level, e.g. ILR_INTTGT_INT */
+ int pending; /* TRUE if IRQ is pending */
+ enum irq_type type;
+ bool level:1; /* level-triggered */
+ bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */
+};
+
+#define IVPR_MASK_SHIFT 31
+#define IVPR_MASK_MASK (1 << IVPR_MASK_SHIFT)
+#define IVPR_ACTIVITY_SHIFT 30
+#define IVPR_ACTIVITY_MASK (1 << IVPR_ACTIVITY_SHIFT)
+#define IVPR_MODE_SHIFT 29
+#define IVPR_MODE_MASK (1 << IVPR_MODE_SHIFT)
+#define IVPR_POLARITY_SHIFT 23
+#define IVPR_POLARITY_MASK (1 << IVPR_POLARITY_SHIFT)
+#define IVPR_SENSE_SHIFT 22
+#define IVPR_SENSE_MASK (1 << IVPR_SENSE_SHIFT)
+
+#define IVPR_PRIORITY_MASK (0xF << 16)
+#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
+#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
+
+/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
+#define IDR_EP 0x80000000 /* external pin */
+#define IDR_CI 0x40000000 /* critical interrupt */
+
+struct irq_dest {
+ struct kvm_vcpu *vcpu;
+
+ int32_t ctpr; /* CPU current task priority */
+ struct irq_queue raised;
+ struct irq_queue servicing;
+
+ /* Count of IRQ sources asserting on non-INT outputs */
+ uint32_t outputs_active[NUM_OUTPUTS];
+};
+
+#define MAX_MMIO_REGIONS 10
+
+struct openpic {
+ struct kvm *kvm;
+ struct kvm_device *dev;
+ struct kvm_io_device mmio;
+ const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS];
+ int num_mmio_regions;
+
+ gpa_t reg_base;
+ spinlock_t lock;
+
+ /* Behavior control */
+ struct fsl_mpic_info *fsl;
+ uint32_t model;
+ uint32_t flags;
+ uint32_t nb_irqs;
+ uint32_t vid;
+ uint32_t vir; /* Vendor identification register */
+ uint32_t vector_mask;
+ uint32_t tfrr_reset;
+ uint32_t ivpr_reset;
+ uint32_t idr_reset;
+ uint32_t brr1;
+ uint32_t mpic_mode_mask;
+
+ /* Global registers */
+ uint32_t frr; /* Feature reporting register */
+ uint32_t gcr; /* Global configuration register */
+ uint32_t pir; /* Processor initialization register */
+ uint32_t spve; /* Spurious vector register */
+ uint32_t tfrr; /* Timer frequency reporting register */
+ /* Source registers */
+ struct irq_source src[MAX_IRQ];
+ /* Local registers per output pin */
+ struct irq_dest dst[MAX_CPU];
+ uint32_t nb_cpus;
+ /* Timer registers */
+ struct {
+ uint32_t tccr; /* Global timer current count register */
+ uint32_t tbcr; /* Global timer base count register */
+ } timers[MAX_TMR];
+ /* Shared MSI registers */
+ struct {
+ uint32_t msir; /* Shared Message Signaled Interrupt Register */
+ } msi[MAX_MSI];
+ uint32_t max_irq;
+ uint32_t irq_ipi0;
+ uint32_t irq_tim0;
+ uint32_t irq_msi;
+};
+
+
+static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
+ int output)
+{
+ struct kvm_interrupt irq = {
+ .irq = KVM_INTERRUPT_SET_LEVEL,
+ };
+
+ if (!dst->vcpu) {
+ pr_debug("%s: destination cpu %d does not exist\n",
+ __func__, (int)(dst - &opp->dst[0]));
+ return;
+ }
+
+ pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
+ output);
+
+ if (output != ILR_INTTGT_INT) /* TODO */
+ return;
+
+ kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
+}
+
+static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
+ int output)
+{
+ if (!dst->vcpu) {
+ pr_debug("%s: destination cpu %d does not exist\n",
+ __func__, (int)(dst - &opp->dst[0]));
+ return;
+ }
+
+ pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
+ output);
+
+ if (output != ILR_INTTGT_INT) /* TODO */
+ return;
+
+ kvmppc_core_dequeue_external(dst->vcpu);
+}
+
+static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
+{
+ set_bit(n_IRQ, q->queue);
+}
+
+static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
+{
+ clear_bit(n_IRQ, q->queue);
+}
+
+static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
+{
+ return test_bit(n_IRQ, q->queue);
+}
+
+static void IRQ_check(struct openpic *opp, struct irq_queue *q)
+{
+ int irq = -1;
+ int next = -1;
+ int priority = -1;
+
+ for (;;) {
+ irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
+ if (irq == opp->max_irq)
+ break;
+
+ pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
+ irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
+
+ if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
+ next = irq;
+ priority = IVPR_PRIORITY(opp->src[irq].ivpr);
+ }
+ }
+
+ q->next = next;
+ q->priority = priority;
+}
+
+static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
+{
+ /* XXX: optimize */
+ IRQ_check(opp, q);
+
+ return q->next;
+}
+
+static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
+ bool active, bool was_active)
+{
+ struct irq_dest *dst;
+ struct irq_source *src;
+ int priority;
+
+ dst = &opp->dst[n_CPU];
+ src = &opp->src[n_IRQ];
+
+ pr_debug("%s: IRQ %d active %d was %d\n",
+ __func__, n_IRQ, active, was_active);
+
+ if (src->output != ILR_INTTGT_INT) {
+ pr_debug("%s: output %d irq %d active %d was %d count %d\n",
+ __func__, src->output, n_IRQ, active, was_active,
+ dst->outputs_active[src->output]);
+
+ /* On Freescale MPIC, critical interrupts ignore priority,
+ * IACK, EOI, etc. Before MPIC v4.1 they also ignore
+ * masking.
+ */
+ if (active) {
+ if (!was_active &&
+ dst->outputs_active[src->output]++ == 0) {
+ pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
+ __func__, src->output, n_CPU, n_IRQ);
+ mpic_irq_raise(opp, dst, src->output);
+ }
+ } else {
+ if (was_active &&
+ --dst->outputs_active[src->output] == 0) {
+ pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
+ __func__, src->output, n_CPU, n_IRQ);
+ mpic_irq_lower(opp, dst, src->output);
+ }
+ }
+
+ return;
+ }
+
+ priority = IVPR_PRIORITY(src->ivpr);
+
+ /* Even if the interrupt doesn't have enough priority,
+ * it is still raised, in case ctpr is lowered later.
+ */
+ if (active)
+ IRQ_setbit(&dst->raised, n_IRQ);
+ else
+ IRQ_resetbit(&dst->raised, n_IRQ);
+
+ IRQ_check(opp, &dst->raised);
+
+ if (active && priority <= dst->ctpr) {
+ pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
+ __func__, n_IRQ, priority, dst->ctpr, n_CPU);
+ active = 0;
+ }
+
+ if (active) {
+ if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
+ priority <= dst->servicing.priority) {
+ pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
+ __func__, n_IRQ, dst->servicing.next, n_CPU);
+ } else {
+ pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
+ __func__, n_CPU, n_IRQ, dst->raised.next);
+ mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+ }
+ } else {
+ IRQ_get_next(opp, &dst->servicing);
+ if (dst->raised.priority > dst->ctpr &&
+ dst->raised.priority > dst->servicing.priority) {
+ pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
+ __func__, n_IRQ, dst->raised.next,
+ dst->raised.priority, dst->ctpr,
+ dst->servicing.priority, n_CPU);
+ /* IRQ line stays asserted */
+ } else {
+ pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
+ __func__, n_IRQ, dst->ctpr,
+ dst->servicing.priority, n_CPU);
+ mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+ }
+ }
+}
+
+/* update pic state because registers for n_IRQ have changed value */
+static void openpic_update_irq(struct openpic *opp, int n_IRQ)
+{
+ struct irq_source *src;
+ bool active, was_active;
+ int i;
+
+ src = &opp->src[n_IRQ];
+ active = src->pending;
+
+ if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
+ /* Interrupt source is disabled */
+ pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
+ active = false;
+ }
+
+ was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
+
+ /*
+ * We don't have a similar check for already-active because
+ * ctpr may have changed and we need to withdraw the interrupt.
+ */
+ if (!active && !was_active) {
+ pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
+ return;
+ }
+
+ if (active)
+ src->ivpr |= IVPR_ACTIVITY_MASK;
+ else
+ src->ivpr &= ~IVPR_ACTIVITY_MASK;
+
+ if (src->destmask == 0) {
+ /* No target */
+ pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
+ return;
+ }
+
+ if (src->destmask == (1 << src->last_cpu)) {
+ /* Only one CPU is allowed to receive this IRQ */
+ IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
+ } else if (!(src->ivpr & IVPR_MODE_MASK)) {
+ /* Directed delivery mode */
+ for (i = 0; i < opp->nb_cpus; i++) {
+ if (src->destmask & (1 << i)) {
+ IRQ_local_pipe(opp, i, n_IRQ, active,
+ was_active);
+ }
+ }
+ } else {
+ /* Distributed delivery mode */
+ for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
+ if (i == opp->nb_cpus)
+ i = 0;
+
+ if (src->destmask & (1 << i)) {
+ IRQ_local_pipe(opp, i, n_IRQ, active,
+ was_active);
+ src->last_cpu = i;
+ break;
+ }
+ }
+ }
+}
+
+static void openpic_set_irq(void *opaque, int n_IRQ, int level)
+{
+ struct openpic *opp = opaque;
+ struct irq_source *src;
+
+ if (n_IRQ >= MAX_IRQ) {
+ WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
+ return;
+ }
+
+ src = &opp->src[n_IRQ];
+ pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
+ n_IRQ, level, src->ivpr);
+ if (src->level) {
+ /* level-sensitive irq */
+ src->pending = level;
+ openpic_update_irq(opp, n_IRQ);
+ } else {
+ /* edge-sensitive irq */
+ if (level) {
+ src->pending = 1;
+ openpic_update_irq(opp, n_IRQ);
+ }
+
+ if (src->output != ILR_INTTGT_INT) {
+ /* Edge-triggered interrupts shouldn't be used
+ * with non-INT delivery, but just in case,
+ * try to make it do something sane rather than
+ * cause an interrupt storm. This is close to
+ * what you'd probably see happen in real hardware.
+ */
+ src->pending = 0;
+ openpic_update_irq(opp, n_IRQ);
+ }
+ }
+}
+
+static void openpic_reset(struct openpic *opp)
+{
+ int i;
+
+ opp->gcr = GCR_RESET;
+ /* Initialise controller registers */
+ opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
+ (opp->vid << FRR_VID_SHIFT);
+
+ opp->pir = 0;
+ opp->spve = -1 & opp->vector_mask;
+ opp->tfrr = opp->tfrr_reset;
+ /* Initialise IRQ sources */
+ for (i = 0; i < opp->max_irq; i++) {
+ opp->src[i].ivpr = opp->ivpr_reset;
+
+ switch (opp->src[i].type) {
+ case IRQ_TYPE_NORMAL:
+ opp->src[i].level =
+ !!(opp->ivpr_reset & IVPR_SENSE_MASK);
+ break;
+
+ case IRQ_TYPE_FSLINT:
+ opp->src[i].ivpr |= IVPR_POLARITY_MASK;
+ break;
+
+ case IRQ_TYPE_FSLSPECIAL:
+ break;
+ }
+
+ write_IRQreg_idr(opp, i, opp->idr_reset);
+ }
+ /* Initialise IRQ destinations */
+ for (i = 0; i < MAX_CPU; i++) {
+ opp->dst[i].ctpr = 15;
+ memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
+ opp->dst[i].raised.next = -1;
+ memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
+ opp->dst[i].servicing.next = -1;
+ }
+ /* Initialise timers */
+ for (i = 0; i < MAX_TMR; i++) {
+ opp->timers[i].tccr = 0;
+ opp->timers[i].tbcr = TBCR_CI;
+ }
+ /* Go out of RESET state */
+ opp->gcr = 0;
+}
+
+static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
+{
+ return opp->src[n_IRQ].idr;
+}
+
+static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
+{
+ if (opp->flags & OPENPIC_FLAG_ILR)
+ return opp->src[n_IRQ].output;
+
+ return 0xffffffff;
+}
+
+static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
+{
+ return opp->src[n_IRQ].ivpr;
+}
+
+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
+ uint32_t val)
+{
+ struct irq_source *src = &opp->src[n_IRQ];
+ uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
+ uint32_t crit_mask = 0;
+ uint32_t mask = normal_mask;
+ int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
+ int i;
+
+ if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
+ crit_mask = mask << crit_shift;
+ mask |= crit_mask | IDR_EP;
+ }
+
+ src->idr = val & mask;
+ pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
+
+ if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
+ if (src->idr & crit_mask) {
+ if (src->idr & normal_mask) {
+ pr_debug("%s: IRQ configured for multiple output types, using critical\n",
+ __func__);
+ }
+
+ src->output = ILR_INTTGT_CINT;
+ src->nomask = true;
+ src->destmask = 0;
+
+ for (i = 0; i < opp->nb_cpus; i++) {
+ int n_ci = IDR_CI0_SHIFT - i;
+
+ if (src->idr & (1UL << n_ci))
+ src->destmask |= 1UL << i;
+ }
+ } else {
+ src->output = ILR_INTTGT_INT;
+ src->nomask = false;
+ src->destmask = src->idr & normal_mask;
+ }
+ } else {
+ src->destmask = src->idr;
+ }
+}
+
+static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
+ uint32_t val)
+{
+ if (opp->flags & OPENPIC_FLAG_ILR) {
+ struct irq_source *src = &opp->src[n_IRQ];
+
+ src->output = val & ILR_INTTGT_MASK;
+ pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
+ src->output);
+
+ /* TODO: on MPIC v4.0 only, set nomask for non-INT */
+ }
+}
+
+static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
+ uint32_t val)
+{
+ uint32_t mask;
+
+ /* NOTE when implementing newer FSL MPIC models: starting with v4.0,
+ * the polarity bit is read-only on internal interrupts.
+ */
+ mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
+ IVPR_POLARITY_MASK | opp->vector_mask;
+
+ /* ACTIVITY bit is read-only */
+ opp->src[n_IRQ].ivpr =
+ (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
+
+ /* For FSL internal interrupts, The sense bit is reserved and zero,
+ * and the interrupt is always level-triggered. Timers and IPIs
+ * have no sense or polarity bits, and are edge-triggered.
+ */
+ switch (opp->src[n_IRQ].type) {
+ case IRQ_TYPE_NORMAL:
+ opp->src[n_IRQ].level =
+ !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
+ break;
+
+ case IRQ_TYPE_FSLINT:
+ opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
+ break;
+
+ case IRQ_TYPE_FSLSPECIAL:
+ opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
+ break;
+ }
+
+ openpic_update_irq(opp, n_IRQ);
+ pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
+ opp->src[n_IRQ].ivpr);
+}
+
+static void openpic_gcr_write(struct openpic *opp, uint64_t val)
+{
+ if (val & GCR_RESET) {
+ openpic_reset(opp);
+ return;
+ }
+
+ opp->gcr &= ~opp->mpic_mode_mask;
+ opp->gcr |= val & opp->mpic_mode_mask;
+}
+
+static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+ int err = 0;
+
+ pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return 0;
+
+ switch (addr) {
+ case 0x00: /* Block Revision Register1 (BRR1) is Readonly */
+ break;
+ case 0x40:
+ case 0x50:
+ case 0x60:
+ case 0x70:
+ case 0x80:
+ case 0x90:
+ case 0xA0:
+ case 0xB0:
+ err = openpic_cpu_write_internal(opp, addr, val,
+ get_current_cpu());
+ break;
+ case 0x1000: /* FRR */
+ break;
+ case 0x1020: /* GCR */
+ openpic_gcr_write(opp, val);
+ break;
+ case 0x1080: /* VIR */
+ break;
+ case 0x1090: /* PIR */
+ /*
+ * This register is used to reset a CPU core --
+ * let userspace handle it.
+ */
+ err = -ENXIO;
+ break;
+ case 0x10A0: /* IPI_IVPR */
+ case 0x10B0:
+ case 0x10C0:
+ case 0x10D0: {
+ int idx;
+ idx = (addr - 0x10A0) >> 4;
+ write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
+ break;
+ }
+ case 0x10E0: /* SPVE */
+ opp->spve = val & opp->vector_mask;
+ break;
+ default:
+ break;
+ }
+
+ return err;
+}
+
+static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+ u32 retval;
+ int err = 0;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+ retval = 0xFFFFFFFF;
+ if (addr & 0xF)
+ goto out;
+
+ switch (addr) {
+ case 0x1000: /* FRR */
+ retval = opp->frr;
+ retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
+ break;
+ case 0x1020: /* GCR */
+ retval = opp->gcr;
+ break;
+ case 0x1080: /* VIR */
+ retval = opp->vir;
+ break;
+ case 0x1090: /* PIR */
+ retval = 0x00000000;
+ break;
+ case 0x00: /* Block Revision Register1 (BRR1) */
+ retval = opp->brr1;
+ break;
+ case 0x40:
+ case 0x50:
+ case 0x60:
+ case 0x70:
+ case 0x80:
+ case 0x90:
+ case 0xA0:
+ case 0xB0:
+ err = openpic_cpu_read_internal(opp, addr,
+ &retval, get_current_cpu());
+ break;
+ case 0x10A0: /* IPI_IVPR */
+ case 0x10B0:
+ case 0x10C0:
+ case 0x10D0:
+ {
+ int idx;
+ idx = (addr - 0x10A0) >> 4;
+ retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
+ }
+ break;
+ case 0x10E0: /* SPVE */
+ retval = opp->spve;
+ break;
+ default:
+ break;
+ }
+
+out:
+ pr_debug("%s: => 0x%08x\n", __func__, retval);
+ *ptr = retval;
+ return err;
+}
+
+static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+ int idx;
+
+ addr += 0x10f0;
+
+ pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return 0;
+
+ if (addr == 0x10f0) {
+ /* TFRR */
+ opp->tfrr = val;
+ return 0;
+ }
+
+ idx = (addr >> 6) & 0x3;
+ addr = addr & 0x30;
+
+ switch (addr & 0x30) {
+ case 0x00: /* TCCR */
+ break;
+ case 0x10: /* TBCR */
+ if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
+ (val & TBCR_CI) == 0 &&
+ (opp->timers[idx].tbcr & TBCR_CI) != 0)
+ opp->timers[idx].tccr &= ~TCCR_TOG;
+
+ opp->timers[idx].tbcr = val;
+ break;
+ case 0x20: /* TVPR */
+ write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
+ break;
+ case 0x30: /* TDR */
+ write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
+ break;
+ }
+
+ return 0;
+}
+
+static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+ uint32_t retval = -1;
+ int idx;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+ if (addr & 0xF)
+ goto out;
+
+ idx = (addr >> 6) & 0x3;
+ if (addr == 0x0) {
+ /* TFRR */
+ retval = opp->tfrr;
+ goto out;
+ }
+
+ switch (addr & 0x30) {
+ case 0x00: /* TCCR */
+ retval = opp->timers[idx].tccr;
+ break;
+ case 0x10: /* TBCR */
+ retval = opp->timers[idx].tbcr;
+ break;
+ case 0x20: /* TIPV */
+ retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
+ break;
+ case 0x30: /* TIDE (TIDR) */
+ retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
+ break;
+ }
+
+out:
+ pr_debug("%s: => 0x%08x\n", __func__, retval);
+ *ptr = retval;
+ return 0;
+}
+
+static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+ int idx;
+
+ pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+
+ addr = addr & 0xffff;
+ idx = addr >> 5;
+
+ switch (addr & 0x1f) {
+ case 0x00:
+ write_IRQreg_ivpr(opp, idx, val);
+ break;
+ case 0x10:
+ write_IRQreg_idr(opp, idx, val);
+ break;
+ case 0x18:
+ write_IRQreg_ilr(opp, idx, val);
+ break;
+ }
+
+ return 0;
+}
+
+static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+ uint32_t retval;
+ int idx;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+ retval = 0xFFFFFFFF;
+
+ addr = addr & 0xffff;
+ idx = addr >> 5;
+
+ switch (addr & 0x1f) {
+ case 0x00:
+ retval = read_IRQreg_ivpr(opp, idx);
+ break;
+ case 0x10:
+ retval = read_IRQreg_idr(opp, idx);
+ break;
+ case 0x18:
+ retval = read_IRQreg_ilr(opp, idx);
+ break;
+ }
+
+ pr_debug("%s: => 0x%08x\n", __func__, retval);
+ *ptr = retval;
+ return 0;
+}
+
+static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+ int idx = opp->irq_msi;
+ int srs, ibs;
+
+ pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return 0;
+
+ switch (addr) {
+ case MSIIR_OFFSET:
+ srs = val >> MSIIR_SRS_SHIFT;
+ idx += srs;
+ ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
+ opp->msi[srs].msir |= 1 << ibs;
+ openpic_set_irq(opp, idx, 1);
+ break;
+ default:
+ /* most registers are read-only, thus ignored */
+ break;
+ }
+
+ return 0;
+}
+
+static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+ uint32_t r = 0;
+ int i, srs;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+ if (addr & 0xF)
+ return -ENXIO;
+
+ srs = addr >> 4;
+
+ switch (addr) {
+ case 0x00:
+ case 0x10:
+ case 0x20:
+ case 0x30:
+ case 0x40:
+ case 0x50:
+ case 0x60:
+ case 0x70: /* MSIRs */
+ r = opp->msi[srs].msir;
+ /* Clear on read */
+ opp->msi[srs].msir = 0;
+ openpic_set_irq(opp, opp->irq_msi + srs, 0);
+ break;
+ case 0x120: /* MSISR */
+ for (i = 0; i < MAX_MSI; i++)
+ r |= (opp->msi[i].msir ? 1 : 0) << i;
+ break;
+ }
+
+ pr_debug("%s: => 0x%08x\n", __func__, r);
+ *ptr = r;
+ return 0;
+}
+
+static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ uint32_t r = 0;
+
+ pr_debug("%s: addr %#llx\n", __func__, addr);
+
+ /* TODO: EISR/EIMR */
+
+ *ptr = r;
+ return 0;
+}
+
+static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
+{
+ pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
+
+ /* TODO: EISR/EIMR */
+ return 0;
+}
+
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+ u32 val, int idx)
+{
+ struct openpic *opp = opaque;
+ struct irq_source *src;
+ struct irq_dest *dst;
+ int s_IRQ, n_IRQ;
+
+ pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
+ addr, val);
+
+ if (idx < 0)
+ return 0;
+
+ if (addr & 0xF)
+ return 0;
+
+ dst = &opp->dst[idx];
+ addr &= 0xFF0;
+ switch (addr) {
+ case 0x40: /* IPIDR */
+ case 0x50:
+ case 0x60:
+ case 0x70:
+ idx = (addr - 0x40) >> 4;
+ /* we use IDE as mask which CPUs to deliver the IPI to still. */
+ opp->src[opp->irq_ipi0 + idx].destmask |= val;
+ openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
+ openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
+ break;
+ case 0x80: /* CTPR */
+ dst->ctpr = val & 0x0000000F;
+
+ pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
+ __func__, idx, dst->ctpr, dst->raised.priority,
+ dst->servicing.priority);
+
+ if (dst->raised.priority <= dst->ctpr) {
+ pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
+ __func__, idx);
+ mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+ } else if (dst->raised.priority > dst->servicing.priority) {
+ pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
+ __func__, idx, dst->raised.next);
+ mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+ }
+
+ break;
+ case 0x90: /* WHOAMI */
+ /* Read-only register */
+ break;
+ case 0xA0: /* IACK */
+ /* Read-only register */
+ break;
+ case 0xB0: { /* EOI */
+ int notify_eoi;
+
+ pr_debug("EOI\n");
+ s_IRQ = IRQ_get_next(opp, &dst->servicing);
+
+ if (s_IRQ < 0) {
+ pr_debug("%s: EOI with no interrupt in service\n",
+ __func__);
+ break;
+ }
+
+ IRQ_resetbit(&dst->servicing, s_IRQ);
+ /* Notify listeners that the IRQ is over */
+ notify_eoi = s_IRQ;
+ /* Set up next servicing IRQ */
+ s_IRQ = IRQ_get_next(opp, &dst->servicing);
+ /* Check queued interrupts. */
+ n_IRQ = IRQ_get_next(opp, &dst->raised);
+ src = &opp->src[n_IRQ];
+ if (n_IRQ != -1 &&
+ (s_IRQ == -1 ||
+ IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
+ pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
+ idx, n_IRQ);
+ mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+ }
+
+ spin_unlock(&opp->lock);
+ kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
+ spin_lock(&opp->lock);
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
+{
+ struct openpic *opp = opaque;
+
+ return openpic_cpu_write_internal(opp, addr, val,
+ (addr & 0x1f000) >> 12);
+}
+
+static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
+ int cpu)
+{
+ struct irq_source *src;
+ int retval, irq;
+
+ pr_debug("Lower OpenPIC INT output\n");
+ mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+
+ irq = IRQ_get_next(opp, &dst->raised);
+ pr_debug("IACK: irq=%d\n", irq);
+
+ if (irq == -1)
+ /* No more interrupt pending */
+ return opp->spve;
+
+ src = &opp->src[irq];
+ if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
+ !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
+ pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
+ __func__, irq, dst->ctpr, src->ivpr);
+ openpic_update_irq(opp, irq);
+ retval = opp->spve;
+ } else {
+ /* IRQ enter servicing state */
+ IRQ_setbit(&dst->servicing, irq);
+ retval = IVPR_VECTOR(opp, src->ivpr);
+ }
+
+ if (!src->level) {
+ /* edge-sensitive IRQ */
+ src->ivpr &= ~IVPR_ACTIVITY_MASK;
+ src->pending = 0;
+ IRQ_resetbit(&dst->raised, irq);
+ }
+
+ if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
+ src->destmask &= ~(1 << cpu);
+ if (src->destmask && !src->level) {
+ /* trigger on CPUs that didn't know about it yet */
+ openpic_set_irq(opp, irq, 1);
+ openpic_set_irq(opp, irq, 0);
+ /* if all CPUs knew about it, set active bit again */
+ src->ivpr |= IVPR_ACTIVITY_MASK;
+ }
+ }
+
+ return retval;
+}
+
+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
+{
+ struct openpic *opp = vcpu->arch.mpic;
+ int cpu = vcpu->arch.irq_cpu_id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&opp->lock, flags);
+
+ if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
+ kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
+
+ spin_unlock_irqrestore(&opp->lock, flags);
+}
+
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+ u32 *ptr, int idx)
+{
+ struct openpic *opp = opaque;
+ struct irq_dest *dst;
+ uint32_t retval;
+
+ pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
+ retval = 0xFFFFFFFF;
+
+ if (idx < 0)
+ goto out;
+
+ if (addr & 0xF)
+ goto out;
+
+ dst = &opp->dst[idx];
+ addr &= 0xFF0;
+ switch (addr) {
+ case 0x80: /* CTPR */
+ retval = dst->ctpr;
+ break;
+ case 0x90: /* WHOAMI */
+ retval = idx;
+ break;
+ case 0xA0: /* IACK */
+ retval = openpic_iack(opp, dst, idx);
+ break;
+ case 0xB0: /* EOI */
+ retval = 0;
+ break;
+ default:
+ break;
+ }
+ pr_debug("%s: => 0x%08x\n", __func__, retval);
+
+out:
+ *ptr = retval;
+ return 0;
+}
+
+static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+ struct openpic *opp = opaque;
+
+ return openpic_cpu_read_internal(opp, addr, ptr,
+ (addr & 0x1f000) >> 12);
+}
+
+struct mem_reg {
+ int (*read)(void *opaque, gpa_t addr, u32 *ptr);
+ int (*write)(void *opaque, gpa_t addr, u32 val);
+ gpa_t start_addr;
+ int size;
+};
+
+static const struct mem_reg openpic_gbl_mmio = {
+ .write = openpic_gbl_write,
+ .read = openpic_gbl_read,
+ .start_addr = OPENPIC_GLB_REG_START,
+ .size = OPENPIC_GLB_REG_SIZE,
+};
+
+static const struct mem_reg openpic_tmr_mmio = {
+ .write = openpic_tmr_write,
+ .read = openpic_tmr_read,
+ .start_addr = OPENPIC_TMR_REG_START,
+ .size = OPENPIC_TMR_REG_SIZE,
+};
+
+static const struct mem_reg openpic_cpu_mmio = {
+ .write = openpic_cpu_write,
+ .read = openpic_cpu_read,
+ .start_addr = OPENPIC_CPU_REG_START,
+ .size = OPENPIC_CPU_REG_SIZE,
+};
+
+static const struct mem_reg openpic_src_mmio = {
+ .write = openpic_src_write,
+ .read = openpic_src_read,
+ .start_addr = OPENPIC_SRC_REG_START,
+ .size = OPENPIC_SRC_REG_SIZE,
+};
+
+static const struct mem_reg openpic_msi_mmio = {
+ .read = openpic_msi_read,
+ .write = openpic_msi_write,
+ .start_addr = OPENPIC_MSI_REG_START,
+ .size = OPENPIC_MSI_REG_SIZE,
+};
+
+static const struct mem_reg openpic_summary_mmio = {
+ .read = openpic_summary_read,
+ .write = openpic_summary_write,
+ .start_addr = OPENPIC_SUMMARY_REG_START,
+ .size = OPENPIC_SUMMARY_REG_SIZE,
+};
+
+static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr)
+{
+ if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) {
+ WARN(1, "kvm mpic: too many mmio regions\n");
+ return;
+ }
+
+ opp->mmio_regions[opp->num_mmio_regions++] = mr;
+}
+
+static void fsl_common_init(struct openpic *opp)
+{
+ int i;
+ int virq = MAX_SRC;
+
+ add_mmio_region(opp, &openpic_msi_mmio);
+ add_mmio_region(opp, &openpic_summary_mmio);
+
+ opp->vid = VID_REVISION_1_2;
+ opp->vir = VIR_GENERIC;
+ opp->vector_mask = 0xFFFF;
+ opp->tfrr_reset = 0;
+ opp->ivpr_reset = IVPR_MASK_MASK;
+ opp->idr_reset = 1 << 0;
+ opp->max_irq = MAX_IRQ;
+
+ opp->irq_ipi0 = virq;
+ virq += MAX_IPI;
+ opp->irq_tim0 = virq;
+ virq += MAX_TMR;
+
+ BUG_ON(virq > MAX_IRQ);
+
+ opp->irq_msi = 224;
+
+ for (i = 0; i < opp->fsl->max_ext; i++)
+ opp->src[i].level = false;
+
+ /* Internal interrupts, including message and MSI */
+ for (i = 16; i < MAX_SRC; i++) {
+ opp->src[i].type = IRQ_TYPE_FSLINT;
+ opp->src[i].level = true;
+ }
+
+ /* timers and IPIs */
+ for (i = MAX_SRC; i < virq; i++) {
+ opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
+ opp->src[i].level = false;
+ }
+}
+
+static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
+{
+ int i;
+
+ for (i = 0; i < opp->num_mmio_regions; i++) {
+ const struct mem_reg *mr = opp->mmio_regions[i];
+
+ if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+ continue;
+
+ return mr->read(opp, addr - mr->start_addr, ptr);
+ }
+
+ return -ENXIO;
+}
+
+static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
+{
+ int i;
+
+ for (i = 0; i < opp->num_mmio_regions; i++) {
+ const struct mem_reg *mr = opp->mmio_regions[i];
+
+ if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+ continue;
+
+ return mr->write(opp, addr - mr->start_addr, val);
+ }
+
+ return -ENXIO;
+}
+
+static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
+ int len, void *ptr)
+{
+ struct openpic *opp = container_of(this, struct openpic, mmio);
+ int ret;
+ union {
+ u32 val;
+ u8 bytes[4];
+ } u;
+
+ if (addr & (len - 1)) {
+ pr_debug("%s: bad alignment %llx/%d\n",
+ __func__, addr, len);
+ return -EINVAL;
+ }
+
+ spin_lock_irq(&opp->lock);
+ ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
+ spin_unlock_irq(&opp->lock);
+
+ /*
+ * Technically only 32-bit accesses are allowed, but be nice to
+ * people dumping registers a byte at a time -- it works in real
+ * hardware (reads only, not writes).
+ */
+ if (len == 4) {
+ *(u32 *)ptr = u.val;
+ pr_debug("%s: addr %llx ret %d len 4 val %x\n",
+ __func__, addr, ret, u.val);
+ } else if (len == 1) {
+ *(u8 *)ptr = u.bytes[addr & 3];
+ pr_debug("%s: addr %llx ret %d len 1 val %x\n",
+ __func__, addr, ret, u.bytes[addr & 3]);
+ } else {
+ pr_debug("%s: bad length %d\n", __func__, len);
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
+ int len, const void *ptr)
+{
+ struct openpic *opp = container_of(this, struct openpic, mmio);
+ int ret;
+
+ if (len != 4) {
+ pr_debug("%s: bad length %d\n", __func__, len);
+ return -EOPNOTSUPP;
+ }
+ if (addr & 3) {
+ pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_irq(&opp->lock);
+ ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
+ *(const u32 *)ptr);
+ spin_unlock_irq(&opp->lock);
+
+ pr_debug("%s: addr %llx ret %d val %x\n",
+ __func__, addr, ret, *(const u32 *)ptr);
+
+ return ret;
+}
+
+static const struct kvm_io_device_ops mpic_mmio_ops = {
+ .read = kvm_mpic_read,
+ .write = kvm_mpic_write,
+};
+
+static void map_mmio(struct openpic *opp)
+{
+ kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
+
+ kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
+ opp->reg_base, OPENPIC_REG_SIZE,
+ &opp->mmio);
+}
+
+static void unmap_mmio(struct openpic *opp)
+{
+ kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
+}
+
+static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
+{
+ u64 base;
+
+ if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
+ return -EFAULT;
+
+ if (base & 0x3ffff) {
+ pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
+ __func__, base);
+ return -EINVAL;
+ }
+
+ if (base == opp->reg_base)
+ return 0;
+
+ mutex_lock(&opp->kvm->slots_lock);
+
+ unmap_mmio(opp);
+ opp->reg_base = base;
+
+ pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
+ __func__, base);
+
+ if (base == 0)
+ goto out;
+
+ map_mmio(opp);
+
+out:
+ mutex_unlock(&opp->kvm->slots_lock);
+ return 0;
+}
+
+#define ATTR_SET 0
+#define ATTR_GET 1
+
+static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
+{
+ int ret;
+
+ if (addr & 3)
+ return -ENXIO;
+
+ spin_lock_irq(&opp->lock);
+
+ if (type == ATTR_SET)
+ ret = kvm_mpic_write_internal(opp, addr, *val);
+ else
+ ret = kvm_mpic_read_internal(opp, addr, val);
+
+ spin_unlock_irq(&opp->lock);
+
+ pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
+
+ return ret;
+}
+
+static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct openpic *opp = dev->private;
+ u32 attr32;
+
+ switch (attr->group) {
+ case KVM_DEV_MPIC_GRP_MISC:
+ switch (attr->attr) {
+ case KVM_DEV_MPIC_BASE_ADDR:
+ return set_base_addr(opp, attr);
+ }
+
+ break;
+
+ case KVM_DEV_MPIC_GRP_REGISTER:
+ if (get_user(attr32, (u32 __user *)(long)attr->addr))
+ return -EFAULT;
+
+ return access_reg(opp, attr->attr, &attr32, ATTR_SET);
+
+ case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+ if (attr->attr > MAX_SRC)
+ return -EINVAL;
+
+ if (get_user(attr32, (u32 __user *)(long)attr->addr))
+ return -EFAULT;
+
+ if (attr32 != 0 && attr32 != 1)
+ return -EINVAL;
+
+ spin_lock_irq(&opp->lock);
+ openpic_set_irq(opp, attr->attr, attr32);
+ spin_unlock_irq(&opp->lock);
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct openpic *opp = dev->private;
+ u64 attr64;
+ u32 attr32;
+ int ret;
+
+ switch (attr->group) {
+ case KVM_DEV_MPIC_GRP_MISC:
+ switch (attr->attr) {
+ case KVM_DEV_MPIC_BASE_ADDR:
+ mutex_lock(&opp->kvm->slots_lock);
+ attr64 = opp->reg_base;
+ mutex_unlock(&opp->kvm->slots_lock);
+
+ if (copy_to_user((u64 __user *)(long)attr->addr,
+ &attr64, sizeof(u64)))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ break;
+
+ case KVM_DEV_MPIC_GRP_REGISTER:
+ ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
+ if (ret)
+ return ret;
+
+ if (put_user(attr32, (u32 __user *)(long)attr->addr))
+ return -EFAULT;
+
+ return 0;
+
+ case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+ if (attr->attr > MAX_SRC)
+ return -EINVAL;
+
+ spin_lock_irq(&opp->lock);
+ attr32 = opp->src[attr->attr].pending;
+ spin_unlock_irq(&opp->lock);
+
+ if (put_user(attr32, (u32 __user *)(long)attr->addr))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_MPIC_GRP_MISC:
+ switch (attr->attr) {
+ case KVM_DEV_MPIC_BASE_ADDR:
+ return 0;
+ }
+
+ break;
+
+ case KVM_DEV_MPIC_GRP_REGISTER:
+ return 0;
+
+ case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+ if (attr->attr > MAX_SRC)
+ break;
+
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+static void mpic_destroy(struct kvm_device *dev)
+{
+ struct openpic *opp = dev->private;
+
+ dev->kvm->arch.mpic = NULL;
+ kfree(opp);
+ kfree(dev);
+}
+
+static int mpic_set_default_irq_routing(struct openpic *opp)
+{
+ struct kvm_irq_routing_entry *routing;
+
+ /* Create a nop default map, so that dereferencing it still works */
+ routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
+ if (!routing)
+ return -ENOMEM;
+
+ kvm_set_irq_routing(opp->kvm, routing, 0, 0);
+
+ kfree(routing);
+ return 0;
+}
+
+static int mpic_create(struct kvm_device *dev, u32 type)
+{
+ struct openpic *opp;
+ int ret;
+
+ /* We only support one MPIC at a time for now */
+ if (dev->kvm->arch.mpic)
+ return -EINVAL;
+
+ opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
+ if (!opp)
+ return -ENOMEM;
+
+ dev->private = opp;
+ opp->kvm = dev->kvm;
+ opp->dev = dev;
+ opp->model = type;
+ spin_lock_init(&opp->lock);
+
+ add_mmio_region(opp, &openpic_gbl_mmio);
+ add_mmio_region(opp, &openpic_tmr_mmio);
+ add_mmio_region(opp, &openpic_src_mmio);
+ add_mmio_region(opp, &openpic_cpu_mmio);
+
+ switch (opp->model) {
+ case KVM_DEV_TYPE_FSL_MPIC_20:
+ opp->fsl = &fsl_mpic_20;
+ opp->brr1 = 0x00400200;
+ opp->flags |= OPENPIC_FLAG_IDR_CRIT;
+ opp->nb_irqs = 80;
+ opp->mpic_mode_mask = GCR_MODE_MIXED;
+
+ fsl_common_init(opp);
+
+ break;
+
+ case KVM_DEV_TYPE_FSL_MPIC_42:
+ opp->fsl = &fsl_mpic_42;
+ opp->brr1 = 0x00400402;
+ opp->flags |= OPENPIC_FLAG_ILR;
+ opp->nb_irqs = 196;
+ opp->mpic_mode_mask = GCR_MODE_PROXY;
+
+ fsl_common_init(opp);
+
+ break;
+
+ default:
+ ret = -ENODEV;
+ goto err;
+ }
+
+ ret = mpic_set_default_irq_routing(opp);
+ if (ret)
+ goto err;
+
+ openpic_reset(opp);
+
+ smp_wmb();
+ dev->kvm->arch.mpic = opp;
+
+ return 0;
+
+err:
+ kfree(opp);
+ return ret;
+}
+
+struct kvm_device_ops kvm_mpic_ops = {
+ .name = "kvm-mpic",
+ .create = mpic_create,
+ .destroy = mpic_destroy,
+ .set_attr = mpic_set_attr,
+ .get_attr = mpic_get_attr,
+ .has_attr = mpic_has_attr,
+};
+
+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+ u32 cpu)
+{
+ struct openpic *opp = dev->private;
+ int ret = 0;
+
+ if (dev->ops != &kvm_mpic_ops)
+ return -EPERM;
+ if (opp->kvm != vcpu->kvm)
+ return -EPERM;
+ if (cpu < 0 || cpu >= MAX_CPU)
+ return -EPERM;
+
+ spin_lock_irq(&opp->lock);
+
+ if (opp->dst[cpu].vcpu) {
+ ret = -EEXIST;
+ goto out;
+ }
+ if (vcpu->arch.irq_type) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ opp->dst[cpu].vcpu = vcpu;
+ opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
+
+ vcpu->arch.mpic = opp;
+ vcpu->arch.irq_cpu_id = cpu;
+ vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
+
+ /* This might need to be changed if GCR gets extended */
+ if (opp->mpic_mode_mask == GCR_MODE_PROXY)
+ vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
+
+out:
+ spin_unlock_irq(&opp->lock);
+ return ret;
+}
+
+/*
+ * This should only happen immediately before the mpic is destroyed,
+ * so we shouldn't need to worry about anything still trying to
+ * access the vcpu pointer.
+ */
+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
+{
+ BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
+
+ opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
+}
+
+/*
+ * Return value:
+ * < 0 Interrupt was ignored (masked or not delivered for other reasons)
+ * = 0 Interrupt was coalesced (previous irq is still pending)
+ * > 0 Number of CPUs interrupt was delivered to
+ */
+static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ u32 irq = e->irqchip.pin;
+ struct openpic *opp = kvm->arch.mpic;
+ unsigned long flags;
+
+ spin_lock_irqsave(&opp->lock, flags);
+ openpic_set_irq(opp, irq, level);
+ spin_unlock_irqrestore(&opp->lock, flags);
+
+ /* All code paths we care about don't check for the return value */
+ return 0;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+ struct openpic *opp = kvm->arch.mpic;
+ unsigned long flags;
+
+ spin_lock_irqsave(&opp->lock, flags);
+
+ /*
+ * XXX We ignore the target address for now, as we only support
+ * a single MSI bank.
+ */
+ openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
+ spin_unlock_irqrestore(&opp->lock, flags);
+
+ /* All code paths we care about don't check for the return value */
+ return 0;
+}
+
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ int r = -EINVAL;
+
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ e->set = mpic_set_irq;
+ e->irqchip.irqchip = ue->u.irqchip.irqchip;
+ e->irqchip.pin = ue->u.irqchip.pin;
+ if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
+ goto out;
+ rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
+ break;
+ case KVM_IRQ_ROUTING_MSI:
+ e->set = kvm_set_msi;
+ e->msi.address_lo = ue->u.msi.address_lo;
+ e->msi.address_hi = ue->u.msi.address_hi;
+ e->msi.data = ue->u.msi.data;
+ break;
+ default:
+ goto out;
+ }
+
+ r = 0;
+out:
+ return r;
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 38f756f2505..61c738ab128 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -21,26 +21,130 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
-#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/hrtimer.h>
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/module.h>
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/tlbflush.h>
+#include <asm/cputhreads.h>
+#include <asm/irqflags.h>
#include "timing.h"
+#include "irq.h"
#include "../mm/mmu_decl.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
+struct kvmppc_ops *kvmppc_hv_ops;
+EXPORT_SYMBOL_GPL(kvmppc_hv_ops);
+struct kvmppc_ops *kvmppc_pr_ops;
+EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
+
+
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !(v->arch.shared->msr & MSR_WE) ||
- !!(v->arch.pending_exceptions);
+ return !!(v->arch.pending_exceptions) ||
+ v->requests;
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
+/*
+ * Common checks before entering the guest world. Call with interrupts
+ * disabled.
+ *
+ * returns:
+ *
+ * == 1 if we're ready to go into guest state
+ * <= 0 if we need to go back to the host with return value
+ */
+int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ WARN_ON(irqs_disabled());
+ hard_irq_disable();
+
+ while (true) {
+ if (need_resched()) {
+ local_irq_enable();
+ cond_resched();
+ hard_irq_disable();
+ continue;
+ }
+
+ if (signal_pending(current)) {
+ kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+ vcpu->run->exit_reason = KVM_EXIT_INTR;
+ r = -EINTR;
+ break;
+ }
+
+ vcpu->mode = IN_GUEST_MODE;
+
+ /*
+ * Reading vcpu->requests must happen after setting vcpu->mode,
+ * so we don't miss a request because the requester sees
+ * OUTSIDE_GUEST_MODE and assumes we'll be checking requests
+ * before next entering the guest (and thus doesn't IPI).
+ */
+ smp_mb();
+
+ if (vcpu->requests) {
+ /* Make sure we process requests preemptable */
+ local_irq_enable();
+ trace_kvm_check_requests(vcpu);
+ r = kvmppc_core_check_requests(vcpu);
+ hard_irq_disable();
+ if (r > 0)
+ continue;
+ break;
+ }
+
+ if (kvmppc_core_prepare_to_enter(vcpu)) {
+ /* interrupts got enabled in between, so we
+ are back at square 1 */
+ continue;
+ }
+
+ kvm_guest_enter();
+ return 1;
+ }
+
+ /* return to host */
+ local_irq_enable();
+ return r;
+}
+EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+static void kvmppc_swab_shared(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
+ int i;
+
+ shared->sprg0 = swab64(shared->sprg0);
+ shared->sprg1 = swab64(shared->sprg1);
+ shared->sprg2 = swab64(shared->sprg2);
+ shared->sprg3 = swab64(shared->sprg3);
+ shared->srr0 = swab64(shared->srr0);
+ shared->srr1 = swab64(shared->srr1);
+ shared->dar = swab64(shared->dar);
+ shared->msr = swab64(shared->msr);
+ shared->dsisr = swab32(shared->dsisr);
+ shared->int_pending = swab32(shared->int_pending);
+ for (i = 0; i < ARRAY_SIZE(shared->sr); i++)
+ shared->sr[i] = swab32(shared->sr[i]);
}
+#endif
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
{
@@ -52,7 +156,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
unsigned long r2 = 0;
- if (!(vcpu->arch.shared->msr & MSR_SF)) {
+ if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
/* 32 bit mode */
param1 &= 0xffffffff;
param2 &= 0xffffffff;
@@ -61,26 +165,52 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
}
switch (nr) {
- case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
+ case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
{
- vcpu->arch.magic_page_pa = param1;
- vcpu->arch.magic_page_ea = param2;
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+ /* Book3S can be little endian, find it out here */
+ int shared_big_endian = true;
+ if (vcpu->arch.intr_msr & MSR_LE)
+ shared_big_endian = false;
+ if (shared_big_endian != vcpu->arch.shared_big_endian)
+ kvmppc_swab_shared(vcpu);
+ vcpu->arch.shared_big_endian = shared_big_endian;
+#endif
+
+ if (!(param2 & MAGIC_PAGE_FLAG_NOT_MAPPED_NX)) {
+ /*
+ * Older versions of the Linux magic page code had
+ * a bug where they would map their trampoline code
+ * NX. If that's the case, remove !PR NX capability.
+ */
+ vcpu->arch.disable_kernel_nx = true;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+
+ vcpu->arch.magic_page_pa = param1 & ~0xfffULL;
+ vcpu->arch.magic_page_ea = param2 & ~0xfffULL;
- r2 = KVM_MAGIC_FEAT_SR;
+ r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
- r = HC_EV_SUCCESS;
+ r = EV_SUCCESS;
break;
}
- case HC_VENDOR_KVM | KVM_HC_FEATURES:
- r = HC_EV_SUCCESS;
-#if defined(CONFIG_PPC_BOOK3S) /* XXX Missing magic page on BookE */
+ case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
+ r = EV_SUCCESS;
+#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
+ /* XXX Missing magic page on 44x */
r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
#endif
/* Second return value is in r4 */
break;
+ case EV_HCALL_TOKEN(EV_IDLE):
+ r = EV_SUCCESS;
+ kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ break;
default:
- r = HC_EV_UNIMPLEMENTED;
+ r = EV_UNIMPLEMENTED;
break;
}
@@ -88,6 +218,36 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
return r;
}
+EXPORT_SYMBOL_GPL(kvmppc_kvm_pv);
+
+int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
+{
+ int r = false;
+
+ /* We have to know what CPU to virtualize */
+ if (!vcpu->arch.pvr)
+ goto out;
+
+ /* PAPR only works with book3s_64 */
+ if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
+ goto out;
+
+ /* HV KVM can only do PAPR mode for now */
+ if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm))
+ goto out;
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ if (!cpu_has_feature(CPU_FTR_EMB_HV))
+ goto out;
+#endif
+
+ r = true;
+
+out:
+ vcpu->arch.sane = r;
+ return r ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
@@ -116,11 +276,13 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
r = RESUME_HOST;
break;
default:
- BUG();
+ WARN_ON(1);
+ r = RESUME_GUEST;
}
return r;
}
+EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
int kvm_arch_hardware_enable(void *garbage)
{
@@ -145,18 +307,40 @@ void kvm_arch_check_processor_compat(void *rtn)
*(int *)rtn = kvmppc_core_check_processor_compat();
}
-struct kvm *kvm_arch_create_vm(void)
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
- struct kvm *kvm;
-
- kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
- if (!kvm)
- return ERR_PTR(-ENOMEM);
-
- return kvm;
+ struct kvmppc_ops *kvm_ops = NULL;
+ /*
+ * if we have both HV and PR enabled, default is HV
+ */
+ if (type == 0) {
+ if (kvmppc_hv_ops)
+ kvm_ops = kvmppc_hv_ops;
+ else
+ kvm_ops = kvmppc_pr_ops;
+ if (!kvm_ops)
+ goto err_out;
+ } else if (type == KVM_VM_PPC_HV) {
+ if (!kvmppc_hv_ops)
+ goto err_out;
+ kvm_ops = kvmppc_hv_ops;
+ } else if (type == KVM_VM_PPC_PR) {
+ if (!kvmppc_pr_ops)
+ goto err_out;
+ kvm_ops = kvmppc_pr_ops;
+ } else
+ goto err_out;
+
+ if (kvm_ops->owner && !try_module_get(kvm_ops->owner))
+ return -ENOENT;
+
+ kvm->arch.kvm_ops = kvm_ops;
+ return kvmppc_core_init_vm(kvm);
+err_out:
+ return -EINVAL;
}
-static void kvmppc_free_vcpus(struct kvm *kvm)
+void kvm_arch_destroy_vm(struct kvm *kvm)
{
unsigned int i;
struct kvm_vcpu *vcpu;
@@ -169,38 +353,127 @@ static void kvmppc_free_vcpus(struct kvm *kvm)
kvm->vcpus[i] = NULL;
atomic_set(&kvm->online_vcpus, 0);
+
+ kvmppc_core_destroy_vm(kvm);
+
mutex_unlock(&kvm->lock);
-}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
+ /* drop the module reference */
+ module_put(kvm->arch.kvm_ops->owner);
}
-void kvm_arch_destroy_vm(struct kvm *kvm)
+void kvm_arch_sync_events(struct kvm *kvm)
{
- kvmppc_free_vcpus(kvm);
- kvm_free_physmem(kvm);
- cleanup_srcu_struct(&kvm->srcu);
- kfree(kvm);
}
int kvm_dev_ioctl_check_extension(long ext)
{
int r;
+ /* FIXME!!
+ * Should some of this be vm ioctl ? is it possible now ?
+ */
+ int hv_enabled = kvmppc_hv_ops ? 1 : 0;
switch (ext) {
+#ifdef CONFIG_BOOKE
+ case KVM_CAP_PPC_BOOKE_SREGS:
+ case KVM_CAP_PPC_BOOKE_WATCHDOG:
+ case KVM_CAP_PPC_EPR:
+#else
case KVM_CAP_PPC_SEGSTATE:
- case KVM_CAP_PPC_PAIRED_SINGLES:
+ case KVM_CAP_PPC_HIOR:
+ case KVM_CAP_PPC_PAPR:
+#endif
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
case KVM_CAP_ENABLE_CAP:
+ case KVM_CAP_ONE_REG:
+ case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_DEVICE_CTRL:
+ r = 1;
+ break;
+ case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
- r = 1;
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+ case KVM_CAP_SW_TLB:
+#endif
+ /* We support this only for PR */
+ r = !hv_enabled;
break;
+#ifdef CONFIG_KVM_MMIO
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
+#endif
+#ifdef CONFIG_KVM_MPIC
+ case KVM_CAP_IRQ_MPIC:
+ r = 1;
+ break;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_CAP_SPAPR_TCE:
+ case KVM_CAP_PPC_ALLOC_HTAB:
+ case KVM_CAP_PPC_RTAS:
+ case KVM_CAP_PPC_FIXUP_HCALL:
+#ifdef CONFIG_KVM_XICS
+ case KVM_CAP_IRQ_XICS:
+#endif
+ r = 1;
+ break;
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_CAP_PPC_SMT:
+ if (hv_enabled)
+ r = threads_per_subcore;
+ else
+ r = 0;
+ break;
+ case KVM_CAP_PPC_RMA:
+ r = hv_enabled;
+ /* PPC970 requires an RMA */
+ if (r && cpu_has_feature(CPU_FTR_ARCH_201))
+ r = 2;
+ break;
+#endif
+ case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (hv_enabled)
+ r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+ else
+ r = 0;
+#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ r = 1;
+#else
+ r = 0;
+#endif
+ break;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_CAP_PPC_HTAB_FD:
+ r = hv_enabled;
+ break;
+#endif
+ case KVM_CAP_NR_VCPUS:
+ /*
+ * Recommending a number of CPUs is somewhat arbitrary; we
+ * return the number of present CPUs for -HV (since a host
+ * will have secondary threads "offline"), and for other KVM
+ * implementations just count online CPUs.
+ */
+ if (hv_enabled)
+ r = num_present_cpus();
+ else
+ r = num_online_cpus();
+ break;
+ case KVM_CAP_MAX_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_CAP_PPC_GET_SMMU_INFO:
+ r = 1;
+ break;
+#endif
default:
r = 0;
break;
@@ -215,37 +488,64 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ kvmppc_core_free_memslot(kvm, free, dont);
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return kvmppc_core_create_memslot(kvm, slot, npages);
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_memory_slot old,
- struct kvm_userspace_memory_region *mem,
- int user_alloc)
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
- return 0;
+ return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old,
- int user_alloc)
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change)
{
- return;
+ kvmppc_core_commit_memory_region(kvm, mem, old);
}
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot)
{
+ kvmppc_core_flush_memslot(kvm, slot);
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvm_vcpu *vcpu;
vcpu = kvmppc_core_vcpu_create(kvm, id);
- if (!IS_ERR(vcpu))
+ if (!IS_ERR(vcpu)) {
+ vcpu->arch.wqp = &vcpu->wq;
kvmppc_create_vcpu_debugfs(vcpu, id);
+ }
return vcpu;
}
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
/* Make sure we're not using the vcpu anymore */
@@ -253,6 +553,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
tasklet_kill(&vcpu->arch.tasklet);
kvmppc_remove_vcpu_debugfs(vcpu);
+
+ switch (vcpu->arch.irq_type) {
+ case KVMPPC_IRQ_MPIC:
+ kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
+ break;
+ case KVMPPC_IRQ_XICS:
+ kvmppc_xics_free_icp(vcpu);
+ break;
+ }
+
kvmppc_core_vcpu_free(vcpu);
}
@@ -266,18 +576,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvmppc_core_pending_dec(vcpu);
}
-static void kvmppc_decrementer_func(unsigned long data)
-{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
- kvmppc_core_queue_dec(vcpu);
-
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
- vcpu->stat.halt_wakeup++;
- }
-}
-
/*
* low level hrtimer wake routine. Because this runs in hardirq context
* we schedule a tasklet to do the real work.
@@ -294,32 +592,47 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
+ int ret;
+
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
+ vcpu->arch.dec_expires = ~(u64)0;
- return 0;
+#ifdef CONFIG_KVM_EXIT_TIMING
+ mutex_init(&vcpu->arch.exit_timing_lock);
+#endif
+ ret = kvmppc_subarch_vcpu_init(vcpu);
+ return ret;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_destroy(vcpu);
+ kvmppc_subarch_vcpu_uninit(vcpu);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+#ifdef CONFIG_BOOKE
+ /*
+ * vrsave (formerly usprg0) isn't used by Linux, but may
+ * be used by the guest.
+ *
+ * On non-booke this is associated with Altivec and
+ * is handled by code in book3s.c.
+ */
+ mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
kvmppc_core_vcpu_load(vcpu, cpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvmppc_core_vcpu_put(vcpu);
-}
-
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
-{
- return -EINVAL;
+#ifdef CONFIG_BOOKE
+ vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+#endif
}
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
@@ -372,20 +685,20 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
- switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) {
- case KVM_REG_GPR:
+ switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
+ case KVM_MMIO_REG_GPR:
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
break;
- case KVM_REG_FPR:
- vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
+ case KVM_MMIO_REG_FPR:
+ VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
break;
#ifdef CONFIG_PPC_BOOK3S
- case KVM_REG_QPR:
- vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
+ case KVM_MMIO_REG_QPR:
+ vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
- case KVM_REG_FQPR:
- vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
- vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
+ case KVM_MMIO_REG_FQPR:
+ VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
+ vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
#endif
default:
@@ -394,8 +707,20 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
}
int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int rt, unsigned int bytes, int is_bigendian)
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian)
{
+ int idx, ret;
+ int is_bigendian;
+
+ if (kvmppc_need_byteswap(vcpu)) {
+ /* Default endianness is "little endian". */
+ is_bigendian = !is_default_endian;
+ } else {
+ /* Default endianness is "big endian". */
+ is_bigendian = is_default_endian;
+ }
+
if (bytes > sizeof(run->mmio.data)) {
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
run->mmio.len);
@@ -411,25 +736,50 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->mmio_is_write = 0;
vcpu->arch.mmio_sign_extend = 0;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ bytes, &run->mmio.data);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (!ret) {
+ kvmppc_complete_mmio_load(vcpu, run);
+ vcpu->mmio_needed = 0;
+ return EMULATE_DONE;
+ }
+
return EMULATE_DO_MMIO;
}
+EXPORT_SYMBOL_GPL(kvmppc_handle_load);
/* Same as above, but sign extends */
int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int rt, unsigned int bytes, int is_bigendian)
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian)
{
int r;
- r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
vcpu->arch.mmio_sign_extend = 1;
+ r = kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian);
return r;
}
int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
- u64 val, unsigned int bytes, int is_bigendian)
+ u64 val, unsigned int bytes, int is_default_endian)
{
void *data = run->mmio.data;
+ int idx, ret;
+ int is_bigendian;
+
+ if (kvmppc_need_byteswap(vcpu)) {
+ /* Default endianness is "little endian". */
+ is_bigendian = !is_default_endian;
+ } else {
+ /* Default endianness is "big endian". */
+ is_bigendian = is_default_endian;
+ }
if (bytes > sizeof(run->mmio.data)) {
printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
@@ -459,8 +809,21 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ bytes, &run->mmio.data);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (!ret) {
+ vcpu->mmio_needed = 0;
+ return EMULATE_DONE;
+ }
+
return EMULATE_DO_MMIO;
}
+EXPORT_SYMBOL_GPL(kvmppc_handle_store);
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
@@ -485,15 +848,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
for (i = 0; i < 32; i++)
kvmppc_set_gpr(vcpu, i, gprs[i]);
vcpu->arch.osi_needed = 0;
- }
+ } else if (vcpu->arch.hcall_needed) {
+ int i;
- kvmppc_core_deliver_interrupts(vcpu);
+ kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret);
+ for (i = 0; i < 9; ++i)
+ kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
+ vcpu->arch.hcall_needed = 0;
+#ifdef CONFIG_BOOKE
+ } else if (vcpu->arch.epr_needed) {
+ kvmppc_set_epr(vcpu, run->epr.epr);
+ vcpu->arch.epr_needed = 0;
+#endif
+ }
- local_irq_disable();
- kvm_guest_enter();
- r = __kvmppc_vcpu_run(run, vcpu);
- kvm_guest_exit();
- local_irq_enable();
+ r = kvmppc_vcpu_run(run, vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -503,16 +872,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
- if (irq->irq == KVM_INTERRUPT_UNSET)
- kvmppc_core_dequeue_external(vcpu, irq);
- else
- kvmppc_core_queue_external(vcpu, irq);
-
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
- vcpu->stat.halt_wakeup++;
+ if (irq->irq == KVM_INTERRUPT_UNSET) {
+ kvmppc_core_dequeue_external(vcpu);
+ return 0;
}
+ kvmppc_core_queue_external(vcpu, irq);
+
+ kvm_vcpu_kick(vcpu);
+
return 0;
}
@@ -529,11 +897,82 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.osi_enabled = true;
break;
+ case KVM_CAP_PPC_PAPR:
+ r = 0;
+ vcpu->arch.papr_enabled = true;
+ break;
+ case KVM_CAP_PPC_EPR:
+ r = 0;
+ if (cap->args[0])
+ vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
+ else
+ vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
+ break;
+#ifdef CONFIG_BOOKE
+ case KVM_CAP_PPC_BOOKE_WATCHDOG:
+ r = 0;
+ vcpu->arch.watchdog_enabled = true;
+ break;
+#endif
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+ case KVM_CAP_SW_TLB: {
+ struct kvm_config_tlb cfg;
+ void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
+
+ r = -EFAULT;
+ if (copy_from_user(&cfg, user_ptr, sizeof(cfg)))
+ break;
+
+ r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
+ break;
+ }
+#endif
+#ifdef CONFIG_KVM_MPIC
+ case KVM_CAP_IRQ_MPIC: {
+ struct fd f;
+ struct kvm_device *dev;
+
+ r = -EBADF;
+ f = fdget(cap->args[0]);
+ if (!f.file)
+ break;
+
+ r = -EPERM;
+ dev = kvm_device_from_filp(f.file);
+ if (dev)
+ r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
+
+ fdput(f);
+ break;
+ }
+#endif
+#ifdef CONFIG_KVM_XICS
+ case KVM_CAP_IRQ_XICS: {
+ struct fd f;
+ struct kvm_device *dev;
+
+ r = -EBADF;
+ f = fdget(cap->args[0]);
+ if (!f.file)
+ break;
+
+ r = -EPERM;
+ dev = kvm_device_from_filp(f.file);
+ if (dev)
+ r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+
+ fdput(f);
+ break;
+ }
+#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;
}
+ if (!r)
+ r = kvmppc_sanity_check(vcpu);
+
return r;
}
@@ -575,6 +1014,31 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
+
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG:
+ {
+ struct kvm_one_reg reg;
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ goto out;
+ if (ioctl == KVM_SET_ONE_REG)
+ r = kvm_vcpu_ioctl_set_one_reg(vcpu, &reg);
+ else
+ r = kvm_vcpu_ioctl_get_one_reg(vcpu, &reg);
+ break;
+ }
+
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+ case KVM_DIRTY_TLB: {
+ struct kvm_dirty_tlb dirty;
+ r = -EFAULT;
+ if (copy_from_user(&dirty, argp, sizeof(dirty)))
+ goto out;
+ r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
+ break;
+ }
+#endif
default:
r = -EINVAL;
}
@@ -583,11 +1047,23 @@ out:
return r;
}
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
{
+ u32 inst_nop = 0x60000000;
+#ifdef CONFIG_KVM_BOOKE_HV
+ u32 inst_sc1 = 0x44000022;
+ pvinfo->hcall[0] = cpu_to_be32(inst_sc1);
+ pvinfo->hcall[1] = cpu_to_be32(inst_nop);
+ pvinfo->hcall[2] = cpu_to_be32(inst_nop);
+ pvinfo->hcall[3] = cpu_to_be32(inst_nop);
+#else
u32 inst_lis = 0x3c000000;
u32 inst_ori = 0x60000000;
- u32 inst_nop = 0x60000000;
u32 inst_sc = 0x44000002;
u32 inst_imm_mask = 0xffff;
@@ -600,17 +1076,33 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
* sc
* nop
*/
- pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
- pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
- pvinfo->hcall[2] = inst_sc;
- pvinfo->hcall[3] = inst_nop;
+ pvinfo->hcall[0] = cpu_to_be32(inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask));
+ pvinfo->hcall[1] = cpu_to_be32(inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask));
+ pvinfo->hcall[2] = cpu_to_be32(inst_sc);
+ pvinfo->hcall[3] = cpu_to_be32(inst_nop);
+#endif
+
+ pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
return 0;
}
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
+ bool line_status)
+{
+ if (!irqchip_in_kernel(kvm))
+ return -ENXIO;
+
+ irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event->irq, irq_event->level,
+ line_status);
+ return 0;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
+ struct kvm *kvm __maybe_unused = filp->private_data;
void __user *argp = (void __user *)arg;
long r;
@@ -626,14 +1118,83 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
+#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_CREATE_SPAPR_TCE: {
+ struct kvm_create_spapr_tce create_tce;
+
+ r = -EFAULT;
+ if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
+ goto out;
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
+ goto out;
+ }
+ case KVM_PPC_GET_SMMU_INFO: {
+ struct kvm_ppc_smmu_info info;
+ struct kvm *kvm = filp->private_data;
+
+ memset(&info, 0, sizeof(info));
+ r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info);
+ if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
+ r = -EFAULT;
+ break;
+ }
+ case KVM_PPC_RTAS_DEFINE_TOKEN: {
+ struct kvm *kvm = filp->private_data;
+
+ r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
+ break;
+ }
+ default: {
+ struct kvm *kvm = filp->private_data;
+ r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
+ }
+#else /* CONFIG_PPC_BOOK3S_64 */
default:
r = -ENOTTY;
+#endif
}
-
out:
return r;
}
+static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)];
+static unsigned long nr_lpids;
+
+long kvmppc_alloc_lpid(void)
+{
+ long lpid;
+
+ do {
+ lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS);
+ if (lpid >= nr_lpids) {
+ pr_err("%s: No LPIDs free\n", __func__);
+ return -ENOMEM;
+ }
+ } while (test_and_set_bit(lpid, lpid_inuse));
+
+ return lpid;
+}
+EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);
+
+void kvmppc_claim_lpid(long lpid)
+{
+ set_bit(lpid, lpid_inuse);
+}
+EXPORT_SYMBOL_GPL(kvmppc_claim_lpid);
+
+void kvmppc_free_lpid(long lpid)
+{
+ clear_bit(lpid, lpid_inuse);
+}
+EXPORT_SYMBOL_GPL(kvmppc_free_lpid);
+
+void kvmppc_init_lpid(unsigned long nr_lpids_param)
+{
+ nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
+ memset(lpid_inuse, 0, sizeof(lpid_inuse));
+}
+EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
+
int kvm_arch_init(void *opaque)
{
return 0;
@@ -641,4 +1202,5 @@ int kvm_arch_init(void *opaque)
void kvm_arch_exit(void)
{
+
}
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index a021f5827a3..07b6110a4bb 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
{
int i;
- /* pause guest execution to avoid concurrent updates */
- mutex_lock(&vcpu->mutex);
+ /* Take a lock to avoid concurrent updates */
+ mutex_lock(&vcpu->arch.exit_timing_lock);
vcpu->arch.last_exit_type = 0xDEAD;
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
@@ -49,21 +49,14 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
vcpu->arch.timing_exit.tv64 = 0;
vcpu->arch.timing_last_enter.tv64 = 0;
- mutex_unlock(&vcpu->mutex);
+ mutex_unlock(&vcpu->arch.exit_timing_lock);
}
static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
{
u64 old;
- do_div(duration, tb_ticks_per_usec);
- if (unlikely(duration > 0xFFFFFFFF)) {
- printk(KERN_ERR"%s - duration too big -> overflow"
- " duration %lld type %d exit #%d\n",
- __func__, duration, type,
- vcpu->arch.timing_count_type[type]);
- return;
- }
+ mutex_lock(&vcpu->arch.exit_timing_lock);
vcpu->arch.timing_count_type[type]++;
@@ -93,6 +86,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
vcpu->arch.timing_min_duration[type] = duration;
if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
vcpu->arch.timing_max_duration[type] = duration;
+
+ mutex_unlock(&vcpu->arch.exit_timing_lock);
}
void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
@@ -147,17 +142,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
{
struct kvm_vcpu *vcpu = m->private;
int i;
+ u64 min, max, sum, sum_quad;
seq_printf(m, "%s", "type count min max sum sum_squared\n");
+
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+
+ min = vcpu->arch.timing_min_duration[i];
+ do_div(min, tb_ticks_per_usec);
+ max = vcpu->arch.timing_max_duration[i];
+ do_div(max, tb_ticks_per_usec);
+ sum = vcpu->arch.timing_sum_duration[i];
+ do_div(sum, tb_ticks_per_usec);
+ sum_quad = vcpu->arch.timing_sum_quad_duration[i];
+ do_div(sum_quad, tb_ticks_per_usec);
+
seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
kvm_exit_names[i],
vcpu->arch.timing_count_type[i],
- vcpu->arch.timing_min_duration[i],
- vcpu->arch.timing_max_duration[i],
- vcpu->arch.timing_sum_duration[i],
- vcpu->arch.timing_sum_quad_duration[i]);
+ min,
+ max,
+ sum,
+ sum_quad);
+
}
return 0;
}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index 8167d42a776..bf191e72b2d 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -93,6 +93,12 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
case SIGNAL_EXITS:
vcpu->stat.signal_exits++;
break;
+ case DBELL_EXITS:
+ vcpu->stat.dbell_exits++;
+ break;
+ case GDBELL_EXITS:
+ vcpu->stat.gdbell_exits++;
+ break;
}
}
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 3aca1b042b8..2e0e67ef354 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -98,245 +98,24 @@ TRACE_EVENT(kvm_gtlb_write,
__entry->word1, __entry->word2)
);
-
-/*************************************************************************
- * Book3S trace points *
- *************************************************************************/
-
-#ifdef CONFIG_PPC_BOOK3S
-
-TRACE_EVENT(kvm_book3s_exit,
- TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
- TP_ARGS(exit_nr, vcpu),
+TRACE_EVENT(kvm_check_requests,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
TP_STRUCT__entry(
- __field( unsigned int, exit_nr )
- __field( unsigned long, pc )
- __field( unsigned long, msr )
- __field( unsigned long, dar )
- __field( unsigned long, srr1 )
+ __field( __u32, cpu_nr )
+ __field( __u32, requests )
),
TP_fast_assign(
- __entry->exit_nr = exit_nr;
- __entry->pc = kvmppc_get_pc(vcpu);
- __entry->dar = kvmppc_get_fault_dar(vcpu);
- __entry->msr = vcpu->arch.shared->msr;
- __entry->srr1 = to_svcpu(vcpu)->shadow_srr1;
+ __entry->cpu_nr = vcpu->vcpu_id;
+ __entry->requests = vcpu->requests;
),
- TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
- __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
- __entry->srr1)
+ TP_printk("vcpu=%x requests=%x",
+ __entry->cpu_nr, __entry->requests)
);
-TRACE_EVENT(kvm_book3s_reenter,
- TP_PROTO(int r, struct kvm_vcpu *vcpu),
- TP_ARGS(r, vcpu),
-
- TP_STRUCT__entry(
- __field( unsigned int, r )
- __field( unsigned long, pc )
- ),
-
- TP_fast_assign(
- __entry->r = r;
- __entry->pc = kvmppc_get_pc(vcpu);
- ),
-
- TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
-);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-
-TRACE_EVENT(kvm_book3s_64_mmu_map,
- TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
- struct kvmppc_pte *orig_pte),
- TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
-
- TP_STRUCT__entry(
- __field( unsigned char, flag_w )
- __field( unsigned char, flag_x )
- __field( unsigned long, eaddr )
- __field( unsigned long, hpteg )
- __field( unsigned long, va )
- __field( unsigned long long, vpage )
- __field( unsigned long, hpaddr )
- ),
-
- TP_fast_assign(
- __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
- __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
- __entry->eaddr = orig_pte->eaddr;
- __entry->hpteg = hpteg;
- __entry->va = va;
- __entry->vpage = orig_pte->vpage;
- __entry->hpaddr = hpaddr;
- ),
-
- TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
- __entry->flag_w, __entry->flag_x, __entry->eaddr,
- __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
-);
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-TRACE_EVENT(kvm_book3s_mmu_map,
- TP_PROTO(struct hpte_cache *pte),
- TP_ARGS(pte),
-
- TP_STRUCT__entry(
- __field( u64, host_va )
- __field( u64, pfn )
- __field( ulong, eaddr )
- __field( u64, vpage )
- __field( ulong, raddr )
- __field( int, flags )
- ),
-
- TP_fast_assign(
- __entry->host_va = pte->host_va;
- __entry->pfn = pte->pfn;
- __entry->eaddr = pte->pte.eaddr;
- __entry->vpage = pte->pte.vpage;
- __entry->raddr = pte->pte.raddr;
- __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
- (pte->pte.may_write ? 0x2 : 0) |
- (pte->pte.may_execute ? 0x1 : 0);
- ),
-
- TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
- __entry->host_va, __entry->pfn, __entry->eaddr,
- __entry->vpage, __entry->raddr, __entry->flags)
-);
-
-TRACE_EVENT(kvm_book3s_mmu_invalidate,
- TP_PROTO(struct hpte_cache *pte),
- TP_ARGS(pte),
-
- TP_STRUCT__entry(
- __field( u64, host_va )
- __field( u64, pfn )
- __field( ulong, eaddr )
- __field( u64, vpage )
- __field( ulong, raddr )
- __field( int, flags )
- ),
-
- TP_fast_assign(
- __entry->host_va = pte->host_va;
- __entry->pfn = pte->pfn;
- __entry->eaddr = pte->pte.eaddr;
- __entry->vpage = pte->pte.vpage;
- __entry->raddr = pte->pte.raddr;
- __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
- (pte->pte.may_write ? 0x2 : 0) |
- (pte->pte.may_execute ? 0x1 : 0);
- ),
-
- TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
- __entry->host_va, __entry->pfn, __entry->eaddr,
- __entry->vpage, __entry->raddr, __entry->flags)
-);
-
-TRACE_EVENT(kvm_book3s_mmu_flush,
- TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
- unsigned long long p2),
- TP_ARGS(type, vcpu, p1, p2),
-
- TP_STRUCT__entry(
- __field( int, count )
- __field( unsigned long long, p1 )
- __field( unsigned long long, p2 )
- __field( const char *, type )
- ),
-
- TP_fast_assign(
- __entry->count = vcpu->arch.hpte_cache_count;
- __entry->p1 = p1;
- __entry->p2 = p2;
- __entry->type = type;
- ),
-
- TP_printk("Flush %d %sPTEs: %llx - %llx",
- __entry->count, __entry->type, __entry->p1, __entry->p2)
-);
-
-TRACE_EVENT(kvm_book3s_slb_found,
- TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
- TP_ARGS(gvsid, hvsid),
-
- TP_STRUCT__entry(
- __field( unsigned long long, gvsid )
- __field( unsigned long long, hvsid )
- ),
-
- TP_fast_assign(
- __entry->gvsid = gvsid;
- __entry->hvsid = hvsid;
- ),
-
- TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
-);
-
-TRACE_EVENT(kvm_book3s_slb_fail,
- TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
- TP_ARGS(sid_map_mask, gvsid),
-
- TP_STRUCT__entry(
- __field( unsigned short, sid_map_mask )
- __field( unsigned long long, gvsid )
- ),
-
- TP_fast_assign(
- __entry->sid_map_mask = sid_map_mask;
- __entry->gvsid = gvsid;
- ),
-
- TP_printk("%x/%x: %llx", __entry->sid_map_mask,
- SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
-);
-
-TRACE_EVENT(kvm_book3s_slb_map,
- TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
- unsigned long long hvsid),
- TP_ARGS(sid_map_mask, gvsid, hvsid),
-
- TP_STRUCT__entry(
- __field( unsigned short, sid_map_mask )
- __field( unsigned long long, guest_vsid )
- __field( unsigned long long, host_vsid )
- ),
-
- TP_fast_assign(
- __entry->sid_map_mask = sid_map_mask;
- __entry->guest_vsid = gvsid;
- __entry->host_vsid = hvsid;
- ),
-
- TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
- __entry->guest_vsid, __entry->host_vsid)
-);
-
-TRACE_EVENT(kvm_book3s_slbmte,
- TP_PROTO(u64 slb_vsid, u64 slb_esid),
- TP_ARGS(slb_vsid, slb_esid),
-
- TP_STRUCT__entry(
- __field( u64, slb_vsid )
- __field( u64, slb_esid )
- ),
-
- TP_fast_assign(
- __entry->slb_vsid = slb_vsid;
- __entry->slb_esid = slb_esid;
- ),
-
- TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
-);
-
-#endif /* CONFIG_PPC_BOOK3S */
-
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
new file mode 100644
index 00000000000..f7537cf26ce
--- /dev/null
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -0,0 +1,177 @@
+#if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_BOOKE_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_booke
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_booke
+
+#define kvm_trace_symbol_exit \
+ {0, "CRITICAL"}, \
+ {1, "MACHINE_CHECK"}, \
+ {2, "DATA_STORAGE"}, \
+ {3, "INST_STORAGE"}, \
+ {4, "EXTERNAL"}, \
+ {5, "ALIGNMENT"}, \
+ {6, "PROGRAM"}, \
+ {7, "FP_UNAVAIL"}, \
+ {8, "SYSCALL"}, \
+ {9, "AP_UNAVAIL"}, \
+ {10, "DECREMENTER"}, \
+ {11, "FIT"}, \
+ {12, "WATCHDOG"}, \
+ {13, "DTLB_MISS"}, \
+ {14, "ITLB_MISS"}, \
+ {15, "DEBUG"}, \
+ {32, "SPE_UNAVAIL"}, \
+ {33, "SPE_FP_DATA"}, \
+ {34, "SPE_FP_ROUND"}, \
+ {35, "PERFORMANCE_MONITOR"}, \
+ {36, "DOORBELL"}, \
+ {37, "DOORBELL_CRITICAL"}, \
+ {38, "GUEST_DBELL"}, \
+ {39, "GUEST_DBELL_CRIT"}, \
+ {40, "HV_SYSCALL"}, \
+ {41, "HV_PRIV"}
+
+TRACE_EVENT(kvm_exit,
+ TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+ TP_ARGS(exit_nr, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, exit_nr )
+ __field( unsigned long, pc )
+ __field( unsigned long, msr )
+ __field( unsigned long, dar )
+ __field( unsigned long, last_inst )
+ ),
+
+ TP_fast_assign(
+ __entry->exit_nr = exit_nr;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->dar = kvmppc_get_fault_dar(vcpu);
+ __entry->msr = vcpu->arch.shared->msr;
+ __entry->last_inst = vcpu->arch.last_inst;
+ ),
+
+ TP_printk("exit=%s"
+ " | pc=0x%lx"
+ " | msr=0x%lx"
+ " | dar=0x%lx"
+ " | last_inst=0x%lx"
+ ,
+ __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+ __entry->pc,
+ __entry->msr,
+ __entry->dar,
+ __entry->last_inst
+ )
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+ TP_PROTO(unsigned long hva),
+ TP_ARGS(hva),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, hva )
+ ),
+
+ TP_fast_assign(
+ __entry->hva = hva;
+ ),
+
+ TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
+TRACE_EVENT(kvm_booke206_stlb_write,
+ TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
+ TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
+
+ TP_STRUCT__entry(
+ __field( __u32, mas0 )
+ __field( __u32, mas8 )
+ __field( __u32, mas1 )
+ __field( __u64, mas2 )
+ __field( __u64, mas7_3 )
+ ),
+
+ TP_fast_assign(
+ __entry->mas0 = mas0;
+ __entry->mas8 = mas8;
+ __entry->mas1 = mas1;
+ __entry->mas2 = mas2;
+ __entry->mas7_3 = mas7_3;
+ ),
+
+ TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
+ __entry->mas0, __entry->mas8, __entry->mas1,
+ __entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_gtlb_write,
+ TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
+ TP_ARGS(mas0, mas1, mas2, mas7_3),
+
+ TP_STRUCT__entry(
+ __field( __u32, mas0 )
+ __field( __u32, mas1 )
+ __field( __u64, mas2 )
+ __field( __u64, mas7_3 )
+ ),
+
+ TP_fast_assign(
+ __entry->mas0 = mas0;
+ __entry->mas1 = mas1;
+ __entry->mas2 = mas2;
+ __entry->mas7_3 = mas7_3;
+ ),
+
+ TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
+ __entry->mas0, __entry->mas1,
+ __entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_ref_release,
+ TP_PROTO(__u64 pfn, __u32 flags),
+ TP_ARGS(pfn, flags),
+
+ TP_STRUCT__entry(
+ __field( __u64, pfn )
+ __field( __u32, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = pfn;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("pfn=%llx flags=%x",
+ __entry->pfn, __entry->flags)
+);
+
+TRACE_EVENT(kvm_booke_queue_irqprio,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
+ TP_ARGS(vcpu, priority),
+
+ TP_STRUCT__entry(
+ __field( __u32, cpu_nr )
+ __field( __u32, priority )
+ __field( unsigned long, pending )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu_nr = vcpu->vcpu_id;
+ __entry->priority = priority;
+ __entry->pending = vcpu->arch.pending_exceptions;
+ ),
+
+ TP_printk("vcpu=%x prio=%x pending=%lx",
+ __entry->cpu_nr, __entry->priority, __entry->pending)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
new file mode 100644
index 00000000000..e1357cd8dc1
--- /dev/null
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -0,0 +1,297 @@
+
+#if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_PR_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_pr
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_pr
+
+#define kvm_trace_symbol_exit \
+ {0x100, "SYSTEM_RESET"}, \
+ {0x200, "MACHINE_CHECK"}, \
+ {0x300, "DATA_STORAGE"}, \
+ {0x380, "DATA_SEGMENT"}, \
+ {0x400, "INST_STORAGE"}, \
+ {0x480, "INST_SEGMENT"}, \
+ {0x500, "EXTERNAL"}, \
+ {0x501, "EXTERNAL_LEVEL"}, \
+ {0x502, "EXTERNAL_HV"}, \
+ {0x600, "ALIGNMENT"}, \
+ {0x700, "PROGRAM"}, \
+ {0x800, "FP_UNAVAIL"}, \
+ {0x900, "DECREMENTER"}, \
+ {0x980, "HV_DECREMENTER"}, \
+ {0xc00, "SYSCALL"}, \
+ {0xd00, "TRACE"}, \
+ {0xe00, "H_DATA_STORAGE"}, \
+ {0xe20, "H_INST_STORAGE"}, \
+ {0xe40, "H_EMUL_ASSIST"}, \
+ {0xf00, "PERFMON"}, \
+ {0xf20, "ALTIVEC"}, \
+ {0xf40, "VSX"}
+
+TRACE_EVENT(kvm_book3s_reenter,
+ TP_PROTO(int r, struct kvm_vcpu *vcpu),
+ TP_ARGS(r, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, r )
+ __field( unsigned long, pc )
+ ),
+
+ TP_fast_assign(
+ __entry->r = r;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ ),
+
+ TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
+);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+TRACE_EVENT(kvm_book3s_64_mmu_map,
+ TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
+ struct kvmppc_pte *orig_pte),
+ TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
+
+ TP_STRUCT__entry(
+ __field( unsigned char, flag_w )
+ __field( unsigned char, flag_x )
+ __field( unsigned long, eaddr )
+ __field( unsigned long, hpteg )
+ __field( unsigned long, va )
+ __field( unsigned long long, vpage )
+ __field( unsigned long, hpaddr )
+ ),
+
+ TP_fast_assign(
+ __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
+ __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
+ __entry->eaddr = orig_pte->eaddr;
+ __entry->hpteg = hpteg;
+ __entry->va = va;
+ __entry->vpage = orig_pte->vpage;
+ __entry->hpaddr = hpaddr;
+ ),
+
+ TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
+ __entry->flag_w, __entry->flag_x, __entry->eaddr,
+ __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
+);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+TRACE_EVENT(kvm_book3s_mmu_map,
+ TP_PROTO(struct hpte_cache *pte),
+ TP_ARGS(pte),
+
+ TP_STRUCT__entry(
+ __field( u64, host_vpn )
+ __field( u64, pfn )
+ __field( ulong, eaddr )
+ __field( u64, vpage )
+ __field( ulong, raddr )
+ __field( int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->host_vpn = pte->host_vpn;
+ __entry->pfn = pte->pfn;
+ __entry->eaddr = pte->pte.eaddr;
+ __entry->vpage = pte->pte.vpage;
+ __entry->raddr = pte->pte.raddr;
+ __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
+ (pte->pte.may_write ? 0x2 : 0) |
+ (pte->pte.may_execute ? 0x1 : 0);
+ ),
+
+ TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+ __entry->host_vpn, __entry->pfn, __entry->eaddr,
+ __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_invalidate,
+ TP_PROTO(struct hpte_cache *pte),
+ TP_ARGS(pte),
+
+ TP_STRUCT__entry(
+ __field( u64, host_vpn )
+ __field( u64, pfn )
+ __field( ulong, eaddr )
+ __field( u64, vpage )
+ __field( ulong, raddr )
+ __field( int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->host_vpn = pte->host_vpn;
+ __entry->pfn = pte->pfn;
+ __entry->eaddr = pte->pte.eaddr;
+ __entry->vpage = pte->pte.vpage;
+ __entry->raddr = pte->pte.raddr;
+ __entry->flags = (pte->pte.may_read ? 0x4 : 0) |
+ (pte->pte.may_write ? 0x2 : 0) |
+ (pte->pte.may_execute ? 0x1 : 0);
+ ),
+
+ TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+ __entry->host_vpn, __entry->pfn, __entry->eaddr,
+ __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_flush,
+ TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
+ unsigned long long p2),
+ TP_ARGS(type, vcpu, p1, p2),
+
+ TP_STRUCT__entry(
+ __field( int, count )
+ __field( unsigned long long, p1 )
+ __field( unsigned long long, p2 )
+ __field( const char *, type )
+ ),
+
+ TP_fast_assign(
+ __entry->count = to_book3s(vcpu)->hpte_cache_count;
+ __entry->p1 = p1;
+ __entry->p2 = p2;
+ __entry->type = type;
+ ),
+
+ TP_printk("Flush %d %sPTEs: %llx - %llx",
+ __entry->count, __entry->type, __entry->p1, __entry->p2)
+);
+
+TRACE_EVENT(kvm_book3s_slb_found,
+ TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
+ TP_ARGS(gvsid, hvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned long long, gvsid )
+ __field( unsigned long long, hvsid )
+ ),
+
+ TP_fast_assign(
+ __entry->gvsid = gvsid;
+ __entry->hvsid = hvsid;
+ ),
+
+ TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_fail,
+ TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
+ TP_ARGS(sid_map_mask, gvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, sid_map_mask )
+ __field( unsigned long long, gvsid )
+ ),
+
+ TP_fast_assign(
+ __entry->sid_map_mask = sid_map_mask;
+ __entry->gvsid = gvsid;
+ ),
+
+ TP_printk("%x/%x: %llx", __entry->sid_map_mask,
+ SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_map,
+ TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
+ unsigned long long hvsid),
+ TP_ARGS(sid_map_mask, gvsid, hvsid),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, sid_map_mask )
+ __field( unsigned long long, guest_vsid )
+ __field( unsigned long long, host_vsid )
+ ),
+
+ TP_fast_assign(
+ __entry->sid_map_mask = sid_map_mask;
+ __entry->guest_vsid = gvsid;
+ __entry->host_vsid = hvsid;
+ ),
+
+ TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
+ __entry->guest_vsid, __entry->host_vsid)
+);
+
+TRACE_EVENT(kvm_book3s_slbmte,
+ TP_PROTO(u64 slb_vsid, u64 slb_esid),
+ TP_ARGS(slb_vsid, slb_esid),
+
+ TP_STRUCT__entry(
+ __field( u64, slb_vsid )
+ __field( u64, slb_esid )
+ ),
+
+ TP_fast_assign(
+ __entry->slb_vsid = slb_vsid;
+ __entry->slb_esid = slb_esid;
+ ),
+
+ TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
+);
+
+TRACE_EVENT(kvm_exit,
+ TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+ TP_ARGS(exit_nr, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, exit_nr )
+ __field( unsigned long, pc )
+ __field( unsigned long, msr )
+ __field( unsigned long, dar )
+ __field( unsigned long, srr1 )
+ __field( unsigned long, last_inst )
+ ),
+
+ TP_fast_assign(
+ __entry->exit_nr = exit_nr;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->dar = kvmppc_get_fault_dar(vcpu);
+ __entry->msr = kvmppc_get_msr(vcpu);
+ __entry->srr1 = vcpu->arch.shadow_srr1;
+ __entry->last_inst = vcpu->arch.last_inst;
+ ),
+
+ TP_printk("exit=%s"
+ " | pc=0x%lx"
+ " | msr=0x%lx"
+ " | dar=0x%lx"
+ " | srr1=0x%lx"
+ " | last_inst=0x%lx"
+ ,
+ __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+ __entry->pc,
+ __entry->msr,
+ __entry->dar,
+ __entry->srr1,
+ __entry->last_inst
+ )
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+ TP_PROTO(unsigned long hva),
+ TP_ARGS(hva),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, hva )
+ ),
+
+ TP_fast_assign(
+ __entry->hva = hva;
+ ),
+
+ TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>