diff options
Diffstat (limited to 'arch/powerpc/kvm')
46 files changed, 5382 insertions, 2562 deletions
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 2f5c6b6d687..9cb4b0a3603 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -21,6 +21,8 @@  #include <linux/slab.h>  #include <linux/err.h>  #include <linux/export.h> +#include <linux/module.h> +#include <linux/miscdevice.h>  #include <asm/reg.h>  #include <asm/cputable.h> @@ -31,13 +33,13 @@  #include "44x_tlb.h"  #include "booke.h" -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu)  {  	kvmppc_booke_vcpu_load(vcpu, cpu);  	kvmppc_44x_tlb_load(vcpu);  } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu)  {  	kvmppc_44x_tlb_put(vcpu);  	kvmppc_booke_vcpu_put(vcpu); @@ -114,29 +116,32 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,  	return 0;  } -void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu, +				      struct kvm_sregs *sregs)  { -	kvmppc_get_sregs_ivor(vcpu, sregs); +	return kvmppc_get_sregs_ivor(vcpu, sregs);  } -int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu, +				     struct kvm_sregs *sregs)  {  	return kvmppc_set_sregs_ivor(vcpu, sregs);  } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, -			union kvmppc_one_reg *val) +static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, +				  union kvmppc_one_reg *val)  {  	return -EINVAL;  } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, -		       union kvmppc_one_reg *val) +static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, +				  union kvmppc_one_reg *val)  {  	return -EINVAL;  } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm, +						    unsigned int id)  {  	struct kvmppc_vcpu_44x *vcpu_44x;  	struct kvm_vcpu *vcpu; @@ -167,7 +172,7 @@ out:  	return ERR_PTR(err);  } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu)  {  	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); @@ -176,30 +181,57 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)  	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);  } -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_44x(struct kvm *kvm)  {  	return 0;  } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_core_destroy_vm_44x(struct kvm *kvm)  {  } +static struct kvmppc_ops kvm_ops_44x = { +	.get_sregs = kvmppc_core_get_sregs_44x, +	.set_sregs = kvmppc_core_set_sregs_44x, +	.get_one_reg = kvmppc_get_one_reg_44x, +	.set_one_reg = kvmppc_set_one_reg_44x, +	.vcpu_load   = kvmppc_core_vcpu_load_44x, +	.vcpu_put    = kvmppc_core_vcpu_put_44x, +	.vcpu_create = kvmppc_core_vcpu_create_44x, +	.vcpu_free   = kvmppc_core_vcpu_free_44x, +	.mmu_destroy  = kvmppc_mmu_destroy_44x, +	.init_vm = kvmppc_core_init_vm_44x, +	.destroy_vm = kvmppc_core_destroy_vm_44x, +	.emulate_op = kvmppc_core_emulate_op_44x, +	.emulate_mtspr = kvmppc_core_emulate_mtspr_44x, +	.emulate_mfspr = kvmppc_core_emulate_mfspr_44x, +}; +  static int __init kvmppc_44x_init(void)  {  	int r;  	r = kvmppc_booke_init();  	if (r) -		return r; +		goto err_out; + +	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); +	if (r) +		goto err_out; +	kvm_ops_44x.owner = THIS_MODULE; +	kvmppc_pr_ops = &kvm_ops_44x; -	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); +err_out: +	return r;  }  static void __exit kvmppc_44x_exit(void)  { +	kvmppc_pr_ops = NULL;  	kvmppc_booke_exit();  }  module_init(kvmppc_44x_init);  module_exit(kvmppc_44x_exit); +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 35ec0a8547d..92c9ab4bcfe 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -91,8 +91,8 @@ static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)  	return EMULATE_DONE;  } -int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, -                           unsigned int inst, int *advance) +int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, +			       unsigned int inst, int *advance)  {  	int emulated = EMULATE_DONE;  	int dcrn = get_dcrn(inst); @@ -152,7 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,  	return emulated;  } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) +int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)  {  	int emulated = EMULATE_DONE; @@ -172,7 +172,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)  	return emulated;  } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) +int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)  {  	int emulated = EMULATE_DONE; diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index ed038544814..0deef1082e0 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -268,7 +268,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,  	trace_kvm_stlb_inval(stlb_index);  } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu)  {  	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);  	int i; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index ffaef2cb101..d6a53b95de9 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -34,17 +34,20 @@ config KVM_BOOK3S_64_HANDLER  	bool  	select KVM_BOOK3S_HANDLER -config KVM_BOOK3S_PR +config KVM_BOOK3S_PR_POSSIBLE  	bool  	select KVM_MMIO  	select MMU_NOTIFIER +config KVM_BOOK3S_HV_POSSIBLE +	bool +  config KVM_BOOK3S_32  	tristate "KVM support for PowerPC book3s_32 processors"  	depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT  	select KVM  	select KVM_BOOK3S_32_HANDLER -	select KVM_BOOK3S_PR +	select KVM_BOOK3S_PR_POSSIBLE  	---help---  	  Support running unmodified book3s_32 guest kernels  	  in virtual machines on book3s_32 host processors. @@ -59,6 +62,7 @@ config KVM_BOOK3S_64  	depends on PPC_BOOK3S_64  	select KVM_BOOK3S_64_HANDLER  	select KVM +	select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE  	---help---  	  Support running unmodified book3s_64 and book3s_32 guest kernels  	  in virtual machines on book3s_64 host processors. @@ -69,8 +73,10 @@ config KVM_BOOK3S_64  	  If unsure, say N.  config KVM_BOOK3S_64_HV -	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" +	tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"  	depends on KVM_BOOK3S_64 +	depends on !CPU_LITTLE_ENDIAN +	select KVM_BOOK3S_HV_POSSIBLE  	select MMU_NOTIFIER  	select CMA  	---help--- @@ -89,9 +95,20 @@ config KVM_BOOK3S_64_HV  	  If unsure, say N.  config KVM_BOOK3S_64_PR -	def_bool y -	depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV -	select KVM_BOOK3S_PR +	tristate "KVM support without using hypervisor mode in host" +	depends on KVM_BOOK3S_64 +	select KVM_BOOK3S_PR_POSSIBLE +	---help--- +	  Support running guest kernels in virtual machines on processors +	  without using hypervisor mode in the host, by running the +	  guest in user mode (problem state) and emulating all +	  privileged instructions and registers. + +	  This is not as fast as using hypervisor mode, but works on +	  machines where hypervisor mode is not available or not usable, +	  and can emulate processors that are different from the host +	  processor, including emulating 32-bit processors on a 64-bit +	  host.  config KVM_BOOKE_HV  	bool diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 6646c952c5e..ce569b6bf4d 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -53,41 +53,51 @@ kvm-e500mc-objs := \  	e500_emulate.o  kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) -kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ -	$(KVM)/coalesced_mmio.o \ +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ +	book3s_64_vio_hv.o + +kvm-pr-y := \  	fpu.o \  	book3s_paired_singles.o \  	book3s_pr.o \  	book3s_pr_papr.o \ -	book3s_64_vio_hv.o \  	book3s_emulate.o \  	book3s_interrupts.o \  	book3s_mmu_hpte.o \  	book3s_64_mmu_host.o \  	book3s_64_mmu.o \  	book3s_32_mmu.o -kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ + +ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE +kvm-book3s_64-module-objs := \ +	$(KVM)/coalesced_mmio.o + +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \  	book3s_rmhandlers.o +endif -kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ +kvm-hv-y += \  	book3s_hv.o \  	book3s_hv_interrupts.o \  	book3s_64_mmu_hv.o +  kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \  	book3s_hv_rm_xics.o -kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ + +ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \  	book3s_hv_rmhandlers.o \  	book3s_hv_rm_mmu.o \ -	book3s_64_vio_hv.o \  	book3s_hv_ras.o \  	book3s_hv_builtin.o \  	book3s_hv_cma.o \  	$(kvm-book3s_64-builtin-xics-objs-y) +endif  kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \  	book3s_xics.o -kvm-book3s_64-module-objs := \ +kvm-book3s_64-module-objs += \  	$(KVM)/kvm_main.o \  	$(KVM)/eventfd.o \  	powerpc.o \ @@ -123,4 +133,7 @@ obj-$(CONFIG_KVM_E500MC) += kvm.o  obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o  obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o +obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o +obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o +  obj-y += $(kvm-book3s_64-builtin-objs-y) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 700df6f1d32..c254c27f240 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -18,6 +18,8 @@  #include <linux/err.h>  #include <linux/export.h>  #include <linux/slab.h> +#include <linux/module.h> +#include <linux/miscdevice.h>  #include <asm/reg.h>  #include <asm/cputable.h> @@ -34,6 +36,7 @@  #include <linux/vmalloc.h>  #include <linux/highmem.h> +#include "book3s.h"  #include "trace.h"  #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -69,10 +72,54 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)  {  } +static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) +{ +	if (!is_kvmppc_hv_enabled(vcpu->kvm)) +		return to_book3s(vcpu)->hior; +	return 0; +} + +static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, +			unsigned long pending_now, unsigned long old_pending) +{ +	if (is_kvmppc_hv_enabled(vcpu->kvm)) +		return; +	if (pending_now) +		kvmppc_set_int_pending(vcpu, 1); +	else if (old_pending) +		kvmppc_set_int_pending(vcpu, 0); +} + +static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) +{ +	ulong crit_raw; +	ulong crit_r1; +	bool crit; + +	if (is_kvmppc_hv_enabled(vcpu->kvm)) +		return false; + +	crit_raw = kvmppc_get_critical(vcpu); +	crit_r1 = kvmppc_get_gpr(vcpu, 1); + +	/* Truncate crit indicators in 32 bit mode */ +	if (!(kvmppc_get_msr(vcpu) & MSR_SF)) { +		crit_raw &= 0xffffffff; +		crit_r1 &= 0xffffffff; +	} + +	/* Critical section when crit == r1 */ +	crit = (crit_raw == crit_r1); +	/* ... and we're in supervisor mode */ +	crit = crit && !(kvmppc_get_msr(vcpu) & MSR_PR); + +	return crit; +} +  void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)  { -	vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu); -	vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags; +	kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); +	kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);  	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);  	vcpu->arch.mmu.reset_msr(vcpu);  } @@ -98,6 +145,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)  	case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG;		break;  	case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC;		break;  	case 0xf40: prio = BOOK3S_IRQPRIO_VSX;			break; +	case 0xf60: prio = BOOK3S_IRQPRIO_FAC_UNAVAIL;		break;  	default:    prio = BOOK3S_IRQPRIO_MAX;			break;  	} @@ -126,28 +174,32 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)  	printk(KERN_INFO "Queueing interrupt %x\n", vec);  #endif  } - +EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);  void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)  {  	/* might as well deliver this straight away */  	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);  } +EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);  void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)  {  	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);  } +EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec);  int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)  {  	return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);  } +EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec);  void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)  {  	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);  } +EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);  void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,                                  struct kvm_interrupt *irq) @@ -174,12 +226,12 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)  	switch (priority) {  	case BOOK3S_IRQPRIO_DECREMENTER: -		deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; +		deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;  		vec = BOOK3S_INTERRUPT_DECREMENTER;  		break;  	case BOOK3S_IRQPRIO_EXTERNAL:  	case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: -		deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit; +		deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;  		vec = BOOK3S_INTERRUPT_EXTERNAL;  		break;  	case BOOK3S_IRQPRIO_SYSTEM_RESET: @@ -224,6 +276,9 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)  	case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:  		vec = BOOK3S_INTERRUPT_PERFMON;  		break; +	case BOOK3S_IRQPRIO_FAC_UNAVAIL: +		vec = BOOK3S_INTERRUPT_FAC_UNAVAIL; +		break;  	default:  		deliver = 0;  		printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority); @@ -285,12 +340,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)  	return 0;  } +EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); -pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) +pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, +			bool *writable)  {  	ulong mp_pa = vcpu->arch.magic_page_pa; -	if (!(vcpu->arch.shared->msr & MSR_SF)) +	if (!(kvmppc_get_msr(vcpu) & MSR_SF))  		mp_pa = (uint32_t)mp_pa;  	/* Magic page override */ @@ -302,20 +359,23 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)  		pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;  		get_page(pfn_to_page(pfn)); +		if (writable) +			*writable = true;  		return pfn;  	} -	return gfn_to_pfn(vcpu->kvm, gfn); +	return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);  } +EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);  static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, -			 struct kvmppc_pte *pte) +			bool iswrite, struct kvmppc_pte *pte)  { -	int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR)); +	int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR));  	int r;  	if (relocated) { -		r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); +		r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite);  	} else {  		pte->eaddr = eaddr;  		pte->raddr = eaddr & KVM_PAM; @@ -361,7 +421,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,  	vcpu->stat.st++; -	if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) +	if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte))  		return -ENOENT;  	*eaddr = pte.raddr; @@ -374,6 +434,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,  	return EMULATE_DONE;  } +EXPORT_SYMBOL_GPL(kvmppc_st);  int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,  		      bool data) @@ -383,7 +444,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,  	vcpu->stat.ld++; -	if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) +	if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte))  		goto nopte;  	*eaddr = pte.raddr; @@ -404,6 +465,7 @@ nopte:  mmio:  	return EMULATE_DO_MMIO;  } +EXPORT_SYMBOL_GPL(kvmppc_ld);  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)  { @@ -419,6 +481,18 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)  {  } +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, +				  struct kvm_sregs *sregs) +{ +	return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, +				  struct kvm_sregs *sregs) +{ +	return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); +} +  int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)  {  	int i; @@ -428,18 +502,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)  	regs->ctr = kvmppc_get_ctr(vcpu);  	regs->lr = kvmppc_get_lr(vcpu);  	regs->xer = kvmppc_get_xer(vcpu); -	regs->msr = vcpu->arch.shared->msr; -	regs->srr0 = vcpu->arch.shared->srr0; -	regs->srr1 = vcpu->arch.shared->srr1; +	regs->msr = kvmppc_get_msr(vcpu); +	regs->srr0 = kvmppc_get_srr0(vcpu); +	regs->srr1 = kvmppc_get_srr1(vcpu);  	regs->pid = vcpu->arch.pid; -	regs->sprg0 = vcpu->arch.shared->sprg0; -	regs->sprg1 = vcpu->arch.shared->sprg1; -	regs->sprg2 = vcpu->arch.shared->sprg2; -	regs->sprg3 = vcpu->arch.shared->sprg3; -	regs->sprg4 = vcpu->arch.shared->sprg4; -	regs->sprg5 = vcpu->arch.shared->sprg5; -	regs->sprg6 = vcpu->arch.shared->sprg6; -	regs->sprg7 = vcpu->arch.shared->sprg7; +	regs->sprg0 = kvmppc_get_sprg0(vcpu); +	regs->sprg1 = kvmppc_get_sprg1(vcpu); +	regs->sprg2 = kvmppc_get_sprg2(vcpu); +	regs->sprg3 = kvmppc_get_sprg3(vcpu); +	regs->sprg4 = kvmppc_get_sprg4(vcpu); +	regs->sprg5 = kvmppc_get_sprg5(vcpu); +	regs->sprg6 = kvmppc_get_sprg6(vcpu); +	regs->sprg7 = kvmppc_get_sprg7(vcpu);  	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)  		regs->gpr[i] = kvmppc_get_gpr(vcpu, i); @@ -457,16 +531,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)  	kvmppc_set_lr(vcpu, regs->lr);  	kvmppc_set_xer(vcpu, regs->xer);  	kvmppc_set_msr(vcpu, regs->msr); -	vcpu->arch.shared->srr0 = regs->srr0; -	vcpu->arch.shared->srr1 = regs->srr1; -	vcpu->arch.shared->sprg0 = regs->sprg0; -	vcpu->arch.shared->sprg1 = regs->sprg1; -	vcpu->arch.shared->sprg2 = regs->sprg2; -	vcpu->arch.shared->sprg3 = regs->sprg3; -	vcpu->arch.shared->sprg4 = regs->sprg4; -	vcpu->arch.shared->sprg5 = regs->sprg5; -	vcpu->arch.shared->sprg6 = regs->sprg6; -	vcpu->arch.shared->sprg7 = regs->sprg7; +	kvmppc_set_srr0(vcpu, regs->srr0); +	kvmppc_set_srr1(vcpu, regs->srr1); +	kvmppc_set_sprg0(vcpu, regs->sprg0); +	kvmppc_set_sprg1(vcpu, regs->sprg1); +	kvmppc_set_sprg2(vcpu, regs->sprg2); +	kvmppc_set_sprg3(vcpu, regs->sprg3); +	kvmppc_set_sprg4(vcpu, regs->sprg4); +	kvmppc_set_sprg5(vcpu, regs->sprg5); +	kvmppc_set_sprg6(vcpu, regs->sprg6); +	kvmppc_set_sprg7(vcpu, regs->sprg7);  	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)  		kvmppc_set_gpr(vcpu, i, regs->gpr[i]); @@ -495,23 +569,22 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  	if (size > sizeof(val))  		return -EINVAL; -	r = kvmppc_get_one_reg(vcpu, reg->id, &val); - +	r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);  	if (r == -EINVAL) {  		r = 0;  		switch (reg->id) {  		case KVM_REG_PPC_DAR: -			val = get_reg_val(reg->id, vcpu->arch.shared->dar); +			val = get_reg_val(reg->id, kvmppc_get_dar(vcpu));  			break;  		case KVM_REG_PPC_DSISR: -			val = get_reg_val(reg->id, vcpu->arch.shared->dsisr); +			val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu));  			break;  		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:  			i = reg->id - KVM_REG_PPC_FPR0; -			val = get_reg_val(reg->id, vcpu->arch.fpr[i]); +			val = get_reg_val(reg->id, VCPU_FPR(vcpu, i));  			break;  		case KVM_REG_PPC_FPSCR: -			val = get_reg_val(reg->id, vcpu->arch.fpscr); +			val = get_reg_val(reg->id, vcpu->arch.fp.fpscr);  			break;  #ifdef CONFIG_ALTIVEC  		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: @@ -519,16 +592,30 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  				r = -ENXIO;  				break;  			} -			val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0]; +			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];  			break;  		case KVM_REG_PPC_VSCR:  			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {  				r = -ENXIO;  				break;  			} -			val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); +			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); +			break; +		case KVM_REG_PPC_VRSAVE: +			val = get_reg_val(reg->id, vcpu->arch.vrsave);  			break;  #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX +		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: +			if (cpu_has_feature(CPU_FTR_VSX)) { +				long int i = reg->id - KVM_REG_PPC_VSR0; +				val.vsxval[0] = vcpu->arch.fp.fpr[i][0]; +				val.vsxval[1] = vcpu->arch.fp.fpr[i][1]; +			} else { +				r = -ENXIO; +			} +			break; +#endif /* CONFIG_VSX */  		case KVM_REG_PPC_DEBUG_INST: {  			u32 opcode = INS_TW;  			r = copy_to_user((u32 __user *)(long)reg->addr, @@ -544,6 +631,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  			val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));  			break;  #endif /* CONFIG_KVM_XICS */ +		case KVM_REG_PPC_FSCR: +			val = get_reg_val(reg->id, vcpu->arch.fscr); +			break; +		case KVM_REG_PPC_TAR: +			val = get_reg_val(reg->id, vcpu->arch.tar); +			break; +		case KVM_REG_PPC_EBBHR: +			val = get_reg_val(reg->id, vcpu->arch.ebbhr); +			break; +		case KVM_REG_PPC_EBBRR: +			val = get_reg_val(reg->id, vcpu->arch.ebbrr); +			break; +		case KVM_REG_PPC_BESCR: +			val = get_reg_val(reg->id, vcpu->arch.bescr); +			break;  		default:  			r = -EINVAL;  			break; @@ -572,23 +674,22 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))  		return -EFAULT; -	r = kvmppc_set_one_reg(vcpu, reg->id, &val); - +	r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);  	if (r == -EINVAL) {  		r = 0;  		switch (reg->id) {  		case KVM_REG_PPC_DAR: -			vcpu->arch.shared->dar = set_reg_val(reg->id, val); +			kvmppc_set_dar(vcpu, set_reg_val(reg->id, val));  			break;  		case KVM_REG_PPC_DSISR: -			vcpu->arch.shared->dsisr = set_reg_val(reg->id, val); +			kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val));  			break;  		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:  			i = reg->id - KVM_REG_PPC_FPR0; -			vcpu->arch.fpr[i] = set_reg_val(reg->id, val); +			VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val);  			break;  		case KVM_REG_PPC_FPSCR: -			vcpu->arch.fpscr = set_reg_val(reg->id, val); +			vcpu->arch.fp.fpscr = set_reg_val(reg->id, val);  			break;  #ifdef CONFIG_ALTIVEC  		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: @@ -596,16 +697,34 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  				r = -ENXIO;  				break;  			} -			vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; +			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;  			break;  		case KVM_REG_PPC_VSCR:  			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {  				r = -ENXIO;  				break;  			} -			vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); +			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); +			break; +		case KVM_REG_PPC_VRSAVE: +			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { +				r = -ENXIO; +				break; +			} +			vcpu->arch.vrsave = set_reg_val(reg->id, val);  			break;  #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX +		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: +			if (cpu_has_feature(CPU_FTR_VSX)) { +				long int i = reg->id - KVM_REG_PPC_VSR0; +				vcpu->arch.fp.fpr[i][0] = val.vsxval[0]; +				vcpu->arch.fp.fpr[i][1] = val.vsxval[1]; +			} else { +				r = -ENXIO; +			} +			break; +#endif /* CONFIG_VSX */  #ifdef CONFIG_KVM_XICS  		case KVM_REG_PPC_ICP_STATE:  			if (!vcpu->arch.icp) { @@ -616,6 +735,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  						set_reg_val(reg->id, val));  			break;  #endif /* CONFIG_KVM_XICS */ +		case KVM_REG_PPC_FSCR: +			vcpu->arch.fscr = set_reg_val(reg->id, val); +			break; +		case KVM_REG_PPC_TAR: +			vcpu->arch.tar = set_reg_val(reg->id, val); +			break; +		case KVM_REG_PPC_EBBHR: +			vcpu->arch.ebbhr = set_reg_val(reg->id, val); +			break; +		case KVM_REG_PPC_EBBRR: +			vcpu->arch.ebbrr = set_reg_val(reg->id, val); +			break; +		case KVM_REG_PPC_BESCR: +			vcpu->arch.bescr = set_reg_val(reg->id, val); +			break;  		default:  			r = -EINVAL;  			break; @@ -625,6 +759,27 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  	return r;  } +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +	vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ +	vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); +} + +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +{ +	vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr); +} +EXPORT_SYMBOL_GPL(kvmppc_set_msr); + +int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ +	return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu); +} +  int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,                                    struct kvm_translation *tr)  { @@ -644,3 +799,147 @@ void kvmppc_decrementer_func(unsigned long data)  	kvmppc_core_queue_dec(vcpu);  	kvm_vcpu_kick(vcpu);  } + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ +	return kvm->arch.kvm_ops->vcpu_create(kvm, id); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ +	vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); +} + +int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +{ +	return vcpu->kvm->arch.kvm_ops->check_requests(vcpu); +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ +	return kvm->arch.kvm_ops->get_dirty_log(kvm, log); +} + +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, +			      struct kvm_memory_slot *dont) +{ +	kvm->arch.kvm_ops->free_memslot(free, dont); +} + +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, +			       unsigned long npages) +{ +	return kvm->arch.kvm_ops->create_memslot(slot, npages); +} + +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ +	kvm->arch.kvm_ops->flush_memslot(kvm, memslot); +} + +int kvmppc_core_prepare_memory_region(struct kvm *kvm, +				struct kvm_memory_slot *memslot, +				struct kvm_userspace_memory_region *mem) +{ +	return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); +} + +void kvmppc_core_commit_memory_region(struct kvm *kvm, +				struct kvm_userspace_memory_region *mem, +				const struct kvm_memory_slot *old) +{ +	kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); +} + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ +	return kvm->arch.kvm_ops->unmap_hva(kvm, hva); +} +EXPORT_SYMBOL_GPL(kvm_unmap_hva); + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ +	return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ +	return kvm->arch.kvm_ops->age_hva(kvm, hva); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ +	return kvm->arch.kvm_ops->test_age_hva(kvm, hva); +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ +	kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ +	vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + +#ifdef CONFIG_PPC64 +	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); +	INIT_LIST_HEAD(&kvm->arch.rtas_tokens); +#endif + +	return kvm->arch.kvm_ops->init_vm(kvm); +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +	kvm->arch.kvm_ops->destroy_vm(kvm); + +#ifdef CONFIG_PPC64 +	kvmppc_rtas_tokens_free(kvm); +	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); +#endif +} + +int kvmppc_core_check_processor_compat(void) +{ +	/* +	 * We always return 0 for book3s. We check +	 * for compatability while loading the HV +	 * or PR module +	 */ +	return 0; +} + +static int kvmppc_book3s_init(void) +{ +	int r; + +	r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); +	if (r) +		return r; +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER +	r = kvmppc_book3s_init_pr(); +#endif +	return r; + +} + +static void kvmppc_book3s_exit(void) +{ +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER +	kvmppc_book3s_exit_pr(); +#endif +	kvm_exit(); +} + +module_init(kvmppc_book3s_init); +module_exit(kvmppc_book3s_exit); + +/* On 32bit this is our one and only kernel module */ +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); +#endif diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h new file mode 100644 index 00000000000..4bf956cf94d --- /dev/null +++ b/arch/powerpc/kvm/book3s.h @@ -0,0 +1,34 @@ +/* + * Copyright IBM Corporation, 2013 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + * + */ + +#ifndef __POWERPC_KVM_BOOK3S_H__ +#define __POWERPC_KVM_BOOK3S_H__ + +extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, +					 struct kvm_memory_slot *memslot); +extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); +extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, +				  unsigned long end); +extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); +extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); +extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); + +extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); +extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, +				     unsigned int inst, int *advance); +extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, +					int sprn, ulong spr_val); +extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, +					int sprn, ulong *spr_val); +extern int kvmppc_book3s_init_pr(void); +extern void kvmppc_book3s_exit_pr(void); + +#endif diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index c8cefdd15fd..93503bbdae4 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -84,13 +84,14 @@ static inline bool sr_nx(u32 sr_raw)  }  static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, -					  struct kvmppc_pte *pte, bool data); +					  struct kvmppc_pte *pte, bool data, +					  bool iswrite);  static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,  					     u64 *vsid);  static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)  { -	return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf]; +	return kvmppc_get_sr(vcpu, (eaddr >> 28) & 0xf);  }  static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, @@ -99,7 +100,7 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,  	u64 vsid;  	struct kvmppc_pte pte; -	if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) +	if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false))  		return pte.vpage;  	kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); @@ -111,10 +112,11 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)  	kvmppc_set_msr(vcpu, 0);  } -static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s, +static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,  				      u32 sre, gva_t eaddr,  				      bool primary)  { +	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);  	u32 page, hash, pteg, htabmask;  	hva_t r; @@ -129,10 +131,10 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3  	pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;  	dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n", -		kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg, +		kvmppc_get_pc(vcpu), eaddr, vcpu_book3s->sdr1, pteg,  		sr_vsid(sre)); -	r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); +	r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);  	if (kvm_is_error_hva(r))  		return r;  	return r | (pteg & ~PAGE_MASK); @@ -145,7 +147,8 @@ static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)  }  static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, -					  struct kvmppc_pte *pte, bool data) +					  struct kvmppc_pte *pte, bool data, +					  bool iswrite)  {  	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);  	struct kvmppc_bat *bat; @@ -157,7 +160,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,  		else  			bat = &vcpu_book3s->ibat[i]; -		if (vcpu->arch.shared->msr & MSR_PR) { +		if (kvmppc_get_msr(vcpu) & MSR_PR) {  			if (!bat->vp)  				continue;  		} else { @@ -186,8 +189,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,  				printk(KERN_INFO "BAT is not readable!\n");  				continue;  			} -			if (!pte->may_write) { -				/* let's treat r/o BATs as not-readable for now */ +			if (iswrite && !pte->may_write) {  				dprintk_pte("BAT is read-only!\n");  				continue;  			} @@ -201,12 +203,12 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,  static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,  				     struct kvmppc_pte *pte, bool data, -				     bool primary) +				     bool iswrite, bool primary)  { -	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);  	u32 sre;  	hva_t ptegp;  	u32 pteg[16]; +	u32 pte0, pte1;  	u32 ptem = 0;  	int i;  	int found = 0; @@ -218,7 +220,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,  	pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); -	ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary); +	ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary);  	if (kvm_is_error_hva(ptegp)) {  		printk(KERN_INFO "KVM: Invalid PTEG!\n");  		goto no_page_found; @@ -232,14 +234,16 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,  	}  	for (i=0; i<16; i+=2) { -		if (ptem == pteg[i]) { +		pte0 = be32_to_cpu(pteg[i]); +		pte1 = be32_to_cpu(pteg[i + 1]); +		if (ptem == pte0) {  			u8 pp; -			pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF); -			pp = pteg[i+1] & 3; +			pte->raddr = (pte1 & ~(0xFFFULL)) | (eaddr & 0xFFF); +			pp = pte1 & 3; -			if ((sr_kp(sre) &&  (vcpu->arch.shared->msr & MSR_PR)) || -			    (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR))) +			if ((sr_kp(sre) &&  (kvmppc_get_msr(vcpu) & MSR_PR)) || +			    (sr_ks(sre) && !(kvmppc_get_msr(vcpu) & MSR_PR)))  				pp |= 4;  			pte->may_write = false; @@ -258,11 +262,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,  					break;  			} -			if ( !pte->may_read ) -				continue; -  			dprintk_pte("MMU: Found PTE -> %x %x - %x\n", -				    pteg[i], pteg[i+1], pp); +				    pte0, pte1, pp);  			found = 1;  			break;  		} @@ -271,19 +272,23 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,  	/* Update PTE C and A bits, so the guest's swapper knows we used the  	   page */  	if (found) { -		u32 oldpte = pteg[i+1]; - -		if (pte->may_read) -			pteg[i+1] |= PTEG_FLAG_ACCESSED; -		if (pte->may_write) -			pteg[i+1] |= PTEG_FLAG_DIRTY; -		else -			dprintk_pte("KVM: Mapping read-only page!\n"); - -		/* Write back into the PTEG */ -		if (pteg[i+1] != oldpte) -			copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); - +		u32 pte_r = pte1; +		char __user *addr = (char __user *) (ptegp + (i+1) * sizeof(u32)); + +		/* +		 * Use single-byte writes to update the HPTE, to +		 * conform to what real hardware does. +		 */ +		if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) { +			pte_r |= PTEG_FLAG_ACCESSED; +			put_user(pte_r >> 8, addr + 2); +		} +		if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) { +			pte_r |= PTEG_FLAG_DIRTY; +			put_user(pte_r, addr + 3); +		} +		if (!pte->may_read || (iswrite && !pte->may_write)) +			return -EPERM;  		return 0;  	} @@ -294,7 +299,8 @@ no_page_found:  			    to_book3s(vcpu)->sdr1, ptegp);  		for (i=0; i<16; i+=2) {  			dprintk_pte("   %02d: 0x%x - 0x%x (0x%x)\n", -				    i, pteg[i], pteg[i+1], ptem); +				    i, be32_to_cpu(pteg[i]), +				    be32_to_cpu(pteg[i+1]), ptem);  		}  	} @@ -302,17 +308,19 @@ no_page_found:  }  static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, -				      struct kvmppc_pte *pte, bool data) +				      struct kvmppc_pte *pte, bool data, +				      bool iswrite)  {  	int r;  	ulong mp_ea = vcpu->arch.magic_page_ea;  	pte->eaddr = eaddr; +	pte->page_size = MMU_PAGE_4K;  	/* Magic page override */  	if (unlikely(mp_ea) &&  	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && -	    !(vcpu->arch.shared->msr & MSR_PR)) { +	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {  		pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);  		pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);  		pte->raddr &= KVM_PAM; @@ -323,11 +331,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,  		return 0;  	} -	r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data); +	r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite);  	if (r < 0) -	       r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true); +		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, +						   data, iswrite, true);  	if (r < 0) -	       r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false); +		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, +						   data, iswrite, false);  	return r;  } @@ -335,19 +345,24 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,  static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)  { -	return vcpu->arch.shared->sr[srnum]; +	return kvmppc_get_sr(vcpu, srnum);  }  static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,  					ulong value)  { -	vcpu->arch.shared->sr[srnum] = value; +	kvmppc_set_sr(vcpu, srnum, value);  	kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);  }  static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)  { -	kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); +	int i; +	struct kvm_vcpu *v; + +	/* flush this VA on all cpus */ +	kvm_for_each_vcpu(i, v, vcpu->kvm) +		kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000);  }  static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, @@ -356,8 +371,9 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,  	ulong ea = esid << SID_SHIFT;  	u32 sr;  	u64 gvsid = esid; +	u64 msr = kvmppc_get_msr(vcpu); -	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { +	if (msr & (MSR_DR|MSR_IR)) {  		sr = find_sr(vcpu, ea);  		if (sr_valid(sr))  			gvsid = sr_vsid(sr); @@ -366,7 +382,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,  	/* In case we only have one of MSR_IR or MSR_DR set, let's put  	   that in the real-mode context (and hope RM doesn't access  	   high memory) */ -	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { +	switch (msr & (MSR_DR|MSR_IR)) {  	case 0:  		*vsid = VSID_REAL | esid;  		break; @@ -386,7 +402,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,  		BUG();  	} -	if (vcpu->arch.shared->msr & MSR_PR) +	if (msr & MSR_PR)  		*vsid |= VSID_PR;  	return 0; diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 00e619bf608..678e7537049 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -92,7 +92,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)  	struct kvmppc_sid_map *map;  	u16 sid_map_mask; -	if (vcpu->arch.shared->msr & MSR_PR) +	if (kvmppc_get_msr(vcpu) & MSR_PR)  		gvsid |= VSID_PR;  	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); @@ -138,7 +138,8 @@ static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,  extern char etext[]; -int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) +int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, +			bool iswrite)  {  	pfn_t hpaddr;  	u64 vpn; @@ -152,9 +153,11 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)  	bool evict = false;  	struct hpte_cache *pte;  	int r = 0; +	bool writable;  	/* Get host physical address for gpa */ -	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); +	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT, +				   iswrite, &writable);  	if (is_error_noslot_pfn(hpaddr)) {  		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",  				 orig_pte->eaddr); @@ -204,7 +207,7 @@ next_pteg:  		(primary ? 0 : PTE_SEC);  	pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; -	if (orig_pte->may_write) { +	if (orig_pte->may_write && writable) {  		pteg1 |= PP_RWRW;  		mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);  	} else { @@ -240,6 +243,11 @@ next_pteg:  	/* Now tell our Shadow PTE code about the new page */  	pte = kvmppc_mmu_hpte_cache_next(vcpu); +	if (!pte) { +		kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); +		r = -EAGAIN; +		goto out; +	}  	dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",  		    orig_pte->may_write ? 'w' : '-', @@ -259,6 +267,11 @@ out:  	return r;  } +void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) +{ +	kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL); +} +  static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)  {  	struct kvmppc_sid_map *map; @@ -266,7 +279,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)  	u16 sid_map_mask;  	static int backwards_map = 0; -	if (vcpu->arch.shared->msr & MSR_PR) +	if (kvmppc_get_msr(vcpu) & MSR_PR)  		gvsid |= VSID_PR;  	/* We might get collisions that trap in preceding order, so let's @@ -341,7 +354,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)  	svcpu_put(svcpu);  } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)  {  	int i; diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 7e345e00661..774a253ca4e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -38,7 +38,7 @@  static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)  { -	kvmppc_set_msr(vcpu, MSR_SF); +	kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);  }  static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( @@ -107,9 +107,20 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,  	return kvmppc_slb_calc_vpn(slb, eaddr);  } +static int mmu_pagesize(int mmu_pg) +{ +	switch (mmu_pg) { +	case MMU_PAGE_64K: +		return 16; +	case MMU_PAGE_16M: +		return 24; +	} +	return 12; +} +  static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)  { -	return slbe->large ? 24 : 12; +	return mmu_pagesize(slbe->base_page_size);  }  static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) @@ -119,11 +130,11 @@ static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)  	return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);  } -static hva_t kvmppc_mmu_book3s_64_get_pteg( -				struct kvmppc_vcpu_book3s *vcpu_book3s, +static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu,  				struct kvmppc_slb *slbe, gva_t eaddr,  				bool second)  { +	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);  	u64 hash, pteg, htabsize;  	u32 ssize;  	hva_t r; @@ -148,10 +159,10 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(  	/* When running a PAPR guest, SDR1 contains a HVA address instead             of a GPA */ -	if (vcpu_book3s->vcpu.arch.papr_enabled) +	if (vcpu->arch.papr_enabled)  		r = pteg;  	else -		r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); +		r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);  	if (kvm_is_error_hva(r))  		return r; @@ -166,18 +177,38 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)  	avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);  	avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p); -	if (p < 24) -		avpn >>= ((80 - p) - 56) - 8; +	if (p < 16) +		avpn >>= ((80 - p) - 56) - 8;	/* 16 - p */  	else -		avpn <<= 8; +		avpn <<= p - 16;  	return avpn;  } +/* + * Return page size encoded in the second word of a HPTE, or + * -1 for an invalid encoding for the base page size indicated by + * the SLB entry.  This doesn't handle mixed pagesize segments yet. + */ +static int decode_pagesize(struct kvmppc_slb *slbe, u64 r) +{ +	switch (slbe->base_page_size) { +	case MMU_PAGE_64K: +		if ((r & 0xf000) == 0x1000) +			return MMU_PAGE_64K; +		break; +	case MMU_PAGE_16M: +		if ((r & 0xff000) == 0) +			return MMU_PAGE_16M; +		break; +	} +	return -1; +} +  static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, -				struct kvmppc_pte *gpte, bool data) +				      struct kvmppc_pte *gpte, bool data, +				      bool iswrite)  { -	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);  	struct kvmppc_slb *slbe;  	hva_t ptegp;  	u64 pteg[16]; @@ -189,12 +220,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,  	u8 pp, key = 0;  	bool found = false;  	bool second = false; +	int pgsize;  	ulong mp_ea = vcpu->arch.magic_page_ea;  	/* Magic page override */  	if (unlikely(mp_ea) &&  	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) && -	    !(vcpu->arch.shared->msr & MSR_PR)) { +	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {  		gpte->eaddr = eaddr;  		gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);  		gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff); @@ -202,6 +234,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,  		gpte->may_execute = true;  		gpte->may_read = true;  		gpte->may_write = true; +		gpte->page_size = MMU_PAGE_4K;  		return 0;  	} @@ -222,8 +255,12 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,  	v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |  		HPTE_V_SECONDARY; +	pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K; + +	mutex_lock(&vcpu->kvm->arch.hpt_mutex); +  do_second: -	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); +	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second);  	if (kvm_is_error_hva(ptegp))  		goto no_page_found; @@ -232,14 +269,24 @@ do_second:  		goto no_page_found;  	} -	if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp) +	if ((kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Kp)  		key = 4; -	else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks) +	else if (!(kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Ks)  		key = 4;  	for (i=0; i<16; i+=2) { +		u64 pte0 = be64_to_cpu(pteg[i]); +		u64 pte1 = be64_to_cpu(pteg[i + 1]); +  		/* Check all relevant fields of 1st dword */ -		if ((pteg[i] & v_mask) == v_val) { +		if ((pte0 & v_mask) == v_val) { +			/* If large page bit is set, check pgsize encoding */ +			if (slbe->large && +			    (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { +				pgsize = decode_pagesize(slbe, pte1); +				if (pgsize < 0) +					continue; +			}  			found = true;  			break;  		} @@ -253,17 +300,22 @@ do_second:  		goto do_second;  	} -	v = pteg[i]; -	r = pteg[i+1]; +	v = be64_to_cpu(pteg[i]); +	r = be64_to_cpu(pteg[i+1]);  	pp = (r & HPTE_R_PP) | key; -	eaddr_mask = 0xFFF; +	if (r & HPTE_R_PP0) +		pp |= 8;  	gpte->eaddr = eaddr;  	gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); -	if (slbe->large) -		eaddr_mask = 0xFFFFFF; + +	eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;  	gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); +	gpte->page_size = pgsize;  	gpte->may_execute = ((r & HPTE_R_N) ? false : true); +	if (unlikely(vcpu->arch.disable_kernel_nx) && +	    !(kvmppc_get_msr(vcpu) & MSR_PR)) +		gpte->may_execute = true;  	gpte->may_read = false;  	gpte->may_write = false; @@ -277,6 +329,7 @@ do_second:  	case 3:  	case 5:  	case 7: +	case 10:  		gpte->may_read = true;  		break;  	} @@ -287,30 +340,37 @@ do_second:  	/* Update PTE R and C bits, so the guest's swapper knows we used the  	 * page */ -	if (gpte->may_read) { -		/* Set the accessed flag */ +	if (gpte->may_read && !(r & HPTE_R_R)) { +		/* +		 * Set the accessed flag. +		 * We have to write this back with a single byte write +		 * because another vcpu may be accessing this on +		 * non-PAPR platforms such as mac99, and this is +		 * what real hardware does. +		 */ +                char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));  		r |= HPTE_R_R; +		put_user(r >> 8, addr + 6);  	} -	if (data && gpte->may_write) { -		/* Set the dirty flag -- XXX even if not writing */ +	if (iswrite && gpte->may_write && !(r & HPTE_R_C)) { +		/* Set the dirty flag */ +		/* Use a single byte write */ +                char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));  		r |= HPTE_R_C; +		put_user(r, addr + 7);  	} -	/* Write back into the PTEG */ -	if (pteg[i+1] != r) { -		pteg[i+1] = r; -		copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); -	} +	mutex_unlock(&vcpu->kvm->arch.hpt_mutex); -	if (!gpte->may_read) +	if (!gpte->may_read || (iswrite && !gpte->may_write))  		return -EPERM;  	return 0;  no_page_found: +	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);  	return -ENOENT;  no_seg_found: -  	dprintk("KVM MMU: Trigger segment fault\n");  	return -EINVAL;  } @@ -345,6 +405,21 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)  	slbe->nx    = (rs & SLB_VSID_N) ? 1 : 0;  	slbe->class = (rs & SLB_VSID_C) ? 1 : 0; +	slbe->base_page_size = MMU_PAGE_4K; +	if (slbe->large) { +		if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) { +			switch (rs & SLB_VSID_LP) { +			case SLB_VSID_LP_00: +				slbe->base_page_size = MMU_PAGE_16M; +				break; +			case SLB_VSID_LP_01: +				slbe->base_page_size = MMU_PAGE_64K; +				break; +			} +		} else +			slbe->base_page_size = MMU_PAGE_16M; +	} +  	slbe->orige = rb & (ESID_MASK | SLB_ESID_V);  	slbe->origv = rs; @@ -410,7 +485,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)  		vcpu->arch.slb[i].origv = 0;  	} -	if (vcpu->arch.shared->msr & MSR_IR) { +	if (kvmppc_get_msr(vcpu) & MSR_IR) {  		kvmppc_mmu_flush_segments(vcpu);  		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));  	} @@ -460,13 +535,44 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,  				       bool large)  {  	u64 mask = 0xFFFFFFFFFULL; +	long i; +	struct kvm_vcpu *v;  	dprintk("KVM MMU: tlbie(0x%lx)\n", va); -	if (large) -		mask = 0xFFFFFF000ULL; -	kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); +	/* +	 * The tlbie instruction changed behaviour starting with +	 * POWER6.  POWER6 and later don't have the large page flag +	 * in the instruction but in the RB value, along with bits +	 * indicating page and segment sizes. +	 */ +	if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) { +		/* POWER6 or later */ +		if (va & 1) {		/* L bit */ +			if ((va & 0xf000) == 0x1000) +				mask = 0xFFFFFFFF0ULL;	/* 64k page */ +			else +				mask = 0xFFFFFF000ULL;	/* 16M page */ +		} +	} else { +		/* older processors, e.g. PPC970 */ +		if (large) +			mask = 0xFFFFFF000ULL; +	} +	/* flush this VA on all vcpus */ +	kvm_for_each_vcpu(i, v, vcpu->kvm) +		kvmppc_mmu_pte_vflush(v, va >> 12, mask); +} + +#ifdef CONFIG_PPC_64K_PAGES +static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid) +{ +	ulong mp_ea = vcpu->arch.magic_page_ea; + +	return mp_ea && !(kvmppc_get_msr(vcpu) & MSR_PR) && +		(mp_ea >> SID_SHIFT) == esid;  } +#endif  static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,  					     u64 *vsid) @@ -475,11 +581,14 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,  	struct kvmppc_slb *slb;  	u64 gvsid = esid;  	ulong mp_ea = vcpu->arch.magic_page_ea; +	int pagesize = MMU_PAGE_64K; +	u64 msr = kvmppc_get_msr(vcpu); -	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { +	if (msr & (MSR_DR|MSR_IR)) {  		slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);  		if (slb) {  			gvsid = slb->vsid; +			pagesize = slb->base_page_size;  			if (slb->tb) {  				gvsid <<= SID_SHIFT_1T - SID_SHIFT;  				gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1); @@ -488,37 +597,50 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,  		}  	} -	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { +	switch (msr & (MSR_DR|MSR_IR)) {  	case 0: -		*vsid = VSID_REAL | esid; +		gvsid = VSID_REAL | esid;  		break;  	case MSR_IR: -		*vsid = VSID_REAL_IR | gvsid; +		gvsid |= VSID_REAL_IR;  		break;  	case MSR_DR: -		*vsid = VSID_REAL_DR | gvsid; +		gvsid |= VSID_REAL_DR;  		break;  	case MSR_DR|MSR_IR:  		if (!slb)  			goto no_slb; -		*vsid = gvsid;  		break;  	default:  		BUG();  		break;  	} -	if (vcpu->arch.shared->msr & MSR_PR) -		*vsid |= VSID_PR; +#ifdef CONFIG_PPC_64K_PAGES +	/* +	 * Mark this as a 64k segment if the host is using +	 * 64k pages, the host MMU supports 64k pages and +	 * the guest segment page size is >= 64k, +	 * but not if this segment contains the magic page. +	 */ +	if (pagesize >= MMU_PAGE_64K && +	    mmu_psize_defs[MMU_PAGE_64K].shift && +	    !segment_contains_magic_page(vcpu, esid)) +		gvsid |= VSID_64K; +#endif + +	if (kvmppc_get_msr(vcpu) & MSR_PR) +		gvsid |= VSID_PR; +	*vsid = gvsid;  	return 0;  no_slb:  	/* Catch magic page case */  	if (unlikely(mp_ea) &&  	    unlikely(esid == (mp_ea >> SID_SHIFT)) && -	    !(vcpu->arch.shared->msr & MSR_PR)) { +	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {  		*vsid = VSID_REAL | esid;  		return 0;  	} diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index e5240524bf6..0ac98392f36 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -27,14 +27,14 @@  #include <asm/machdep.h>  #include <asm/mmu_context.h>  #include <asm/hw_irq.h> -#include "trace.h" +#include "trace_pr.h"  #define PTE_SIZE 12  void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)  {  	ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, -			       MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M, +			       pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M,  			       false);  } @@ -58,7 +58,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)  	struct kvmppc_sid_map *map;  	u16 sid_map_mask; -	if (vcpu->arch.shared->msr & MSR_PR) +	if (kvmppc_get_msr(vcpu) & MSR_PR)  		gvsid |= VSID_PR;  	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); @@ -78,7 +78,8 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)  	return NULL;  } -int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) +int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, +			bool iswrite)  {  	unsigned long vpn;  	pfn_t hpaddr; @@ -90,16 +91,26 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)  	int attempt = 0;  	struct kvmppc_sid_map *map;  	int r = 0; +	int hpsize = MMU_PAGE_4K; +	bool writable; +	unsigned long mmu_seq; +	struct kvm *kvm = vcpu->kvm; +	struct hpte_cache *cpte; +	unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT; +	unsigned long pfn; + +	/* used to check for invalidations in progress */ +	mmu_seq = kvm->mmu_notifier_seq; +	smp_rmb();  	/* Get host physical address for gpa */ -	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); -	if (is_error_noslot_pfn(hpaddr)) { -		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); +	pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable); +	if (is_error_noslot_pfn(pfn)) { +		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn);  		r = -EINVAL;  		goto out;  	} -	hpaddr <<= PAGE_SHIFT; -	hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); +	hpaddr = pfn << PAGE_SHIFT;  	/* and write the mapping ea -> hpa into the pt */  	vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); @@ -117,20 +128,39 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)  		goto out;  	} -	vsid = map->host_vsid; -	vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); +	vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M); -	if (!orig_pte->may_write) -		rflags |= HPTE_R_PP; -	else -		mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); +	kvm_set_pfn_accessed(pfn); +	if (!orig_pte->may_write || !writable) +		rflags |= PP_RXRX; +	else { +		mark_page_dirty(vcpu->kvm, gfn); +		kvm_set_pfn_dirty(pfn); +	}  	if (!orig_pte->may_execute)  		rflags |= HPTE_R_N;  	else -		kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT); +		kvmppc_mmu_flush_icache(pfn); + +	/* +	 * Use 64K pages if possible; otherwise, on 64K page kernels, +	 * we need to transfer 4 more bits from guest real to host real addr. +	 */ +	if (vsid & VSID_64K) +		hpsize = MMU_PAGE_64K; +	else +		hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); + +	hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M); -	hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M); +	cpte = kvmppc_mmu_hpte_cache_next(vcpu); + +	spin_lock(&kvm->mmu_lock); +	if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) { +		r = -EAGAIN; +		goto out_unlock; +	}  map_again:  	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); @@ -139,11 +169,11 @@ map_again:  	if (attempt > 1)  		if (ppc_md.hpte_remove(hpteg) < 0) {  			r = -1; -			goto out; +			goto out_unlock;  		}  	ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, -				 MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M); +				 hpsize, hpsize, MMU_SEGSIZE_256M);  	if (ret < 0) {  		/* If we couldn't map a primary PTE, try a secondary */ @@ -152,8 +182,6 @@ map_again:  		attempt++;  		goto map_again;  	} else { -		struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); -  		trace_kvm_book3s_64_mmu_map(rflags, hpteg,  					    vpn, hpaddr, orig_pte); @@ -164,19 +192,37 @@ map_again:  			hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);  		} -		pte->slot = hpteg + (ret & 7); -		pte->host_vpn = vpn; -		pte->pte = *orig_pte; -		pte->pfn = hpaddr >> PAGE_SHIFT; +		cpte->slot = hpteg + (ret & 7); +		cpte->host_vpn = vpn; +		cpte->pte = *orig_pte; +		cpte->pfn = pfn; +		cpte->pagesize = hpsize; -		kvmppc_mmu_hpte_cache_map(vcpu, pte); +		kvmppc_mmu_hpte_cache_map(vcpu, cpte); +		cpte = NULL;  	} -	kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); + +out_unlock: +	spin_unlock(&kvm->mmu_lock); +	kvm_release_pfn_clean(pfn); +	if (cpte) +		kvmppc_mmu_hpte_cache_free(cpte);  out:  	return r;  } +void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) +{ +	u64 mask = 0xfffffffffULL; +	u64 vsid; + +	vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid); +	if (vsid & VSID_64K) +		mask = 0xffffffff0ULL; +	kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask); +} +  static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)  {  	struct kvmppc_sid_map *map; @@ -184,7 +230,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)  	u16 sid_map_mask;  	static int backwards_map = 0; -	if (vcpu->arch.shared->msr & MSR_PR) +	if (kvmppc_get_msr(vcpu) & MSR_PR)  		gvsid |= VSID_PR;  	/* We might get collisions that trap in preceding order, so let's @@ -225,11 +271,8 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)  	int found_inval = -1;  	int r; -	if (!svcpu->slb_max) -		svcpu->slb_max = 1; -  	/* Are we overwriting? */ -	for (i = 1; i < svcpu->slb_max; i++) { +	for (i = 0; i < svcpu->slb_max; i++) {  		if (!(svcpu->slb[i].esid & SLB_ESID_V))  			found_inval = i;  		else if ((svcpu->slb[i].esid & ESID_MASK) == esid) { @@ -239,7 +282,7 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)  	}  	/* Found a spare entry that was invalidated before */ -	if (found_inval > 0) { +	if (found_inval >= 0) {  		r = found_inval;  		goto out;  	} @@ -291,6 +334,12 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)  	slb_vsid &= ~SLB_VSID_KP;  	slb_esid |= slb_index; +#ifdef CONFIG_PPC_64K_PAGES +	/* Set host segment base page size to 64K if possible */ +	if (gvsid & VSID_64K) +		slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp; +#endif +  	svcpu->slb[slb_index].esid = slb_esid;  	svcpu->slb[slb_index].vsid = slb_vsid; @@ -307,7 +356,7 @@ void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)  	ulong seg_mask = -seg_size;  	int i; -	for (i = 1; i < svcpu->slb_max; i++) { +	for (i = 0; i < svcpu->slb_max; i++) {  		if ((svcpu->slb[i].esid & SLB_ESID_V) &&  		    (svcpu->slb[i].esid & seg_mask) == ea) {  			/* Invalidate this entry */ @@ -321,12 +370,12 @@ void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)  void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)  {  	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); -	svcpu->slb_max = 1; +	svcpu->slb_max = 0;  	svcpu->slb[0].esid = 0;  	svcpu_put(svcpu);  } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)  {  	kvmppc_mmu_hpte_destroy(vcpu);  	__destroy_context(to_book3s(vcpu)->context_id[0]); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 043eec8461e..68468d695f1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -52,7 +52,7 @@ static void kvmppc_rmap_reset(struct kvm *kvm);  long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)  { -	unsigned long hpt; +	unsigned long hpt = 0;  	struct revmap_entry *rev;  	struct page *page = NULL;  	long order = KVM_DEFAULT_HPT_ORDER; @@ -64,22 +64,11 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)  	}  	kvm->arch.hpt_cma_alloc = 0; -	/* -	 * try first to allocate it from the kernel page allocator. -	 * We keep the CMA reserved for failed allocation. -	 */ -	hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT | -			       __GFP_NOWARN, order - PAGE_SHIFT); - -	/* Next try to allocate from the preallocated pool */ -	if (!hpt) { -		VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); -		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); -		if (page) { -			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); -			kvm->arch.hpt_cma_alloc = 1; -		} else -			--order; +	VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); +	page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); +	if (page) { +		hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); +		kvm->arch.hpt_cma_alloc = 1;  	}  	/* Lastly try successively smaller sizes from the page allocator */ @@ -260,13 +249,16 @@ int kvmppc_mmu_hv_init(void)  	return 0;  } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) -{ -} -  static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)  { -	kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); +	unsigned long msr = vcpu->arch.intr_msr; + +	/* If transactional, change to suspend mode on IRQ delivery */ +	if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) +		msr |= MSR_TS_S; +	else +		msr |= vcpu->arch.shregs.msr & MSR_TS_MASK; +	kvmppc_set_msr(vcpu, msr);  }  /* @@ -451,7 +443,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,  }  static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, -			struct kvmppc_pte *gpte, bool data) +			struct kvmppc_pte *gpte, bool data, bool iswrite)  {  	struct kvm *kvm = vcpu->kvm;  	struct kvmppc_slb *slbe; @@ -473,11 +465,14 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,  		slb_v = vcpu->kvm->arch.vrma_slb_v;  	} +	preempt_disable();  	/* Find the HPTE in the hash table */  	index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,  					 HPTE_V_VALID | HPTE_V_ABSENT); -	if (index < 0) +	if (index < 0) { +		preempt_enable();  		return -ENOENT; +	}  	hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));  	v = hptep[0] & ~HPTE_V_HVLOCK;  	gr = kvm->arch.revmap[index].guest_rpte; @@ -485,6 +480,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,  	/* Unlock the HPTE */  	asm volatile("lwsync" : : : "memory");  	hptep[0] = v; +	preempt_enable();  	gpte->eaddr = eaddr;  	gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff); @@ -562,7 +558,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,  	 * we just return and retry the instruction.  	 */ -	if (instruction_is_store(vcpu->arch.last_inst) != !!is_store) +	if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store)  		return RESUME_GUEST;  	/* @@ -589,6 +585,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  	struct kvm *kvm = vcpu->kvm;  	unsigned long *hptep, hpte[3], r;  	unsigned long mmu_seq, psize, pte_size; +	unsigned long gpa_base, gfn_base;  	unsigned long gpa, gfn, hva, pfn;  	struct kvm_memory_slot *memslot;  	unsigned long *rmap; @@ -627,7 +624,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  	/* Translate the logical address and get the page */  	psize = hpte_page_size(hpte[0], r); -	gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1)); +	gpa_base = r & HPTE_R_RPN & ~(psize - 1); +	gfn_base = gpa_base >> PAGE_SHIFT; +	gpa = gpa_base | (ea & (psize - 1));  	gfn = gpa >> PAGE_SHIFT;  	memslot = gfn_to_memslot(kvm, gfn); @@ -639,6 +638,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  	if (!kvm->arch.using_mmu_notifiers)  		return -EFAULT;		/* should never get here */ +	/* +	 * This should never happen, because of the slot_is_aligned() +	 * check in kvmppc_do_h_enter(). +	 */ +	if (gfn_base < memslot->base_gfn) +		return -EFAULT; +  	/* used to check for invalidations in progress */  	mmu_seq = kvm->mmu_notifier_seq;  	smp_rmb(); @@ -669,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  			return -EFAULT;  	} else {  		page = pages[0]; +		pfn = page_to_pfn(page);  		if (PageHuge(page)) {  			page = compound_head(page);  			pte_size <<= compound_order(page); @@ -693,7 +700,6 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  			}  			rcu_read_unlock_sched();  		} -		pfn = page_to_pfn(page);  	}  	ret = -EFAULT; @@ -711,8 +717,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  		r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;  	} -	/* Set the HPTE to point to pfn */ -	r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); +	/* +	 * Set the HPTE to point to pfn. +	 * Since the pfn is at PAGE_SIZE granularity, make sure we +	 * don't mask out lower-order bits if psize < PAGE_SIZE. +	 */ +	if (psize < PAGE_SIZE) +		psize = PAGE_SIZE; +	r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1));  	if (hpte_is_writable(r) && !write_ok)  		r = hpte_make_readonly(r);  	ret = RESUME_GUEST; @@ -725,7 +737,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  		goto out_unlock;  	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; -	rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; +	/* Always put the HPTE in the rmap chain for the page base address */ +	rmap = &memslot->arch.rmap[gfn_base - memslot->base_gfn];  	lock_rmap(rmap);  	/* Check if we might have been invalidated; let the guest retry if so */ @@ -906,21 +919,22 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,  	return 0;  } -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)  {  	if (kvm->arch.using_mmu_notifiers)  		kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);  	return 0;  } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)  {  	if (kvm->arch.using_mmu_notifiers)  		kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);  	return 0;  } -void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) +void kvmppc_core_flush_memslot_hv(struct kvm *kvm, +				  struct kvm_memory_slot *memslot)  {  	unsigned long *rmapp;  	unsigned long gfn; @@ -994,7 +1008,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,  	return ret;  } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)  {  	if (!kvm->arch.using_mmu_notifiers)  		return 0; @@ -1032,36 +1046,47 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,  	return ret;  } -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)  {  	if (!kvm->arch.using_mmu_notifiers)  		return 0;  	return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);  } -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)  {  	if (!kvm->arch.using_mmu_notifiers)  		return;  	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);  } -static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) +static int vcpus_running(struct kvm *kvm) +{ +	return atomic_read(&kvm->arch.vcpus_running) != 0; +} + +/* + * Returns the number of system pages that are dirty. + * This can be more than 1 if we find a huge-page HPTE. + */ +static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)  {  	struct revmap_entry *rev = kvm->arch.revmap;  	unsigned long head, i, j; +	unsigned long n; +	unsigned long v, r;  	unsigned long *hptep; -	int ret = 0; +	int npages_dirty = 0;   retry:  	lock_rmap(rmapp);  	if (*rmapp & KVMPPC_RMAP_CHANGED) {  		*rmapp &= ~KVMPPC_RMAP_CHANGED; -		ret = 1; +		npages_dirty = 1;  	}  	if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {  		unlock_rmap(rmapp); -		return ret; +		return npages_dirty;  	}  	i = head = *rmapp & KVMPPC_RMAP_INDEX; @@ -1069,7 +1094,22 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)  		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));  		j = rev[i].forw; -		if (!(hptep[1] & HPTE_R_C)) +		/* +		 * Checking the C (changed) bit here is racy since there +		 * is no guarantee about when the hardware writes it back. +		 * If the HPTE is not writable then it is stable since the +		 * page can't be written to, and we would have done a tlbie +		 * (which forces the hardware to complete any writeback) +		 * when making the HPTE read-only. +		 * If vcpus are running then this call is racy anyway +		 * since the page could get dirtied subsequently, so we +		 * expect there to be a further call which would pick up +		 * any delayed C bit writeback. +		 * Otherwise we need to do the tlbie even if C==0 in +		 * order to pick up any delayed writeback of C. +		 */ +		if (!(hptep[1] & HPTE_R_C) && +		    (!hpte_is_writable(hptep[1]) || vcpus_running(kvm)))  			continue;  		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { @@ -1081,24 +1121,33 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)  		}  		/* Now check and modify the HPTE */ -		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { -			/* need to make it temporarily absent to clear C */ -			hptep[0] |= HPTE_V_ABSENT; -			kvmppc_invalidate_hpte(kvm, hptep, i); -			hptep[1] &= ~HPTE_R_C; -			eieio(); -			hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; +		if (!(hptep[0] & HPTE_V_VALID)) +			continue; + +		/* need to make it temporarily absent so C is stable */ +		hptep[0] |= HPTE_V_ABSENT; +		kvmppc_invalidate_hpte(kvm, hptep, i); +		v = hptep[0]; +		r = hptep[1]; +		if (r & HPTE_R_C) { +			hptep[1] = r & ~HPTE_R_C;  			if (!(rev[i].guest_rpte & HPTE_R_C)) {  				rev[i].guest_rpte |= HPTE_R_C;  				note_hpte_modification(kvm, &rev[i]);  			} -			ret = 1; +			n = hpte_page_size(v, r); +			n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT; +			if (n > npages_dirty) +				npages_dirty = n; +			eieio();  		} -		hptep[0] &= ~HPTE_V_HVLOCK; +		v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK); +		v |= HPTE_V_VALID; +		hptep[0] = v;  	} while ((i = j) != head);  	unlock_rmap(rmapp); -	return ret; +	return npages_dirty;  }  static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, @@ -1122,15 +1171,22 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,  long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,  			     unsigned long *map)  { -	unsigned long i; +	unsigned long i, j;  	unsigned long *rmapp;  	struct kvm_vcpu *vcpu;  	preempt_disable();  	rmapp = memslot->arch.rmap;  	for (i = 0; i < memslot->npages; ++i) { -		if (kvm_test_clear_dirty(kvm, rmapp) && map) -			__set_bit_le(i, map); +		int npages = kvm_test_clear_dirty_npages(kvm, rmapp); +		/* +		 * Note that if npages > 0 then i must be a multiple of npages, +		 * since we always put huge-page HPTEs in the rmap chain +		 * corresponding to their page base address. +		 */ +		if (npages && map) +			for (j = i; npages; ++j, --npages) +				__set_bit_le(j, map);  		++rmapp;  	} @@ -1506,15 +1562,14 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,  				goto out;  			}  			if (!rma_setup && is_vrma_hpte(v)) { -				unsigned long psize = hpte_page_size(v, r); +				unsigned long psize = hpte_base_page_size(v, r);  				unsigned long senc = slb_pgsize_encoding(psize);  				unsigned long lpcr;  				kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |  					(VRMA_VSID << SLB_VSID_SHIFT_1T); -				lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; -				lpcr |= senc << (LPCR_VRMASD_SH - 4); -				kvm->arch.lpcr = lpcr; +				lpcr = senc << (LPCR_VRMASD_SH - 4); +				kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);  				rma_setup = 1;  			}  			++i; diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 4f12e8f0c71..3589c4e3d49 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -17,30 +17,9 @@   * Authors: Alexander Graf <agraf@suse.de>   */ -#ifdef __LITTLE_ENDIAN__ -#error Need to fix SLB shadow accesses in little endian mode -#endif - -#define SHADOW_SLB_ESID(num)	(SLBSHADOW_SAVEAREA + (num * 0x10)) -#define SHADOW_SLB_VSID(num)	(SLBSHADOW_SAVEAREA + (num * 0x10) + 0x8) -#define UNBOLT_SLB_ENTRY(num) \ -	ld	r9, SHADOW_SLB_ESID(num)(r12); \ -	/* Invalid? Skip. */; \ -	rldicl. r0, r9, 37, 63; \ -	beq	slb_entry_skip_ ## num; \ -	xoris	r9, r9, SLB_ESID_V@h; \ -	std	r9, SHADOW_SLB_ESID(num)(r12); \ -  slb_entry_skip_ ## num: - -#define REBOLT_SLB_ENTRY(num) \ -	ld	r10, SHADOW_SLB_ESID(num)(r11); \ -	cmpdi	r10, 0; \ -	beq	slb_exit_skip_ ## num; \ -	oris	r10, r10, SLB_ESID_V@h; \ -	ld	r9, SHADOW_SLB_VSID(num)(r11); \ -	slbmte	r9, r10; \ -	std	r10, SHADOW_SLB_ESID(num)(r11); \ -slb_exit_skip_ ## num: +#define SHADOW_SLB_ENTRY_LEN	0x10 +#define OFFSET_ESID(x)		(SHADOW_SLB_ENTRY_LEN * x) +#define OFFSET_VSID(x)		((SHADOW_SLB_ENTRY_LEN * x) + 8)  /******************************************************************************   *                                                                            * @@ -64,20 +43,15 @@ slb_exit_skip_ ## num:  	 * SVCPU[LR]  = guest LR  	 */ -	/* Remove LPAR shadow entries */ +BEGIN_FW_FTR_SECTION -#if SLB_NUM_BOLTED == 3 +	/* Declare SLB shadow as 0 entries big */ -	ld	r12, PACA_SLBSHADOWPTR(r13) +	ld	r11, PACA_SLBSHADOWPTR(r13) +	li	r8, 0 +	stb	r8, 3(r11) -	/* Remove bolted entries */ -	UNBOLT_SLB_ENTRY(0) -	UNBOLT_SLB_ENTRY(1) -	UNBOLT_SLB_ENTRY(2) -	 -#else -#error unknown number of bolted entries -#endif +END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)  	/* Flush SLB */ @@ -100,7 +74,7 @@ slb_loop_enter:  	ld	r10, 0(r11) -	rldicl. r0, r10, 37, 63 +	andis.	r9, r10, SLB_ESID_V@h  	beq	slb_loop_enter_skip  	ld	r9, 8(r11) @@ -137,23 +111,42 @@ slb_do_enter:  	 *  	 */ -	/* Restore bolted entries from the shadow and fix it along the way */ +	/* Remove all SLB entries that are in use. */ -	/* We don't store anything in entry 0, so we don't need to take care of it */ +	li	r0, r0 +	slbmte	r0, r0  	slbia -	isync -#if SLB_NUM_BOLTED == 3 +	/* Restore bolted entries from the shadow */  	ld	r11, PACA_SLBSHADOWPTR(r13) -	REBOLT_SLB_ENTRY(0) -	REBOLT_SLB_ENTRY(1) -	REBOLT_SLB_ENTRY(2) -	 -#else -#error unknown number of bolted entries -#endif +BEGIN_FW_FTR_SECTION + +	/* Declare SLB shadow as SLB_NUM_BOLTED entries big */ + +	li	r8, SLB_NUM_BOLTED +	stb	r8, 3(r11) + +END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR) + +	/* Manually load all entries from shadow SLB */ + +	li	r8, SLBSHADOW_SAVEAREA +	li	r7, SLBSHADOW_SAVEAREA + 8 + +	.rept	SLB_NUM_BOLTED +	LDX_BE	r10, r11, r8 +	cmpdi	r10, 0 +	beq	1f +	LDX_BE	r9, r11, r7 +	slbmte	r9, r10 +1:	addi	r7, r7, SHADOW_SLB_ENTRY_LEN +	addi	r8, r8, SHADOW_SLB_ENTRY_LEN +	.endr + +	isync +	sync  slb_do_exit: diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 30c2f3b134c..89e96b3e003 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -74,3 +74,32 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,  	/* Didn't find the liobn, punt it to userspace */  	return H_TOO_HARD;  } +EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); + +long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, +		      unsigned long ioba) +{ +	struct kvm *kvm = vcpu->kvm; +	struct kvmppc_spapr_tce_table *stt; + +	list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { +		if (stt->liobn == liobn) { +			unsigned long idx = ioba >> SPAPR_TCE_SHIFT; +			struct page *page; +			u64 *tbl; + +			if (ioba >= stt->window_size) +				return H_PARAMETER; + +			page = stt->pages[idx / TCES_PER_PAGE]; +			tbl = (u64 *)page_address(page); + +			vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; +			return H_SUCCESS; +		} +	} + +	/* Didn't find the liobn, punt it to userspace */ +	return H_TOO_HARD; +} +EXPORT_SYMBOL_GPL(kvmppc_h_get_tce); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 360ce68c980..3f295269af3 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -80,28 +80,45 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)  		return false;  	/* Limit user space to its own small SPR set */ -	if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM) +	if ((kvmppc_get_msr(vcpu) & MSR_PR) && level > PRIV_PROBLEM)  		return false;  	return true;  } -int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, -                           unsigned int inst, int *advance) +int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, +			      unsigned int inst, int *advance)  {  	int emulated = EMULATE_DONE;  	int rt = get_rt(inst);  	int rs = get_rs(inst);  	int ra = get_ra(inst);  	int rb = get_rb(inst); +	u32 inst_sc = 0x44000002;  	switch (get_op(inst)) { +	case 0: +		emulated = EMULATE_FAIL; +		if ((kvmppc_get_msr(vcpu) & MSR_LE) && +		    (inst == swab32(inst_sc))) { +			/* +			 * This is the byte reversed syscall instruction of our +			 * hypercall handler. Early versions of LE Linux didn't +			 * swap the instructions correctly and ended up in +			 * illegal instructions. +			 * Just always fail hypercalls on these broken systems. +			 */ +			kvmppc_set_gpr(vcpu, 3, EV_UNIMPLEMENTED); +			kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); +			emulated = EMULATE_DONE; +		} +		break;  	case 19:  		switch (get_xop(inst)) {  		case OP_19_XOP_RFID:  		case OP_19_XOP_RFI: -			kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0); -			kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); +			kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu)); +			kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu));  			*advance = 0;  			break; @@ -113,16 +130,16 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,  	case 31:  		switch (get_xop(inst)) {  		case OP_31_XOP_MFMSR: -			kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); +			kvmppc_set_gpr(vcpu, rt, kvmppc_get_msr(vcpu));  			break;  		case OP_31_XOP_MTMSRD:  		{  			ulong rs_val = kvmppc_get_gpr(vcpu, rs);  			if (inst & 0x10000) { -				ulong new_msr = vcpu->arch.shared->msr; +				ulong new_msr = kvmppc_get_msr(vcpu);  				new_msr &= ~(MSR_RI | MSR_EE);  				new_msr |= rs_val & (MSR_RI | MSR_EE); -				vcpu->arch.shared->msr = new_msr; +				kvmppc_set_msr_fast(vcpu, new_msr);  			} else  				kvmppc_set_msr(vcpu, rs_val);  			break; @@ -172,14 +189,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,  			vcpu->arch.mmu.tlbie(vcpu, addr, large);  			break;  		} -#ifdef CONFIG_KVM_BOOK3S_64_PR +#ifdef CONFIG_PPC_BOOK3S_64  		case OP_31_XOP_FAKE_SC1:  		{  			/* SC 1 papr hypercalls */  			ulong cmd = kvmppc_get_gpr(vcpu, 3);  			int i; -		        if ((vcpu->arch.shared->msr & MSR_PR) || +		        if ((kvmppc_get_msr(vcpu) & MSR_PR) ||  			    !vcpu->arch.papr_enabled) {  				emulated = EMULATE_FAIL;  				break; @@ -261,18 +278,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,  				ra_val = kvmppc_get_gpr(vcpu, ra);  			addr = (ra_val + rb_val) & ~31ULL; -			if (!(vcpu->arch.shared->msr & MSR_SF)) +			if (!(kvmppc_get_msr(vcpu) & MSR_SF))  				addr &= 0xffffffff;  			vaddr = addr;  			r = kvmppc_st(vcpu, &addr, 32, zeros, true);  			if ((r == -ENOENT) || (r == -EPERM)) { -				struct kvmppc_book3s_shadow_vcpu *svcpu; - -				svcpu = svcpu_get(vcpu);  				*advance = 0; -				vcpu->arch.shared->dar = vaddr; -				svcpu->fault_dar = vaddr; +				kvmppc_set_dar(vcpu, vaddr); +				vcpu->arch.fault_dar = vaddr;  				dsisr = DSISR_ISSTORE;  				if (r == -ENOENT) @@ -280,9 +294,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,  				else if (r == -EPERM)  					dsisr |= DSISR_PROTFAULT; -				vcpu->arch.shared->dsisr = dsisr; -				svcpu->fault_dsisr = dsisr; -				svcpu_put(svcpu); +				kvmppc_set_dsisr(vcpu, dsisr); +				vcpu->arch.fault_dsisr = dsisr;  				kvmppc_book3s_queue_irqprio(vcpu,  					BOOK3S_INTERRUPT_DATA_STORAGE); @@ -349,7 +362,7 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)  	return bat;  } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) +int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)  {  	int emulated = EMULATE_DONE; @@ -360,10 +373,10 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)  		to_book3s(vcpu)->sdr1 = spr_val;  		break;  	case SPRN_DSISR: -		vcpu->arch.shared->dsisr = spr_val; +		kvmppc_set_dsisr(vcpu, spr_val);  		break;  	case SPRN_DAR: -		vcpu->arch.shared->dar = spr_val; +		kvmppc_set_dar(vcpu, spr_val);  		break;  	case SPRN_HIOR:  		to_book3s(vcpu)->hior = spr_val; @@ -442,6 +455,31 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)  	case SPRN_GQR7:  		to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;  		break; +	case SPRN_FSCR: +		vcpu->arch.fscr = spr_val; +		break; +#ifdef CONFIG_PPC_BOOK3S_64 +	case SPRN_BESCR: +		vcpu->arch.bescr = spr_val; +		break; +	case SPRN_EBBHR: +		vcpu->arch.ebbhr = spr_val; +		break; +	case SPRN_EBBRR: +		vcpu->arch.ebbrr = spr_val; +		break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +	case SPRN_TFHAR: +		vcpu->arch.tfhar = spr_val; +		break; +	case SPRN_TEXASR: +		vcpu->arch.texasr = spr_val; +		break; +	case SPRN_TFIAR: +		vcpu->arch.tfiar = spr_val; +		break; +#endif +#endif  	case SPRN_ICTC:  	case SPRN_THRM1:  	case SPRN_THRM2: @@ -459,6 +497,13 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)  	case SPRN_WPAR_GEKKO:  	case SPRN_MSSSR0:  	case SPRN_DABR: +#ifdef CONFIG_PPC_BOOK3S_64 +	case SPRN_MMCRS: +	case SPRN_MMCRA: +	case SPRN_MMCR0: +	case SPRN_MMCR1: +	case SPRN_MMCR2: +#endif  		break;  unprivileged:  	default: @@ -472,7 +517,7 @@ unprivileged:  	return emulated;  } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) +int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)  {  	int emulated = EMULATE_DONE; @@ -497,10 +542,10 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)  		*spr_val = to_book3s(vcpu)->sdr1;  		break;  	case SPRN_DSISR: -		*spr_val = vcpu->arch.shared->dsisr; +		*spr_val = kvmppc_get_dsisr(vcpu);  		break;  	case SPRN_DAR: -		*spr_val = vcpu->arch.shared->dar; +		*spr_val = kvmppc_get_dar(vcpu);  		break;  	case SPRN_HIOR:  		*spr_val = to_book3s(vcpu)->hior; @@ -542,6 +587,31 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)  	case SPRN_GQR7:  		*spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];  		break; +	case SPRN_FSCR: +		*spr_val = vcpu->arch.fscr; +		break; +#ifdef CONFIG_PPC_BOOK3S_64 +	case SPRN_BESCR: +		*spr_val = vcpu->arch.bescr; +		break; +	case SPRN_EBBHR: +		*spr_val = vcpu->arch.ebbhr; +		break; +	case SPRN_EBBRR: +		*spr_val = vcpu->arch.ebbrr; +		break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +	case SPRN_TFHAR: +		*spr_val = vcpu->arch.tfhar; +		break; +	case SPRN_TEXASR: +		*spr_val = vcpu->arch.texasr; +		break; +	case SPRN_TFIAR: +		*spr_val = vcpu->arch.tfiar; +		break; +#endif +#endif  	case SPRN_THRM1:  	case SPRN_THRM2:  	case SPRN_THRM3: @@ -557,6 +627,14 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)  	case SPRN_WPAR_GEKKO:  	case SPRN_MSSSR0:  	case SPRN_DABR: +#ifdef CONFIG_PPC_BOOK3S_64 +	case SPRN_MMCRS: +	case SPRN_MMCRA: +	case SPRN_MMCR0: +	case SPRN_MMCR1: +	case SPRN_MMCR2: +	case SPRN_TIR: +#endif  		*spr_val = 0;  		break;  	default: @@ -573,48 +651,17 @@ unprivileged:  u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)  { -	u32 dsisr = 0; - -	/* -	 * This is what the spec says about DSISR bits (not mentioned = 0): -	 * -	 * 12:13		[DS]	Set to bits 30:31 -	 * 15:16		[X]	Set to bits 29:30 -	 * 17			[X]	Set to bit 25 -	 *			[D/DS]	Set to bit 5 -	 * 18:21		[X]	Set to bits 21:24 -	 *			[D/DS]	Set to bits 1:4 -	 * 22:26			Set to bits 6:10 (RT/RS/FRT/FRS) -	 * 27:31			Set to bits 11:15 (RA) -	 */ - -	switch (get_op(inst)) { -	/* D-form */ -	case OP_LFS: -	case OP_LFD: -	case OP_STFD: -	case OP_STFS: -		dsisr |= (inst >> 12) & 0x4000;	/* bit 17 */ -		dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */ -		break; -	/* X-form */ -	case 31: -		dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */ -		dsisr |= (inst << 8)  & 0x04000; /* bit 17 */ -		dsisr |= (inst << 3)  & 0x03c00; /* bits 18:21 */ -		break; -	default: -		printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); -		break; -	} - -	dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */ - -	return dsisr; +	return make_dsisr(inst);  }  ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)  { +#ifdef CONFIG_PPC_BOOK3S_64 +	/* +	 * Linux's fix_alignment() assumes that DAR is valid, so can we +	 */ +	return vcpu->arch.fault_dar; +#else  	ulong dar = 0;  	ulong ra = get_ra(inst);  	ulong rb = get_rb(inst); @@ -639,4 +686,5 @@ ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)  	}  	return dar; +#endif  } diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index 7057a02f090..0d013fbc2e1 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c @@ -18,15 +18,13 @@   */  #include <linux/export.h> +#include <asm/kvm_ppc.h>  #include <asm/kvm_book3s.h> -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); -#else -EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); -EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); -#ifdef CONFIG_ALTIVEC -EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);  #endif +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE +EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);  #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 62a2b5ab08e..7a12edbb61e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -31,6 +31,7 @@  #include <linux/spinlock.h>  #include <linux/page-flags.h>  #include <linux/srcu.h> +#include <linux/miscdevice.h>  #include <asm/reg.h>  #include <asm/cputable.h> @@ -52,6 +53,9 @@  #include <linux/vmalloc.h>  #include <linux/highmem.h>  #include <linux/hugetlb.h> +#include <linux/module.h> + +#include "book3s.h"  /* #define EXIT_DEBUG */  /* #define EXIT_DEBUG_SIMPLE */ @@ -66,7 +70,7 @@  static void kvmppc_end_cede(struct kvm_vcpu *vcpu);  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); -void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)  {  	int me;  	int cpu = vcpu->cpu; @@ -82,10 +86,13 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)  	/* CPU points to the first thread of the core */  	if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { +#ifdef CONFIG_PPC_ICP_NATIVE  		int real_cpu = cpu + vcpu->arch.ptid;  		if (paca[real_cpu].kvm_hstate.xics_phys)  			xics_wake_cpu(real_cpu); -		else if (cpu_online(cpu)) +		else +#endif +		if (cpu_online(cpu))  			smp_send_reschedule(cpu);  	}  	put_cpu(); @@ -125,11 +132,12 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)   * purely defensive; they should never fail.)   */ -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)  {  	struct kvmppc_vcore *vc = vcpu->arch.vcore; +	unsigned long flags; -	spin_lock(&vcpu->arch.tbacct_lock); +	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);  	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&  	    vc->preempt_tb != TB_NIL) {  		vc->stolen_tb += mftb() - vc->preempt_tb; @@ -140,32 +148,76 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)  		vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;  		vcpu->arch.busy_preempt = TB_NIL;  	} -	spin_unlock(&vcpu->arch.tbacct_lock); +	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);  } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)  {  	struct kvmppc_vcore *vc = vcpu->arch.vcore; +	unsigned long flags; -	spin_lock(&vcpu->arch.tbacct_lock); +	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);  	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)  		vc->preempt_tb = mftb();  	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)  		vcpu->arch.busy_preempt = mftb(); -	spin_unlock(&vcpu->arch.tbacct_lock); +	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);  } -void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)  {  	vcpu->arch.shregs.msr = msr;  	kvmppc_end_cede(vcpu);  } -void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) +void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)  {  	vcpu->arch.pvr = pvr;  } +int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) +{ +	unsigned long pcr = 0; +	struct kvmppc_vcore *vc = vcpu->arch.vcore; + +	if (arch_compat) { +		if (!cpu_has_feature(CPU_FTR_ARCH_206)) +			return -EINVAL;	/* 970 has no compat mode support */ + +		switch (arch_compat) { +		case PVR_ARCH_205: +			/* +			 * If an arch bit is set in PCR, all the defined +			 * higher-order arch bits also have to be set. +			 */ +			pcr = PCR_ARCH_206 | PCR_ARCH_205; +			break; +		case PVR_ARCH_206: +		case PVR_ARCH_206p: +			pcr = PCR_ARCH_206; +			break; +		case PVR_ARCH_207: +			break; +		default: +			return -EINVAL; +		} + +		if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { +			/* POWER7 can't emulate POWER8 */ +			if (!(pcr & PCR_ARCH_206)) +				return -EINVAL; +			pcr &= ~PCR_ARCH_206; +		} +	} + +	spin_lock(&vc->lock); +	vc->arch_compat = arch_compat; +	vc->pcr = pcr; +	spin_unlock(&vc->lock); + +	return 0; +} +  void kvmppc_dump_regs(struct kvm_vcpu *vcpu)  {  	int r; @@ -195,7 +247,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)  		pr_err("  ESID = %.16llx VSID = %.16llx\n",  		       vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);  	pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", -	       vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, +	       vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,  	       vcpu->arch.last_inst);  } @@ -454,11 +506,11 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)  	 */  	if (vc->vcore_state != VCORE_INACTIVE &&  	    vc->runner->arch.run_task != current) { -		spin_lock(&vc->runner->arch.tbacct_lock); +		spin_lock_irq(&vc->runner->arch.tbacct_lock);  		p = vc->stolen_tb;  		if (vc->preempt_tb != TB_NIL)  			p += now - vc->preempt_tb; -		spin_unlock(&vc->runner->arch.tbacct_lock); +		spin_unlock_irq(&vc->runner->arch.tbacct_lock);  	} else {  		p = vc->stolen_tb;  	} @@ -480,16 +532,16 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,  	core_stolen = vcore_stolen_time(vc, now);  	stolen = core_stolen - vcpu->arch.stolen_logged;  	vcpu->arch.stolen_logged = core_stolen; -	spin_lock(&vcpu->arch.tbacct_lock); +	spin_lock_irq(&vcpu->arch.tbacct_lock);  	stolen += vcpu->arch.busy_stolen;  	vcpu->arch.busy_stolen = 0; -	spin_unlock(&vcpu->arch.tbacct_lock); +	spin_unlock_irq(&vcpu->arch.tbacct_lock);  	if (!dt || !vpa)  		return;  	memset(dt, 0, sizeof(struct dtl_entry));  	dt->dispatch_reason = 7;  	dt->processor_id = vc->pcpu + vcpu->arch.ptid; -	dt->timebase = now; +	dt->timebase = now + vc->tb_offset;  	dt->enqueue_to_dispatch_time = stolen;  	dt->srr0 = kvmppc_get_pc(vcpu);  	dt->srr1 = vcpu->arch.shregs.msr; @@ -538,6 +590,15 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)  		}  		break;  	case H_CONFER: +		target = kvmppc_get_gpr(vcpu, 4); +		if (target == -1) +			break; +		tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); +		if (!tvcpu) { +			ret = H_PARAMETER; +			break; +		} +		kvm_vcpu_yield_to(tvcpu);  		break;  	case H_REGISTER_VPA:  		ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), @@ -548,7 +609,9 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)  		if (list_empty(&vcpu->kvm->arch.rtas_tokens))  			return RESUME_HOST; +		idx = srcu_read_lock(&vcpu->kvm->srcu);  		rc = kvmppc_rtas_hcall(vcpu); +		srcu_read_unlock(&vcpu->kvm->srcu, idx);  		if (rc == -ENOENT)  			return RESUME_HOST; @@ -576,8 +639,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)  	return RESUME_GUEST;  } -static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, -			      struct task_struct *tsk) +static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, +				 struct task_struct *tsk)  {  	int r = RESUME_HOST; @@ -592,6 +655,7 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  		r = RESUME_GUEST;  		break;  	case BOOK3S_INTERRUPT_EXTERNAL: +	case BOOK3S_INTERRUPT_H_DOORBELL:  		vcpu->stat.ext_intr_exits++;  		r = RESUME_GUEST;  		break; @@ -628,12 +692,10 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  		/* hcall - punt to userspace */  		int i; -		if (vcpu->arch.shregs.msr & MSR_PR) { -			/* sc 1 from userspace - reflect to guest syscall */ -			kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL); -			r = RESUME_GUEST; -			break; -		} +		/* hypercall with MSR_PR has already been handled in rmode, +		 * and never reaches here. +		 */ +  		run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);  		for (i = 0; i < 9; ++i)  			run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i); @@ -663,7 +725,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  	 * we don't emulate any guest instructions at this stage.  	 */  	case BOOK3S_INTERRUPT_H_EMUL_ASSIST: -		kvmppc_core_queue_program(vcpu, 0x80000); +		kvmppc_core_queue_program(vcpu, SRR1_PROGILL); +		r = RESUME_GUEST; +		break; +	/* +	 * This occurs if the guest (kernel or userspace), does something that +	 * is prohibited by HFSCR.  We just generate a program interrupt to +	 * the guest. +	 */ +	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: +		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);  		r = RESUME_GUEST;  		break;  	default: @@ -671,16 +742,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  		printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",  			vcpu->arch.trap, kvmppc_get_pc(vcpu),  			vcpu->arch.shregs.msr); +		run->hw.hardware_exit_reason = vcpu->arch.trap;  		r = RESUME_HOST; -		BUG();  		break;  	}  	return r;  } -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, -				  struct kvm_sregs *sregs) +static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu, +					    struct kvm_sregs *sregs)  {  	int i; @@ -694,12 +765,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,  	return 0;  } -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, -				  struct kvm_sregs *sregs) +static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, +					    struct kvm_sregs *sregs)  {  	int i, j; -	kvmppc_set_pvr(vcpu, sregs->pvr); +	kvmppc_set_pvr_hv(vcpu, sregs->pvr);  	j = 0;  	for (i = 0; i < vcpu->arch.slb_nr; i++) { @@ -714,7 +785,47 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,  	return 0;  } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) +static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) +{ +	struct kvmppc_vcore *vc = vcpu->arch.vcore; +	u64 mask; + +	spin_lock(&vc->lock); +	/* +	 * If ILE (interrupt little-endian) has changed, update the +	 * MSR_LE bit in the intr_msr for each vcpu in this vcore. +	 */ +	if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) { +		struct kvm *kvm = vcpu->kvm; +		struct kvm_vcpu *vcpu; +		int i; + +		mutex_lock(&kvm->lock); +		kvm_for_each_vcpu(i, vcpu, kvm) { +			if (vcpu->arch.vcore != vc) +				continue; +			if (new_lpcr & LPCR_ILE) +				vcpu->arch.intr_msr |= MSR_LE; +			else +				vcpu->arch.intr_msr &= ~MSR_LE; +		} +		mutex_unlock(&kvm->lock); +	} + +	/* +	 * Userspace can only modify DPFD (default prefetch depth), +	 * ILE (interrupt little-endian) and TC (translation control). +	 * On POWER8 userspace can also modify AIL (alt. interrupt loc.) +	 */ +	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; +	if (cpu_has_feature(CPU_FTR_ARCH_207S)) +		mask |= LPCR_AIL; +	vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); +	spin_unlock(&vc->lock); +} + +static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, +				 union kvmppc_one_reg *val)  {  	int r = 0;  	long int i; @@ -726,6 +837,9 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  	case KVM_REG_PPC_DABR:  		*val = get_reg_val(id, vcpu->arch.dabr);  		break; +	case KVM_REG_PPC_DABRX: +		*val = get_reg_val(id, vcpu->arch.dabrx); +		break;  	case KVM_REG_PPC_DSCR:  		*val = get_reg_val(id, vcpu->arch.dscr);  		break; @@ -741,7 +855,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  	case KVM_REG_PPC_UAMOR:  		*val = get_reg_val(id, vcpu->arch.uamor);  		break; -	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: +	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:  		i = id - KVM_REG_PPC_MMCR0;  		*val = get_reg_val(id, vcpu->arch.mmcr[i]);  		break; @@ -749,27 +863,61 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  		i = id - KVM_REG_PPC_PMC1;  		*val = get_reg_val(id, vcpu->arch.pmc[i]);  		break; -#ifdef CONFIG_VSX -	case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: -		if (cpu_has_feature(CPU_FTR_VSX)) { -			/* VSX => FP reg i is stored in arch.vsr[2*i] */ -			long int i = id - KVM_REG_PPC_FPR0; -			*val = get_reg_val(id, vcpu->arch.vsr[2 * i]); -		} else { -			/* let generic code handle it */ -			r = -EINVAL; -		} +	case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: +		i = id - KVM_REG_PPC_SPMC1; +		*val = get_reg_val(id, vcpu->arch.spmc[i]);  		break; -	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: -		if (cpu_has_feature(CPU_FTR_VSX)) { -			long int i = id - KVM_REG_PPC_VSR0; -			val->vsxval[0] = vcpu->arch.vsr[2 * i]; -			val->vsxval[1] = vcpu->arch.vsr[2 * i + 1]; -		} else { -			r = -ENXIO; -		} +	case KVM_REG_PPC_SIAR: +		*val = get_reg_val(id, vcpu->arch.siar); +		break; +	case KVM_REG_PPC_SDAR: +		*val = get_reg_val(id, vcpu->arch.sdar); +		break; +	case KVM_REG_PPC_SIER: +		*val = get_reg_val(id, vcpu->arch.sier); +		break; +	case KVM_REG_PPC_IAMR: +		*val = get_reg_val(id, vcpu->arch.iamr); +		break; +	case KVM_REG_PPC_PSPB: +		*val = get_reg_val(id, vcpu->arch.pspb); +		break; +	case KVM_REG_PPC_DPDES: +		*val = get_reg_val(id, vcpu->arch.vcore->dpdes); +		break; +	case KVM_REG_PPC_DAWR: +		*val = get_reg_val(id, vcpu->arch.dawr); +		break; +	case KVM_REG_PPC_DAWRX: +		*val = get_reg_val(id, vcpu->arch.dawrx); +		break; +	case KVM_REG_PPC_CIABR: +		*val = get_reg_val(id, vcpu->arch.ciabr); +		break; +	case KVM_REG_PPC_IC: +		*val = get_reg_val(id, vcpu->arch.ic); +		break; +	case KVM_REG_PPC_VTB: +		*val = get_reg_val(id, vcpu->arch.vtb); +		break; +	case KVM_REG_PPC_CSIGR: +		*val = get_reg_val(id, vcpu->arch.csigr); +		break; +	case KVM_REG_PPC_TACR: +		*val = get_reg_val(id, vcpu->arch.tacr); +		break; +	case KVM_REG_PPC_TCSCR: +		*val = get_reg_val(id, vcpu->arch.tcscr); +		break; +	case KVM_REG_PPC_PID: +		*val = get_reg_val(id, vcpu->arch.pid); +		break; +	case KVM_REG_PPC_ACOP: +		*val = get_reg_val(id, vcpu->arch.acop); +		break; +	case KVM_REG_PPC_WORT: +		*val = get_reg_val(id, vcpu->arch.wort);  		break; -#endif /* CONFIG_VSX */  	case KVM_REG_PPC_VPA_ADDR:  		spin_lock(&vcpu->arch.vpa_update_lock);  		*val = get_reg_val(id, vcpu->arch.vpa.next_gpa); @@ -787,6 +935,81 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  		val->vpaval.length = vcpu->arch.dtl.len;  		spin_unlock(&vcpu->arch.vpa_update_lock);  		break; +	case KVM_REG_PPC_TB_OFFSET: +		*val = get_reg_val(id, vcpu->arch.vcore->tb_offset); +		break; +	case KVM_REG_PPC_LPCR: +		*val = get_reg_val(id, vcpu->arch.vcore->lpcr); +		break; +	case KVM_REG_PPC_PPR: +		*val = get_reg_val(id, vcpu->arch.ppr); +		break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +	case KVM_REG_PPC_TFHAR: +		*val = get_reg_val(id, vcpu->arch.tfhar); +		break; +	case KVM_REG_PPC_TFIAR: +		*val = get_reg_val(id, vcpu->arch.tfiar); +		break; +	case KVM_REG_PPC_TEXASR: +		*val = get_reg_val(id, vcpu->arch.texasr); +		break; +	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: +		i = id - KVM_REG_PPC_TM_GPR0; +		*val = get_reg_val(id, vcpu->arch.gpr_tm[i]); +		break; +	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: +	{ +		int j; +		i = id - KVM_REG_PPC_TM_VSR0; +		if (i < 32) +			for (j = 0; j < TS_FPRWIDTH; j++) +				val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j]; +		else { +			if (cpu_has_feature(CPU_FTR_ALTIVEC)) +				val->vval = vcpu->arch.vr_tm.vr[i-32]; +			else +				r = -ENXIO; +		} +		break; +	} +	case KVM_REG_PPC_TM_CR: +		*val = get_reg_val(id, vcpu->arch.cr_tm); +		break; +	case KVM_REG_PPC_TM_LR: +		*val = get_reg_val(id, vcpu->arch.lr_tm); +		break; +	case KVM_REG_PPC_TM_CTR: +		*val = get_reg_val(id, vcpu->arch.ctr_tm); +		break; +	case KVM_REG_PPC_TM_FPSCR: +		*val = get_reg_val(id, vcpu->arch.fp_tm.fpscr); +		break; +	case KVM_REG_PPC_TM_AMR: +		*val = get_reg_val(id, vcpu->arch.amr_tm); +		break; +	case KVM_REG_PPC_TM_PPR: +		*val = get_reg_val(id, vcpu->arch.ppr_tm); +		break; +	case KVM_REG_PPC_TM_VRSAVE: +		*val = get_reg_val(id, vcpu->arch.vrsave_tm); +		break; +	case KVM_REG_PPC_TM_VSCR: +		if (cpu_has_feature(CPU_FTR_ALTIVEC)) +			*val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]); +		else +			r = -ENXIO; +		break; +	case KVM_REG_PPC_TM_DSCR: +		*val = get_reg_val(id, vcpu->arch.dscr_tm); +		break; +	case KVM_REG_PPC_TM_TAR: +		*val = get_reg_val(id, vcpu->arch.tar_tm); +		break; +#endif +	case KVM_REG_PPC_ARCH_COMPAT: +		*val = get_reg_val(id, vcpu->arch.vcore->arch_compat); +		break;  	default:  		r = -EINVAL;  		break; @@ -795,7 +1018,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  	return r;  } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) +static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, +				 union kvmppc_one_reg *val)  {  	int r = 0;  	long int i; @@ -810,6 +1034,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  	case KVM_REG_PPC_DABR:  		vcpu->arch.dabr = set_reg_val(id, *val);  		break; +	case KVM_REG_PPC_DABRX: +		vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP; +		break;  	case KVM_REG_PPC_DSCR:  		vcpu->arch.dscr = set_reg_val(id, *val);  		break; @@ -825,7 +1052,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  	case KVM_REG_PPC_UAMOR:  		vcpu->arch.uamor = set_reg_val(id, *val);  		break; -	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: +	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:  		i = id - KVM_REG_PPC_MMCR0;  		vcpu->arch.mmcr[i] = set_reg_val(id, *val);  		break; @@ -833,27 +1060,64 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  		i = id - KVM_REG_PPC_PMC1;  		vcpu->arch.pmc[i] = set_reg_val(id, *val);  		break; -#ifdef CONFIG_VSX -	case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: -		if (cpu_has_feature(CPU_FTR_VSX)) { -			/* VSX => FP reg i is stored in arch.vsr[2*i] */ -			long int i = id - KVM_REG_PPC_FPR0; -			vcpu->arch.vsr[2 * i] = set_reg_val(id, *val); -		} else { -			/* let generic code handle it */ -			r = -EINVAL; -		} +	case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: +		i = id - KVM_REG_PPC_SPMC1; +		vcpu->arch.spmc[i] = set_reg_val(id, *val);  		break; -	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: -		if (cpu_has_feature(CPU_FTR_VSX)) { -			long int i = id - KVM_REG_PPC_VSR0; -			vcpu->arch.vsr[2 * i] = val->vsxval[0]; -			vcpu->arch.vsr[2 * i + 1] = val->vsxval[1]; -		} else { -			r = -ENXIO; -		} +	case KVM_REG_PPC_SIAR: +		vcpu->arch.siar = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_SDAR: +		vcpu->arch.sdar = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_SIER: +		vcpu->arch.sier = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_IAMR: +		vcpu->arch.iamr = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_PSPB: +		vcpu->arch.pspb = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_DPDES: +		vcpu->arch.vcore->dpdes = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_DAWR: +		vcpu->arch.dawr = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_DAWRX: +		vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP; +		break; +	case KVM_REG_PPC_CIABR: +		vcpu->arch.ciabr = set_reg_val(id, *val); +		/* Don't allow setting breakpoints in hypervisor code */ +		if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) +			vcpu->arch.ciabr &= ~CIABR_PRIV;	/* disable */ +		break; +	case KVM_REG_PPC_IC: +		vcpu->arch.ic = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_VTB: +		vcpu->arch.vtb = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_CSIGR: +		vcpu->arch.csigr = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TACR: +		vcpu->arch.tacr = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TCSCR: +		vcpu->arch.tcscr = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_PID: +		vcpu->arch.pid = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_ACOP: +		vcpu->arch.acop = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_WORT: +		vcpu->arch.wort = set_reg_val(id, *val);  		break; -#endif /* CONFIG_VSX */  	case KVM_REG_PPC_VPA_ADDR:  		addr = set_reg_val(id, *val);  		r = -EINVAL; @@ -880,6 +1144,82 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  		len -= len % sizeof(struct dtl_entry);  		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);  		break; +	case KVM_REG_PPC_TB_OFFSET: +		/* round up to multiple of 2^24 */ +		vcpu->arch.vcore->tb_offset = +			ALIGN(set_reg_val(id, *val), 1UL << 24); +		break; +	case KVM_REG_PPC_LPCR: +		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val)); +		break; +	case KVM_REG_PPC_PPR: +		vcpu->arch.ppr = set_reg_val(id, *val); +		break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +	case KVM_REG_PPC_TFHAR: +		vcpu->arch.tfhar = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TFIAR: +		vcpu->arch.tfiar = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TEXASR: +		vcpu->arch.texasr = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: +		i = id - KVM_REG_PPC_TM_GPR0; +		vcpu->arch.gpr_tm[i] = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: +	{ +		int j; +		i = id - KVM_REG_PPC_TM_VSR0; +		if (i < 32) +			for (j = 0; j < TS_FPRWIDTH; j++) +				vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j]; +		else +			if (cpu_has_feature(CPU_FTR_ALTIVEC)) +				vcpu->arch.vr_tm.vr[i-32] = val->vval; +			else +				r = -ENXIO; +		break; +	} +	case KVM_REG_PPC_TM_CR: +		vcpu->arch.cr_tm = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_LR: +		vcpu->arch.lr_tm = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_CTR: +		vcpu->arch.ctr_tm = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_FPSCR: +		vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_AMR: +		vcpu->arch.amr_tm = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_PPR: +		vcpu->arch.ppr_tm = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_VRSAVE: +		vcpu->arch.vrsave_tm = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_VSCR: +		if (cpu_has_feature(CPU_FTR_ALTIVEC)) +			vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val); +		else +			r = - ENXIO; +		break; +	case KVM_REG_PPC_TM_DSCR: +		vcpu->arch.dscr_tm = set_reg_val(id, *val); +		break; +	case KVM_REG_PPC_TM_TAR: +		vcpu->arch.tar_tm = set_reg_val(id, *val); +		break; +#endif +	case KVM_REG_PPC_ARCH_COMPAT: +		r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); +		break;  	default:  		r = -EINVAL;  		break; @@ -888,21 +1228,15 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  	return r;  } -int kvmppc_core_check_processor_compat(void) -{ -	if (cpu_has_feature(CPU_FTR_HVMODE)) -		return 0; -	return -EIO; -} - -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, +						   unsigned int id)  {  	struct kvm_vcpu *vcpu;  	int err = -EINVAL;  	int core;  	struct kvmppc_vcore *vcore; -	core = id / threads_per_core; +	core = id / threads_per_subcore;  	if (core >= KVM_MAX_VCORES)  		goto out; @@ -916,14 +1250,25 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)  		goto free_vcpu;  	vcpu->arch.shared = &vcpu->arch.shregs; +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE +	/* +	 * The shared struct is never shared on HV, +	 * so we can always use host endianness +	 */ +#ifdef __BIG_ENDIAN__ +	vcpu->arch.shared_big_endian = true; +#else +	vcpu->arch.shared_big_endian = false; +#endif +#endif  	vcpu->arch.mmcr[0] = MMCR0_FC;  	vcpu->arch.ctrl = CTRL_RUNLATCH;  	/* default to host PVR, since we can't spoof it */ -	vcpu->arch.pvr = mfspr(SPRN_PVR); -	kvmppc_set_pvr(vcpu, vcpu->arch.pvr); +	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));  	spin_lock_init(&vcpu->arch.vpa_update_lock);  	spin_lock_init(&vcpu->arch.tbacct_lock);  	vcpu->arch.busy_preempt = TB_NIL; +	vcpu->arch.intr_msr = MSR_SF | MSR_ME;  	kvmppc_mmu_book3s_hv_init(vcpu); @@ -940,6 +1285,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)  			spin_lock_init(&vcore->lock);  			init_waitqueue_head(&vcore->wq);  			vcore->preempt_tb = TB_NIL; +			vcore->lpcr = kvm->arch.lpcr; +			vcore->first_vcpuid = core * threads_per_subcore; +			vcore->kvm = kvm;  		}  		kvm->arch.vcores[core] = vcore;  		kvm->arch.online_vcores++; @@ -953,6 +1301,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)  	++vcore->num_threads;  	spin_unlock(&vcore->lock);  	vcpu->arch.vcore = vcore; +	vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;  	vcpu->arch.cpu_type = KVM_CPU_3S_64;  	kvmppc_sanity_check(vcpu); @@ -972,7 +1321,7 @@ static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)  					vpa->dirty);  } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)  {  	spin_lock(&vcpu->arch.vpa_update_lock);  	unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); @@ -983,6 +1332,12 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)  	kmem_cache_free(kvm_vcpu_cache, vcpu);  } +static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu) +{ +	/* Indicate we want to get back into the guest */ +	return 1; +} +  static void kvmppc_set_timer(struct kvm_vcpu *vcpu)  {  	unsigned long dec_nsec, now; @@ -1010,7 +1365,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)  	}  } -extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +extern void __kvmppc_vcore_entry(void);  static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,  				   struct kvm_vcpu *vcpu) @@ -1019,13 +1374,13 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,  	if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)  		return; -	spin_lock(&vcpu->arch.tbacct_lock); +	spin_lock_irq(&vcpu->arch.tbacct_lock);  	now = mftb();  	vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -  		vcpu->arch.stolen_logged;  	vcpu->arch.busy_preempt = now;  	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; -	spin_unlock(&vcpu->arch.tbacct_lock); +	spin_unlock_irq(&vcpu->arch.tbacct_lock);  	--vc->n_runnable;  	list_del(&vcpu->arch.run_list);  } @@ -1084,13 +1439,14 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)  	tpaca = &paca[cpu];  	tpaca->kvm_hstate.kvm_vcpu = vcpu;  	tpaca->kvm_hstate.kvm_vcore = vc; -	tpaca->kvm_hstate.napping = 0; +	tpaca->kvm_hstate.ptid = vcpu->arch.ptid;  	vcpu->cpu = vc->pcpu;  	smp_wmb();  #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) -	if (vcpu->arch.ptid) { +	if (cpu != smp_processor_id()) {  		xics_wake_cpu(cpu); -		++vc->n_woken; +		if (vcpu->arch.ptid) +			++vc->n_woken;  	}  #endif  } @@ -1120,16 +1476,19 @@ static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)  static int on_primary_thread(void)  {  	int cpu = smp_processor_id(); -	int thr = cpu_thread_in_core(cpu); +	int thr; -	if (thr) +	/* Are we on a primary subcore? */ +	if (cpu_thread_in_subcore(cpu))  		return 0; -	while (++thr < threads_per_core) + +	thr = 0; +	while (++thr < threads_per_subcore)  		if (cpu_online(cpu + thr))  			return 0;  	/* Grab all hw threads so they can't go into the kernel */ -	for (thr = 1; thr < threads_per_core; ++thr) { +	for (thr = 1; thr < threads_per_subcore; ++thr) {  		if (kvmppc_grab_hwthread(cpu + thr)) {  			/* Couldn't grab one; let the others go */  			do { @@ -1147,10 +1506,10 @@ static int on_primary_thread(void)   */  static void kvmppc_run_core(struct kvmppc_vcore *vc)  { -	struct kvm_vcpu *vcpu, *vcpu0, *vnext; +	struct kvm_vcpu *vcpu, *vnext;  	long ret;  	u64 now; -	int ptid, i, need_vpa_update; +	int i, need_vpa_update;  	int srcu_idx;  	struct kvm_vcpu *vcpus_to_update[threads_per_core]; @@ -1188,49 +1547,37 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)  	}  	/* -	 * Assign physical thread IDs, first to non-ceded vcpus -	 * and then to ceded ones. -	 */ -	ptid = 0; -	vcpu0 = NULL; -	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { -		if (!vcpu->arch.ceded) { -			if (!ptid) -				vcpu0 = vcpu; -			vcpu->arch.ptid = ptid++; -		} -	} -	if (!vcpu0) -		goto out;	/* nothing to run; should never happen */ -	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) -		if (vcpu->arch.ceded) -			vcpu->arch.ptid = ptid++; - -	/* -	 * Make sure we are running on thread 0, and that -	 * secondary threads are offline. +	 * Make sure we are running on primary threads, and that secondary +	 * threads are offline.  Also check if the number of threads in this +	 * guest are greater than the current system threads per guest.  	 */ -	if (threads_per_core > 1 && !on_primary_thread()) { +	if ((threads_per_core > 1) && +	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {  		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)  			vcpu->arch.ret = -EBUSY;  		goto out;  	} +  	vc->pcpu = smp_processor_id();  	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {  		kvmppc_start_thread(vcpu);  		kvmppc_create_dtl_entry(vcpu, vc);  	} +	/* Set this explicitly in case thread 0 doesn't have a vcpu */ +	get_paca()->kvm_hstate.kvm_vcore = vc; +	get_paca()->kvm_hstate.ptid = 0; +  	vc->vcore_state = VCORE_RUNNING;  	preempt_disable();  	spin_unlock(&vc->lock);  	kvm_guest_enter(); -	srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu); +	srcu_idx = srcu_read_lock(&vc->kvm->srcu); -	__kvmppc_vcore_entry(NULL, vcpu0); +	__kvmppc_vcore_entry();  	spin_lock(&vc->lock);  	/* disable sending of IPIs on virtual external irqs */ @@ -1239,20 +1586,20 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)  	/* wait for secondary threads to finish writing their state to memory */  	if (vc->nap_count < vc->n_woken)  		kvmppc_wait_for_nap(vc); -	for (i = 0; i < threads_per_core; ++i) +	for (i = 0; i < threads_per_subcore; ++i)  		kvmppc_release_hwthread(vc->pcpu + i);  	/* prevent other vcpu threads from doing kvmppc_start_thread() now */  	vc->vcore_state = VCORE_EXITING;  	spin_unlock(&vc->lock); -	srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx); +	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);  	/* make sure updates to secondary vcpu structs are visible now */  	smp_mb();  	kvm_guest_exit();  	preempt_enable(); -	kvm_resched(vcpu); +	cond_resched();  	spin_lock(&vc->lock);  	now = get_tb(); @@ -1264,14 +1611,14 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)  		ret = RESUME_GUEST;  		if (vcpu->arch.trap) -			ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, -						 vcpu->arch.run_task); +			ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, +						    vcpu->arch.run_task);  		vcpu->arch.ret = ret;  		vcpu->arch.trap = 0;  		if (vcpu->arch.ceded) { -			if (ret != RESUME_GUEST) +			if (!is_kvmppc_resume_guest(ret))  				kvmppc_end_cede(vcpu);  			else  				kvmppc_set_timer(vcpu); @@ -1282,7 +1629,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)  	vc->vcore_state = VCORE_INACTIVE;  	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,  				 arch.run_list) { -		if (vcpu->arch.ret != RESUME_GUEST) { +		if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {  			kvmppc_remove_runnable(vc, vcpu);  			wake_up(&vcpu->arch.cpu_run);  		} @@ -1353,7 +1700,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)  	if (!signal_pending(current)) {  		if (vc->vcore_state == VCORE_RUNNING &&  		    VCORE_EXIT_COUNT(vc) == 0) { -			vcpu->arch.ptid = vc->n_runnable - 1;  			kvmppc_create_dtl_entry(vcpu, vc);  			kvmppc_start_thread(vcpu);  		} else if (vc->vcore_state == VCORE_SLEEPING) { @@ -1424,7 +1770,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)  	return vcpu->arch.ret;  } -int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)  {  	int r;  	int srcu_idx; @@ -1473,7 +1819,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)  				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);  			srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);  		} -	} while (r == RESUME_GUEST); +	} while (is_kvmppc_resume_guest(r));   out:  	vcpu->arch.state = KVMPPC_VCPU_NOTREADY; @@ -1546,7 +1892,8 @@ static const struct file_operations kvm_rma_fops = {  	.release	= kvm_rma_release,  }; -long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) +static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, +				      struct kvm_allocate_rma *ret)  {  	long fd;  	struct kvm_rma_info *ri; @@ -1589,10 +1936,18 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,  	 * support pte_enc here  	 */  	(*sps)->enc[0].pte_enc = def->penc[linux_psize]; +	/* +	 * Add 16MB MPSS support if host supports it +	 */ +	if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) { +		(*sps)->enc[1].page_shift = 24; +		(*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M]; +	}  	(*sps)++;  } -int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, +					 struct kvm_ppc_smmu_info *info)  {  	struct kvm_ppc_one_seg_page_size *sps; @@ -1613,7 +1968,8 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)  /*   * Get (and clear) the dirty memory log for a memory slot.   */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, +					 struct kvm_dirty_log *log)  {  	struct kvm_memory_slot *memslot;  	int r; @@ -1667,8 +2023,8 @@ static void unpin_slot(struct kvm_memory_slot *memslot)  	}  } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, -			      struct kvm_memory_slot *dont) +static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, +					struct kvm_memory_slot *dont)  {  	if (!dont || free->arch.rmap != dont->arch.rmap) {  		vfree(free->arch.rmap); @@ -1681,8 +2037,8 @@ void kvmppc_core_free_memslot(struct kvm_memory_slot *free,  	}  } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, -			       unsigned long npages) +static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, +					 unsigned long npages)  {  	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));  	if (!slot->arch.rmap) @@ -1692,9 +2048,9 @@ int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,  	return 0;  } -int kvmppc_core_prepare_memory_region(struct kvm *kvm, -				      struct kvm_memory_slot *memslot, -				      struct kvm_userspace_memory_region *mem) +static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, +					struct kvm_memory_slot *memslot, +					struct kvm_userspace_memory_region *mem)  {  	unsigned long *phys; @@ -1710,9 +2066,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,  	return 0;  } -void kvmppc_core_commit_memory_region(struct kvm *kvm, -				      struct kvm_userspace_memory_region *mem, -				      const struct kvm_memory_slot *old) +static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, +				struct kvm_userspace_memory_region *mem, +				const struct kvm_memory_slot *old)  {  	unsigned long npages = mem->memory_size >> PAGE_SHIFT;  	struct kvm_memory_slot *memslot; @@ -1729,6 +2085,37 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,  	}  } +/* + * Update LPCR values in kvm->arch and in vcores. + * Caller must hold kvm->lock. + */ +void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) +{ +	long int i; +	u32 cores_done = 0; + +	if ((kvm->arch.lpcr & mask) == lpcr) +		return; + +	kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr; + +	for (i = 0; i < KVM_MAX_VCORES; ++i) { +		struct kvmppc_vcore *vc = kvm->arch.vcores[i]; +		if (!vc) +			continue; +		spin_lock(&vc->lock); +		vc->lpcr = (vc->lpcr & ~mask) | lpcr; +		spin_unlock(&vc->lock); +		if (++cores_done >= kvm->arch.online_vcores) +			break; +	} +} + +static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) +{ +	return; +} +  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)  {  	int err = 0; @@ -1737,7 +2124,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)  	unsigned long hva;  	struct kvm_memory_slot *memslot;  	struct vm_area_struct *vma; -	unsigned long lpcr, senc; +	unsigned long lpcr = 0, senc; +	unsigned long lpcr_mask = 0;  	unsigned long psize, porder;  	unsigned long rma_size;  	unsigned long rmls; @@ -1802,9 +2190,9 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)  		senc = slb_pgsize_encoding(psize);  		kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |  			(VRMA_VSID << SLB_VSID_SHIFT_1T); -		lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; -		lpcr |= senc << (LPCR_VRMASD_SH - 4); -		kvm->arch.lpcr = lpcr; +		lpcr_mask = LPCR_VRMASD; +		/* the -4 is to account for senc values starting at 0x10 */ +		lpcr = senc << (LPCR_VRMASD_SH - 4);  		/* Create HPTEs in the hash page table for the VRMA */  		kvmppc_map_vrma(vcpu, memslot, porder); @@ -1825,23 +2213,21 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)  		kvm->arch.rma = ri;  		/* Update LPCR and RMOR */ -		lpcr = kvm->arch.lpcr;  		if (cpu_has_feature(CPU_FTR_ARCH_201)) {  			/* PPC970; insert RMLS value (split field) in HID4 */ -			lpcr &= ~((1ul << HID4_RMLS0_SH) | -				  (3ul << HID4_RMLS2_SH)); -			lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | +			lpcr_mask = (1ul << HID4_RMLS0_SH) | +				(3ul << HID4_RMLS2_SH) | HID4_RMOR; +			lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |  				((rmls & 3) << HID4_RMLS2_SH);  			/* RMOR is also in HID4 */  			lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)  				<< HID4_RMOR_SH;  		} else {  			/* POWER7 */ -			lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); -			lpcr |= rmls << LPCR_RMLS_SH; +			lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS; +			lpcr = rmls << LPCR_RMLS_SH;  			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;  		} -		kvm->arch.lpcr = lpcr;  		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",  			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); @@ -1860,6 +2246,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)  		}  	} +	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); +  	/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */  	smp_wmb();  	kvm->arch.rma_setup_done = 1; @@ -1875,7 +2263,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)  	goto out_srcu;  } -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_hv(struct kvm *kvm)  {  	unsigned long lpcr, lpid; @@ -1893,9 +2281,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)  	 */  	cpumask_setall(&kvm->arch.need_tlb_flush); -	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); -	INIT_LIST_HEAD(&kvm->arch.rtas_tokens); -  	kvm->arch.rma = NULL;  	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); @@ -1916,6 +2301,9 @@ int kvmppc_core_init_vm(struct kvm *kvm)  			LPCR_VPM0 | LPCR_VPM1;  		kvm->arch.vrma_slb_v = SLB_VSID_B_1T |  			(VRMA_VSID << SLB_VSID_SHIFT_1T); +		/* On POWER8 turn on online bit to enable PURR/SPURR */ +		if (cpu_has_feature(CPU_FTR_ARCH_207S)) +			lpcr |= LPCR_ONL;  	}  	kvm->arch.lpcr = lpcr; @@ -1923,69 +2311,172 @@ int kvmppc_core_init_vm(struct kvm *kvm)  	spin_lock_init(&kvm->arch.slot_phys_lock);  	/* -	 * Don't allow secondary CPU threads to come online -	 * while any KVM VMs exist. +	 * Track that we now have a HV mode VM active. This blocks secondary +	 * CPU threads from coming online.  	 */ -	inhibit_secondary_onlining(); +	kvm_hv_vm_activated();  	return 0;  } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_free_vcores(struct kvm *kvm) +{ +	long int i; + +	for (i = 0; i < KVM_MAX_VCORES; ++i) +		kfree(kvm->arch.vcores[i]); +	kvm->arch.online_vcores = 0; +} + +static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)  { -	uninhibit_secondary_onlining(); +	kvm_hv_vm_deactivated(); +	kvmppc_free_vcores(kvm);  	if (kvm->arch.rma) {  		kvm_release_rma(kvm->arch.rma);  		kvm->arch.rma = NULL;  	} -	kvmppc_rtas_tokens_free(kvm); -  	kvmppc_free_hpt(kvm); -	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));  } -/* These are stubs for now */ -void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) +/* We don't need to emulate any privileged instructions or dcbz */ +static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, +				     unsigned int inst, int *advance)  { +	return EMULATE_FAIL;  } -/* We don't need to emulate any privileged instructions or dcbz */ -int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, -                           unsigned int inst, int *advance) +static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn, +					ulong spr_val)  {  	return EMULATE_FAIL;  } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) +static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn, +					ulong *spr_val)  {  	return EMULATE_FAIL;  } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) +static int kvmppc_core_check_processor_compat_hv(void)  { -	return EMULATE_FAIL; +	if (!cpu_has_feature(CPU_FTR_HVMODE)) +		return -EIO; +	return 0;  } -static int kvmppc_book3s_hv_init(void) +static long kvm_arch_vm_ioctl_hv(struct file *filp, +				 unsigned int ioctl, unsigned long arg)  { -	int r; +	struct kvm *kvm __maybe_unused = filp->private_data; +	void __user *argp = (void __user *)arg; +	long r; -	r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); +	switch (ioctl) { -	if (r) -		return r; +	case KVM_ALLOCATE_RMA: { +		struct kvm_allocate_rma rma; +		struct kvm *kvm = filp->private_data; -	r = kvmppc_mmu_hv_init(); +		r = kvm_vm_ioctl_allocate_rma(kvm, &rma); +		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) +			r = -EFAULT; +		break; +	} + +	case KVM_PPC_ALLOCATE_HTAB: { +		u32 htab_order; + +		r = -EFAULT; +		if (get_user(htab_order, (u32 __user *)argp)) +			break; +		r = kvmppc_alloc_reset_hpt(kvm, &htab_order); +		if (r) +			break; +		r = -EFAULT; +		if (put_user(htab_order, (u32 __user *)argp)) +			break; +		r = 0; +		break; +	} + +	case KVM_PPC_GET_HTAB_FD: { +		struct kvm_get_htab_fd ghf; + +		r = -EFAULT; +		if (copy_from_user(&ghf, argp, sizeof(ghf))) +			break; +		r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); +		break; +	} +	default: +		r = -ENOTTY; +	} + +	return r; +} + +static struct kvmppc_ops kvm_ops_hv = { +	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, +	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, +	.get_one_reg = kvmppc_get_one_reg_hv, +	.set_one_reg = kvmppc_set_one_reg_hv, +	.vcpu_load   = kvmppc_core_vcpu_load_hv, +	.vcpu_put    = kvmppc_core_vcpu_put_hv, +	.set_msr     = kvmppc_set_msr_hv, +	.vcpu_run    = kvmppc_vcpu_run_hv, +	.vcpu_create = kvmppc_core_vcpu_create_hv, +	.vcpu_free   = kvmppc_core_vcpu_free_hv, +	.check_requests = kvmppc_core_check_requests_hv, +	.get_dirty_log  = kvm_vm_ioctl_get_dirty_log_hv, +	.flush_memslot  = kvmppc_core_flush_memslot_hv, +	.prepare_memory_region = kvmppc_core_prepare_memory_region_hv, +	.commit_memory_region  = kvmppc_core_commit_memory_region_hv, +	.unmap_hva = kvm_unmap_hva_hv, +	.unmap_hva_range = kvm_unmap_hva_range_hv, +	.age_hva  = kvm_age_hva_hv, +	.test_age_hva = kvm_test_age_hva_hv, +	.set_spte_hva = kvm_set_spte_hva_hv, +	.mmu_destroy  = kvmppc_mmu_destroy_hv, +	.free_memslot = kvmppc_core_free_memslot_hv, +	.create_memslot = kvmppc_core_create_memslot_hv, +	.init_vm =  kvmppc_core_init_vm_hv, +	.destroy_vm = kvmppc_core_destroy_vm_hv, +	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv, +	.emulate_op = kvmppc_core_emulate_op_hv, +	.emulate_mtspr = kvmppc_core_emulate_mtspr_hv, +	.emulate_mfspr = kvmppc_core_emulate_mfspr_hv, +	.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, +	.arch_vm_ioctl  = kvm_arch_vm_ioctl_hv, +}; + +static int kvmppc_book3s_init_hv(void) +{ +	int r; +	/* +	 * FIXME!! Do we need to check on all cpus ? +	 */ +	r = kvmppc_core_check_processor_compat_hv(); +	if (r < 0) +		return -ENODEV; + +	kvm_ops_hv.owner = THIS_MODULE; +	kvmppc_hv_ops = &kvm_ops_hv; + +	r = kvmppc_mmu_hv_init();  	return r;  } -static void kvmppc_book3s_hv_exit(void) +static void kvmppc_book3s_exit_hv(void)  { -	kvm_exit(); +	kvmppc_hv_ops = NULL;  } -module_init(kvmppc_book3s_hv_init); -module_exit(kvmppc_book3s_hv_exit); +module_init(kvmppc_book3s_init_hv); +module_exit(kvmppc_book3s_exit_hv); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 8cd0daebb82..7cde8a66520 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -6,6 +6,7 @@   * published by the Free Software Foundation.   */ +#include <linux/cpu.h>  #include <linux/kvm_host.h>  #include <linux/preempt.h>  #include <linux/export.h> @@ -181,3 +182,33 @@ void __init kvm_cma_reserve(void)  		kvm_cma_declare_contiguous(selected_size, align_size);  	}  } + +/* + * When running HV mode KVM we need to block certain operations while KVM VMs + * exist in the system. We use a counter of VMs to track this. + * + * One of the operations we need to block is onlining of secondaries, so we + * protect hv_vm_count with get/put_online_cpus(). + */ +static atomic_t hv_vm_count; + +void kvm_hv_vm_activated(void) +{ +	get_online_cpus(); +	atomic_inc(&hv_vm_count); +	put_online_cpus(); +} +EXPORT_SYMBOL_GPL(kvm_hv_vm_activated); + +void kvm_hv_vm_deactivated(void) +{ +	get_online_cpus(); +	atomic_dec(&hv_vm_count); +	put_online_cpus(); +} +EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated); + +bool kvm_hv_mode_active(void) +{ +	return atomic_read(&hv_vm_count) != 0; +} diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 37f1cc417ca..731be7478b2 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -35,7 +35,7 @@   ****************************************************************************/  /* Registers: - *  r4: vcpu pointer + *  none   */  _GLOBAL(__kvmppc_vcore_entry) @@ -57,9 +57,11 @@ BEGIN_FTR_SECTION  	std	r3, HSTATE_DSCR(r13)  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) +BEGIN_FTR_SECTION  	/* Save host DABR */  	mfspr	r3, SPRN_DABR  	std	r3, HSTATE_DABR(r13) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)  	/* Hard-disable interrupts */  	mfmsr   r10 @@ -69,7 +71,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)  	mtmsrd  r10,1  	/* Save host PMU registers */ -	/* R4 is live here (vcpu pointer) but not r3 or r5 */ +BEGIN_FTR_SECTION +	/* Work around P8 PMAE bug */ +	li	r3, -1 +	clrrdi	r3, r3, 10 +	mfspr	r8, SPRN_MMCR2 +	mtspr	SPRN_MMCR2, r3		/* freeze all counters using MMCR2 */ +	isync +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)  	li	r3, 1  	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */  	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */ @@ -86,9 +95,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)  	cmpwi	r5, 0  	beq	31f			/* skip if not */  	mfspr	r5, SPRN_MMCR1 +	mfspr	r9, SPRN_SIAR +	mfspr	r10, SPRN_SDAR  	std	r7, HSTATE_MMCR(r13)  	std	r5, HSTATE_MMCR + 8(r13)  	std	r6, HSTATE_MMCR + 16(r13) +	std	r9, HSTATE_MMCR + 24(r13) +	std	r10, HSTATE_MMCR + 32(r13) +BEGIN_FTR_SECTION +	mfspr	r9, SPRN_SIER +	std	r8, HSTATE_MMCR + 40(r13) +	std	r9, HSTATE_MMCR + 48(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)  	mfspr	r3, SPRN_PMC1  	mfspr	r5, SPRN_PMC2  	mfspr	r6, SPRN_PMC3 @@ -134,22 +152,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	 * enters the guest with interrupts enabled.  	 */  BEGIN_FTR_SECTION +	ld	r4, HSTATE_KVM_VCPU(r13)  	ld	r0, VCPU_PENDING_EXC(r4)  	li	r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)  	oris	r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h  	and.	r0, r0, r7  	beq	32f -	mr	r31, r4  	lhz	r3, PACAPACAINDEX(r13)  	bl	smp_send_reschedule  	nop -	mr	r4, r31  32:  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  #endif /* CONFIG_SMP */  	/* Jump to partition switch code */ -	bl	.kvmppc_hv_entry_trampoline +	bl	kvmppc_hv_entry_trampoline  	nop  /* @@ -158,9 +175,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)   * Interrupts are enabled again at this point.   */ -.global kvmppc_handler_highmem -kvmppc_handler_highmem: -  	/*  	 * Register usage at this point:  	 * diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index a353c485808..3a5c568b1e8 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -12,6 +12,7 @@  #include <linux/kvm_host.h>  #include <linux/kernel.h>  #include <asm/opal.h> +#include <asm/mce.h>  /* SRR1 bits for machine check on POWER7 */  #define SRR1_MC_LDSTERR		(1ul << (63-42)) @@ -58,18 +59,6 @@ static void reload_slb(struct kvm_vcpu *vcpu)  	}  } -/* POWER7 TLB flush */ -static void flush_tlb_power7(struct kvm_vcpu *vcpu) -{ -	unsigned long i, rb; - -	rb = TLBIEL_INVAL_SET_LPID; -	for (i = 0; i < POWER7_TLB_SETS; ++i) { -		asm volatile("tlbiel %0" : : "r" (rb)); -		rb += 1 << TLBIEL_INVAL_SET_SHIFT; -	} -} -  /*   * On POWER7, see if we can handle a machine check that occurred inside   * the guest in real mode, without switching to the host partition. @@ -79,9 +68,7 @@ static void flush_tlb_power7(struct kvm_vcpu *vcpu)  static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)  {  	unsigned long srr1 = vcpu->arch.shregs.msr; -#ifdef CONFIG_PPC_POWERNV -	struct opal_machine_check_event *opal_evt; -#endif +	struct machine_check_event mce_evt;  	long handled = 1;  	if (srr1 & SRR1_MC_LDSTERR) { @@ -96,7 +83,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)  				   DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);  		}  		if (dsisr & DSISR_MC_TLB_MULTI) { -			flush_tlb_power7(vcpu); +			if (cur_cpu_spec && cur_cpu_spec->flush_tlb) +				cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);  			dsisr &= ~DSISR_MC_TLB_MULTI;  		}  		/* Any other errors we don't understand? */ @@ -113,28 +101,37 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)  		reload_slb(vcpu);  		break;  	case SRR1_MC_IFETCH_TLBMULTI: -		flush_tlb_power7(vcpu); +		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) +			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);  		break;  	default:  		handled = 0;  	} -#ifdef CONFIG_PPC_POWERNV  	/* -	 * See if OPAL has already handled the condition. -	 * We assume that if the condition is recovered then OPAL +	 * See if we have already handled the condition in the linux host. +	 * We assume that if the condition is recovered then linux host  	 * will have generated an error log event that we will pick  	 * up and log later. +	 * Don't release mce event now. We will queue up the event so that +	 * we can log the MCE event info on host console.  	 */ -	opal_evt = local_paca->opal_mc_evt; -	if (opal_evt->version == OpalMCE_V1 && -	    (opal_evt->severity == OpalMCE_SEV_NO_ERROR || -	     opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED)) +	if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) +		goto out; + +	if (mce_evt.version == MCE_V1 && +	    (mce_evt.severity == MCE_SEV_NO_ERROR || +	     mce_evt.disposition == MCE_DISPOSITION_RECOVERED))  		handled = 1; -	if (handled) -		opal_evt->in_use = 0; -#endif +out: +	/* +	 * We are now going enter guest either through machine check +	 * interrupt (for unhandled errors) or will continue from +	 * current HSRR0 (for handled errors) in guest. Hence +	 * queue up the event so that we can log it from host console later. +	 */ +	machine_check_queue_event();  	return handled;  } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 9c515440ad1..5a24d3c2b6b 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -42,13 +42,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)  	/*  	 * If there is only one vcore, and it's currently running, +	 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,  	 * we can use tlbiel as long as we mark all other physical  	 * cores as potentially having stale TLB entries for this lpid.  	 * If we're not using MMU notifiers, we never take pages away  	 * from the guest, so we can use tlbiel if requested.  	 * Otherwise, don't use tlbiel.  	 */ -	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore) +	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)  		global = 0;  	else if (kvm->arch.using_mmu_notifiers)  		global = 1; @@ -111,7 +112,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,  	rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);  	ptel = rev->guest_rpte |= rcbits;  	gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); -	memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); +	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);  	if (!memslot)  		return; @@ -134,7 +135,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,  	unlock_rmap(rmap);  } -static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva, +static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,  			      int writing, unsigned long *pte_sizep)  {  	pte_t *ptep; @@ -192,7 +193,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,  	/* Find the memslot (if any) for this address */  	gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);  	gfn = gpa >> PAGE_SHIFT; -	memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); +	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);  	pa = 0;  	is_io = ~0ul;  	rmap = NULL; @@ -225,26 +226,28 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,  		is_io = pa & (HPTE_R_I | HPTE_R_W);  		pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);  		pa &= PAGE_MASK; +		pa |= gpa & ~PAGE_MASK;  	} else {  		/* Translate to host virtual address */  		hva = __gfn_to_hva_memslot(memslot, gfn);  		/* Look up the Linux PTE for the backing page */  		pte_size = psize; -		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size); -		if (pte_present(pte)) { +		pte = lookup_linux_pte_and_update(pgdir, hva, writing, +						  &pte_size); +		if (pte_present(pte) && !pte_numa(pte)) {  			if (writing && !pte_write(pte))  				/* make the actual HPTE be read-only */  				ptel = hpte_make_readonly(ptel);  			is_io = hpte_cache_bits(pte_val(pte));  			pa = pte_pfn(pte) << PAGE_SHIFT; +			pa |= hva & (pte_size - 1); +			pa |= gpa & ~PAGE_MASK;  		}  	}  	if (pte_size < psize)  		return H_PARAMETER; -	if (pa && pte_size > psize) -		pa |= gpa & (pte_size - 1);  	ptel &= ~(HPTE_R_PP0 - psize);  	ptel |= pa; @@ -668,10 +671,11 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,  			psize = hpte_page_size(v, r);  			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; -			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); +			memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);  			if (memslot) {  				hva = __gfn_to_hva_memslot(memslot, gfn); -				pte = lookup_linux_pte(pgdir, hva, 1, &psize); +				pte = lookup_linux_pte_and_update(pgdir, hva, +								  1, &psize);  				if (pte_present(pte) && !pte_write(pte))  					r = hpte_make_readonly(r);  			} @@ -749,6 +753,10 @@ static int slb_base_page_shift[4] = {  	20,	/* 1M, unsupported */  }; +/* When called from virtmode, this func should be protected by + * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK + * can trigger deadlock issue. + */  long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,  			      unsigned long valid)  { @@ -806,13 +814,10 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,  			r = hpte[i+1];  			/* -			 * Check the HPTE again, including large page size -			 * Since we don't currently allow any MPSS (mixed -			 * page-size segment) page sizes, it is sufficient -			 * to check against the actual page size. +			 * Check the HPTE again, including base page size  			 */  			if ((v & valid) && (v & mask) == val && -			    hpte_page_size(v, r) == (1ul << pshift)) +			    hpte_base_page_size(v, r) == (1ul << pshift))  				/* Return with the HPTE still locked */  				return (hash << 3) + (i >> 1); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 294b7af28cd..558a67df812 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -28,34 +28,17 @@  #include <asm/exception-64s.h>  #include <asm/kvm_book3s_asm.h>  #include <asm/mmu-hash64.h> +#include <asm/tm.h> + +#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)  #ifdef __LITTLE_ENDIAN__  #error Need to fix lppaca and SLB shadow accesses in little endian mode  #endif -/***************************************************************************** - *                                                                           * - *        Real Mode handlers that need to be in the linear mapping           * - *                                                                           * - ****************************************************************************/ - -	.globl	kvmppc_skip_interrupt -kvmppc_skip_interrupt: -	mfspr	r13,SPRN_SRR0 -	addi	r13,r13,4 -	mtspr	SPRN_SRR0,r13 -	GET_SCRATCH0(r13) -	rfid -	b	. - -	.globl	kvmppc_skip_Hinterrupt -kvmppc_skip_Hinterrupt: -	mfspr	r13,SPRN_HSRR0 -	addi	r13,r13,4 -	mtspr	SPRN_HSRR0,r13 -	GET_SCRATCH0(r13) -	hrfid -	b	. +/* Values in HSTATE_NAPPING(r13) */ +#define NAPPING_CEDE	1 +#define NAPPING_NOVCPU	2  /*   * Call kvmppc_hv_entry in real mode. @@ -65,9 +48,12 @@ kvmppc_skip_Hinterrupt:   *   * LR = return address to continue at after eventually re-enabling MMU   */ -_GLOBAL(kvmppc_hv_entry_trampoline) +_GLOBAL_TOC(kvmppc_hv_entry_trampoline) +	mflr	r0 +	std	r0, PPC_LR_STKOFF(r1) +	stdu	r1, -112(r1)  	mfmsr	r10 -	LOAD_REG_ADDR(r5, kvmppc_hv_entry) +	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)  	li	r0,MSR_RI  	andc	r0,r10,r0  	li	r6,MSR_IR | MSR_DR @@ -77,21 +63,197 @@ _GLOBAL(kvmppc_hv_entry_trampoline)  	mtsrr1	r6  	RFI -/****************************************************************************** - *                                                                            * - *                               Entry code                                   * - *                                                                            * - *****************************************************************************/ +kvmppc_call_hv_entry: +	ld	r4, HSTATE_KVM_VCPU(r13) +	bl	kvmppc_hv_entry + +	/* Back from guest - restore host state and return to caller */ + +BEGIN_FTR_SECTION +	/* Restore host DABR and DABRX */ +	ld	r5,HSTATE_DABR(r13) +	li	r6,7 +	mtspr	SPRN_DABR,r5 +	mtspr	SPRN_DABRX,r6 +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + +	/* Restore SPRG3 */ +	ld	r3,PACA_SPRG_VDSO(r13) +	mtspr	SPRN_SPRG_VDSO_WRITE,r3 + +	/* Reload the host's PMU registers */ +	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */ +	lbz	r4, LPPACA_PMCINUSE(r3) +	cmpwi	r4, 0 +	beq	23f			/* skip if not */ +BEGIN_FTR_SECTION +	ld	r3, HSTATE_MMCR(r13) +	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO +	cmpwi	r4, MMCR0_PMAO +	beql	kvmppc_fix_pmao +END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) +	lwz	r3, HSTATE_PMC(r13) +	lwz	r4, HSTATE_PMC + 4(r13) +	lwz	r5, HSTATE_PMC + 8(r13) +	lwz	r6, HSTATE_PMC + 12(r13) +	lwz	r8, HSTATE_PMC + 16(r13) +	lwz	r9, HSTATE_PMC + 20(r13) +BEGIN_FTR_SECTION +	lwz	r10, HSTATE_PMC + 24(r13) +	lwz	r11, HSTATE_PMC + 28(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +	mtspr	SPRN_PMC1, r3 +	mtspr	SPRN_PMC2, r4 +	mtspr	SPRN_PMC3, r5 +	mtspr	SPRN_PMC4, r6 +	mtspr	SPRN_PMC5, r8 +	mtspr	SPRN_PMC6, r9 +BEGIN_FTR_SECTION +	mtspr	SPRN_PMC7, r10 +	mtspr	SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +	ld	r3, HSTATE_MMCR(r13) +	ld	r4, HSTATE_MMCR + 8(r13) +	ld	r5, HSTATE_MMCR + 16(r13) +	ld	r6, HSTATE_MMCR + 24(r13) +	ld	r7, HSTATE_MMCR + 32(r13) +	mtspr	SPRN_MMCR1, r4 +	mtspr	SPRN_MMCRA, r5 +	mtspr	SPRN_SIAR, r6 +	mtspr	SPRN_SDAR, r7 +BEGIN_FTR_SECTION +	ld	r8, HSTATE_MMCR + 40(r13) +	ld	r9, HSTATE_MMCR + 48(r13) +	mtspr	SPRN_MMCR2, r8 +	mtspr	SPRN_SIER, r9 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +	mtspr	SPRN_MMCR0, r3 +	isync +23: + +	/* +	 * Reload DEC.  HDEC interrupts were disabled when +	 * we reloaded the host's LPCR value. +	 */ +	ld	r3, HSTATE_DECEXP(r13) +	mftb	r4 +	subf	r4, r4, r3 +	mtspr	SPRN_DEC, r4 + +	/* +	 * For external and machine check interrupts, we need +	 * to call the Linux handler to process the interrupt. +	 * We do that by jumping to absolute address 0x500 for +	 * external interrupts, or the machine_check_fwnmi label +	 * for machine checks (since firmware might have patched +	 * the vector area at 0x200).  The [h]rfid at the end of the +	 * handler will return to the book3s_hv_interrupts.S code. +	 * For other interrupts we do the rfid to get back +	 * to the book3s_hv_interrupts.S code here. +	 */ +	ld	r8, 112+PPC_LR_STKOFF(r1) +	addi	r1, r1, 112 +	ld	r7, HSTATE_HOST_MSR(r13) + +	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK +	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL +BEGIN_FTR_SECTION +	beq	11f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + +	/* RFI into the highmem handler, or branch to interrupt handler */ +	mfmsr	r6 +	li	r0, MSR_RI +	andc	r6, r6, r0 +	mtmsrd	r6, 1			/* Clear RI in MSR */ +	mtsrr0	r8 +	mtsrr1	r7 +	beqa	0x500			/* external interrupt (PPC970) */ +	beq	cr1, 13f		/* machine check */ +	RFI + +	/* On POWER7, we have external interrupts set to use HSRR0/1 */ +11:	mtspr	SPRN_HSRR0, r8 +	mtspr	SPRN_HSRR1, r7 +	ba	0x500 + +13:	b	machine_check_fwnmi + +kvmppc_primary_no_guest: +	/* We handle this much like a ceded vcpu */ +	/* set our bit in napping_threads */ +	ld	r5, HSTATE_KVM_VCORE(r13) +	lbz	r7, HSTATE_PTID(r13) +	li	r0, 1 +	sld	r0, r0, r7 +	addi	r6, r5, VCORE_NAPPING_THREADS +1:	lwarx	r3, 0, r6 +	or	r3, r3, r0 +	stwcx.	r3, 0, r6 +	bne	1b +	/* order napping_threads update vs testing entry_exit_count */ +	isync +	li	r12, 0 +	lwz	r7, VCORE_ENTRY_EXIT(r5) +	cmpwi	r7, 0x100 +	bge	kvm_novcpu_exit	/* another thread already exiting */ +	li	r3, NAPPING_NOVCPU +	stb	r3, HSTATE_NAPPING(r13) +	li	r3, 1 +	stb	r3, HSTATE_HWTHREAD_REQ(r13) + +	b	kvm_do_nap + +kvm_novcpu_wakeup: +	ld	r1, HSTATE_HOST_R1(r13) +	ld	r5, HSTATE_KVM_VCORE(r13) +	li	r0, 0 +	stb	r0, HSTATE_NAPPING(r13) +	stb	r0, HSTATE_HWTHREAD_REQ(r13) + +	/* check the wake reason */ +	bl	kvmppc_check_wake_reason +	 +	/* see if any other thread is already exiting */ +	lwz	r0, VCORE_ENTRY_EXIT(r5) +	cmpwi	r0, 0x100 +	bge	kvm_novcpu_exit + +	/* clear our bit in napping_threads */ +	lbz	r7, HSTATE_PTID(r13) +	li	r0, 1 +	sld	r0, r0, r7 +	addi	r6, r5, VCORE_NAPPING_THREADS +4:	lwarx	r7, 0, r6 +	andc	r7, r7, r0 +	stwcx.	r7, 0, r6 +	bne	4b + +	/* See if the wake reason means we need to exit */ +	cmpdi	r3, 0 +	bge	kvm_novcpu_exit + +	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ +	ld	r4, HSTATE_KVM_VCPU(r13) +	cmpdi	r4, 0 +	bne	kvmppc_got_guest + +kvm_novcpu_exit: +	b	hdec_soon  /* - * We come in here when wakened from nap mode on a secondary hw thread. + * We come in here when wakened from nap mode.   * Relocation is off and most register values are lost.   * r13 points to the PACA.   */  	.globl	kvm_start_guest  kvm_start_guest: -	ld	r1,PACAEMERGSP(r13) -	subi	r1,r1,STACK_FRAME_OVERHEAD + +	/* Set runlatch bit the minute you wake up from nap */ +	mfspr	r1, SPRN_CTRLF +	ori 	r1, r1, 1 +	mtspr	SPRN_CTRLT, r1 +  	ld	r2,PACATOC(r13)  	li	r0,KVM_HWTHREAD_IN_KVM @@ -103,8 +265,13 @@ kvm_start_guest:  	/* were we napping due to cede? */  	lbz	r0,HSTATE_NAPPING(r13) -	cmpwi	r0,0 -	bne	kvm_end_cede +	cmpwi	r0,NAPPING_CEDE +	beq	kvm_end_cede +	cmpwi	r0,NAPPING_NOVCPU +	beq	kvm_novcpu_wakeup + +	ld	r1,PACAEMERGSP(r13) +	subi	r1,r1,STACK_FRAME_OVERHEAD  	/*  	 * We weren't napping due to cede, so this must be a secondary @@ -114,171 +281,89 @@ kvm_start_guest:  	 */  	/* Check the wake reason in SRR1 to see why we got here */ -	mfspr	r3,SPRN_SRR1 -	rlwinm	r3,r3,44-31,0x7		/* extract wake reason field */ -	cmpwi	r3,4			/* was it an external interrupt? */ -	bne	27f			/* if not */ -	ld	r5,HSTATE_XICS_PHYS(r13) -	li	r7,XICS_XIRR		/* if it was an external interrupt, */ -	lwzcix	r8,r5,r7		/* get and ack the interrupt */ -	sync -	clrldi.	r9,r8,40		/* get interrupt source ID. */ -	beq	28f			/* none there? */ -	cmpwi	r9,XICS_IPI		/* was it an IPI? */ -	bne	29f -	li	r0,0xff -	li	r6,XICS_MFRR -	stbcix	r0,r5,r6		/* clear IPI */ -	stwcix	r8,r5,r7		/* EOI the interrupt */ -	sync				/* order loading of vcpu after that */ +	bl	kvmppc_check_wake_reason +	cmpdi	r3, 0 +	bge	kvm_no_guest  	/* get vcpu pointer, NULL if we have no vcpu to run */  	ld	r4,HSTATE_KVM_VCPU(r13)  	cmpdi	r4,0  	/* if we have no vcpu to run, go back to sleep */  	beq	kvm_no_guest -	b	kvmppc_hv_entry -27:	/* XXX should handle hypervisor maintenance interrupts etc. here */ -	b	kvm_no_guest -28:	/* SRR1 said external but ICP said nope?? */ -	b	kvm_no_guest -29:	/* External non-IPI interrupt to offline secondary thread? help?? */ -	stw	r8,HSTATE_SAVED_XIRR(r13) -	b	kvm_no_guest +	/* Set HSTATE_DSCR(r13) to something sensible */ +	ld	r6, PACA_DSCR(r13) +	std	r6, HSTATE_DSCR(r13) + +	bl	kvmppc_hv_entry + +	/* Back from the guest, go back to nap */ +	/* Clear our vcpu pointer so we don't come back in early */ +	li	r0, 0 +	std	r0, HSTATE_KVM_VCPU(r13) +	/* +	 * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing +	 * the nap_count, because once the increment to nap_count is +	 * visible we could be given another vcpu. +	 */ +	lwsync + +	/* increment the nap count and then go to nap mode */ +	ld	r4, HSTATE_KVM_VCORE(r13) +	addi	r4, r4, VCORE_NAP_COUNT +51:	lwarx	r3, 0, r4 +	addi	r3, r3, 1 +	stwcx.	r3, 0, r4 +	bne	51b + +kvm_no_guest: +	li	r0, KVM_HWTHREAD_IN_NAP +	stb	r0, HSTATE_HWTHREAD_STATE(r13) +kvm_do_nap: +	/* Clear the runlatch bit before napping */ +	mfspr	r2, SPRN_CTRLF +	clrrdi	r2, r2, 1 +	mtspr	SPRN_CTRLT, r2 + +	li	r3, LPCR_PECE0 +	mfspr	r4, SPRN_LPCR +	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 +	mtspr	SPRN_LPCR, r4 +	isync +	std	r0, HSTATE_SCRATCH0(r13) +	ptesync +	ld	r0, HSTATE_SCRATCH0(r13) +1:	cmpd	r0, r0 +	bne	1b +	nap +	b	. + +/****************************************************************************** + *                                                                            * + *                               Entry code                                   * + *                                                                            * + *****************************************************************************/  .global kvmppc_hv_entry  kvmppc_hv_entry:  	/* Required state:  	 * -	 * R4 = vcpu pointer +	 * R4 = vcpu pointer (or NULL)  	 * MSR = ~IR|DR  	 * R13 = PACA  	 * R1 = host R1  	 * all other volatile GPRS = free  	 */  	mflr	r0 -	std	r0, HSTATE_VMHANDLER(r13) - -	/* Set partition DABR */ -	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ -	li	r5,3 -	ld	r6,VCPU_DABR(r4) -	mtspr	SPRN_DABRX,r5 -	mtspr	SPRN_DABR,r6 -BEGIN_FTR_SECTION -	isync -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - -	/* Load guest PMU registers */ -	/* R4 is live here (vcpu pointer) */ -	li	r3, 1 -	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */ -	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */ -	isync -	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */ -	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */ -	lwz	r6, VCPU_PMC + 8(r4) -	lwz	r7, VCPU_PMC + 12(r4) -	lwz	r8, VCPU_PMC + 16(r4) -	lwz	r9, VCPU_PMC + 20(r4) -BEGIN_FTR_SECTION -	lwz	r10, VCPU_PMC + 24(r4) -	lwz	r11, VCPU_PMC + 28(r4) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -	mtspr	SPRN_PMC1, r3 -	mtspr	SPRN_PMC2, r5 -	mtspr	SPRN_PMC3, r6 -	mtspr	SPRN_PMC4, r7 -	mtspr	SPRN_PMC5, r8 -	mtspr	SPRN_PMC6, r9 -BEGIN_FTR_SECTION -	mtspr	SPRN_PMC7, r10 -	mtspr	SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -	ld	r3, VCPU_MMCR(r4) -	ld	r5, VCPU_MMCR + 8(r4) -	ld	r6, VCPU_MMCR + 16(r4) -	mtspr	SPRN_MMCR1, r5 -	mtspr	SPRN_MMCRA, r6 -	mtspr	SPRN_MMCR0, r3 -	isync - -	/* Load up FP, VMX and VSX registers */ -	bl	kvmppc_load_fp - -	ld	r14, VCPU_GPR(R14)(r4) -	ld	r15, VCPU_GPR(R15)(r4) -	ld	r16, VCPU_GPR(R16)(r4) -	ld	r17, VCPU_GPR(R17)(r4) -	ld	r18, VCPU_GPR(R18)(r4) -	ld	r19, VCPU_GPR(R19)(r4) -	ld	r20, VCPU_GPR(R20)(r4) -	ld	r21, VCPU_GPR(R21)(r4) -	ld	r22, VCPU_GPR(R22)(r4) -	ld	r23, VCPU_GPR(R23)(r4) -	ld	r24, VCPU_GPR(R24)(r4) -	ld	r25, VCPU_GPR(R25)(r4) -	ld	r26, VCPU_GPR(R26)(r4) -	ld	r27, VCPU_GPR(R27)(r4) -	ld	r28, VCPU_GPR(R28)(r4) -	ld	r29, VCPU_GPR(R29)(r4) -	ld	r30, VCPU_GPR(R30)(r4) -	ld	r31, VCPU_GPR(R31)(r4) - -BEGIN_FTR_SECTION -	/* Switch DSCR to guest value */ -	ld	r5, VCPU_DSCR(r4) -	mtspr	SPRN_DSCR, r5 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - -	/* -	 * Set the decrementer to the guest decrementer. -	 */ -	ld	r8,VCPU_DEC_EXPIRES(r4) -	mftb	r7 -	subf	r3,r7,r8 -	mtspr	SPRN_DEC,r3 -	stw	r3,VCPU_DEC(r4) - -	ld	r5, VCPU_SPRG0(r4) -	ld	r6, VCPU_SPRG1(r4) -	ld	r7, VCPU_SPRG2(r4) -	ld	r8, VCPU_SPRG3(r4) -	mtspr	SPRN_SPRG0, r5 -	mtspr	SPRN_SPRG1, r6 -	mtspr	SPRN_SPRG2, r7 -	mtspr	SPRN_SPRG3, r8 +	std	r0, PPC_LR_STKOFF(r1) +	stdu	r1, -112(r1)  	/* Save R1 in the PACA */  	std	r1, HSTATE_HOST_R1(r13) -	/* Increment yield count if they have a VPA */ -	ld	r3, VCPU_VPA(r4) -	cmpdi	r3, 0 -	beq	25f -	lwz	r5, LPPACA_YIELDCOUNT(r3) -	addi	r5, r5, 1 -	stw	r5, LPPACA_YIELDCOUNT(r3) -	li	r6, 1 -	stb	r6, VCPU_VPA_DIRTY(r4) -25: -	/* Load up DAR and DSISR */ -	ld	r5, VCPU_DAR(r4) -	lwz	r6, VCPU_DSISR(r4) -	mtspr	SPRN_DAR, r5 -	mtspr	SPRN_DSISR, r6 - -BEGIN_FTR_SECTION -	/* Restore AMR and UAMOR, set AMOR to all 1s */ -	ld	r5,VCPU_AMR(r4) -	ld	r6,VCPU_UAMOR(r4) -	li	r7,-1 -	mtspr	SPRN_AMR,r5 -	mtspr	SPRN_UAMOR,r6 -	mtspr	SPRN_AMOR,r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) +	li	r6, KVM_GUEST_MODE_HOST_HV +	stb	r6, HSTATE_IN_GUEST(r13)  	/* Clear out SLB */  	li	r6,0 @@ -305,8 +390,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	bne	21b  	/* Primary thread switches to guest partition. */ -	ld	r9,VCPU_KVM(r4)		/* pointer to struct kvm */ -	lwz	r6,VCPU_PTID(r4) +	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */ +	lbz	r6,HSTATE_PTID(r13)  	cmpwi	r6,0  	bne	20f  	ld	r6,KVM_SDR1(r9) @@ -334,7 +419,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	andc	r7,r7,r0  	stdcx.	r7,0,r6  	bne	23b -	li	r6,128			/* and flush the TLB */ +	/* Flush the TLB of any entries for this LPID */ +	/* use arch 2.07S as a proxy for POWER8 */ +BEGIN_FTR_SECTION +	li	r6,512			/* POWER8 has 512 sets */ +FTR_SECTION_ELSE +	li	r6,128			/* POWER7 has 128 sets */ +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)  	mtctr	r6  	li	r7,0x800		/* IS field = 0b10 */  	ptesync @@ -343,7 +434,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	bdnz	28b  	ptesync -22:	li	r0,1 +	/* Add timebase offset onto timebase */ +22:	ld	r8,VCORE_TB_OFFSET(r5) +	cmpdi	r8,0 +	beq	37f +	mftb	r6		/* current host timebase */ +	add	r8,r8,r6 +	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */ +	mftb	r7		/* check if lower 24 bits overflowed */ +	clrldi	r6,r6,40 +	clrldi	r7,r7,40 +	cmpld	r7,r6 +	bge	37f +	addis	r8,r8,0x100	/* if so, increment upper 40 bits */ +	mtspr	SPRN_TBU40,r8 + +	/* Load guest PCR value to select appropriate compat mode */ +37:	ld	r7, VCORE_PCR(r5) +	cmpdi	r7, 0 +	beq	38f +	mtspr	SPRN_PCR, r7 +38: + +BEGIN_FTR_SECTION +	/* DPDES is shared between threads */ +	ld	r8, VCORE_DPDES(r5) +	mtspr	SPRN_DPDES, r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +	li	r0,1  	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */  	b	10f @@ -353,7 +472,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	beq	20b  	/* Set LPCR and RMOR. */ -10:	ld	r8,KVM_LPCR(r9) +10:	ld	r8,VCORE_LPCR(r5)  	mtspr	SPRN_LPCR,r8  	ld	r8,KVM_RMOR(r9)  	mtspr	SPRN_RMOR,r8 @@ -361,20 +480,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	/* Check if HDEC expires soon */  	mfspr	r3,SPRN_HDEC -	cmpwi	r3,10 +	cmpwi	r3,512		/* 1 microsecond */  	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER -	mr	r9,r4  	blt	hdec_soon - -	/* Save purr/spurr */ -	mfspr	r5,SPRN_PURR -	mfspr	r6,SPRN_SPURR -	std	r5,HSTATE_PURR(r13) -	std	r6,HSTATE_SPURR(r13) -	ld	r7,VCPU_PURR(r4) -	ld	r8,VCPU_SPURR(r4) -	mtspr	SPRN_PURR,r7 -	mtspr	SPRN_SPURR,r8  	b	31f  	/* @@ -385,7 +493,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	 * We also have to invalidate the TLB since its  	 * entries aren't tagged with the LPID.  	 */ -30:	ld	r9,VCPU_KVM(r4)		/* pointer to struct kvm */ +30:	ld	r5,HSTATE_KVM_VCORE(r13) +	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */  	/* first take native_tlbie_lock */  	.section ".toc","aw" @@ -405,7 +514,8 @@ toc_tlbie_lock:  	bne	24b  	isync -	ld	r7,KVM_LPCR(r9)		/* use kvm->arch.lpcr to store HID4 */ +	ld	r5,HSTATE_KVM_VCORE(r13) +	ld	r7,VCORE_LPCR(r5)	/* use vcore->lpcr to store HID4 */  	li	r0,0x18f  	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */  	or	r0,r7,r0 @@ -449,7 +559,6 @@ toc_tlbie_lock:  	mfspr	r3,SPRN_HDEC  	cmpwi	r3,10  	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER -	mr	r9,r4  	blt	hdec_soon  	/* Enable HDEC interrupts */ @@ -464,9 +573,14 @@ toc_tlbie_lock:  	mfspr	r0,SPRN_HID0  	mfspr	r0,SPRN_HID0  	mfspr	r0,SPRN_HID0 +31: +	/* Do we have a guest vcpu to run? */ +	cmpdi	r4, 0 +	beq	kvmppc_primary_no_guest +kvmppc_got_guest:  	/* Load up guest SLB entries */ -31:	lwz	r5,VCPU_SLB_MAX(r4) +	lwz	r5,VCPU_SLB_MAX(r4)  	cmpwi	r5,0  	beq	9f  	mtctr	r5 @@ -477,6 +591,321 @@ toc_tlbie_lock:  	addi	r6,r6,VCPU_SLB_SIZE  	bdnz	1b  9: +	/* Increment yield count if they have a VPA */ +	ld	r3, VCPU_VPA(r4) +	cmpdi	r3, 0 +	beq	25f +	lwz	r5, LPPACA_YIELDCOUNT(r3) +	addi	r5, r5, 1 +	stw	r5, LPPACA_YIELDCOUNT(r3) +	li	r6, 1 +	stb	r6, VCPU_VPA_DIRTY(r4) +25: + +BEGIN_FTR_SECTION +	/* Save purr/spurr */ +	mfspr	r5,SPRN_PURR +	mfspr	r6,SPRN_SPURR +	std	r5,HSTATE_PURR(r13) +	std	r6,HSTATE_SPURR(r13) +	ld	r7,VCPU_PURR(r4) +	ld	r8,VCPU_SPURR(r4) +	mtspr	SPRN_PURR,r7 +	mtspr	SPRN_SPURR,r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + +BEGIN_FTR_SECTION +	/* Set partition DABR */ +	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ +	lwz	r5,VCPU_DABRX(r4) +	ld	r6,VCPU_DABR(r4) +	mtspr	SPRN_DABRX,r5 +	mtspr	SPRN_DABR,r6 + BEGIN_FTR_SECTION_NESTED(89) +	isync + END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION +	b	skip_tm +END_FTR_SECTION_IFCLR(CPU_FTR_TM) + +	/* Turn on TM/FP/VSX/VMX so we can restore them. */ +	mfmsr	r5 +	li	r6, MSR_TM >> 32 +	sldi	r6, r6, 32 +	or	r5, r5, r6 +	ori	r5, r5, MSR_FP +	oris	r5, r5, (MSR_VEC | MSR_VSX)@h +	mtmsrd	r5 + +	/* +	 * The user may change these outside of a transaction, so they must +	 * always be context switched. +	 */ +	ld	r5, VCPU_TFHAR(r4) +	ld	r6, VCPU_TFIAR(r4) +	ld	r7, VCPU_TEXASR(r4) +	mtspr	SPRN_TFHAR, r5 +	mtspr	SPRN_TFIAR, r6 +	mtspr	SPRN_TEXASR, r7 + +	ld	r5, VCPU_MSR(r4) +	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 +	beq	skip_tm	/* TM not active in guest */ + +	/* Make sure the failure summary is set, otherwise we'll program check +	 * when we trechkpt.  It's possible that this might have been not set +	 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the +	 * host. +	 */ +	oris	r7, r7, (TEXASR_FS)@h +	mtspr	SPRN_TEXASR, r7 + +	/* +	 * We need to load up the checkpointed state for the guest. +	 * We need to do this early as it will blow away any GPRs, VSRs and +	 * some SPRs. +	 */ + +	mr	r31, r4 +	addi	r3, r31, VCPU_FPRS_TM +	bl	.load_fp_state +	addi	r3, r31, VCPU_VRS_TM +	bl	.load_vr_state +	mr	r4, r31 +	lwz	r7, VCPU_VRSAVE_TM(r4) +	mtspr	SPRN_VRSAVE, r7 + +	ld	r5, VCPU_LR_TM(r4) +	lwz	r6, VCPU_CR_TM(r4) +	ld	r7, VCPU_CTR_TM(r4) +	ld	r8, VCPU_AMR_TM(r4) +	ld	r9, VCPU_TAR_TM(r4) +	mtlr	r5 +	mtcr	r6 +	mtctr	r7 +	mtspr	SPRN_AMR, r8 +	mtspr	SPRN_TAR, r9 + +	/* +	 * Load up PPR and DSCR values but don't put them in the actual SPRs +	 * till the last moment to avoid running with userspace PPR and DSCR for +	 * too long. +	 */ +	ld	r29, VCPU_DSCR_TM(r4) +	ld	r30, VCPU_PPR_TM(r4) + +	std	r2, PACATMSCRATCH(r13) /* Save TOC */ + +	/* Clear the MSR RI since r1, r13 are all going to be foobar. */ +	li	r5, 0 +	mtmsrd	r5, 1 + +	/* Load GPRs r0-r28 */ +	reg = 0 +	.rept	29 +	ld	reg, VCPU_GPRS_TM(reg)(r31) +	reg = reg + 1 +	.endr + +	mtspr	SPRN_DSCR, r29 +	mtspr	SPRN_PPR, r30 + +	/* Load final GPRs */ +	ld	29, VCPU_GPRS_TM(29)(r31) +	ld	30, VCPU_GPRS_TM(30)(r31) +	ld	31, VCPU_GPRS_TM(31)(r31) + +	/* TM checkpointed state is now setup.  All GPRs are now volatile. */ +	TRECHKPT + +	/* Now let's get back the state we need. */ +	HMT_MEDIUM +	GET_PACA(r13) +	ld	r29, HSTATE_DSCR(r13) +	mtspr	SPRN_DSCR, r29 +	ld	r4, HSTATE_KVM_VCPU(r13) +	ld	r1, HSTATE_HOST_R1(r13) +	ld	r2, PACATMSCRATCH(r13) + +	/* Set the MSR RI since we have our registers back. */ +	li	r5, MSR_RI +	mtmsrd	r5, 1 +skip_tm: +#endif + +	/* Load guest PMU registers */ +	/* R4 is live here (vcpu pointer) */ +	li	r3, 1 +	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */ +	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */ +	isync +BEGIN_FTR_SECTION +	ld	r3, VCPU_MMCR(r4) +	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO +	cmpwi	r5, MMCR0_PMAO +	beql	kvmppc_fix_pmao +END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) +	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */ +	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */ +	lwz	r6, VCPU_PMC + 8(r4) +	lwz	r7, VCPU_PMC + 12(r4) +	lwz	r8, VCPU_PMC + 16(r4) +	lwz	r9, VCPU_PMC + 20(r4) +BEGIN_FTR_SECTION +	lwz	r10, VCPU_PMC + 24(r4) +	lwz	r11, VCPU_PMC + 28(r4) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +	mtspr	SPRN_PMC1, r3 +	mtspr	SPRN_PMC2, r5 +	mtspr	SPRN_PMC3, r6 +	mtspr	SPRN_PMC4, r7 +	mtspr	SPRN_PMC5, r8 +	mtspr	SPRN_PMC6, r9 +BEGIN_FTR_SECTION +	mtspr	SPRN_PMC7, r10 +	mtspr	SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +	ld	r3, VCPU_MMCR(r4) +	ld	r5, VCPU_MMCR + 8(r4) +	ld	r6, VCPU_MMCR + 16(r4) +	ld	r7, VCPU_SIAR(r4) +	ld	r8, VCPU_SDAR(r4) +	mtspr	SPRN_MMCR1, r5 +	mtspr	SPRN_MMCRA, r6 +	mtspr	SPRN_SIAR, r7 +	mtspr	SPRN_SDAR, r8 +BEGIN_FTR_SECTION +	ld	r5, VCPU_MMCR + 24(r4) +	ld	r6, VCPU_SIER(r4) +	lwz	r7, VCPU_PMC + 24(r4) +	lwz	r8, VCPU_PMC + 28(r4) +	ld	r9, VCPU_MMCR + 32(r4) +	mtspr	SPRN_MMCR2, r5 +	mtspr	SPRN_SIER, r6 +	mtspr	SPRN_SPMC1, r7 +	mtspr	SPRN_SPMC2, r8 +	mtspr	SPRN_MMCRS, r9 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +	mtspr	SPRN_MMCR0, r3 +	isync + +	/* Load up FP, VMX and VSX registers */ +	bl	kvmppc_load_fp + +	ld	r14, VCPU_GPR(R14)(r4) +	ld	r15, VCPU_GPR(R15)(r4) +	ld	r16, VCPU_GPR(R16)(r4) +	ld	r17, VCPU_GPR(R17)(r4) +	ld	r18, VCPU_GPR(R18)(r4) +	ld	r19, VCPU_GPR(R19)(r4) +	ld	r20, VCPU_GPR(R20)(r4) +	ld	r21, VCPU_GPR(R21)(r4) +	ld	r22, VCPU_GPR(R22)(r4) +	ld	r23, VCPU_GPR(R23)(r4) +	ld	r24, VCPU_GPR(R24)(r4) +	ld	r25, VCPU_GPR(R25)(r4) +	ld	r26, VCPU_GPR(R26)(r4) +	ld	r27, VCPU_GPR(R27)(r4) +	ld	r28, VCPU_GPR(R28)(r4) +	ld	r29, VCPU_GPR(R29)(r4) +	ld	r30, VCPU_GPR(R30)(r4) +	ld	r31, VCPU_GPR(R31)(r4) + +BEGIN_FTR_SECTION +	/* Switch DSCR to guest value */ +	ld	r5, VCPU_DSCR(r4) +	mtspr	SPRN_DSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + +BEGIN_FTR_SECTION +	/* Skip next section on POWER7 or PPC970 */ +	b	8f +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) +	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ +	mfmsr	r8 +	li	r0, 1 +	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG +	mtmsrd	r8 + +	/* Load up POWER8-specific registers */ +	ld	r5, VCPU_IAMR(r4) +	lwz	r6, VCPU_PSPB(r4) +	ld	r7, VCPU_FSCR(r4) +	mtspr	SPRN_IAMR, r5 +	mtspr	SPRN_PSPB, r6 +	mtspr	SPRN_FSCR, r7 +	ld	r5, VCPU_DAWR(r4) +	ld	r6, VCPU_DAWRX(r4) +	ld	r7, VCPU_CIABR(r4) +	ld	r8, VCPU_TAR(r4) +	mtspr	SPRN_DAWR, r5 +	mtspr	SPRN_DAWRX, r6 +	mtspr	SPRN_CIABR, r7 +	mtspr	SPRN_TAR, r8 +	ld	r5, VCPU_IC(r4) +	ld	r6, VCPU_VTB(r4) +	mtspr	SPRN_IC, r5 +	mtspr	SPRN_VTB, r6 +	ld	r8, VCPU_EBBHR(r4) +	mtspr	SPRN_EBBHR, r8 +	ld	r5, VCPU_EBBRR(r4) +	ld	r6, VCPU_BESCR(r4) +	ld	r7, VCPU_CSIGR(r4) +	ld	r8, VCPU_TACR(r4) +	mtspr	SPRN_EBBRR, r5 +	mtspr	SPRN_BESCR, r6 +	mtspr	SPRN_CSIGR, r7 +	mtspr	SPRN_TACR, r8 +	ld	r5, VCPU_TCSCR(r4) +	ld	r6, VCPU_ACOP(r4) +	lwz	r7, VCPU_GUEST_PID(r4) +	ld	r8, VCPU_WORT(r4) +	mtspr	SPRN_TCSCR, r5 +	mtspr	SPRN_ACOP, r6 +	mtspr	SPRN_PID, r7 +	mtspr	SPRN_WORT, r8 +8: + +	/* +	 * Set the decrementer to the guest decrementer. +	 */ +	ld	r8,VCPU_DEC_EXPIRES(r4) +	/* r8 is a host timebase value here, convert to guest TB */ +	ld	r5,HSTATE_KVM_VCORE(r13) +	ld	r6,VCORE_TB_OFFSET(r5) +	add	r8,r8,r6 +	mftb	r7 +	subf	r3,r7,r8 +	mtspr	SPRN_DEC,r3 +	stw	r3,VCPU_DEC(r4) + +	ld	r5, VCPU_SPRG0(r4) +	ld	r6, VCPU_SPRG1(r4) +	ld	r7, VCPU_SPRG2(r4) +	ld	r8, VCPU_SPRG3(r4) +	mtspr	SPRN_SPRG0, r5 +	mtspr	SPRN_SPRG1, r6 +	mtspr	SPRN_SPRG2, r7 +	mtspr	SPRN_SPRG3, r8 + +	/* Load up DAR and DSISR */ +	ld	r5, VCPU_DAR(r4) +	lwz	r6, VCPU_DSISR(r4) +	mtspr	SPRN_DAR, r5 +	mtspr	SPRN_DSISR, r6 + +BEGIN_FTR_SECTION +	/* Restore AMR and UAMOR, set AMOR to all 1s */ +	ld	r5,VCPU_AMR(r4) +	ld	r6,VCPU_UAMOR(r4) +	li	r7,-1 +	mtspr	SPRN_AMR,r5 +	mtspr	SPRN_UAMOR,r6 +	mtspr	SPRN_AMOR,r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)  	/* Restore state of CTRL run bit; assume 1 on entry */  	lwz	r5,VCPU_CTRL(r4) @@ -492,48 +921,54 @@ toc_tlbie_lock:  	mtctr	r6  	mtxer	r7 +kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */  	ld	r10, VCPU_PC(r4)  	ld	r11, VCPU_MSR(r4) -kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */  	ld	r6, VCPU_SRR0(r4)  	ld	r7, VCPU_SRR1(r4) +	mtspr	SPRN_SRR0, r6 +	mtspr	SPRN_SRR1, r7 +deliver_guest_interrupt:  	/* r11 = vcpu->arch.msr & ~MSR_HV */  	rldicl	r11, r11, 63 - MSR_HV_LG, 1  	rotldi	r11, r11, 1 + MSR_HV_LG  	ori	r11, r11, MSR_ME  	/* Check if we can deliver an external or decrementer interrupt now */ -	ld	r0,VCPU_PENDING_EXC(r4) -	lis	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h -	and	r0,r0,r8 -	cmpdi	cr1,r0,0 -	andi.	r0,r11,MSR_EE -	beq	cr1,11f +	ld	r0, VCPU_PENDING_EXC(r4) +	rldicl	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 +	cmpdi	cr1, r0, 0 +	andi.	r8, r11, MSR_EE  BEGIN_FTR_SECTION -	mfspr	r8,SPRN_LPCR -	ori	r8,r8,LPCR_MER -	mtspr	SPRN_LPCR,r8 +	mfspr	r8, SPRN_LPCR +	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ +	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH +	mtspr	SPRN_LPCR, r8  	isync  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)  	beq	5f -	li	r0,BOOK3S_INTERRUPT_EXTERNAL -12:	mr	r6,r10 -	mr	r10,r0 -	mr	r7,r11 -	li	r11,(MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */ -	rotldi	r11,r11,63 -	b	5f -11:	beq	5f -	mfspr	r0,SPRN_DEC -	cmpwi	r0,0 -	li	r0,BOOK3S_INTERRUPT_DECREMENTER -	blt	12b +	li	r0, BOOK3S_INTERRUPT_EXTERNAL +	bne	cr1, 12f +	mfspr	r0, SPRN_DEC +	cmpwi	r0, 0 +	li	r0, BOOK3S_INTERRUPT_DECREMENTER +	bge	5f -	/* Move SRR0 and SRR1 into the respective regs */ -5:	mtspr	SPRN_SRR0, r6 -	mtspr	SPRN_SRR1, r7 +12:	mtspr	SPRN_SRR0, r10 +	mr	r10,r0 +	mtspr	SPRN_SRR1, r11 +	mr	r9, r4 +	bl	kvmppc_msr_interrupt +5: +/* + * Required state: + * R4 = vcpu + * R10: value for HSRR0 + * R11: value for HSRR1 + * R13 = PACA + */  fast_guest_return:  	li	r0,0  	stb	r0,VCPU_CEDED(r4)	/* cancel cede */ @@ -541,7 +976,7 @@ fast_guest_return:  	mtspr	SPRN_HSRR1,r11  	/* Activate guest mode, so faults get handled by KVM */ -	li	r9, KVM_GUEST_MODE_GUEST +	li	r9, KVM_GUEST_MODE_GUEST_HV  	stb	r9, HSTATE_IN_GUEST(r13)  	/* Enter guest */ @@ -550,13 +985,15 @@ BEGIN_FTR_SECTION  	ld	r5, VCPU_CFAR(r4)  	mtspr	SPRN_CFAR, r5  END_FTR_SECTION_IFSET(CPU_FTR_CFAR) +BEGIN_FTR_SECTION +	ld	r0, VCPU_PPR(r4) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	ld	r5, VCPU_LR(r4)  	lwz	r6, VCPU_CR(r4)  	mtlr	r5  	mtcr	r6 -	ld	r0, VCPU_GPR(R0)(r4)  	ld	r1, VCPU_GPR(R1)(r4)  	ld	r2, VCPU_GPR(R2)(r4)  	ld	r3, VCPU_GPR(R3)(r4) @@ -570,6 +1007,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)  	ld	r12, VCPU_GPR(R12)(r4)  	ld	r13, VCPU_GPR(R13)(r4) +BEGIN_FTR_SECTION +	mtspr	SPRN_PPR, r0 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) +	ld	r0, VCPU_GPR(R0)(r4)  	ld	r4, VCPU_GPR(R4)(r4)  	hrfid @@ -584,8 +1025,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)  /*   * We come here from the first-level interrupt handlers.   */ -	.globl	kvmppc_interrupt -kvmppc_interrupt: +	.globl	kvmppc_interrupt_hv +kvmppc_interrupt_hv:  	/*  	 * Register contents:  	 * R12		= interrupt vector @@ -593,8 +1034,20 @@ kvmppc_interrupt:  	 * guest CR, R12 saved in shadow VCPU SCRATCH1/0  	 * guest R13 saved in SPRN_SCRATCH0  	 */ -	/* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */ -	std	r9, HSTATE_HOST_R2(r13) +	std	r9, HSTATE_SCRATCH2(r13) + +	lbz	r9, HSTATE_IN_GUEST(r13) +	cmpwi	r9, KVM_GUEST_MODE_HOST_HV +	beq	kvmppc_bad_host_intr +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE +	cmpwi	r9, KVM_GUEST_MODE_GUEST +	ld	r9, HSTATE_SCRATCH2(r13) +	beq	kvmppc_interrupt_pr +#endif +	/* We're now back in the host but in guest MMU context */ +	li	r9, KVM_GUEST_MODE_HOST_HV +	stb	r9, HSTATE_IN_GUEST(r13) +  	ld	r9, HSTATE_KVM_VCPU(r13)  	/* Save registers */ @@ -608,7 +1061,7 @@ kvmppc_interrupt:  	std	r6, VCPU_GPR(R6)(r9)  	std	r7, VCPU_GPR(R7)(r9)  	std	r8, VCPU_GPR(R8)(r9) -	ld	r0, HSTATE_HOST_R2(r13) +	ld	r0, HSTATE_SCRATCH2(r13)  	std	r0, VCPU_GPR(R9)(r9)  	std	r10, VCPU_GPR(R10)(r9)  	std	r11, VCPU_GPR(R11)(r9) @@ -620,6 +1073,10 @@ BEGIN_FTR_SECTION  	ld	r3, HSTATE_CFAR(r13)  	std	r3, VCPU_CFAR(r9)  END_FTR_SECTION_IFSET(CPU_FTR_CFAR) +BEGIN_FTR_SECTION +	ld	r4, HSTATE_PPR(r13) +	std	r4, VCPU_PPR(r9) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	/* Restore R1/R2 so we can handle faults */  	ld	r1, HSTATE_HOST_R1(r13) @@ -642,10 +1099,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)  	std	r3, VCPU_GPR(R13)(r9)  	std	r4, VCPU_LR(r9) -	/* Unset guest mode */ -	li	r0, KVM_GUEST_MODE_NONE -	stb	r0, HSTATE_IN_GUEST(r13) -  	stw	r12,VCPU_TRAP(r9)  	/* Save HEIR (HV emulation assist reg) in last_inst @@ -695,96 +1148,23 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)  	/* External interrupt, first check for host_ipi. If this is  	 * set, we know the host wants us out so let's do it now  	 */ -do_ext_interrupt: -	lbz	r0, HSTATE_HOST_IPI(r13) -	cmpwi	r0, 0 -	bne	ext_interrupt_to_host - -	/* Now read the interrupt from the ICP */ -	ld	r5, HSTATE_XICS_PHYS(r13) -	li	r7, XICS_XIRR -	cmpdi	r5, 0 -	beq-	ext_interrupt_to_host -	lwzcix	r3, r5, r7 -	rlwinm.	r0, r3, 0, 0xffffff -	sync -	beq	3f		/* if nothing pending in the ICP */ - -	/* We found something in the ICP... -	 * -	 * If it's not an IPI, stash it in the PACA and return to -	 * the host, we don't (yet) handle directing real external -	 * interrupts directly to the guest -	 */ -	cmpwi	r0, XICS_IPI -	bne	ext_stash_for_host - -	/* It's an IPI, clear the MFRR and EOI it */ -	li	r0, 0xff -	li	r6, XICS_MFRR -	stbcix	r0, r5, r6		/* clear the IPI */ -	stwcix	r3, r5, r7		/* EOI it */ -	sync - -	/* We need to re-check host IPI now in case it got set in the -	 * meantime. If it's clear, we bounce the interrupt to the -	 * guest -	 */ -	lbz	r0, HSTATE_HOST_IPI(r13) -	cmpwi	r0, 0 -	bne-	1f +	bl	kvmppc_read_intr +	cmpdi	r3, 0 +	bgt	ext_interrupt_to_host -	/* Allright, looks like an IPI for the guest, we need to set MER */ -3:  	/* Check if any CPU is heading out to the host, if so head out too */  	ld	r5, HSTATE_KVM_VCORE(r13)  	lwz	r0, VCORE_ENTRY_EXIT(r5)  	cmpwi	r0, 0x100  	bge	ext_interrupt_to_host -	/* See if there is a pending interrupt for the guest */ -	mfspr	r8, SPRN_LPCR -	ld	r0, VCPU_PENDING_EXC(r9) -	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ -	rldicl.	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 -	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH -	beq	2f - -	/* And if the guest EE is set, we can deliver immediately, else -	 * we return to the guest with MER set -	 */ -	andi.	r0, r11, MSR_EE -	beq	2f -	mtspr	SPRN_SRR0, r10 -	mtspr	SPRN_SRR1, r11 -	li	r10, BOOK3S_INTERRUPT_EXTERNAL -	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */ -	rotldi	r11, r11, 63 -2:	mr	r4, r9 -	mtspr	SPRN_LPCR, r8 -	b	fast_guest_return - -	/* We raced with the host, we need to resend that IPI, bummer */ -1:	li	r0, IPI_PRIORITY -	stbcix	r0, r5, r6		/* set the IPI */ -	sync -	b	ext_interrupt_to_host +	/* Return to guest after delivering any pending interrupt */ +	mr	r4, r9 +	b	deliver_guest_interrupt -ext_stash_for_host: -	/* It's not an IPI and it's for the host, stash it in the PACA -	 * before exit, it will be picked up by the host ICP driver -	 */ -	stw	r3, HSTATE_SAVED_XIRR(r13)  ext_interrupt_to_host:  guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */ -	/* Save DEC */ -	mfspr	r5,SPRN_DEC -	mftb	r6 -	extsw	r5,r5 -	add	r5,r5,r6 -	std	r5,VCPU_DEC_EXPIRES(r9) -  	/* Save more register state  */  	mfdar	r6  	mfdsisr	r7 @@ -855,13 +1235,313 @@ BEGIN_FTR_SECTION  	mtspr	SPRN_SPURR,r4  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) +	/* Save DEC */ +	mfspr	r5,SPRN_DEC +	mftb	r6 +	extsw	r5,r5 +	add	r5,r5,r6 +	/* r5 is a guest timebase value here, convert to host TB */ +	ld	r3,HSTATE_KVM_VCORE(r13) +	ld	r4,VCORE_TB_OFFSET(r3) +	subf	r5,r4,r5 +	std	r5,VCPU_DEC_EXPIRES(r9) + +BEGIN_FTR_SECTION +	b	8f +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) +	/* Save POWER8-specific registers */ +	mfspr	r5, SPRN_IAMR +	mfspr	r6, SPRN_PSPB +	mfspr	r7, SPRN_FSCR +	std	r5, VCPU_IAMR(r9) +	stw	r6, VCPU_PSPB(r9) +	std	r7, VCPU_FSCR(r9) +	mfspr	r5, SPRN_IC +	mfspr	r6, SPRN_VTB +	mfspr	r7, SPRN_TAR +	std	r5, VCPU_IC(r9) +	std	r6, VCPU_VTB(r9) +	std	r7, VCPU_TAR(r9) +	mfspr	r8, SPRN_EBBHR +	std	r8, VCPU_EBBHR(r9) +	mfspr	r5, SPRN_EBBRR +	mfspr	r6, SPRN_BESCR +	mfspr	r7, SPRN_CSIGR +	mfspr	r8, SPRN_TACR +	std	r5, VCPU_EBBRR(r9) +	std	r6, VCPU_BESCR(r9) +	std	r7, VCPU_CSIGR(r9) +	std	r8, VCPU_TACR(r9) +	mfspr	r5, SPRN_TCSCR +	mfspr	r6, SPRN_ACOP +	mfspr	r7, SPRN_PID +	mfspr	r8, SPRN_WORT +	std	r5, VCPU_TCSCR(r9) +	std	r6, VCPU_ACOP(r9) +	stw	r7, VCPU_GUEST_PID(r9) +	std	r8, VCPU_WORT(r9) +8: + +	/* Save and reset AMR and UAMOR before turning on the MMU */ +BEGIN_FTR_SECTION +	mfspr	r5,SPRN_AMR +	mfspr	r6,SPRN_UAMOR +	std	r5,VCPU_AMR(r9) +	std	r6,VCPU_UAMOR(r9) +	li	r6,0 +	mtspr	SPRN_AMR,r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + +	/* Switch DSCR back to host value */ +BEGIN_FTR_SECTION +	mfspr	r8, SPRN_DSCR +	ld	r7, HSTATE_DSCR(r13) +	std	r8, VCPU_DSCR(r9) +	mtspr	SPRN_DSCR, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + +	/* Save non-volatile GPRs */ +	std	r14, VCPU_GPR(R14)(r9) +	std	r15, VCPU_GPR(R15)(r9) +	std	r16, VCPU_GPR(R16)(r9) +	std	r17, VCPU_GPR(R17)(r9) +	std	r18, VCPU_GPR(R18)(r9) +	std	r19, VCPU_GPR(R19)(r9) +	std	r20, VCPU_GPR(R20)(r9) +	std	r21, VCPU_GPR(R21)(r9) +	std	r22, VCPU_GPR(R22)(r9) +	std	r23, VCPU_GPR(R23)(r9) +	std	r24, VCPU_GPR(R24)(r9) +	std	r25, VCPU_GPR(R25)(r9) +	std	r26, VCPU_GPR(R26)(r9) +	std	r27, VCPU_GPR(R27)(r9) +	std	r28, VCPU_GPR(R28)(r9) +	std	r29, VCPU_GPR(R29)(r9) +	std	r30, VCPU_GPR(R30)(r9) +	std	r31, VCPU_GPR(R31)(r9) + +	/* Save SPRGs */ +	mfspr	r3, SPRN_SPRG0 +	mfspr	r4, SPRN_SPRG1 +	mfspr	r5, SPRN_SPRG2 +	mfspr	r6, SPRN_SPRG3 +	std	r3, VCPU_SPRG0(r9) +	std	r4, VCPU_SPRG1(r9) +	std	r5, VCPU_SPRG2(r9) +	std	r6, VCPU_SPRG3(r9) + +	/* save FP state */ +	mr	r3, r9 +	bl	kvmppc_save_fp + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION +	b	2f +END_FTR_SECTION_IFCLR(CPU_FTR_TM) +	/* Turn on TM. */ +	mfmsr	r8 +	li	r0, 1 +	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG +	mtmsrd	r8 + +	ld	r5, VCPU_MSR(r9) +	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 +	beq	1f	/* TM not active in guest. */ + +	li	r3, TM_CAUSE_KVM_RESCHED + +	/* Clear the MSR RI since r1, r13 are all going to be foobar. */ +	li	r5, 0 +	mtmsrd	r5, 1 + +	/* All GPRs are volatile at this point. */ +	TRECLAIM(R3) + +	/* Temporarily store r13 and r9 so we have some regs to play with */ +	SET_SCRATCH0(r13) +	GET_PACA(r13) +	std	r9, PACATMSCRATCH(r13) +	ld	r9, HSTATE_KVM_VCPU(r13) + +	/* Get a few more GPRs free. */ +	std	r29, VCPU_GPRS_TM(29)(r9) +	std	r30, VCPU_GPRS_TM(30)(r9) +	std	r31, VCPU_GPRS_TM(31)(r9) + +	/* Save away PPR and DSCR soon so don't run with user values. */ +	mfspr	r31, SPRN_PPR +	HMT_MEDIUM +	mfspr	r30, SPRN_DSCR +	ld	r29, HSTATE_DSCR(r13) +	mtspr	SPRN_DSCR, r29 + +	/* Save all but r9, r13 & r29-r31 */ +	reg = 0 +	.rept	29 +	.if (reg != 9) && (reg != 13) +	std	reg, VCPU_GPRS_TM(reg)(r9) +	.endif +	reg = reg + 1 +	.endr +	/* ... now save r13 */ +	GET_SCRATCH0(r4) +	std	r4, VCPU_GPRS_TM(13)(r9) +	/* ... and save r9 */ +	ld	r4, PACATMSCRATCH(r13) +	std	r4, VCPU_GPRS_TM(9)(r9) + +	/* Reload stack pointer and TOC. */ +	ld	r1, HSTATE_HOST_R1(r13) +	ld	r2, PACATOC(r13) + +	/* Set MSR RI now we have r1 and r13 back. */ +	li	r5, MSR_RI +	mtmsrd	r5, 1 + +	/* Save away checkpinted SPRs. */ +	std	r31, VCPU_PPR_TM(r9) +	std	r30, VCPU_DSCR_TM(r9) +	mflr	r5 +	mfcr	r6 +	mfctr	r7 +	mfspr	r8, SPRN_AMR +	mfspr	r10, SPRN_TAR +	std	r5, VCPU_LR_TM(r9) +	stw	r6, VCPU_CR_TM(r9) +	std	r7, VCPU_CTR_TM(r9) +	std	r8, VCPU_AMR_TM(r9) +	std	r10, VCPU_TAR_TM(r9) + +	/* Restore r12 as trap number. */ +	lwz	r12, VCPU_TRAP(r9) + +	/* Save FP/VSX. */ +	addi	r3, r9, VCPU_FPRS_TM +	bl	.store_fp_state +	addi	r3, r9, VCPU_VRS_TM +	bl	.store_vr_state +	mfspr	r6, SPRN_VRSAVE +	stw	r6, VCPU_VRSAVE_TM(r9) +1: +	/* +	 * We need to save these SPRs after the treclaim so that the software +	 * error code is recorded correctly in the TEXASR.  Also the user may +	 * change these outside of a transaction, so they must always be +	 * context switched. +	 */ +	mfspr	r5, SPRN_TFHAR +	mfspr	r6, SPRN_TFIAR +	mfspr	r7, SPRN_TEXASR +	std	r5, VCPU_TFHAR(r9) +	std	r6, VCPU_TFIAR(r9) +	std	r7, VCPU_TEXASR(r9) +2: +#endif + +	/* Increment yield count if they have a VPA */ +	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */ +	cmpdi	r8, 0 +	beq	25f +	lwz	r3, LPPACA_YIELDCOUNT(r8) +	addi	r3, r3, 1 +	stw	r3, LPPACA_YIELDCOUNT(r8) +	li	r3, 1 +	stb	r3, VCPU_VPA_DIRTY(r9) +25: +	/* Save PMU registers if requested */ +	/* r8 and cr0.eq are live here */ +BEGIN_FTR_SECTION +	/* +	 * POWER8 seems to have a hardware bug where setting +	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE] +	 * when some counters are already negative doesn't seem +	 * to cause a performance monitor alert (and hence interrupt). +	 * The effect of this is that when saving the PMU state, +	 * if there is no PMU alert pending when we read MMCR0 +	 * before freezing the counters, but one becomes pending +	 * before we read the counters, we lose it. +	 * To work around this, we need a way to freeze the counters +	 * before reading MMCR0.  Normally, freezing the counters +	 * is done by writing MMCR0 (to set MMCR0[FC]) which +	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8, +	 * we can also freeze the counters using MMCR2, by writing +	 * 1s to all the counter freeze condition bits (there are +	 * 9 bits each for 6 counters). +	 */ +	li	r3, -1			/* set all freeze bits */ +	clrrdi	r3, r3, 10 +	mfspr	r10, SPRN_MMCR2 +	mtspr	SPRN_MMCR2, r3 +	isync +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +	li	r3, 1 +	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */ +	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */ +	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */ +	mfspr	r6, SPRN_MMCRA +BEGIN_FTR_SECTION +	/* On P7, clear MMCRA in order to disable SDAR updates */ +	li	r7, 0 +	mtspr	SPRN_MMCRA, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) +	isync +	beq	21f			/* if no VPA, save PMU stuff anyway */ +	lbz	r7, LPPACA_PMCINUSE(r8) +	cmpwi	r7, 0			/* did they ask for PMU stuff to be saved? */ +	bne	21f +	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */ +	b	22f +21:	mfspr	r5, SPRN_MMCR1 +	mfspr	r7, SPRN_SIAR +	mfspr	r8, SPRN_SDAR +	std	r4, VCPU_MMCR(r9) +	std	r5, VCPU_MMCR + 8(r9) +	std	r6, VCPU_MMCR + 16(r9) +BEGIN_FTR_SECTION +	std	r10, VCPU_MMCR + 24(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +	std	r7, VCPU_SIAR(r9) +	std	r8, VCPU_SDAR(r9) +	mfspr	r3, SPRN_PMC1 +	mfspr	r4, SPRN_PMC2 +	mfspr	r5, SPRN_PMC3 +	mfspr	r6, SPRN_PMC4 +	mfspr	r7, SPRN_PMC5 +	mfspr	r8, SPRN_PMC6 +BEGIN_FTR_SECTION +	mfspr	r10, SPRN_PMC7 +	mfspr	r11, SPRN_PMC8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +	stw	r3, VCPU_PMC(r9) +	stw	r4, VCPU_PMC + 4(r9) +	stw	r5, VCPU_PMC + 8(r9) +	stw	r6, VCPU_PMC + 12(r9) +	stw	r7, VCPU_PMC + 16(r9) +	stw	r8, VCPU_PMC + 20(r9) +BEGIN_FTR_SECTION +	stw	r10, VCPU_PMC + 24(r9) +	stw	r11, VCPU_PMC + 28(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +BEGIN_FTR_SECTION +	mfspr	r5, SPRN_SIER +	mfspr	r6, SPRN_SPMC1 +	mfspr	r7, SPRN_SPMC2 +	mfspr	r8, SPRN_MMCRS +	std	r5, VCPU_SIER(r9) +	stw	r6, VCPU_PMC + 24(r9) +	stw	r7, VCPU_PMC + 28(r9) +	std	r8, VCPU_MMCR + 32(r9) +	lis	r4, 0x8000 +	mtspr	SPRN_MMCRS, r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +22:  	/* Clear out SLB */  	li	r5,0  	slbmte	r5,r5  	slbia  	ptesync -hdec_soon:			/* r9 = vcpu, r12 = trap, r13 = paca */ +hdec_soon:			/* r12 = trap, r13 = paca */  BEGIN_FTR_SECTION  	b	32f  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) @@ -872,14 +1552,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	 */  	/* Increment the threads-exiting-guest count in the 0xff00  	   bits of vcore->entry_exit_count */ -	lwsync  	ld	r5,HSTATE_KVM_VCORE(r13)  	addi	r6,r5,VCORE_ENTRY_EXIT  41:	lwarx	r3,0,r6  	addi	r0,r3,0x100  	stwcx.	r0,0,r6  	bne	41b -	lwsync +	isync		/* order stwcx. vs. reading napping_threads */  	/*  	 * At this point we have an interrupt that we have to pass @@ -895,8 +1574,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	 */  	cmpwi	r3,0x100	/* Are we the first here? */  	bge	43f -	cmpwi	r3,1		/* Are any other threads in the guest? */ -	ble	43f  	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER  	beq	40f  	li	r0,0 @@ -907,11 +1584,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	 * doesn't wake CPUs up from nap.  	 */  	lwz	r3,VCORE_NAPPING_THREADS(r5) -	lwz	r4,VCPU_PTID(r9) +	lbz	r4,HSTATE_PTID(r13)  	li	r0,1  	sld	r0,r0,r4  	andc.	r3,r3,r0		/* no sense IPI'ing ourselves */  	beq	43f +	/* Order entry/exit update vs. IPIs */ +	sync  	mulli	r4,r4,PACA_SIZE		/* get paca for thread 0 */  	subf	r6,r4,r13  42:	andi.	r0,r3,1 @@ -924,10 +1603,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	addi	r6,r6,PACA_SIZE  	bne	42b +secondary_too_late:  	/* Secondary threads wait for primary to do partition switch */ -43:	ld	r4,VCPU_KVM(r9)		/* pointer to struct kvm */ -	ld	r5,HSTATE_KVM_VCORE(r13) -	lwz	r3,VCPU_PTID(r9) +43:	ld	r5,HSTATE_KVM_VCORE(r13) +	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */ +	lbz	r3,HSTATE_PTID(r13)  	cmpwi	r3,0  	beq	15f  	HMT_LOW @@ -954,7 +1634,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	mtspr	SPRN_SDR1,r6		/* switch to partition page table */  	mtspr	SPRN_LPID,r7  	isync -	li	r0,0 + +BEGIN_FTR_SECTION +	/* DPDES is shared between threads */ +	mfspr	r7, SPRN_DPDES +	std	r7, VCORE_DPDES(r5) +	/* clear DPDES so we don't get guest doorbells in the host */ +	li	r8, 0 +	mtspr	SPRN_DPDES, r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +	/* Subtract timebase offset from timebase */ +	ld	r8,VCORE_TB_OFFSET(r5) +	cmpdi	r8,0 +	beq	17f +	mftb	r6			/* current guest timebase */ +	subf	r8,r8,r6 +	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */ +	mftb	r7			/* check if lower 24 bits overflowed */ +	clrldi	r6,r6,40 +	clrldi	r7,r7,40 +	cmpld	r7,r6 +	bge	17f +	addis	r8,r8,0x100		/* if so, increment upper 40 bits */ +	mtspr	SPRN_TBU40,r8 + +	/* Reset PCR */ +17:	ld	r0, VCORE_PCR(r5) +	cmpdi	r0, 0 +	beq	18f +	li	r0, 0 +	mtspr	SPRN_PCR, r0 +18: +	/* Signal secondary CPUs to continue */  	stb	r0,VCORE_IN_GUEST(r5)  	lis	r8,0x7fff		/* MAX_INT@h */  	mtspr	SPRN_HDEC,r8 @@ -969,7 +1681,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  	 * We have to lock against concurrent tlbies, and  	 * we have to flush the whole TLB.  	 */ -32:	ld	r4,VCPU_KVM(r9)		/* pointer to struct kvm */ +32:	ld	r5,HSTATE_KVM_VCORE(r13) +	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */  	/* Take the guest's tlbie_lock */  #ifdef __BIG_ENDIAN__ @@ -1052,209 +1765,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)  1:	addi	r8,r8,16  	.endr -	/* Save and reset AMR and UAMOR before turning on the MMU */ -BEGIN_FTR_SECTION -	mfspr	r5,SPRN_AMR -	mfspr	r6,SPRN_UAMOR -	std	r5,VCPU_AMR(r9) -	std	r6,VCPU_UAMOR(r9) -	li	r6,0 -	mtspr	SPRN_AMR,r6 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - -	/* Switch DSCR back to host value */ -BEGIN_FTR_SECTION -	mfspr	r8, SPRN_DSCR -	ld	r7, HSTATE_DSCR(r13) -	std	r8, VCPU_DSCR(r7) -	mtspr	SPRN_DSCR, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - -	/* Save non-volatile GPRs */ -	std	r14, VCPU_GPR(R14)(r9) -	std	r15, VCPU_GPR(R15)(r9) -	std	r16, VCPU_GPR(R16)(r9) -	std	r17, VCPU_GPR(R17)(r9) -	std	r18, VCPU_GPR(R18)(r9) -	std	r19, VCPU_GPR(R19)(r9) -	std	r20, VCPU_GPR(R20)(r9) -	std	r21, VCPU_GPR(R21)(r9) -	std	r22, VCPU_GPR(R22)(r9) -	std	r23, VCPU_GPR(R23)(r9) -	std	r24, VCPU_GPR(R24)(r9) -	std	r25, VCPU_GPR(R25)(r9) -	std	r26, VCPU_GPR(R26)(r9) -	std	r27, VCPU_GPR(R27)(r9) -	std	r28, VCPU_GPR(R28)(r9) -	std	r29, VCPU_GPR(R29)(r9) -	std	r30, VCPU_GPR(R30)(r9) -	std	r31, VCPU_GPR(R31)(r9) - -	/* Save SPRGs */ -	mfspr	r3, SPRN_SPRG0 -	mfspr	r4, SPRN_SPRG1 -	mfspr	r5, SPRN_SPRG2 -	mfspr	r6, SPRN_SPRG3 -	std	r3, VCPU_SPRG0(r9) -	std	r4, VCPU_SPRG1(r9) -	std	r5, VCPU_SPRG2(r9) -	std	r6, VCPU_SPRG3(r9) - -	/* save FP state */ -	mr	r3, r9 -	bl	.kvmppc_save_fp - -	/* Increment yield count if they have a VPA */ -	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */ -	cmpdi	r8, 0 -	beq	25f -	lwz	r3, LPPACA_YIELDCOUNT(r8) -	addi	r3, r3, 1 -	stw	r3, LPPACA_YIELDCOUNT(r8) -	li	r3, 1 -	stb	r3, VCPU_VPA_DIRTY(r9) -25: -	/* Save PMU registers if requested */ -	/* r8 and cr0.eq are live here */ -	li	r3, 1 -	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */ -	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */ -	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */ -	mfspr	r6, SPRN_MMCRA -BEGIN_FTR_SECTION -	/* On P7, clear MMCRA in order to disable SDAR updates */ -	li	r7, 0 -	mtspr	SPRN_MMCRA, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) -	isync -	beq	21f			/* if no VPA, save PMU stuff anyway */ -	lbz	r7, LPPACA_PMCINUSE(r8) -	cmpwi	r7, 0			/* did they ask for PMU stuff to be saved? */ -	bne	21f -	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */ -	b	22f -21:	mfspr	r5, SPRN_MMCR1 -	std	r4, VCPU_MMCR(r9) -	std	r5, VCPU_MMCR + 8(r9) -	std	r6, VCPU_MMCR + 16(r9) -	mfspr	r3, SPRN_PMC1 -	mfspr	r4, SPRN_PMC2 -	mfspr	r5, SPRN_PMC3 -	mfspr	r6, SPRN_PMC4 -	mfspr	r7, SPRN_PMC5 -	mfspr	r8, SPRN_PMC6 -BEGIN_FTR_SECTION -	mfspr	r10, SPRN_PMC7 -	mfspr	r11, SPRN_PMC8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -	stw	r3, VCPU_PMC(r9) -	stw	r4, VCPU_PMC + 4(r9) -	stw	r5, VCPU_PMC + 8(r9) -	stw	r6, VCPU_PMC + 12(r9) -	stw	r7, VCPU_PMC + 16(r9) -	stw	r8, VCPU_PMC + 20(r9) -BEGIN_FTR_SECTION -	stw	r10, VCPU_PMC + 24(r9) -	stw	r11, VCPU_PMC + 28(r9) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -22: - -	/* Secondary threads go off to take a nap on POWER7 */ -BEGIN_FTR_SECTION -	lwz	r0,VCPU_PTID(r9) -	cmpwi	r0,0 -	bne	secondary_nap -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - -	/* Restore host DABR and DABRX */ -	ld	r5,HSTATE_DABR(r13) -	li	r6,7 -	mtspr	SPRN_DABR,r5 -	mtspr	SPRN_DABRX,r6 - -	/* Restore SPRG3 */ -	ld	r3,PACA_SPRG3(r13) -	mtspr	SPRN_SPRG3,r3 - -	/* -	 * Reload DEC.  HDEC interrupts were disabled when -	 * we reloaded the host's LPCR value. -	 */ -	ld	r3, HSTATE_DECEXP(r13) -	mftb	r4 -	subf	r4, r4, r3 -	mtspr	SPRN_DEC, r4 - -	/* Reload the host's PMU registers */ -	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */ -	lbz	r4, LPPACA_PMCINUSE(r3) -	cmpwi	r4, 0 -	beq	23f			/* skip if not */ -	lwz	r3, HSTATE_PMC(r13) -	lwz	r4, HSTATE_PMC + 4(r13) -	lwz	r5, HSTATE_PMC + 8(r13) -	lwz	r6, HSTATE_PMC + 12(r13) -	lwz	r8, HSTATE_PMC + 16(r13) -	lwz	r9, HSTATE_PMC + 20(r13) -BEGIN_FTR_SECTION -	lwz	r10, HSTATE_PMC + 24(r13) -	lwz	r11, HSTATE_PMC + 28(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -	mtspr	SPRN_PMC1, r3 -	mtspr	SPRN_PMC2, r4 -	mtspr	SPRN_PMC3, r5 -	mtspr	SPRN_PMC4, r6 -	mtspr	SPRN_PMC5, r8 -	mtspr	SPRN_PMC6, r9 -BEGIN_FTR_SECTION -	mtspr	SPRN_PMC7, r10 -	mtspr	SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -	ld	r3, HSTATE_MMCR(r13) -	ld	r4, HSTATE_MMCR + 8(r13) -	ld	r5, HSTATE_MMCR + 16(r13) -	mtspr	SPRN_MMCR1, r4 -	mtspr	SPRN_MMCRA, r5 -	mtspr	SPRN_MMCR0, r3 -	isync -23: -	/* -	 * For external and machine check interrupts, we need -	 * to call the Linux handler to process the interrupt. -	 * We do that by jumping to absolute address 0x500 for -	 * external interrupts, or the machine_check_fwnmi label -	 * for machine checks (since firmware might have patched -	 * the vector area at 0x200).  The [h]rfid at the end of the -	 * handler will return to the book3s_hv_interrupts.S code. -	 * For other interrupts we do the rfid to get back -	 * to the book3s_hv_interrupts.S code here. -	 */ -	ld	r8, HSTATE_VMHANDLER(r13) -	ld	r7, HSTATE_HOST_MSR(r13) - -	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK -	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL -BEGIN_FTR_SECTION -	beq	11f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - -	/* RFI into the highmem handler, or branch to interrupt handler */ -	mfmsr	r6 -	li	r0, MSR_RI -	andc	r6, r6, r0 -	mtmsrd	r6, 1			/* Clear RI in MSR */ -	mtsrr0	r8 -	mtsrr1	r7 -	beqa	0x500			/* external interrupt (PPC970) */ -	beq	cr1, 13f		/* machine check */ -	RFI - -	/* On POWER7, we have external interrupts set to use HSRR0/1 */ -11:	mtspr	SPRN_HSRR0, r8 -	mtspr	SPRN_HSRR1, r7 -	ba	0x500 +	/* Unset guest mode */ +	li	r0, KVM_GUEST_MODE_NONE +	stb	r0, HSTATE_IN_GUEST(r13) -13:	b	machine_check_fwnmi +	ld	r0, 112+PPC_LR_STKOFF(r1) +	addi	r1, r1, 112 +	mtlr	r0 +	blr  /*   * Check whether an HDSI is an HPTE not found fault or something else. @@ -1280,7 +1798,7 @@ kvmppc_hdsi:  	/* Search the hash table. */  	mr	r3, r9			/* vcpu pointer */  	li	r7, 1			/* data fault */ -	bl	.kvmppc_hpte_hv_fault +	bl	kvmppc_hpte_hv_fault  	ld	r9, HSTATE_KVM_VCPU(r13)  	ld	r10, VCPU_PC(r9)  	ld	r11, VCPU_MSR(r9) @@ -1300,8 +1818,7 @@ kvmppc_hdsi:  	mtspr	SPRN_SRR0, r10  	mtspr	SPRN_SRR1, r11  	li	r10, BOOK3S_INTERRUPT_DATA_STORAGE -	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */ -	rotldi	r11, r11, 63 +	bl	kvmppc_msr_interrupt  fast_interrupt_c_return:  6:	ld	r7, VCPU_CTR(r9)  	lwz	r8, VCPU_XER(r9) @@ -1333,7 +1850,7 @@ fast_interrupt_c_return:  	stw	r8, VCPU_LAST_INST(r9)  	/* Unset guest mode. */ -	li	r0, KVM_GUEST_MODE_NONE +	li	r0, KVM_GUEST_MODE_HOST_HV  	stb	r0, HSTATE_IN_GUEST(r13)  	b	guest_exit_cont @@ -1355,7 +1872,7 @@ kvmppc_hisi:  	mr	r4, r10  	mr	r6, r11  	li	r7, 0			/* instruction fault */ -	bl	.kvmppc_hpte_hv_fault +	bl	kvmppc_hpte_hv_fault  	ld	r9, HSTATE_KVM_VCPU(r13)  	ld	r10, VCPU_PC(r9)  	ld	r11, VCPU_MSR(r9) @@ -1370,8 +1887,7 @@ kvmppc_hisi:  1:	mtspr	SPRN_SRR0, r10  	mtspr	SPRN_SRR1, r11  	li	r10, BOOK3S_INTERRUPT_INST_STORAGE -	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */ -	rotldi	r11, r11, 63 +	bl	kvmppc_msr_interrupt  	b	fast_interrupt_c_return  3:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */ @@ -1388,7 +1904,8 @@ kvmppc_hisi:  hcall_try_real_mode:  	ld	r3,VCPU_GPR(R3)(r9)  	andi.	r0,r11,MSR_PR -	bne	guest_exit_cont +	/* sc 1 from userspace - reflect to guest syscall */ +	bne	sc_1_fast_return  	clrrdi	r3,r3,2  	cmpldi	r3,hcall_real_table_end - hcall_real_table  	bge	guest_exit_cont @@ -1409,6 +1926,14 @@ hcall_try_real_mode:  	ld	r11,VCPU_MSR(r4)  	b	fast_guest_return +sc_1_fast_return: +	mtspr	SPRN_SRR0,r10 +	mtspr	SPRN_SRR1,r11 +	li	r10, BOOK3S_INTERRUPT_SYSCALL +	bl	kvmppc_msr_interrupt +	mr	r4,r9 +	b	fast_guest_return +  	/* We've attempted a real mode hcall, but it's punted it back  	 * to userspace.  We need to restore some clobbered volatiles  	 * before resuming the pass-it-to-qemu path */ @@ -1421,16 +1946,16 @@ hcall_real_fallback:  	.globl	hcall_real_table  hcall_real_table:  	.long	0		/* 0 - unused */ -	.long	.kvmppc_h_remove - hcall_real_table -	.long	.kvmppc_h_enter - hcall_real_table -	.long	.kvmppc_h_read - hcall_real_table +	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table +	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table +	.long	DOTSYM(kvmppc_h_read) - hcall_real_table  	.long	0		/* 0x10 - H_CLEAR_MOD */  	.long	0		/* 0x14 - H_CLEAR_REF */ -	.long	.kvmppc_h_protect - hcall_real_table -	.long	0		/* 0x1c - H_GET_TCE */ -	.long	.kvmppc_h_put_tce - hcall_real_table +	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table +	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table +	.long	DOTSYM(kvmppc_h_put_tce) - hcall_real_table  	.long	0		/* 0x24 - H_SET_SPRG0 */ -	.long	.kvmppc_h_set_dabr - hcall_real_table +	.long	DOTSYM(kvmppc_h_set_dabr) - hcall_real_table  	.long	0		/* 0x2c */  	.long	0		/* 0x30 */  	.long	0		/* 0x34 */ @@ -1446,11 +1971,11 @@ hcall_real_table:  	.long	0		/* 0x5c */  	.long	0		/* 0x60 */  #ifdef CONFIG_KVM_XICS -	.long	.kvmppc_rm_h_eoi - hcall_real_table -	.long	.kvmppc_rm_h_cppr - hcall_real_table -	.long	.kvmppc_rm_h_ipi - hcall_real_table +	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table +	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table +	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table  	.long	0		/* 0x70 - H_IPOLL */ -	.long	.kvmppc_rm_h_xirr - hcall_real_table +	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table  #else  	.long	0		/* 0x64 - H_EOI */  	.long	0		/* 0x68 - H_CPPR */ @@ -1484,7 +2009,7 @@ hcall_real_table:  	.long	0		/* 0xd4 */  	.long	0		/* 0xd8 */  	.long	0		/* 0xdc */ -	.long	.kvmppc_h_cede - hcall_real_table +	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table  	.long	0		/* 0xe4 */  	.long	0		/* 0xe8 */  	.long	0		/* 0xec */ @@ -1501,15 +2026,35 @@ hcall_real_table:  	.long	0		/* 0x118 */  	.long	0		/* 0x11c */  	.long	0		/* 0x120 */ -	.long	.kvmppc_h_bulk_remove - hcall_real_table +	.long	DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table +	.long	0		/* 0x128 */ +	.long	0		/* 0x12c */ +	.long	0		/* 0x130 */ +	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table  hcall_real_table_end:  ignore_hdec:  	mr	r4,r9  	b	fast_guest_return +_GLOBAL(kvmppc_h_set_xdabr) +	andi.	r0, r5, DABRX_USER | DABRX_KERNEL +	beq	6f +	li	r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI +	andc.	r0, r5, r0 +	beq	3f +6:	li	r3, H_PARAMETER +	blr +  _GLOBAL(kvmppc_h_set_dabr) +	li	r5, DABRX_USER | DABRX_KERNEL +3: +BEGIN_FTR_SECTION +	b	2f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)  	std	r4,VCPU_DABR(r3) +	stw	r5, VCPU_DABRX(r3) +	mtspr	SPRN_DABRX, r5  	/* Work around P7 bug where DABR can get corrupted on mtspr */  1:	mtspr	SPRN_DABR,r4  	mfspr	r5, SPRN_DABR @@ -1519,6 +2064,17 @@ _GLOBAL(kvmppc_h_set_dabr)  	li	r3,0  	blr +	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ +2:	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW +	rlwimi	r5, r4, 1, DAWRX_WT +	clrrdi	r4, r4, 3 +	std	r4, VCPU_DAWR(r3) +	std	r5, VCPU_DAWRX(r3) +	mtspr	SPRN_DAWR, r4 +	mtspr	SPRN_DAWRX, r5 +	li	r3, 0 +	blr +  _GLOBAL(kvmppc_h_cede)  	ori	r11,r11,MSR_EE  	std	r11,VCPU_MSR(r3) @@ -1542,7 +2098,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)  	 * up to the host.  	 */  	ld	r5,HSTATE_KVM_VCORE(r13) -	lwz	r6,VCPU_PTID(r3) +	lbz	r6,HSTATE_PTID(r13)  	lwz	r8,VCORE_ENTRY_EXIT(r5)  	clrldi	r8,r8,56  	li	r0,1 @@ -1555,11 +2111,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)  	bge	kvm_cede_exit  	stwcx.	r4,0,r6  	bne	31b -	li	r0,1 -	stb	r0,HSTATE_NAPPING(r13)  	/* order napping_threads update vs testing entry_exit_count */ -	lwsync -	mr	r4,r3 +	isync +	li	r0,NAPPING_CEDE +	stb	r0,HSTATE_NAPPING(r13)  	lwz	r7,VCORE_ENTRY_EXIT(r5)  	cmpwi	r7,0x100  	bge	33f		/* another thread already exiting */ @@ -1591,16 +2146,24 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)  	std	r31, VCPU_GPR(R31)(r3)  	/* save FP state */ -	bl	.kvmppc_save_fp +	bl	kvmppc_save_fp  	/* -	 * Take a nap until a decrementer or external interrupt occurs, -	 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR +	 * Take a nap until a decrementer or external or doobell interrupt +	 * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the +	 * runlatch bit before napping.  	 */ +	mfspr	r2, SPRN_CTRLF +	clrrdi	r2, r2, 1 +	mtspr	SPRN_CTRLT, r2 +  	li	r0,1  	stb	r0,HSTATE_HWTHREAD_REQ(r13)  	mfspr	r5,SPRN_LPCR  	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1 +BEGIN_FTR_SECTION +	oris	r5,r5,LPCR_PECEDP@h +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)  	mtspr	SPRN_LPCR,r5  	isync  	li	r0, 0 @@ -1612,6 +2175,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)  	nap  	b	. +33:	mr	r4, r3 +	li	r3, 0 +	li	r12, 0 +	b	34f +  kvm_end_cede:  	/* get vcpu pointer */  	ld	r4, HSTATE_KVM_VCPU(r13) @@ -1641,12 +2209,15 @@ kvm_end_cede:  	ld	r29, VCPU_GPR(R29)(r4)  	ld	r30, VCPU_GPR(R30)(r4)  	ld	r31, VCPU_GPR(R31)(r4) +  +	/* Check the wake reason in SRR1 to see why we got here */ +	bl	kvmppc_check_wake_reason  	/* clear our bit in vcore->napping_threads */ -33:	ld	r5,HSTATE_KVM_VCORE(r13) -	lwz	r3,VCPU_PTID(r4) +34:	ld	r5,HSTATE_KVM_VCORE(r13) +	lbz	r7,HSTATE_PTID(r13)  	li	r0,1 -	sld	r0,r0,r3 +	sld	r0,r0,r7  	addi	r6,r5,VCORE_NAPPING_THREADS  32:	lwarx	r7,0,r6  	andc	r7,r7,r0 @@ -1655,23 +2226,18 @@ kvm_end_cede:  	li	r0,0  	stb	r0,HSTATE_NAPPING(r13) -	/* Check the wake reason in SRR1 to see why we got here */ -	mfspr	r3, SPRN_SRR1 -	rlwinm	r3, r3, 44-31, 0x7	/* extract wake reason field */ -	cmpwi	r3, 4			/* was it an external interrupt? */ -	li	r12, BOOK3S_INTERRUPT_EXTERNAL +	/* See if the wake reason means we need to exit */ +	stw	r12, VCPU_TRAP(r4)  	mr	r9, r4 -	ld	r10, VCPU_PC(r9) -	ld	r11, VCPU_MSR(r9) -	beq	do_ext_interrupt	/* if so */ +	cmpdi	r3, 0 +	bgt	guest_exit_cont  	/* see if any other thread is already exiting */  	lwz	r0,VCORE_ENTRY_EXIT(r5)  	cmpwi	r0,0x100 -	blt	kvmppc_cede_reentry	/* if not go back to guest */ +	bge	guest_exit_cont -	/* some threads are exiting, so go to the guest exit path */ -	b	hcall_real_fallback +	b	kvmppc_cede_reentry	/* if not go back to guest */  	/* cede when already previously prodded case */  kvm_cede_prodded: @@ -1689,85 +2255,144 @@ kvm_cede_exit:  	/* Try to handle a machine check in real mode */  machine_check_realmode:  	mr	r3, r9		/* get vcpu pointer */ -	bl	.kvmppc_realmode_machine_check +	bl	kvmppc_realmode_machine_check  	nop -	cmpdi	r3, 0		/* continue exiting from guest? */ +	cmpdi	r3, 0		/* Did we handle MCE ? */  	ld	r9, HSTATE_KVM_VCPU(r13)  	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK -	beq	mc_cont +	/* +	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through +	 * machine check interrupt (set HSRR0 to 0x200). And for handled +	 * errors (no-fatal), just go back to guest execution with current +	 * HSRR0 instead of exiting guest. This new approach will inject +	 * machine check to guest for fatal error causing guest to crash. +	 * +	 * The old code used to return to host for unhandled errors which +	 * was causing guest to hang with soft lockups inside guest and +	 * makes it difficult to recover guest instance. +	 */ +	ld	r10, VCPU_PC(r9) +	ld	r11, VCPU_MSR(r9) +	bne	2f	/* Continue guest execution. */  	/* If not, deliver a machine check.  SRR0/1 are already set */  	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK -	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */ -	rotldi	r11, r11, 63 -	b	fast_interrupt_c_return +	ld	r11, VCPU_MSR(r9) +	bl	kvmppc_msr_interrupt +2:	b	fast_interrupt_c_return -secondary_too_late: -	ld	r5,HSTATE_KVM_VCORE(r13) -	HMT_LOW -13:	lbz	r3,VCORE_IN_GUEST(r5) -	cmpwi	r3,0 -	bne	13b -	HMT_MEDIUM -	ld	r11,PACA_SLBSHADOWPTR(r13) +/* + * Check the reason we woke from nap, and take appropriate action. + * Returns: + *	0 if nothing needs to be done + *	1 if something happened that needs to be handled by the host + *	-1 if there was a guest wakeup (IPI) + * + * Also sets r12 to the interrupt vector for any interrupt that needs + * to be handled now by the host (0x500 for external interrupt), or zero. + */ +kvmppc_check_wake_reason: +	mfspr	r6, SPRN_SRR1 +BEGIN_FTR_SECTION +	rlwinm	r6, r6, 45-31, 0xf	/* extract wake reason field (P8) */ +FTR_SECTION_ELSE +	rlwinm	r6, r6, 45-31, 0xe	/* P7 wake reason field is 3 bits */ +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) +	cmpwi	r6, 8			/* was it an external interrupt? */ +	li	r12, BOOK3S_INTERRUPT_EXTERNAL +	beq	kvmppc_read_intr	/* if so, see what it was */ +	li	r3, 0 +	li	r12, 0 +	cmpwi	r6, 6			/* was it the decrementer? */ +	beq	0f +BEGIN_FTR_SECTION +	cmpwi	r6, 5			/* privileged doorbell? */ +	beq	0f +	cmpwi	r6, 3			/* hypervisor doorbell? */ +	beq	3f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +	li	r3, 1			/* anything else, return 1 */ +0:	blr -	.rept	SLB_NUM_BOLTED -	ld	r5,SLBSHADOW_SAVEAREA(r11) -	ld	r6,SLBSHADOW_SAVEAREA+8(r11) -	andis.	r7,r5,SLB_ESID_V@h -	beq	1f -	slbmte	r6,r5 -1:	addi	r11,r11,16 -	.endr +	/* hypervisor doorbell */ +3:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL +	li	r3, 1 +	blr -secondary_nap: -	/* Clear our vcpu pointer so we don't come back in early */ -	li	r0, 0 -	std	r0, HSTATE_KVM_VCPU(r13) -	lwsync -	/* Clear any pending IPI - assume we're a secondary thread */ -	ld	r5, HSTATE_XICS_PHYS(r13) +/* + * Determine what sort of external interrupt is pending (if any). + * Returns: + *	0 if no interrupt is pending + *	1 if an interrupt is pending that needs to be handled by the host + *	-1 if there was a guest wakeup IPI (which has now been cleared) + */ +kvmppc_read_intr: +	/* see if a host IPI is pending */ +	li	r3, 1 +	lbz	r0, HSTATE_HOST_IPI(r13) +	cmpwi	r0, 0 +	bne	1f + +	/* Now read the interrupt from the ICP */ +	ld	r6, HSTATE_XICS_PHYS(r13)  	li	r7, XICS_XIRR -	lwzcix	r3, r5, r7		/* ack any pending interrupt */ -	rlwinm.	r0, r3, 0, 0xffffff	/* any pending? */ -	beq	37f +	cmpdi	r6, 0 +	beq-	1f +	lwzcix	r0, r6, r7 +	rlwinm.	r3, r0, 0, 0xffffff  	sync -	li	r0, 0xff -	li	r6, XICS_MFRR -	stbcix	r0, r5, r6		/* clear the IPI */ -	stwcix	r3, r5, r7		/* EOI it */ -37:	sync +	beq	1f			/* if nothing pending in the ICP */ -	/* increment the nap count and then go to nap mode */ -	ld	r4, HSTATE_KVM_VCORE(r13) -	addi	r4, r4, VCORE_NAP_COUNT -	lwsync				/* make previous updates visible */ -51:	lwarx	r3, 0, r4 -	addi	r3, r3, 1 -	stwcx.	r3, 0, r4 -	bne	51b +	/* We found something in the ICP... +	 * +	 * If it's not an IPI, stash it in the PACA and return to +	 * the host, we don't (yet) handle directing real external +	 * interrupts directly to the guest +	 */ +	cmpwi	r3, XICS_IPI		/* if there is, is it an IPI? */ +	bne	42f -kvm_no_guest: -	li	r0, KVM_HWTHREAD_IN_NAP -	stb	r0, HSTATE_HWTHREAD_STATE(r13) +	/* It's an IPI, clear the MFRR and EOI it */ +	li	r3, 0xff +	li	r8, XICS_MFRR +	stbcix	r3, r6, r8		/* clear the IPI */ +	stwcix	r0, r6, r7		/* EOI it */ +	sync -	li	r3, LPCR_PECE0 -	mfspr	r4, SPRN_LPCR -	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 -	mtspr	SPRN_LPCR, r4 -	isync -	std	r0, HSTATE_SCRATCH0(r13) -	ptesync -	ld	r0, HSTATE_SCRATCH0(r13) -1:	cmpd	r0, r0 -	bne	1b -	nap -	b	. +	/* We need to re-check host IPI now in case it got set in the +	 * meantime. If it's clear, we bounce the interrupt to the +	 * guest +	 */ +	lbz	r0, HSTATE_HOST_IPI(r13) +	cmpwi	r0, 0 +	bne-	43f + +	/* OK, it's an IPI for us */ +	li	r3, -1 +1:	blr + +42:	/* It's not an IPI and it's for the host, stash it in the PACA +	 * before exit, it will be picked up by the host ICP driver +	 */ +	stw	r0, HSTATE_SAVED_XIRR(r13) +	li	r3, 1 +	b	1b + +43:	/* We raced with the host, we need to resend that IPI, bummer */ +	li	r0, IPI_PRIORITY +	stbcix	r0, r6, r8		/* set the IPI */ +	sync +	li	r3, 1 +	b	1b  /*   * Save away FP, VMX and VSX registers.   * r3 = vcpu pointer + * N.B. r30 and r31 are volatile across this function, + * thus it is not callable from C.   */ -_GLOBAL(kvmppc_save_fp) +kvmppc_save_fp: +	mflr	r30 +	mr	r31,r3  	mfmsr	r5  	ori	r8,r5,MSR_FP  #ifdef CONFIG_ALTIVEC @@ -1782,52 +2407,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  #endif  	mtmsrd	r8  	isync -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION -	reg = 0 -	.rept	32 -	li	r6,reg*16+VCPU_VSRS -	STXVD2X(reg,R6,R3) -	reg = reg + 1 -	.endr -FTR_SECTION_ELSE -#endif -	reg = 0 -	.rept	32 -	stfd	reg,reg*8+VCPU_FPRS(r3) -	reg = reg + 1 -	.endr -#ifdef CONFIG_VSX -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) -#endif -	mffs	fr0 -	stfd	fr0,VCPU_FPSCR(r3) - +	addi	r3,r3,VCPU_FPRS +	bl	.store_fp_state  #ifdef CONFIG_ALTIVEC  BEGIN_FTR_SECTION -	reg = 0 -	.rept	32 -	li	r6,reg*16+VCPU_VRS -	stvx	reg,r6,r3 -	reg = reg + 1 -	.endr -	mfvscr	vr0 -	li	r6,VCPU_VSCR -	stvx	vr0,r6,r3 +	addi	r3,r31,VCPU_VRS +	bl	.store_vr_state  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  #endif  	mfspr	r6,SPRN_VRSAVE -	stw	r6,VCPU_VRSAVE(r3) -	mtmsrd	r5 -	isync +	stw	r6,VCPU_VRSAVE(r31) +	mtlr	r30  	blr  /*   * Load up FP, VMX and VSX registers   * r4 = vcpu pointer + * N.B. r30 and r31 are volatile across this function, + * thus it is not callable from C.   */ -	.globl	kvmppc_load_fp  kvmppc_load_fp: +	mflr	r30 +	mr	r31,r4  	mfmsr	r9  	ori	r8,r9,MSR_FP  #ifdef CONFIG_ALTIVEC @@ -1842,40 +2443,59 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  #endif  	mtmsrd	r8  	isync -	lfd	fr0,VCPU_FPSCR(r4) -	MTFSF_L(fr0) -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION -	reg = 0 -	.rept	32 -	li	r7,reg*16+VCPU_VSRS -	LXVD2X(reg,R7,R4) -	reg = reg + 1 -	.endr -FTR_SECTION_ELSE -#endif -	reg = 0 -	.rept	32 -	lfd	reg,reg*8+VCPU_FPRS(r4) -	reg = reg + 1 -	.endr -#ifdef CONFIG_VSX -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) -#endif - +	addi	r3,r4,VCPU_FPRS +	bl	.load_fp_state  #ifdef CONFIG_ALTIVEC  BEGIN_FTR_SECTION -	li	r7,VCPU_VSCR -	lvx	vr0,r7,r4 -	mtvscr	vr0 -	reg = 0 -	.rept	32 -	li	r7,reg*16+VCPU_VRS -	lvx	reg,r7,r4 -	reg = reg + 1 -	.endr +	addi	r3,r31,VCPU_VRS +	bl	.load_vr_state  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  #endif -	lwz	r7,VCPU_VRSAVE(r4) +	lwz	r7,VCPU_VRSAVE(r31)  	mtspr	SPRN_VRSAVE,r7 +	mtlr	r30 +	mr	r4,r31 +	blr + +/* + * We come here if we get any exception or interrupt while we are + * executing host real mode code while in guest MMU context. + * For now just spin, but we should do something better. + */ +kvmppc_bad_host_intr: +	b	. + +/* + * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken + * from VCPU_INTR_MSR and is modified based on the required TM state changes. + *   r11 has the guest MSR value (in/out) + *   r9 has a vcpu pointer (in) + *   r0 is used as a scratch register + */ +kvmppc_msr_interrupt: +	rldicl	r0, r11, 64 - MSR_TS_S_LG, 62 +	cmpwi	r0, 2 /* Check if we are in transactional state..  */ +	ld	r11, VCPU_INTR_MSR(r9) +	bne	1f +	/* ... if transactional, change to suspended */ +	li	r0, 1 +1:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG +	blr + +/* + * This works around a hardware bug on POWER8E processors, where + * writing a 1 to the MMCR0[PMAO] bit doesn't generate a + * performance monitor interrupt.  Instead, when we need to have + * an interrupt pending, we have to arrange for a counter to overflow. + */ +kvmppc_fix_pmao: +	li	r3, 0 +	mtspr	SPRN_MMCR2, r3 +	lis	r3, (MMCR0_PMXE | MMCR0_FCECE)@h +	ori	r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN +	mtspr	SPRN_MMCR0, r3 +	lis	r3, 0x7fff +	ori	r3, r3, 0xffff +	mtspr	SPRN_PMC6, r3 +	isync  	blr diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 17cfae5497a..d044b8b7c69 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -25,9 +25,17 @@  #include <asm/exception-64s.h>  #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) 		name +#else  #define FUNC(name) 		GLUE(.,name) +#endif +#define GET_SHADOW_VCPU(reg)    addi	reg, r13, PACA_SVCPU +  #elif defined(CONFIG_PPC_BOOK3S_32)  #define FUNC(name)		name +#define GET_SHADOW_VCPU(reg)	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(r2) +  #endif /* CONFIG_PPC_BOOK3S_XX */  #define VCPU_LOAD_NVGPRS(vcpu) \ @@ -87,15 +95,40 @@ kvm_start_entry:  	VCPU_LOAD_NVGPRS(r4)  kvm_start_lightweight: +	/* Copy registers into shadow vcpu so we can access them in real mode */ +	GET_SHADOW_VCPU(r3) +	bl	FUNC(kvmppc_copy_to_svcpu) +	nop +	REST_GPR(4, r1)  #ifdef CONFIG_PPC_BOOK3S_64 +	/* Get the dcbz32 flag */  	PPC_LL	r3, VCPU_HFLAGS(r4)  	rldicl	r3, r3, 0, 63		/* r3 &= 1 */  	stb	r3, HSTATE_RESTORE_HID5(r13)  	/* Load up guest SPRG3 value, since it's user readable */ -	ld	r3, VCPU_SHARED(r4) -	ld	r3, VCPU_SHARED_SPRG3(r3) +	lwz	r3, VCPU_SHAREDBE(r4) +	cmpwi	r3, 0 +	ld	r5, VCPU_SHARED(r4) +	beq	sprg3_little_endian +sprg3_big_endian: +#ifdef __BIG_ENDIAN__ +	ld	r3, VCPU_SHARED_SPRG3(r5) +#else +	addi	r5, r5, VCPU_SHARED_SPRG3 +	ldbrx	r3, 0, r5 +#endif +	b	after_sprg3_load +sprg3_little_endian: +#ifdef __LITTLE_ENDIAN__ +	ld	r3, VCPU_SHARED_SPRG3(r5) +#else +	addi	r5, r5, VCPU_SHARED_SPRG3 +	ldbrx	r3, 0, r5 +#endif + +after_sprg3_load:  	mtspr	SPRN_SPRG3, r3  #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -111,9 +144,6 @@ kvm_start_lightweight:   *   */ -.global kvmppc_handler_highmem -kvmppc_handler_highmem: -  	/*  	 * Register usage at this point:  	 * @@ -122,21 +152,37 @@ kvmppc_handler_highmem:  	 * R12      = exit handler id  	 * R13      = PACA  	 * SVCPU.*  = guest * +	 * MSR.EE   = 1  	 *  	 */ -	/* R7 = vcpu */ -	PPC_LL	r7, GPR4(r1) +	PPC_LL	r3, GPR4(r1)		/* vcpu pointer */ + +	/* +	 * kvmppc_copy_from_svcpu can clobber volatile registers, save +	 * the exit handler id to the vcpu and restore it from there later. +	 */ +	stw	r12, VCPU_TRAP(r3) + +	/* Transfer reg values from shadow vcpu back to vcpu struct */ +	/* On 64-bit, interrupts are still off at this point */ + +	GET_SHADOW_VCPU(r4) +	bl	FUNC(kvmppc_copy_from_svcpu) +	nop  #ifdef CONFIG_PPC_BOOK3S_64  	/*  	 * Reload kernel SPRG3 value.  	 * No need to save guest value as usermode can't modify SPRG3.  	 */ -	ld	r3, PACA_SPRG3(r13) -	mtspr	SPRN_SPRG3, r3 +	ld	r3, PACA_SPRG_VDSO(r13) +	mtspr	SPRN_SPRG_VDSO_WRITE, r3  #endif /* CONFIG_PPC_BOOK3S_64 */ +	/* R7 = vcpu */ +	PPC_LL	r7, GPR4(r1) +  	PPC_STL	r14, VCPU_GPR(R14)(r7)  	PPC_STL	r15, VCPU_GPR(R15)(r7)  	PPC_STL	r16, VCPU_GPR(R16)(r7) @@ -157,11 +203,11 @@ kvmppc_handler_highmem:  	PPC_STL	r31, VCPU_GPR(R31)(r7)  	/* Pass the exit number as 3rd argument to kvmppc_handle_exit */ -	mr	r5, r12 +	lwz	r5, VCPU_TRAP(r7)  	/* Restore r3 (kvm_run) and r4 (vcpu) */  	REST_2GPRS(3, r1) -	bl	FUNC(kvmppc_handle_exit) +	bl	FUNC(kvmppc_handle_exit_pr)  	/* If RESUME_GUEST, get back in the loop */  	cmpwi	r3, RESUME_GUEST diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index da8b13c4b77..5a1ab1250a0 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -28,7 +28,7 @@  #include <asm/mmu_context.h>  #include <asm/hw_irq.h> -#include "trace.h" +#include "trace_pr.h"  #define PTE_SIZE	12 @@ -56,6 +56,14 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)  		       HPTEG_HASH_BITS_VPTE_LONG);  } +#ifdef CONFIG_PPC_BOOK3S_64 +static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage) +{ +	return hash_64((vpage & 0xffffffff0ULL) >> 4, +		       HPTEG_HASH_BITS_VPTE_64K); +} +#endif +  void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)  {  	u64 index; @@ -83,6 +91,15 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)  	hlist_add_head_rcu(&pte->list_vpte_long,  			   &vcpu3s->hpte_hash_vpte_long[index]); +#ifdef CONFIG_PPC_BOOK3S_64 +	/* Add to vPTE_64k list */ +	index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage); +	hlist_add_head_rcu(&pte->list_vpte_64k, +			   &vcpu3s->hpte_hash_vpte_64k[index]); +#endif + +	vcpu3s->hpte_cache_count++; +  	spin_unlock(&vcpu3s->mmu_lock);  } @@ -113,10 +130,13 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)  	hlist_del_init_rcu(&pte->list_pte_long);  	hlist_del_init_rcu(&pte->list_vpte);  	hlist_del_init_rcu(&pte->list_vpte_long); +#ifdef CONFIG_PPC_BOOK3S_64 +	hlist_del_init_rcu(&pte->list_vpte_64k); +#endif +	vcpu3s->hpte_cache_count--;  	spin_unlock(&vcpu3s->mmu_lock); -	vcpu3s->hpte_cache_count--;  	call_rcu(&pte->rcu_head, free_pte_rcu);  } @@ -219,6 +239,29 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)  	rcu_read_unlock();  } +#ifdef CONFIG_PPC_BOOK3S_64 +/* Flush with mask 0xffffffff0 */ +static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp) +{ +	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); +	struct hlist_head *list; +	struct hpte_cache *pte; +	u64 vp_mask = 0xffffffff0ULL; + +	list = &vcpu3s->hpte_hash_vpte_64k[ +		kvmppc_mmu_hash_vpte_64k(guest_vp)]; + +	rcu_read_lock(); + +	/* Check the list for matching entries and invalidate */ +	hlist_for_each_entry_rcu(pte, list, list_vpte_64k) +		if ((pte->pte.vpage & vp_mask) == guest_vp) +			invalidate_pte(vcpu, pte); + +	rcu_read_unlock(); +} +#endif +  /* Flush with mask 0xffffff000 */  static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)  { @@ -249,6 +292,11 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)  	case 0xfffffffffULL:  		kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);  		break; +#ifdef CONFIG_PPC_BOOK3S_64 +	case 0xffffffff0ULL: +		kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp); +		break; +#endif  	case 0xffffff000ULL:  		kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);  		break; @@ -285,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)  	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);  	struct hpte_cache *pte; -	pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); -	vcpu3s->hpte_cache_count++; -  	if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)  		kvmppc_mmu_pte_flush_all(vcpu); +	pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); +  	return pte;  } +void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte) +{ +	kmem_cache_free(hpte_cache, pte); +} +  void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)  {  	kvmppc_mmu_pte_flush(vcpu, 0, 0); @@ -320,6 +372,10 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)  				  ARRAY_SIZE(vcpu3s->hpte_hash_vpte));  	kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,  				  ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long)); +#ifdef CONFIG_PPC_BOOK3S_64 +	kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k, +				  ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k)); +#endif  	spin_lock_init(&vcpu3s->mmu_lock); diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index a59a25a1321..6c8011fd57e 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -160,21 +160,23 @@  static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)  { -	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]); +	kvm_cvt_df(&VCPU_FPR(vcpu, rt), &vcpu->arch.qpr[rt]);  }  static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)  { -	u64 dsisr; -	struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared; +	u32 dsisr; +	u64 msr = kvmppc_get_msr(vcpu); -	shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0); -	shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0); -	shared->dar = eaddr; +	msr = kvmppc_set_field(msr, 33, 36, 0); +	msr = kvmppc_set_field(msr, 42, 47, 0); +	kvmppc_set_msr(vcpu, msr); +	kvmppc_set_dar(vcpu, eaddr);  	/* Page Fault */  	dsisr = kvmppc_set_field(0, 33, 33, 1);  	if (is_store) -		shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); +		dsisr = kvmppc_set_field(dsisr, 38, 38, 1); +	kvmppc_set_dsisr(vcpu, dsisr);  	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);  } @@ -207,11 +209,11 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,  	/* put in registers */  	switch (ls_type) {  	case FPU_LS_SINGLE: -		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]); +		kvm_cvt_fd((u32*)tmp, &VCPU_FPR(vcpu, rs));  		vcpu->arch.qpr[rs] = *((u32*)tmp);  		break;  	case FPU_LS_DOUBLE: -		vcpu->arch.fpr[rs] = *((u64*)tmp); +		VCPU_FPR(vcpu, rs) = *((u64*)tmp);  		break;  	} @@ -233,18 +235,18 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,  	switch (ls_type) {  	case FPU_LS_SINGLE: -		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp); +		kvm_cvt_df(&VCPU_FPR(vcpu, rs), (u32*)tmp);  		val = *((u32*)tmp);  		len = sizeof(u32);  		break;  	case FPU_LS_SINGLE_LOW: -		*((u32*)tmp) = vcpu->arch.fpr[rs]; -		val = vcpu->arch.fpr[rs] & 0xffffffff; +		*((u32*)tmp) = VCPU_FPR(vcpu, rs); +		val = VCPU_FPR(vcpu, rs) & 0xffffffff;  		len = sizeof(u32);  		break;  	case FPU_LS_DOUBLE: -		*((u64*)tmp) = vcpu->arch.fpr[rs]; -		val = vcpu->arch.fpr[rs]; +		*((u64*)tmp) = VCPU_FPR(vcpu, rs); +		val = VCPU_FPR(vcpu, rs);  		len = sizeof(u64);  		break;  	default: @@ -301,7 +303,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,  	emulated = EMULATE_DONE;  	/* put in registers */ -	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]); +	kvm_cvt_fd(&tmp[0], &VCPU_FPR(vcpu, rs));  	vcpu->arch.qpr[rs] = tmp[1];  	dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], @@ -319,7 +321,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,  	u32 tmp[2];  	int len = w ? sizeof(u32) : sizeof(u64); -	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]); +	kvm_cvt_df(&VCPU_FPR(vcpu, rs), &tmp[0]);  	tmp[1] = vcpu->arch.qpr[rs];  	r = kvmppc_st(vcpu, &addr, len, tmp, true); @@ -512,7 +514,6 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,  						 u32 *src2, u32 *src3))  {  	u32 *qpr = vcpu->arch.qpr; -	u64 *fpr = vcpu->arch.fpr;  	u32 ps0_out;  	u32 ps0_in1, ps0_in2, ps0_in3;  	u32 ps1_in1, ps1_in2, ps1_in3; @@ -521,20 +522,20 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,  	WARN_ON(rc);  	/* PS0 */ -	kvm_cvt_df(&fpr[reg_in1], &ps0_in1); -	kvm_cvt_df(&fpr[reg_in2], &ps0_in2); -	kvm_cvt_df(&fpr[reg_in3], &ps0_in3); +	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1); +	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2); +	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in3), &ps0_in3);  	if (scalar & SCALAR_LOW)  		ps0_in2 = qpr[reg_in2]; -	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); +	func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);  	dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",  			  ps0_in1, ps0_in2, ps0_in3, ps0_out);  	if (!(scalar & SCALAR_NO_PS0)) -		kvm_cvt_fd(&ps0_out, &fpr[reg_out]); +		kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));  	/* PS1 */  	ps1_in1 = qpr[reg_in1]; @@ -545,7 +546,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,  		ps1_in2 = ps0_in2;  	if (!(scalar & SCALAR_NO_PS1)) -		func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); +		func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);  	dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",  			  ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); @@ -561,7 +562,6 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,  						 u32 *src2))  {  	u32 *qpr = vcpu->arch.qpr; -	u64 *fpr = vcpu->arch.fpr;  	u32 ps0_out;  	u32 ps0_in1, ps0_in2;  	u32 ps1_out; @@ -571,20 +571,20 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,  	WARN_ON(rc);  	/* PS0 */ -	kvm_cvt_df(&fpr[reg_in1], &ps0_in1); +	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1);  	if (scalar & SCALAR_LOW)  		ps0_in2 = qpr[reg_in2];  	else -		kvm_cvt_df(&fpr[reg_in2], &ps0_in2); +		kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2); -	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2); +	func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2);  	if (!(scalar & SCALAR_NO_PS0)) {  		dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",  				  ps0_in1, ps0_in2, ps0_out); -		kvm_cvt_fd(&ps0_out, &fpr[reg_out]); +		kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));  	}  	/* PS1 */ @@ -594,7 +594,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,  	if (scalar & SCALAR_HIGH)  		ps1_in2 = ps0_in2; -	func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2); +	func(&vcpu->arch.fp.fpscr, &ps1_out, &ps1_in1, &ps1_in2);  	if (!(scalar & SCALAR_NO_PS1)) {  		qpr[reg_out] = ps1_out; @@ -612,7 +612,6 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,  						 u32 *dst, u32 *src1))  {  	u32 *qpr = vcpu->arch.qpr; -	u64 *fpr = vcpu->arch.fpr;  	u32 ps0_out, ps0_in;  	u32 ps1_in; @@ -620,17 +619,17 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,  	WARN_ON(rc);  	/* PS0 */ -	kvm_cvt_df(&fpr[reg_in], &ps0_in); -	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in); +	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in), &ps0_in); +	func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in);  	dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",  			  ps0_in, ps0_out); -	kvm_cvt_fd(&ps0_out, &fpr[reg_out]); +	kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));  	/* PS1 */  	ps1_in = qpr[reg_in]; -	func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in); +	func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in);  	dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n",  			  ps1_in, qpr[reg_out]); @@ -649,10 +648,10 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  	int ax_rc = inst_get_field(inst, 21, 25);  	short full_d = inst_get_field(inst, 16, 31); -	u64 *fpr_d = &vcpu->arch.fpr[ax_rd]; -	u64 *fpr_a = &vcpu->arch.fpr[ax_ra]; -	u64 *fpr_b = &vcpu->arch.fpr[ax_rb]; -	u64 *fpr_c = &vcpu->arch.fpr[ax_rc]; +	u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd); +	u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra); +	u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb); +	u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc);  	bool rcomp = (inst & 1) ? true : false;  	u32 cr = kvmppc_get_cr(vcpu); @@ -663,7 +662,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  	if (!kvmppc_inst_is_paired_single(vcpu, inst))  		return EMULATE_FAIL; -	if (!(vcpu->arch.shared->msr & MSR_FP)) { +	if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {  		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);  		return EMULATE_AGAIN;  	} @@ -674,11 +673,11 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  	/* Do we need to clear FE0 / FE1 here? Don't think so. */  #ifdef DEBUG -	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { +	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {  		u32 f; -		kvm_cvt_df(&vcpu->arch.fpr[i], &f); +		kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);  		dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx    QPR[%d] = 0x%x\n", -			i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); +			i, f, VCPU_FPR(vcpu, i), i, vcpu->arch.qpr[i]);  	}  #endif @@ -764,8 +763,8 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  			break;  		}  		case OP_4X_PS_NEG: -			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; -			vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL; +			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); +			VCPU_FPR(vcpu, ax_rd) ^= 0x8000000000000000ULL;  			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];  			vcpu->arch.qpr[ax_rd] ^= 0x80000000;  			break; @@ -775,7 +774,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  			break;  		case OP_4X_PS_MR:  			WARN_ON(rcomp); -			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; +			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);  			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];  			break;  		case OP_4X_PS_CMPO1: @@ -784,44 +783,44 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  			break;  		case OP_4X_PS_NABS:  			WARN_ON(rcomp); -			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; -			vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL; +			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); +			VCPU_FPR(vcpu, ax_rd) |= 0x8000000000000000ULL;  			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];  			vcpu->arch.qpr[ax_rd] |= 0x80000000;  			break;  		case OP_4X_PS_ABS:  			WARN_ON(rcomp); -			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; -			vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL; +			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); +			VCPU_FPR(vcpu, ax_rd) &= ~0x8000000000000000ULL;  			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];  			vcpu->arch.qpr[ax_rd] &= ~0x80000000;  			break;  		case OP_4X_PS_MERGE00:  			WARN_ON(rcomp); -			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; -			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ -			kvm_cvt_df(&vcpu->arch.fpr[ax_rb], +			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra); +			/* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */ +			kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),  				   &vcpu->arch.qpr[ax_rd]);  			break;  		case OP_4X_PS_MERGE01:  			WARN_ON(rcomp); -			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; +			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra);  			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];  			break;  		case OP_4X_PS_MERGE10:  			WARN_ON(rcomp); -			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ +			/* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */  			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], -				   &vcpu->arch.fpr[ax_rd]); -			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ -			kvm_cvt_df(&vcpu->arch.fpr[ax_rb], +				   &VCPU_FPR(vcpu, ax_rd)); +			/* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */ +			kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),  				   &vcpu->arch.qpr[ax_rd]);  			break;  		case OP_4X_PS_MERGE11:  			WARN_ON(rcomp); -			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ +			/* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */  			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], -				   &vcpu->arch.fpr[ax_rd]); +				   &VCPU_FPR(vcpu, ax_rd));  			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];  			break;  		} @@ -856,7 +855,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  		case OP_4A_PS_SUM1:  			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,  					ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds); -			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc]; +			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rc);  			break;  		case OP_4A_PS_SUM0:  			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, @@ -1106,45 +1105,45 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  	case 59:  		switch (inst_get_field(inst, 21, 30)) {  		case OP_59_FADDS: -			fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); +			fpd_fadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		case OP_59_FSUBS: -			fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); +			fpd_fsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		case OP_59_FDIVS: -			fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); +			fpd_fdivs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		case OP_59_FRES: -			fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); +			fpd_fres(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		case OP_59_FRSQRTES: -			fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); +			fpd_frsqrtes(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		}  		switch (inst_get_field(inst, 26, 30)) {  		case OP_59_FMULS: -			fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); +			fpd_fmuls(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		case OP_59_FMSUBS: -			fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); +			fpd_fmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		case OP_59_FMADDS: -			fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); +			fpd_fmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		case OP_59_FNMSUBS: -			fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); +			fpd_fnmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		case OP_59_FNMADDS: -			fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); +			fpd_fnmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		} @@ -1159,12 +1158,12 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  			break;  		case OP_63_MFFS:  			/* XXX missing CR */ -			*fpr_d = vcpu->arch.fpscr; +			*fpr_d = vcpu->arch.fp.fpscr;  			break;  		case OP_63_MTFSF:  			/* XXX missing fm bits */  			/* XXX missing CR */ -			vcpu->arch.fpscr = *fpr_b; +			vcpu->arch.fp.fpscr = *fpr_b;  			break;  		case OP_63_FCMPU:  		{ @@ -1172,7 +1171,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  			u32 cr0_mask = 0xf0000000;  			u32 cr_shift = inst_get_field(inst, 6, 8) * 4; -			fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); +			fpd_fcmpu(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);  			cr &= ~(cr0_mask >> cr_shift);  			cr |= (cr & cr0_mask) >> cr_shift;  			break; @@ -1183,40 +1182,40 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  			u32 cr0_mask = 0xf0000000;  			u32 cr_shift = inst_get_field(inst, 6, 8) * 4; -			fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); +			fpd_fcmpo(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);  			cr &= ~(cr0_mask >> cr_shift);  			cr |= (cr & cr0_mask) >> cr_shift;  			break;  		}  		case OP_63_FNEG: -			fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); +			fpd_fneg(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);  			break;  		case OP_63_FMR:  			*fpr_d = *fpr_b;  			break;  		case OP_63_FABS: -			fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); +			fpd_fabs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);  			break;  		case OP_63_FCPSGN: -			fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); +			fpd_fcpsgn(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);  			break;  		case OP_63_FDIV: -			fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); +			fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);  			break;  		case OP_63_FADD: -			fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); +			fpd_fadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);  			break;  		case OP_63_FSUB: -			fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); +			fpd_fsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);  			break;  		case OP_63_FCTIW: -			fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); +			fpd_fctiw(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);  			break;  		case OP_63_FCTIWZ: -			fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); +			fpd_fctiwz(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);  			break;  		case OP_63_FRSP: -			fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); +			fpd_frsp(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);  			kvmppc_sync_qpr(vcpu, ax_rd);  			break;  		case OP_63_FRSQRTE: @@ -1224,39 +1223,39 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)  			double one = 1.0f;  			/* fD = sqrt(fB) */ -			fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); +			fpd_fsqrt(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);  			/* fD = 1.0f / fD */ -			fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); +			fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);  			break;  		}  		}  		switch (inst_get_field(inst, 26, 30)) {  		case OP_63_FMUL: -			fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); +			fpd_fmul(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);  			break;  		case OP_63_FSEL: -			fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); +			fpd_fsel(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);  			break;  		case OP_63_FMSUB: -			fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); +			fpd_fmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);  			break;  		case OP_63_FMADD: -			fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); +			fpd_fmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);  			break;  		case OP_63_FNMSUB: -			fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); +			fpd_fnmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);  			break;  		case OP_63_FNMADD: -			fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); +			fpd_fnmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);  			break;  		}  		break;  	}  #ifdef DEBUG -	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { +	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {  		u32 f; -		kvm_cvt_df(&vcpu->arch.fpr[i], &f); +		kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);  		dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);  	}  #endif diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 27db1e66595..8eef1e51907 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -40,14 +40,20 @@  #include <linux/sched.h>  #include <linux/vmalloc.h>  #include <linux/highmem.h> +#include <linux/module.h> +#include <linux/miscdevice.h> -#include "trace.h" +#include "book3s.h" + +#define CREATE_TRACE_POINTS +#include "trace_pr.h"  /* #define EXIT_DEBUG */  /* #define DEBUG_EXT */  static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,  			     ulong msr); +static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);  /* Some compatibility defines */  #ifdef CONFIG_PPC_BOOK3S_32 @@ -56,38 +62,117 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,  #define HW_PAGE_SIZE PAGE_SIZE  #endif -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)  {  #ifdef CONFIG_PPC_BOOK3S_64  	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);  	memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); -	memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, -	       sizeof(get_paca()->shadow_vcpu));  	svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; +	svcpu->in_use = 0;  	svcpu_put(svcpu);  #endif  	vcpu->cpu = smp_processor_id();  #ifdef CONFIG_PPC_BOOK3S_32 -	current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; +	current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;  #endif  } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)  {  #ifdef CONFIG_PPC_BOOK3S_64  	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); +	if (svcpu->in_use) { +		kvmppc_copy_from_svcpu(vcpu, svcpu); +	}  	memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); -	memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, -	       sizeof(get_paca()->shadow_vcpu));  	to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;  	svcpu_put(svcpu);  #endif  	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); +	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);  	vcpu->cpu = -1;  } -int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +/* Copy data needed by real-mode code from vcpu to shadow vcpu */ +void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, +			  struct kvm_vcpu *vcpu) +{ +	svcpu->gpr[0] = vcpu->arch.gpr[0]; +	svcpu->gpr[1] = vcpu->arch.gpr[1]; +	svcpu->gpr[2] = vcpu->arch.gpr[2]; +	svcpu->gpr[3] = vcpu->arch.gpr[3]; +	svcpu->gpr[4] = vcpu->arch.gpr[4]; +	svcpu->gpr[5] = vcpu->arch.gpr[5]; +	svcpu->gpr[6] = vcpu->arch.gpr[6]; +	svcpu->gpr[7] = vcpu->arch.gpr[7]; +	svcpu->gpr[8] = vcpu->arch.gpr[8]; +	svcpu->gpr[9] = vcpu->arch.gpr[9]; +	svcpu->gpr[10] = vcpu->arch.gpr[10]; +	svcpu->gpr[11] = vcpu->arch.gpr[11]; +	svcpu->gpr[12] = vcpu->arch.gpr[12]; +	svcpu->gpr[13] = vcpu->arch.gpr[13]; +	svcpu->cr  = vcpu->arch.cr; +	svcpu->xer = vcpu->arch.xer; +	svcpu->ctr = vcpu->arch.ctr; +	svcpu->lr  = vcpu->arch.lr; +	svcpu->pc  = vcpu->arch.pc; +#ifdef CONFIG_PPC_BOOK3S_64 +	svcpu->shadow_fscr = vcpu->arch.shadow_fscr; +#endif +	svcpu->in_use = true; +} + +/* Copy data touched by real-mode code from shadow vcpu back to vcpu */ +void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, +			    struct kvmppc_book3s_shadow_vcpu *svcpu) +{ +	/* +	 * vcpu_put would just call us again because in_use hasn't +	 * been updated yet. +	 */ +	preempt_disable(); + +	/* +	 * Maybe we were already preempted and synced the svcpu from +	 * our preempt notifiers. Don't bother touching this svcpu then. +	 */ +	if (!svcpu->in_use) +		goto out; + +	vcpu->arch.gpr[0] = svcpu->gpr[0]; +	vcpu->arch.gpr[1] = svcpu->gpr[1]; +	vcpu->arch.gpr[2] = svcpu->gpr[2]; +	vcpu->arch.gpr[3] = svcpu->gpr[3]; +	vcpu->arch.gpr[4] = svcpu->gpr[4]; +	vcpu->arch.gpr[5] = svcpu->gpr[5]; +	vcpu->arch.gpr[6] = svcpu->gpr[6]; +	vcpu->arch.gpr[7] = svcpu->gpr[7]; +	vcpu->arch.gpr[8] = svcpu->gpr[8]; +	vcpu->arch.gpr[9] = svcpu->gpr[9]; +	vcpu->arch.gpr[10] = svcpu->gpr[10]; +	vcpu->arch.gpr[11] = svcpu->gpr[11]; +	vcpu->arch.gpr[12] = svcpu->gpr[12]; +	vcpu->arch.gpr[13] = svcpu->gpr[13]; +	vcpu->arch.cr  = svcpu->cr; +	vcpu->arch.xer = svcpu->xer; +	vcpu->arch.ctr = svcpu->ctr; +	vcpu->arch.lr  = svcpu->lr; +	vcpu->arch.pc  = svcpu->pc; +	vcpu->arch.shadow_srr1 = svcpu->shadow_srr1; +	vcpu->arch.fault_dar   = svcpu->fault_dar; +	vcpu->arch.fault_dsisr = svcpu->fault_dsisr; +	vcpu->arch.last_inst   = svcpu->last_inst; +#ifdef CONFIG_PPC_BOOK3S_64 +	vcpu->arch.shadow_fscr = svcpu->shadow_fscr; +#endif +	svcpu->in_use = false; + +out: +	preempt_enable(); +} + +static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)  {  	int r = 1; /* Indicate we want to get back into the guest */ @@ -100,58 +185,84 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)  }  /************* MMU Notifiers *************/ +static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start, +			     unsigned long end) +{ +	long i; +	struct kvm_vcpu *vcpu; +	struct kvm_memslots *slots; +	struct kvm_memory_slot *memslot; + +	slots = kvm_memslots(kvm); +	kvm_for_each_memslot(memslot, slots) { +		unsigned long hva_start, hva_end; +		gfn_t gfn, gfn_end; + +		hva_start = max(start, memslot->userspace_addr); +		hva_end = min(end, memslot->userspace_addr + +					(memslot->npages << PAGE_SHIFT)); +		if (hva_start >= hva_end) +			continue; +		/* +		 * {gfn(page) | page intersects with [hva_start, hva_end)} = +		 * {gfn, gfn+1, ..., gfn_end-1}. +		 */ +		gfn = hva_to_gfn_memslot(hva_start, memslot); +		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); +		kvm_for_each_vcpu(i, vcpu, kvm) +			kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT, +					      gfn_end << PAGE_SHIFT); +	} +} -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)  {  	trace_kvm_unmap_hva(hva); -	/* -	 * Flush all shadow tlb entries everywhere. This is slow, but -	 * we are 100% sure that we catch the to be unmapped page -	 */ -	kvm_flush_remote_tlbs(kvm); +	do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);  	return 0;  } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, +				  unsigned long end)  { -	/* kvm_unmap_hva flushes everything anyways */ -	kvm_unmap_hva(kvm, start); +	do_kvm_unmap_hva(kvm, start, end);  	return 0;  } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)  {  	/* XXX could be more clever ;) */  	return 0;  } -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)  {  	/* XXX could be more clever ;) */  	return 0;  } -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)  {  	/* The page will get remapped properly on its next fault */ -	kvm_unmap_hva(kvm, hva); +	do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);  }  /*****************************************/  static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)  { -	ulong smsr = vcpu->arch.shared->msr; +	ulong guest_msr = kvmppc_get_msr(vcpu); +	ulong smsr = guest_msr;  	/* Guest MSR values */ -	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE; +	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;  	/* Process MSR values */  	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;  	/* External providers the guest reserved */ -	smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext); +	smsr |= (guest_msr & vcpu->arch.guest_owned_ext);  	/* 64-bit Process MSR values */  #ifdef CONFIG_PPC_BOOK3S_64  	smsr |= MSR_ISF | MSR_HV; @@ -159,16 +270,16 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)  	vcpu->arch.shadow_msr = smsr;  } -void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)  { -	ulong old_msr = vcpu->arch.shared->msr; +	ulong old_msr = kvmppc_get_msr(vcpu);  #ifdef EXIT_DEBUG  	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);  #endif  	msr &= to_book3s(vcpu)->msr_mask; -	vcpu->arch.shared->msr = msr; +	kvmppc_set_msr_fast(vcpu, msr);  	kvmppc_recalc_shadow_msr(vcpu);  	if (msr & MSR_POW) { @@ -179,11 +290,11 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)  			/* Unset POW bit after we woke up */  			msr &= ~MSR_POW; -			vcpu->arch.shared->msr = msr; +			kvmppc_set_msr_fast(vcpu, msr);  		}  	} -	if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) != +	if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=  		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {  		kvmppc_mmu_flush_segments(vcpu);  		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); @@ -215,11 +326,11 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)  	}  	/* Preload FPU if it's enabled */ -	if (vcpu->arch.shared->msr & MSR_FP) +	if (kvmppc_get_msr(vcpu) & MSR_FP)  		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);  } -void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) +void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)  {  	u32 host_pvr; @@ -256,6 +367,23 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)  	if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))  		to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); +	/* +	 * If they're asking for POWER6 or later, set the flag +	 * indicating that we can do multiple large page sizes +	 * and 1TB segments. +	 * Also set the flag that indicates that tlbie has the large +	 * page bit in the RB operand instead of the instruction. +	 */ +	switch (PVR_VER(pvr)) { +	case PVR_POWER6: +	case PVR_POWER7: +	case PVR_POWER7p: +	case PVR_POWER8: +		vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | +			BOOK3S_HFLAG_NEW_TLBIE; +		break; +	} +  #ifdef CONFIG_PPC_BOOK3S_32  	/* 32 bit Book3S always has 32 byte dcbz */  	vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; @@ -308,8 +436,8 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)  	/* patch dcbz into reserved instruction, so we trap */  	for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) -		if ((page[i] & 0xff0007ff) == INS_DCBZ) -			page[i] &= 0xfffffff7; +		if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ) +			page[i] &= cpu_to_be32(0xfffffff7);  	kunmap_atomic(page);  	put_page(hpage); @@ -319,7 +447,7 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)  {  	ulong mp_pa = vcpu->arch.magic_page_pa; -	if (!(vcpu->arch.shared->msr & MSR_SF)) +	if (!(kvmppc_get_msr(vcpu) & MSR_SF))  		mp_pa = (uint32_t)mp_pa;  	if (unlikely(mp_pa) && @@ -334,20 +462,23 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,  			    ulong eaddr, int vec)  {  	bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); +	bool iswrite = false;  	int r = RESUME_GUEST;  	int relocated;  	int page_found = 0;  	struct kvmppc_pte pte;  	bool is_mmio = false; -	bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false; -	bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false; +	bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false; +	bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;  	u64 vsid;  	relocated = data ? dr : ir; +	if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE)) +		iswrite = true;  	/* Resolve real address if translation turned on */  	if (relocated) { -		page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); +		page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);  	} else {  		pte.may_execute = true;  		pte.may_read = true; @@ -355,9 +486,10 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,  		pte.raddr = eaddr & KVM_PAM;  		pte.eaddr = eaddr;  		pte.vpage = eaddr >> 12; +		pte.page_size = MMU_PAGE_64K;  	} -	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { +	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {  	case 0:  		pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));  		break; @@ -365,7 +497,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,  	case MSR_IR:  		vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); -		if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR) +		if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR)  			pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));  		else  			pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); @@ -388,35 +520,42 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,  	if (page_found == -ENOENT) {  		/* Page not found in guest PTE entries */ -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); -		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); -		vcpu->arch.shared->dsisr = svcpu->fault_dsisr; -		vcpu->arch.shared->msr |= -			(svcpu->shadow_srr1 & 0x00000000f8000000ULL); -		svcpu_put(svcpu); +		u64 ssrr1 = vcpu->arch.shadow_srr1; +		u64 msr = kvmppc_get_msr(vcpu); +		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); +		kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr); +		kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));  		kvmppc_book3s_queue_irqprio(vcpu, vec);  	} else if (page_found == -EPERM) {  		/* Storage protection */ -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); -		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); -		vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE; -		vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; -		vcpu->arch.shared->msr |= -			svcpu->shadow_srr1 & 0x00000000f8000000ULL; -		svcpu_put(svcpu); +		u32 dsisr = vcpu->arch.fault_dsisr; +		u64 ssrr1 = vcpu->arch.shadow_srr1; +		u64 msr = kvmppc_get_msr(vcpu); +		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); +		dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT; +		kvmppc_set_dsisr(vcpu, dsisr); +		kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));  		kvmppc_book3s_queue_irqprio(vcpu, vec);  	} else if (page_found == -EINVAL) {  		/* Page not found in guest SLB */ -		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); +		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));  		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);  	} else if (!is_mmio &&  		   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { +		if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { +			/* +			 * There is already a host HPTE there, presumably +			 * a read-only one for a page the guest thinks +			 * is writable, so get rid of it first. +			 */ +			kvmppc_mmu_unmap_page(vcpu, &pte); +		}  		/* The guest's PTE is not mapped yet. Map on the host */ -		kvmppc_mmu_map_page(vcpu, &pte); +		kvmppc_mmu_map_page(vcpu, &pte, iswrite);  		if (data)  			vcpu->stat.sp_storage++;  		else if (vcpu->arch.mmu.is_dcbz32(vcpu) && -			(!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) +			 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))  			kvmppc_patch_dcbz(vcpu, &pte);  	} else {  		/* MMIO */ @@ -440,12 +579,6 @@ static inline int get_fpr_index(int i)  void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)  {  	struct thread_struct *t = ¤t->thread; -	u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX -	u64 *vcpu_vsx = vcpu->arch.vsr; -#endif -	u64 *thread_fpr = (u64*)t->fpr; -	int i;  	/*  	 * VSX instructions can access FP and vector registers, so if @@ -466,28 +599,18 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)  		/*  		 * Note that on CPUs with VSX, giveup_fpu stores  		 * both the traditional FP registers and the added VSX -		 * registers into thread.fpr[]. +		 * registers into thread.fp_state.fpr[].  		 */ -		if (current->thread.regs->msr & MSR_FP) +		if (t->regs->msr & MSR_FP)  			giveup_fpu(current); -		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) -			vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; - -		vcpu->arch.fpscr = t->fpscr.val; - -#ifdef CONFIG_VSX -		if (cpu_has_feature(CPU_FTR_VSX)) -			for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++) -				vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; -#endif +		t->fp_save_area = NULL;  	}  #ifdef CONFIG_ALTIVEC  	if (msr & MSR_VEC) {  		if (current->thread.regs->msr & MSR_VEC)  			giveup_altivec(current); -		memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); -		vcpu->arch.vscr = t->vscr; +		t->vr_save_area = NULL;  	}  #endif @@ -495,6 +618,25 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)  	kvmppc_recalc_shadow_msr(vcpu);  } +/* Give up facility (TAR / EBB / DSCR) */ +static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) +{ +#ifdef CONFIG_PPC_BOOK3S_64 +	if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) { +		/* Facility not available to the guest, ignore giveup request*/ +		return; +	} + +	switch (fac) { +	case FSCR_TAR_LG: +		vcpu->arch.tar = mfspr(SPRN_TAR); +		mtspr(SPRN_TAR, current->thread.tar); +		vcpu->arch.shadow_fscr &= ~FSCR_TAR; +		break; +	} +#endif +} +  static int kvmppc_read_inst(struct kvm_vcpu *vcpu)  {  	ulong srr0 = kvmppc_get_pc(vcpu); @@ -503,11 +645,12 @@ static int kvmppc_read_inst(struct kvm_vcpu *vcpu)  	ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);  	if (ret == -ENOENT) { -		ulong msr = vcpu->arch.shared->msr; +		ulong msr = kvmppc_get_msr(vcpu);  		msr = kvmppc_set_field(msr, 33, 33, 1);  		msr = kvmppc_set_field(msr, 34, 36, 0); -		vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0); +		msr = kvmppc_set_field(msr, 42, 47, 0); +		kvmppc_set_msr_fast(vcpu, msr);  		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);  		return EMULATE_AGAIN;  	} @@ -535,18 +678,12 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,  			     ulong msr)  {  	struct thread_struct *t = ¤t->thread; -	u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX -	u64 *vcpu_vsx = vcpu->arch.vsr; -#endif -	u64 *thread_fpr = (u64*)t->fpr; -	int i;  	/* When we have paired singles, we emulate in software */  	if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)  		return RESUME_GUEST; -	if (!(vcpu->arch.shared->msr & msr)) { +	if (!(kvmppc_get_msr(vcpu) & msr)) {  		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);  		return RESUME_GUEST;  	} @@ -578,27 +715,24 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,  #endif  	if (msr & MSR_FP) { -		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) -			thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; -#ifdef CONFIG_VSX -		for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++) -			thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; -#endif -		t->fpscr.val = vcpu->arch.fpscr; -		t->fpexc_mode = 0; -		kvmppc_load_up_fpu(); +		preempt_disable(); +		enable_kernel_fp(); +		load_fp_state(&vcpu->arch.fp); +		t->fp_save_area = &vcpu->arch.fp; +		preempt_enable();  	}  	if (msr & MSR_VEC) {  #ifdef CONFIG_ALTIVEC -		memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); -		t->vscr = vcpu->arch.vscr; -		t->vrsave = -1; -		kvmppc_load_up_altivec(); +		preempt_disable(); +		enable_kernel_altivec(); +		load_vr_state(&vcpu->arch.vr); +		t->vr_save_area = &vcpu->arch.vr; +		preempt_enable();  #endif  	} -	current->thread.regs->msr |= msr; +	t->regs->msr |= msr;  	vcpu->arch.guest_owned_ext |= msr;  	kvmppc_recalc_shadow_msr(vcpu); @@ -617,15 +751,93 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)  	if (!lost_ext)  		return; -	if (lost_ext & MSR_FP) -		kvmppc_load_up_fpu(); -	if (lost_ext & MSR_VEC) -		kvmppc_load_up_altivec(); +	if (lost_ext & MSR_FP) { +		preempt_disable(); +		enable_kernel_fp(); +		load_fp_state(&vcpu->arch.fp); +		preempt_enable(); +	} +#ifdef CONFIG_ALTIVEC +	if (lost_ext & MSR_VEC) { +		preempt_disable(); +		enable_kernel_altivec(); +		load_vr_state(&vcpu->arch.vr); +		preempt_enable(); +	} +#endif  	current->thread.regs->msr |= lost_ext;  } -int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, -                       unsigned int exit_nr) +#ifdef CONFIG_PPC_BOOK3S_64 + +static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac) +{ +	/* Inject the Interrupt Cause field and trigger a guest interrupt */ +	vcpu->arch.fscr &= ~(0xffULL << 56); +	vcpu->arch.fscr |= (fac << 56); +	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); +} + +static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac) +{ +	enum emulation_result er = EMULATE_FAIL; + +	if (!(kvmppc_get_msr(vcpu) & MSR_PR)) +		er = kvmppc_emulate_instruction(vcpu->run, vcpu); + +	if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) { +		/* Couldn't emulate, trigger interrupt in guest */ +		kvmppc_trigger_fac_interrupt(vcpu, fac); +	} +} + +/* Enable facilities (TAR, EBB, DSCR) for the guest */ +static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) +{ +	bool guest_fac_enabled; +	BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S)); + +	/* +	 * Not every facility is enabled by FSCR bits, check whether the +	 * guest has this facility enabled at all. +	 */ +	switch (fac) { +	case FSCR_TAR_LG: +	case FSCR_EBB_LG: +		guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac)); +		break; +	case FSCR_TM_LG: +		guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM; +		break; +	default: +		guest_fac_enabled = false; +		break; +	} + +	if (!guest_fac_enabled) { +		/* Facility not enabled by the guest */ +		kvmppc_trigger_fac_interrupt(vcpu, fac); +		return RESUME_GUEST; +	} + +	switch (fac) { +	case FSCR_TAR_LG: +		/* TAR switching isn't lazy in Linux yet */ +		current->thread.tar = mfspr(SPRN_TAR); +		mtspr(SPRN_TAR, vcpu->arch.tar); +		vcpu->arch.shadow_fscr |= FSCR_TAR; +		break; +	default: +		kvmppc_emulate_fac(vcpu, fac); +		break; +	} + +	return RESUME_GUEST; +} +#endif + +int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, +			  unsigned int exit_nr)  {  	int r = RESUME_HOST;  	int s; @@ -643,25 +855,32 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  	switch (exit_nr) {  	case BOOK3S_INTERRUPT_INST_STORAGE:  	{ -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); -		ulong shadow_srr1 = svcpu->shadow_srr1; +		ulong shadow_srr1 = vcpu->arch.shadow_srr1;  		vcpu->stat.pf_instruc++;  #ifdef CONFIG_PPC_BOOK3S_32  		/* We set segments as unused segments when invalidating them. So  		 * treat the respective fault as segment fault. */ -		if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) { -			kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); -			r = RESUME_GUEST; +		{ +			struct kvmppc_book3s_shadow_vcpu *svcpu; +			u32 sr; + +			svcpu = svcpu_get(vcpu); +			sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];  			svcpu_put(svcpu); -			break; +			if (sr == SR_INVALID) { +				kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); +				r = RESUME_GUEST; +				break; +			}  		}  #endif -		svcpu_put(svcpu);  		/* only care about PTEG not found errors, but leave NX alone */  		if (shadow_srr1 & 0x40000000) { +			int idx = srcu_read_lock(&vcpu->kvm->srcu);  			r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); +			srcu_read_unlock(&vcpu->kvm->srcu, idx);  			vcpu->stat.sp_instruc++;  		} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&  			  (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { @@ -673,7 +892,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);  			r = RESUME_GUEST;  		} else { -			vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000; +			u64 msr = kvmppc_get_msr(vcpu); +			msr |= shadow_srr1 & 0x58000000; +			kvmppc_set_msr_fast(vcpu, msr);  			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);  			r = RESUME_GUEST;  		} @@ -682,28 +903,39 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  	case BOOK3S_INTERRUPT_DATA_STORAGE:  	{  		ulong dar = kvmppc_get_fault_dar(vcpu); -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); -		u32 fault_dsisr = svcpu->fault_dsisr; +		u32 fault_dsisr = vcpu->arch.fault_dsisr;  		vcpu->stat.pf_storage++;  #ifdef CONFIG_PPC_BOOK3S_32  		/* We set segments as unused segments when invalidating them. So  		 * treat the respective fault as segment fault. */ -		if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) { -			kvmppc_mmu_map_segment(vcpu, dar); -			r = RESUME_GUEST; +		{ +			struct kvmppc_book3s_shadow_vcpu *svcpu; +			u32 sr; + +			svcpu = svcpu_get(vcpu); +			sr = svcpu->sr[dar >> SID_SHIFT];  			svcpu_put(svcpu); -			break; +			if (sr == SR_INVALID) { +				kvmppc_mmu_map_segment(vcpu, dar); +				r = RESUME_GUEST; +				break; +			}  		}  #endif -		svcpu_put(svcpu); -		/* The only case we need to handle is missing shadow PTEs */ -		if (fault_dsisr & DSISR_NOHPTE) { +		/* +		 * We need to handle missing shadow PTEs, and +		 * protection faults due to us mapping a page read-only +		 * when the guest thinks it is writable. +		 */ +		if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) { +			int idx = srcu_read_lock(&vcpu->kvm->srcu);  			r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); +			srcu_read_unlock(&vcpu->kvm->srcu, idx);  		} else { -			vcpu->arch.shared->dar = dar; -			vcpu->arch.shared->dsisr = fault_dsisr; +			kvmppc_set_dar(vcpu, dar); +			kvmppc_set_dsisr(vcpu, fault_dsisr);  			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);  			r = RESUME_GUEST;  		} @@ -711,7 +943,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  	}  	case BOOK3S_INTERRUPT_DATA_SEGMENT:  		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { -			vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); +			kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));  			kvmppc_book3s_queue_irqprio(vcpu,  				BOOK3S_INTERRUPT_DATA_SEGMENT);  		} @@ -727,6 +959,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  	/* We're good on these - the host merely wanted to get our attention */  	case BOOK3S_INTERRUPT_DECREMENTER:  	case BOOK3S_INTERRUPT_HV_DECREMENTER: +	case BOOK3S_INTERRUPT_DOORBELL:  		vcpu->stat.dec_exits++;  		r = RESUME_GUEST;  		break; @@ -743,15 +976,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:  	{  		enum emulation_result er; -		struct kvmppc_book3s_shadow_vcpu *svcpu;  		ulong flags;  program_interrupt: -		svcpu = svcpu_get(vcpu); -		flags = svcpu->shadow_srr1 & 0x1f0000ull; -		svcpu_put(svcpu); +		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; -		if (vcpu->arch.shared->msr & MSR_PR) { +		if (kvmppc_get_msr(vcpu) & MSR_PR) {  #ifdef EXIT_DEBUG  			printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));  #endif @@ -793,12 +1023,12 @@ program_interrupt:  	case BOOK3S_INTERRUPT_SYSCALL:  		if (vcpu->arch.papr_enabled &&  		    (kvmppc_get_last_sc(vcpu) == 0x44000022) && -		    !(vcpu->arch.shared->msr & MSR_PR)) { +		    !(kvmppc_get_msr(vcpu) & MSR_PR)) {  			/* SC 1 papr hypercalls */  			ulong cmd = kvmppc_get_gpr(vcpu, 3);  			int i; -#ifdef CONFIG_KVM_BOOK3S_64_PR +#ifdef CONFIG_PPC_BOOK3S_64  			if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {  				r = RESUME_GUEST;  				break; @@ -825,7 +1055,7 @@ program_interrupt:  				gprs[i] = kvmppc_get_gpr(vcpu, i);  			vcpu->arch.osi_needed = 1;  			r = RESUME_HOST_NV; -		} else if (!(vcpu->arch.shared->msr & MSR_PR) && +		} else if (!(kvmppc_get_msr(vcpu) & MSR_PR) &&  		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {  			/* KVM PV hypercalls */  			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); @@ -866,14 +1096,26 @@ program_interrupt:  	}  	case BOOK3S_INTERRUPT_ALIGNMENT:  		if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { -			vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu, -				kvmppc_get_last_inst(vcpu)); -			vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu, -				kvmppc_get_last_inst(vcpu)); +			u32 last_inst = kvmppc_get_last_inst(vcpu); +			u32 dsisr; +			u64 dar; + +			dsisr = kvmppc_alignment_dsisr(vcpu, last_inst); +			dar = kvmppc_alignment_dar(vcpu, last_inst); + +			kvmppc_set_dsisr(vcpu, dsisr); +			kvmppc_set_dar(vcpu, dar); +  			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);  		}  		r = RESUME_GUEST;  		break; +#ifdef CONFIG_PPC_BOOK3S_64 +	case BOOK3S_INTERRUPT_FAC_UNAVAIL: +		kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); +		r = RESUME_GUEST; +		break; +#endif  	case BOOK3S_INTERRUPT_MACHINE_CHECK:  	case BOOK3S_INTERRUPT_TRACE:  		kvmppc_book3s_queue_irqprio(vcpu, exit_nr); @@ -881,9 +1123,7 @@ program_interrupt:  		break;  	default:  	{ -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); -		ulong shadow_srr1 = svcpu->shadow_srr1; -		svcpu_put(svcpu); +		ulong shadow_srr1 = vcpu->arch.shadow_srr1;  		/* Ugh - bork here! What did we get? */  		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",  			exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); @@ -904,14 +1144,14 @@ program_interrupt:  		 * and if we really did time things so badly, then we just exit  		 * again due to a host external interrupt.  		 */ -		local_irq_disable();  		s = kvmppc_prepare_to_enter(vcpu); -		if (s <= 0) { -			local_irq_enable(); +		if (s <= 0)  			r = s; -		} else { +		else { +			/* interrupts now hard-disabled */  			kvmppc_fix_ee_before_entry();  		} +  		kvmppc_handle_lost_ext(vcpu);  	} @@ -920,8 +1160,8 @@ program_interrupt:  	return r;  } -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, -                                  struct kvm_sregs *sregs) +static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu, +					    struct kvm_sregs *sregs)  {  	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);  	int i; @@ -936,7 +1176,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,  		}  	} else {  		for (i = 0; i < 16; i++) -			sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i]; +			sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i);  		for (i = 0; i < 8; i++) {  			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; @@ -947,13 +1187,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,  	return 0;  } -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, -                                  struct kvm_sregs *sregs) +static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu, +					    struct kvm_sregs *sregs)  {  	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);  	int i; -	kvmppc_set_pvr(vcpu, sregs->pvr); +	kvmppc_set_pvr_pr(vcpu, sregs->pvr);  	vcpu3s->sdr1 = sregs->u.s.sdr1;  	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { @@ -983,7 +1223,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,  	return 0;  } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) +static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, +				 union kvmppc_one_reg *val)  {  	int r = 0; @@ -991,19 +1232,15 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  	case KVM_REG_PPC_HIOR:  		*val = get_reg_val(id, to_book3s(vcpu)->hior);  		break; -#ifdef CONFIG_VSX -	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { -		long int i = id - KVM_REG_PPC_VSR0; - -		if (!cpu_has_feature(CPU_FTR_VSX)) { -			r = -ENXIO; -			break; -		} -		val->vsxval[0] = vcpu->arch.fpr[i]; -		val->vsxval[1] = vcpu->arch.vsr[i]; +	case KVM_REG_PPC_LPCR: +		/* +		 * We are only interested in the LPCR_ILE bit +		 */ +		if (vcpu->arch.intr_msr & MSR_LE) +			*val = get_reg_val(id, LPCR_ILE); +		else +			*val = get_reg_val(id, 0);  		break; -	} -#endif /* CONFIG_VSX */  	default:  		r = -EINVAL;  		break; @@ -1012,7 +1249,16 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  	return r;  } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) +static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr) +{ +	if (new_lpcr & LPCR_ILE) +		vcpu->arch.intr_msr |= MSR_LE; +	else +		vcpu->arch.intr_msr &= ~MSR_LE; +} + +static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, +				 union kvmppc_one_reg *val)  {  	int r = 0; @@ -1021,19 +1267,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  		to_book3s(vcpu)->hior = set_reg_val(id, *val);  		to_book3s(vcpu)->hior_explicit = true;  		break; -#ifdef CONFIG_VSX -	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { -		long int i = id - KVM_REG_PPC_VSR0; - -		if (!cpu_has_feature(CPU_FTR_VSX)) { -			r = -ENXIO; -			break; -		} -		vcpu->arch.fpr[i] = val->vsxval[0]; -		vcpu->arch.vsr[i] = val->vsxval[1]; +	case KVM_REG_PPC_LPCR: +		kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));  		break; -	} -#endif /* CONFIG_VSX */  	default:  		r = -EINVAL;  		break; @@ -1042,28 +1278,30 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)  	return r;  } -int kvmppc_core_check_processor_compat(void) -{ -	return 0; -} - -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, +						   unsigned int id)  {  	struct kvmppc_vcpu_book3s *vcpu_book3s;  	struct kvm_vcpu *vcpu;  	int err = -ENOMEM;  	unsigned long p; -	vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); -	if (!vcpu_book3s) +	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); +	if (!vcpu)  		goto out; -	vcpu_book3s->shadow_vcpu = -		kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); -	if (!vcpu_book3s->shadow_vcpu) +	vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); +	if (!vcpu_book3s)  		goto free_vcpu; +	vcpu->arch.book3s = vcpu_book3s; + +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER +	vcpu->arch.shadow_vcpu = +		kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL); +	if (!vcpu->arch.shadow_vcpu) +		goto free_vcpu3s; +#endif -	vcpu = &vcpu_book3s->vcpu;  	err = kvm_vcpu_init(vcpu, kvm, id);  	if (err)  		goto free_shadow_vcpu; @@ -1074,18 +1312,31 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)  		goto uninit_vcpu;  	/* the real shared page fills the last 4k of our page */  	vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); -  #ifdef CONFIG_PPC_BOOK3S_64 -	/* default to book3s_64 (970fx) */ +	/* Always start the shared struct in native endian mode */ +#ifdef __BIG_ENDIAN__ +        vcpu->arch.shared_big_endian = true; +#else +        vcpu->arch.shared_big_endian = false; +#endif + +	/* +	 * Default to the same as the host if we're on sufficiently +	 * recent machine that we have 1TB segments; +	 * otherwise default to PPC970FX. +	 */  	vcpu->arch.pvr = 0x3C0301; +	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) +		vcpu->arch.pvr = mfspr(SPRN_PVR); +	vcpu->arch.intr_msr = MSR_SF;  #else  	/* default to book3s_32 (750) */  	vcpu->arch.pvr = 0x84202;  #endif -	kvmppc_set_pvr(vcpu, vcpu->arch.pvr); +	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);  	vcpu->arch.slb_nr = 64; -	vcpu->arch.shadow_msr = MSR_USER64; +	vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;  	err = kvmppc_mmu_init(vcpu);  	if (err < 0) @@ -1096,39 +1347,36 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)  uninit_vcpu:  	kvm_vcpu_uninit(vcpu);  free_shadow_vcpu: -	kfree(vcpu_book3s->shadow_vcpu); -free_vcpu: +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER +	kfree(vcpu->arch.shadow_vcpu); +free_vcpu3s: +#endif  	vfree(vcpu_book3s); +free_vcpu: +	kmem_cache_free(kvm_vcpu_cache, vcpu);  out:  	return ERR_PTR(err);  } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)  {  	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);  	free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);  	kvm_vcpu_uninit(vcpu); -	kfree(vcpu_book3s->shadow_vcpu); +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER +	kfree(vcpu->arch.shadow_vcpu); +#endif  	vfree(vcpu_book3s); +	kmem_cache_free(kvm_vcpu_cache, vcpu);  } -int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)  {  	int ret; -	double fpr[32][TS_FPRWIDTH]; -	unsigned int fpscr; -	int fpexc_mode;  #ifdef CONFIG_ALTIVEC -	vector128 vr[32]; -	vector128 vscr;  	unsigned long uninitialized_var(vrsave); -	int used_vr;  #endif -#ifdef CONFIG_VSX -	int used_vsr; -#endif -	ulong ext_msr;  	/* Check if we can run the vcpu at all */  	if (!vcpu->arch.sane) { @@ -1143,44 +1391,29 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)  	 * really did time things so badly, then we just exit again due to  	 * a host external interrupt.  	 */ -	local_irq_disable();  	ret = kvmppc_prepare_to_enter(vcpu); -	if (ret <= 0) { -		local_irq_enable(); +	if (ret <= 0)  		goto out; -	} +	/* interrupts now hard-disabled */ -	/* Save FPU state in stack */ +	/* Save FPU state in thread_struct */  	if (current->thread.regs->msr & MSR_FP)  		giveup_fpu(current); -	memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); -	fpscr = current->thread.fpscr.val; -	fpexc_mode = current->thread.fpexc_mode;  #ifdef CONFIG_ALTIVEC -	/* Save Altivec state in stack */ -	used_vr = current->thread.used_vr; -	if (used_vr) { -		if (current->thread.regs->msr & MSR_VEC) -			giveup_altivec(current); -		memcpy(vr, current->thread.vr, sizeof(current->thread.vr)); -		vscr = current->thread.vscr; -		vrsave = current->thread.vrsave; -	} +	/* Save Altivec state in thread_struct */ +	if (current->thread.regs->msr & MSR_VEC) +		giveup_altivec(current);  #endif  #ifdef CONFIG_VSX -	/* Save VSX state in stack */ -	used_vsr = current->thread.used_vsr; -	if (used_vsr && (current->thread.regs->msr & MSR_VSX)) +	/* Save VSX state in thread_struct */ +	if (current->thread.regs->msr & MSR_VSX)  		__giveup_vsx(current);  #endif -	/* Remember the MSR with disabled extensions */ -	ext_msr = current->thread.regs->msr; -  	/* Preload FPU if it's enabled */ -	if (vcpu->arch.shared->msr & MSR_FP) +	if (kvmppc_get_msr(vcpu) & MSR_FP)  		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);  	kvmppc_fix_ee_before_entry(); @@ -1193,26 +1426,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)  	/* Make sure we save the guest FPU/Altivec/VSX state */  	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); -	current->thread.regs->msr = ext_msr; - -	/* Restore FPU/VSX state from stack */ -	memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); -	current->thread.fpscr.val = fpscr; -	current->thread.fpexc_mode = fpexc_mode; - -#ifdef CONFIG_ALTIVEC -	/* Restore Altivec state from stack */ -	if (used_vr && current->thread.used_vr) { -		memcpy(current->thread.vr, vr, sizeof(current->thread.vr)); -		current->thread.vscr = vscr; -		current->thread.vrsave = vrsave; -	} -	current->thread.used_vr = used_vr; -#endif - -#ifdef CONFIG_VSX -	current->thread.used_vsr = used_vsr; -#endif +	/* Make sure we save the guest TAR/EBB/DSCR state */ +	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);  out:  	vcpu->mode = OUTSIDE_GUEST_MODE; @@ -1222,8 +1437,8 @@ out:  /*   * Get (and clear) the dirty memory log for a memory slot.   */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, -				      struct kvm_dirty_log *log) +static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, +					 struct kvm_dirty_log *log)  {  	struct kvm_memory_slot *memslot;  	struct kvm_vcpu *vcpu; @@ -1258,67 +1473,100 @@ out:  	return r;  } -#ifdef CONFIG_PPC64 -int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, +					 struct kvm_memory_slot *memslot)  { -	info->flags = KVM_PPC_1T_SEGMENTS; - -	/* SLB is always 64 entries */ -	info->slb_size = 64; - -	/* Standard 4k base page size segment */ -	info->sps[0].page_shift = 12; -	info->sps[0].slb_enc = 0; -	info->sps[0].enc[0].page_shift = 12; -	info->sps[0].enc[0].pte_enc = 0; - -	/* Standard 16M large page size segment */ -	info->sps[1].page_shift = 24; -	info->sps[1].slb_enc = SLB_VSID_L; -	info->sps[1].enc[0].page_shift = 24; -	info->sps[1].enc[0].pte_enc = 0; +	return; +} +static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, +					struct kvm_memory_slot *memslot, +					struct kvm_userspace_memory_region *mem) +{  	return 0;  } -#endif /* CONFIG_PPC64 */ -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, -			      struct kvm_memory_slot *dont) +static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, +				struct kvm_userspace_memory_region *mem, +				const struct kvm_memory_slot *old)  { +	return;  } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, -			       unsigned long npages) +static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free, +					struct kvm_memory_slot *dont)  { -	return 0; +	return;  } -int kvmppc_core_prepare_memory_region(struct kvm *kvm, -				      struct kvm_memory_slot *memslot, -				      struct kvm_userspace_memory_region *mem) +static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot, +					 unsigned long npages)  {  	return 0;  } -void kvmppc_core_commit_memory_region(struct kvm *kvm, -				struct kvm_userspace_memory_region *mem, -				const struct kvm_memory_slot *old) + +#ifdef CONFIG_PPC64 +static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, +					 struct kvm_ppc_smmu_info *info)  { -} +	long int i; +	struct kvm_vcpu *vcpu; + +	info->flags = 0; + +	/* SLB is always 64 entries */ +	info->slb_size = 64; + +	/* Standard 4k base page size segment */ +	info->sps[0].page_shift = 12; +	info->sps[0].slb_enc = 0; +	info->sps[0].enc[0].page_shift = 12; +	info->sps[0].enc[0].pte_enc = 0; -void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) +	/* +	 * 64k large page size. +	 * We only want to put this in if the CPUs we're emulating +	 * support it, but unfortunately we don't have a vcpu easily +	 * to hand here to test.  Just pick the first vcpu, and if +	 * that doesn't exist yet, report the minimum capability, +	 * i.e., no 64k pages. +	 * 1T segment support goes along with 64k pages. +	 */ +	i = 1; +	vcpu = kvm_get_vcpu(kvm, 0); +	if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { +		info->flags = KVM_PPC_1T_SEGMENTS; +		info->sps[i].page_shift = 16; +		info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01; +		info->sps[i].enc[0].page_shift = 16; +		info->sps[i].enc[0].pte_enc = 1; +		++i; +	} + +	/* Standard 16M large page size segment */ +	info->sps[i].page_shift = 24; +	info->sps[i].slb_enc = SLB_VSID_L; +	info->sps[i].enc[0].page_shift = 24; +	info->sps[i].enc[0].pte_enc = 0; + +	return 0; +} +#else +static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, +					 struct kvm_ppc_smmu_info *info)  { +	/* We should not get called */ +	BUG();  } +#endif /* CONFIG_PPC64 */  static unsigned int kvm_global_user_count = 0;  static DEFINE_SPINLOCK(kvm_global_user_count_lock); -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_pr(struct kvm *kvm)  { -#ifdef CONFIG_PPC64 -	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); -	INIT_LIST_HEAD(&kvm->arch.rtas_tokens); -#endif +	mutex_init(&kvm->arch.hpt_mutex);  	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {  		spin_lock(&kvm_global_user_count_lock); @@ -1329,7 +1577,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)  	return 0;  } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)  {  #ifdef CONFIG_PPC64  	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); @@ -1344,26 +1592,83 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)  	}  } -static int kvmppc_book3s_init(void) +static int kvmppc_core_check_processor_compat_pr(void)  { -	int r; +	/* we are always compatible */ +	return 0; +} -	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, -		     THIS_MODULE); +static long kvm_arch_vm_ioctl_pr(struct file *filp, +				 unsigned int ioctl, unsigned long arg) +{ +	return -ENOTTY; +} -	if (r) +static struct kvmppc_ops kvm_ops_pr = { +	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr, +	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr, +	.get_one_reg = kvmppc_get_one_reg_pr, +	.set_one_reg = kvmppc_set_one_reg_pr, +	.vcpu_load   = kvmppc_core_vcpu_load_pr, +	.vcpu_put    = kvmppc_core_vcpu_put_pr, +	.set_msr     = kvmppc_set_msr_pr, +	.vcpu_run    = kvmppc_vcpu_run_pr, +	.vcpu_create = kvmppc_core_vcpu_create_pr, +	.vcpu_free   = kvmppc_core_vcpu_free_pr, +	.check_requests = kvmppc_core_check_requests_pr, +	.get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr, +	.flush_memslot = kvmppc_core_flush_memslot_pr, +	.prepare_memory_region = kvmppc_core_prepare_memory_region_pr, +	.commit_memory_region = kvmppc_core_commit_memory_region_pr, +	.unmap_hva = kvm_unmap_hva_pr, +	.unmap_hva_range = kvm_unmap_hva_range_pr, +	.age_hva  = kvm_age_hva_pr, +	.test_age_hva = kvm_test_age_hva_pr, +	.set_spte_hva = kvm_set_spte_hva_pr, +	.mmu_destroy  = kvmppc_mmu_destroy_pr, +	.free_memslot = kvmppc_core_free_memslot_pr, +	.create_memslot = kvmppc_core_create_memslot_pr, +	.init_vm = kvmppc_core_init_vm_pr, +	.destroy_vm = kvmppc_core_destroy_vm_pr, +	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr, +	.emulate_op = kvmppc_core_emulate_op_pr, +	.emulate_mtspr = kvmppc_core_emulate_mtspr_pr, +	.emulate_mfspr = kvmppc_core_emulate_mfspr_pr, +	.fast_vcpu_kick = kvm_vcpu_kick, +	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr, +}; + + +int kvmppc_book3s_init_pr(void) +{ +	int r; + +	r = kvmppc_core_check_processor_compat_pr(); +	if (r < 0)  		return r; -	r = kvmppc_mmu_hpte_sysinit(); +	kvm_ops_pr.owner = THIS_MODULE; +	kvmppc_pr_ops = &kvm_ops_pr; +	r = kvmppc_mmu_hpte_sysinit();  	return r;  } -static void kvmppc_book3s_exit(void) +void kvmppc_book3s_exit_pr(void)  { +	kvmppc_pr_ops = NULL;  	kvmppc_mmu_hpte_sysexit(); -	kvm_exit();  } -module_init(kvmppc_book3s_init); -module_exit(kvmppc_book3s_exit); +/* + * We only support separate modules for book3s 64 + */ +#ifdef CONFIG_PPC_BOOK3S_64 + +module_init(kvmppc_book3s_init_pr); +module_exit(kvmppc_book3s_exit_pr); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); +#endif diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index da0e0bc268b..52a63bfe3f0 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -21,6 +21,8 @@  #include <asm/kvm_ppc.h>  #include <asm/kvm_book3s.h> +#define HPTE_SIZE	16		/* bytes per HPT entry */ +  static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)  {  	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); @@ -40,32 +42,41 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)  	long pte_index = kvmppc_get_gpr(vcpu, 5);  	unsigned long pteg[2 * 8];  	unsigned long pteg_addr, i, *hpte; +	long int ret; +	i = pte_index & 7;  	pte_index &= ~7UL;  	pteg_addr = get_pteg_addr(vcpu, pte_index); +	mutex_lock(&vcpu->kvm->arch.hpt_mutex);  	copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));  	hpte = pteg; +	ret = H_PTEG_FULL;  	if (likely((flags & H_EXACT) == 0)) { -		pte_index &= ~7UL;  		for (i = 0; ; ++i) {  			if (i == 8) -				return H_PTEG_FULL; -			if ((*hpte & HPTE_V_VALID) == 0) +				goto done; +			if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0)  				break;  			hpte += 2;  		}  	} else { -		i = kvmppc_get_gpr(vcpu, 5) & 7UL;  		hpte += i * 2; +		if (*hpte & HPTE_V_VALID) +			goto done;  	} -	hpte[0] = kvmppc_get_gpr(vcpu, 6); -	hpte[1] = kvmppc_get_gpr(vcpu, 7); -	copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg)); -	kvmppc_set_gpr(vcpu, 3, H_SUCCESS); +	hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6)); +	hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7)); +	pteg_addr += i * HPTE_SIZE; +	copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);  	kvmppc_set_gpr(vcpu, 4, pte_index | i); +	ret = H_SUCCESS; + + done: +	mutex_unlock(&vcpu->kvm->arch.hpt_mutex); +	kvmppc_set_gpr(vcpu, 3, ret);  	return EMULATE_DONE;  } @@ -77,26 +88,33 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)  	unsigned long avpn = kvmppc_get_gpr(vcpu, 6);  	unsigned long v = 0, pteg, rb;  	unsigned long pte[2]; +	long int ret;  	pteg = get_pteg_addr(vcpu, pte_index); +	mutex_lock(&vcpu->kvm->arch.hpt_mutex);  	copy_from_user(pte, (void __user *)pteg, sizeof(pte)); +	pte[0] = be64_to_cpu(pte[0]); +	pte[1] = be64_to_cpu(pte[1]); +	ret = H_NOT_FOUND;  	if ((pte[0] & HPTE_V_VALID) == 0 ||  	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || -	    ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) { -		kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); -		return EMULATE_DONE; -	} +	    ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) +		goto done;  	copy_to_user((void __user *)pteg, &v, sizeof(v));  	rb = compute_tlbie_rb(pte[0], pte[1], pte_index);  	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); -	kvmppc_set_gpr(vcpu, 3, H_SUCCESS); +	ret = H_SUCCESS;  	kvmppc_set_gpr(vcpu, 4, pte[0]);  	kvmppc_set_gpr(vcpu, 5, pte[1]); + done: +	mutex_unlock(&vcpu->kvm->arch.hpt_mutex); +	kvmppc_set_gpr(vcpu, 3, ret); +  	return EMULATE_DONE;  } @@ -124,6 +142,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)  	int paramnr = 4;  	int ret = H_SUCCESS; +	mutex_lock(&vcpu->kvm->arch.hpt_mutex);  	for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {  		unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));  		unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1); @@ -152,6 +171,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)  		pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);  		copy_from_user(pte, (void __user *)pteg, sizeof(pte)); +		pte[0] = be64_to_cpu(pte[0]); +		pte[1] = be64_to_cpu(pte[1]);  		/* tsl = AVPN */  		flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26; @@ -172,6 +193,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)  		}  		kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);  	} +	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);  	kvmppc_set_gpr(vcpu, 3, ret);  	return EMULATE_DONE; @@ -184,15 +206,18 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)  	unsigned long avpn = kvmppc_get_gpr(vcpu, 6);  	unsigned long rb, pteg, r, v;  	unsigned long pte[2]; +	long int ret;  	pteg = get_pteg_addr(vcpu, pte_index); +	mutex_lock(&vcpu->kvm->arch.hpt_mutex);  	copy_from_user(pte, (void __user *)pteg, sizeof(pte)); +	pte[0] = be64_to_cpu(pte[0]); +	pte[1] = be64_to_cpu(pte[1]); +	ret = H_NOT_FOUND;  	if ((pte[0] & HPTE_V_VALID) == 0 || -	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) { -		kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); -		return EMULATE_DONE; -	} +	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) +		goto done;  	v = pte[0];  	r = pte[1]; @@ -206,9 +231,14 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)  	rb = compute_tlbie_rb(v, r, pte_index);  	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); +	pte[0] = cpu_to_be64(pte[0]); +	pte[1] = cpu_to_be64(pte[1]);  	copy_to_user((void __user *)pteg, pte, sizeof(pte)); +	ret = H_SUCCESS; -	kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + done: +	mutex_unlock(&vcpu->kvm->arch.hpt_mutex); +	kvmppc_set_gpr(vcpu, 3, ret);  	return EMULATE_DONE;  } @@ -248,7 +278,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)  	case H_PUT_TCE:  		return kvmppc_h_pr_put_tce(vcpu);  	case H_CEDE: -		vcpu->arch.shared->msr |= MSR_EE; +		kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);  		kvm_vcpu_block(vcpu);  		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);  		vcpu->stat.halt_wakeup++; diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 8f7633e3afb..16c4d88ba27 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,33 +36,11 @@  #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) 		name +#else  #define FUNC(name) 		GLUE(.,name) - -	.globl	kvmppc_skip_interrupt -kvmppc_skip_interrupt: -	/* -	 * Here all GPRs are unchanged from when the interrupt happened -	 * except for r13, which is saved in SPRG_SCRATCH0. -	 */ -	mfspr	r13, SPRN_SRR0 -	addi	r13, r13, 4 -	mtspr	SPRN_SRR0, r13 -	GET_SCRATCH0(r13) -	rfid -	b	. - -	.globl	kvmppc_skip_Hinterrupt -kvmppc_skip_Hinterrupt: -	/* -	 * Here all GPRs are unchanged from when the interrupt happened -	 * except for r13, which is saved in SPRG_SCRATCH0. -	 */ -	mfspr	r13, SPRN_HSRR0 -	addi	r13, r13, 4 -	mtspr	SPRN_HSRR0, r13 -	GET_SCRATCH0(r13) -	hrfid -	b	. +#endif  #elif defined(CONFIG_PPC_BOOK3S_32) @@ -172,7 +150,7 @@ kvmppc_handler_skip_ins:   * On entry, r4 contains the guest shadow MSR   * MSR.EE has to be 0 when calling this function   */ -_GLOBAL(kvmppc_entry_trampoline) +_GLOBAL_TOC(kvmppc_entry_trampoline)  	mfmsr	r5  	LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)  	toreal(r7) @@ -181,58 +159,11 @@ _GLOBAL(kvmppc_entry_trampoline)  	andc	r6, r5, r6	/* Clear DR and IR in MSR value */  	/*  	 * Set EE in HOST_MSR so that it's enabled when we get into our -	 * C exit handler function +	 * C exit handler function.  	 */  	ori	r5, r5, MSR_EE  	mtsrr0	r7  	mtsrr1	r6  	RFI -#if defined(CONFIG_PPC_BOOK3S_32) -#define STACK_LR	INT_FRAME_SIZE+4 - -/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */ -#define MSR_EXT_START						\ -	PPC_STL	r20, _NIP(r1);					\ -	mfmsr	r20;						\ -	LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE);			\ -	andc	r3,r20,r3;		/* Disable DR,EE */	\ -	mtmsr	r3;						\ -	sync - -#define MSR_EXT_END						\ -	mtmsr	r20;			/* Enable DR,EE */	\ -	sync;							\ -	PPC_LL	r20, _NIP(r1) - -#elif defined(CONFIG_PPC_BOOK3S_64) -#define STACK_LR	_LINK -#define MSR_EXT_START -#define MSR_EXT_END -#endif - -/* - * Activate current's external feature (FPU/Altivec/VSX) - */ -#define define_load_up(what) 					\ -								\ -_GLOBAL(kvmppc_load_up_ ## what);				\ -	PPC_STLU r1, -INT_FRAME_SIZE(r1);			\ -	mflr	r3;						\ -	PPC_STL	r3, STACK_LR(r1);				\ -	MSR_EXT_START;						\ -								\ -	bl	FUNC(load_up_ ## what);				\ -								\ -	MSR_EXT_END;						\ -	PPC_LL	r3, STACK_LR(r1);				\ -	mtlr	r3;						\ -	addi	r1, r1, INT_FRAME_SIZE;				\ -	blr - -define_load_up(fpu) -#ifdef CONFIG_ALTIVEC -define_load_up(altivec) -#endif -  #include "book3s_segment.S" diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 3219ba89524..ef27fbd5d9c 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -23,20 +23,20 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)  	u32 irq, server, priority;  	int rc; -	if (args->nargs != 3 || args->nret != 1) { +	if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) {  		rc = -3;  		goto out;  	} -	irq = args->args[0]; -	server = args->args[1]; -	priority = args->args[2]; +	irq = be32_to_cpu(args->args[0]); +	server = be32_to_cpu(args->args[1]); +	priority = be32_to_cpu(args->args[2]);  	rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);  	if (rc)  		rc = -3;  out: -	args->rets[0] = rc; +	args->rets[0] = cpu_to_be32(rc);  }  static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -44,12 +44,12 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)  	u32 irq, server, priority;  	int rc; -	if (args->nargs != 1 || args->nret != 3) { +	if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) {  		rc = -3;  		goto out;  	} -	irq = args->args[0]; +	irq = be32_to_cpu(args->args[0]);  	server = priority = 0;  	rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); @@ -58,10 +58,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)  		goto out;  	} -	args->rets[1] = server; -	args->rets[2] = priority; +	args->rets[1] = cpu_to_be32(server); +	args->rets[2] = cpu_to_be32(priority);  out: -	args->rets[0] = rc; +	args->rets[0] = cpu_to_be32(rc);  }  static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -69,18 +69,18 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)  	u32 irq;  	int rc; -	if (args->nargs != 1 || args->nret != 1) { +	if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {  		rc = -3;  		goto out;  	} -	irq = args->args[0]; +	irq = be32_to_cpu(args->args[0]);  	rc = kvmppc_xics_int_off(vcpu->kvm, irq);  	if (rc)  		rc = -3;  out: -	args->rets[0] = rc; +	args->rets[0] = cpu_to_be32(rc);  }  static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -88,18 +88,18 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)  	u32 irq;  	int rc; -	if (args->nargs != 1 || args->nret != 1) { +	if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {  		rc = -3;  		goto out;  	} -	irq = args->args[0]; +	irq = be32_to_cpu(args->args[0]);  	rc = kvmppc_xics_int_on(vcpu->kvm, irq);  	if (rc)  		rc = -3;  out: -	args->rets[0] = rc; +	args->rets[0] = cpu_to_be32(rc);  }  #endif /* CONFIG_KVM_XICS */ @@ -213,8 +213,11 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)  	gpa_t args_phys;  	int rc; -	/* r4 contains the guest physical address of the RTAS args */ -	args_phys = kvmppc_get_gpr(vcpu, 4); +	/* +	 * r4 contains the guest physical address of the RTAS args +	 * Mask off the top 4 bits since this is a guest real address +	 */ +	args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;  	rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));  	if (rc) @@ -227,13 +230,13 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)  	 * value so we can restore it on the way out.  	 */  	orig_rets = args.rets; -	args.rets = &args.args[args.nargs]; +	args.rets = &args.args[be32_to_cpu(args.nargs)];  	mutex_lock(&vcpu->kvm->lock);  	rc = -ENOENT;  	list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { -		if (d->token == args.token) { +		if (d->token == be32_to_cpu(args.token)) {  			d->handler->handler(vcpu, &args);  			rc = 0;  			break; @@ -260,6 +263,7 @@ fail:  	 */  	return rc;  } +EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall);  void kvmppc_rtas_tokens_free(struct kvm *kvm)  { diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 1abe4788191..acee37cde84 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -90,6 +90,15 @@ kvmppc_handler_trampoline_enter:  	LOAD_GUEST_SEGMENTS  #ifdef CONFIG_PPC_BOOK3S_64 +BEGIN_FTR_SECTION +	/* Save host FSCR */ +	mfspr	r8, SPRN_FSCR +	std	r8, HSTATE_HOST_FSCR(r13) +	/* Set FSCR during guest execution */ +	ld	r9, SVCPU_SHADOW_FSCR(r13) +	mtspr	SPRN_FSCR, r9 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +  	/* Some guests may need to have dcbz set to 32 byte length.  	 *  	 * Usually we ensure that by patching the guest's instructions @@ -161,8 +170,8 @@ kvmppc_handler_trampoline_enter_end:  .global kvmppc_handler_trampoline_exit  kvmppc_handler_trampoline_exit: -.global kvmppc_interrupt -kvmppc_interrupt: +.global kvmppc_interrupt_pr +kvmppc_interrupt_pr:  	/* Register usage at this point:  	 * @@ -255,6 +264,10 @@ BEGIN_FTR_SECTION  	cmpwi	r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST  	beq-	ld_last_inst  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) +BEGIN_FTR_SECTION +	cmpwi	r12, BOOK3S_INTERRUPT_FAC_UNAVAIL +	beq-	ld_last_inst +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)  #endif  	b	no_ld_last_inst @@ -311,6 +324,18 @@ no_ld_last_inst:  no_dcbz32_off: +BEGIN_FTR_SECTION +	/* Save guest FSCR on a FAC_UNAVAIL interrupt */ +	cmpwi	r12, BOOK3S_INTERRUPT_FAC_UNAVAIL +	bne+	no_fscr_save +	mfspr	r7, SPRN_FSCR +	std	r7, SVCPU_SHADOW_FSCR(r13) +no_fscr_save: +	/* Restore host FSCR */ +	ld	r8, HSTATE_HOST_FSCR(r13) +	mtspr	SPRN_FSCR, r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +  #endif /* CONFIG_PPC_BOOK3S_64 */  	/* @@ -361,6 +386,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)  	beqa	BOOK3S_INTERRUPT_DECREMENTER  	cmpwi	r12, BOOK3S_INTERRUPT_PERFMON  	beqa	BOOK3S_INTERRUPT_PERFMON +	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL +	beqa	BOOK3S_INTERRUPT_DOORBELL  	RFI  kvmppc_handler_trampoline_exit_end: diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index a3a5cb8ee7e..d1acd32a64c 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)  	}  	/* Check for real mode returning too hard */ -	if (xics->real_mode) +	if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm))  		return kvmppc_xics_rm_complete(vcpu, req);  	switch (req) { @@ -840,6 +840,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)  	return rc;  } +EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);  /* -- Initialisation code etc. -- */ @@ -1245,18 +1246,20 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)  		kvm->arch.xics = xics;  	mutex_unlock(&kvm->lock); -	if (ret) +	if (ret) { +		kfree(xics);  		return ret; +	}  	xics_debugfs_init(xics); -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  	if (cpu_has_feature(CPU_FTR_ARCH_206)) {  		/* Enable real mode support */  		xics->real_mode = ENABLE_REALMODE;  		xics->real_mode_dbg = DEBUG_REALMODE;  	} -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */  	return 0;  } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 17722d82f1d..ab62109fdfa 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -40,7 +40,9 @@  #include "timing.h"  #include "booke.h" -#include "trace.h" + +#define CREATE_TRACE_POINTS +#include "trace_booke.h"  unsigned long kvmppc_booke_handlers; @@ -133,6 +135,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)  #endif  } +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) +{ +	/* Synchronize guest's desire to get debug interrupts into shadow MSR */ +#ifndef CONFIG_KVM_BOOKE_HV +	vcpu->arch.shadow_msr &= ~MSR_DE; +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; +#endif + +	/* Force enable debug interrupts when user space wants to debug */ +	if (vcpu->guest_debug) { +#ifdef CONFIG_KVM_BOOKE_HV +		/* +		 * Since there is no shadow MSR, sync MSR_DE into the guest +		 * visible MSR. +		 */ +		vcpu->arch.shared->msr |= MSR_DE; +#else +		vcpu->arch.shadow_msr |= MSR_DE; +		vcpu->arch.shared->msr &= ~MSR_DE; +#endif +	} +} +  /*   * Helper function for "full" MSR writes.  No need to call this if only   * EE/CE/ME/DE/RI are changing. @@ -150,6 +175,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)  	kvmppc_mmu_msr_notify(vcpu, old_msr);  	kvmppc_vcpu_sync_spe(vcpu);  	kvmppc_vcpu_sync_fpu(vcpu); +	kvmppc_vcpu_sync_debug(vcpu);  }  static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@ -617,7 +643,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)  		local_irq_enable();  		kvm_vcpu_block(vcpu);  		clear_bit(KVM_REQ_UNHALT, &vcpu->requests); -		local_irq_disable(); +		hard_irq_disable();  		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);  		r = 1; @@ -655,35 +681,23 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)  int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)  {  	int ret, s; -#ifdef CONFIG_PPC_FPU -	unsigned int fpscr; -	int fpexc_mode; -	u64 fpr[32]; -#endif +	struct debug_reg debug;  	if (!vcpu->arch.sane) {  		kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;  		return -EINVAL;  	} -	local_irq_disable();  	s = kvmppc_prepare_to_enter(vcpu);  	if (s <= 0) { -		local_irq_enable();  		ret = s;  		goto out;  	} +	/* interrupts now hard-disabled */  #ifdef CONFIG_PPC_FPU  	/* Save userspace FPU state in stack */  	enable_kernel_fp(); -	memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); -	fpscr = current->thread.fpscr.val; -	fpexc_mode = current->thread.fpexc_mode; - -	/* Restore guest FPU state to thread */ -	memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr)); -	current->thread.fpscr.val = vcpu->arch.fpscr;  	/*  	 * Since we can't trap on MSR_FP in GS-mode, we consider the guest @@ -696,6 +710,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)  	kvmppc_load_guest_fp(vcpu);  #endif +	/* Switch to guest debug context */ +	debug = vcpu->arch.shadow_dbg_reg; +	switch_booke_debug_regs(&debug); +	debug = current->thread.debug; +	current->thread.debug = vcpu->arch.shadow_dbg_reg; + +	vcpu->arch.pgdir = current->mm->pgd;  	kvmppc_fix_ee_before_entry();  	ret = __kvmppc_vcpu_run(kvm_run, vcpu); @@ -703,19 +724,14 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)  	/* No need for kvm_guest_exit. It's done in handle_exit.  	   We also get here with interrupts enabled. */ +	/* Switch back to user space debug context */ +	switch_booke_debug_regs(&debug); +	current->thread.debug = debug; +  #ifdef CONFIG_PPC_FPU  	kvmppc_save_guest_fp(vcpu);  	vcpu->fpu_active = 0; - -	/* Save guest FPU state from thread */ -	memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr)); -	vcpu->arch.fpscr = current->thread.fpscr.val; - -	/* Restore userspace FPU state from stack */ -	memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); -	current->thread.fpscr.val = fpscr; -	current->thread.fpexc_mode = fpexc_mode;  #endif  out: @@ -758,6 +774,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)  	}  } +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ +	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg); +	u32 dbsr = vcpu->arch.dbsr; + +	run->debug.arch.status = 0; +	run->debug.arch.address = vcpu->arch.pc; + +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) { +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT; +	} else { +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W)) +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE; +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R)) +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ; +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W)) +			run->debug.arch.address = dbg_reg->dac1; +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W)) +			run->debug.arch.address = dbg_reg->dac2; +	} + +	return RESUME_HOST; +} +  static void kvmppc_fill_pt_regs(struct pt_regs *regs)  {  	ulong r1, ip, msr, lr; @@ -818,6 +858,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,  	case BOOKE_INTERRUPT_CRITICAL:  		unknown_exception(®s);  		break; +	case BOOKE_INTERRUPT_DEBUG: +		/* Save DBSR before preemption is enabled */ +		vcpu->arch.dbsr = mfspr(SPRN_DBSR); +		kvmppc_clear_dbsr(); +		break;  	}  } @@ -833,17 +878,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  	int s;  	int idx; -#ifdef CONFIG_PPC64 -	WARN_ON(local_paca->irq_happened != 0); -#endif - -	/* -	 * We enter with interrupts disabled in hardware, but -	 * we need to call hard_irq_disable anyway to ensure that -	 * the software state is kept in sync. -	 */ -	hard_irq_disable(); -  	/* update before a new last_exit_type is rewritten */  	kvmppc_update_timing_stats(vcpu); @@ -1135,18 +1169,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  	}  	case BOOKE_INTERRUPT_DEBUG: { -		u32 dbsr; - -		vcpu->arch.pc = mfspr(SPRN_CSRR0); - -		/* clear IAC events in DBSR register */ -		dbsr = mfspr(SPRN_DBSR); -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4; -		mtspr(SPRN_DBSR, dbsr); - -		run->exit_reason = KVM_EXIT_DEBUG; +		r = kvmppc_handle_debug(run, vcpu); +		if (r == RESUME_HOST) +			run->exit_reason = KVM_EXIT_DEBUG;  		kvmppc_account_exit(vcpu, DEBUG_EXITS); -		r = RESUME_HOST;  		break;  	} @@ -1160,12 +1186,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,  	 * aren't already exiting to userspace for some other reason.  	 */  	if (!(r & RESUME_HOST)) { -		local_irq_disable();  		s = kvmppc_prepare_to_enter(vcpu); -		if (s <= 0) { -			local_irq_enable(); +		if (s <= 0)  			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); -		} else { +		else { +			/* interrupts now hard-disabled */  			kvmppc_fix_ee_before_entry();  		}  	} @@ -1197,7 +1222,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)  	kvmppc_set_msr(vcpu, 0);  #ifndef CONFIG_KVM_BOOKE_HV -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;  	vcpu->arch.shadow_pid = 1;  	vcpu->arch.shared->msr = 0;  #endif @@ -1359,7 +1384,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu,  	return 0;  } -void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)  {  	sregs->u.e.features |= KVM_SREGS_E_IVOR; @@ -1379,6 +1404,7 @@ void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)  	sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];  	sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];  	sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; +	return 0;  }  int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) @@ -1413,8 +1439,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,  	get_sregs_base(vcpu, sregs);  	get_sregs_arch206(vcpu, sregs); -	kvmppc_core_get_sregs(vcpu, sregs); -	return 0; +	return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);  }  int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, @@ -1433,7 +1458,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,  	if (ret < 0)  		return ret; -	return kvmppc_core_set_sregs(vcpu, sregs); +	return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);  }  int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) @@ -1441,7 +1466,6 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  	int r = 0;  	union kvmppc_one_reg val;  	int size; -	long int i;  	size = one_reg_size(reg->id);  	if (size > sizeof(val)) @@ -1449,16 +1473,24 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  	switch (reg->id) {  	case KVM_REG_PPC_IAC1: +		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1); +		break;  	case KVM_REG_PPC_IAC2: +		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2); +		break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2  	case KVM_REG_PPC_IAC3: +		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3); +		break;  	case KVM_REG_PPC_IAC4: -		i = reg->id - KVM_REG_PPC_IAC1; -		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]); +		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);  		break; +#endif  	case KVM_REG_PPC_DAC1: +		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1); +		break;  	case KVM_REG_PPC_DAC2: -		i = reg->id - KVM_REG_PPC_DAC1; -		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]); +		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);  		break;  	case KVM_REG_PPC_EPR: {  		u32 epr = get_guest_epr(vcpu); @@ -1477,10 +1509,13 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  		val = get_reg_val(reg->id, vcpu->arch.tsr);  		break;  	case KVM_REG_PPC_DEBUG_INST: -		val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV); +		val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG); +		break; +	case KVM_REG_PPC_VRSAVE: +		val = get_reg_val(reg->id, vcpu->arch.vrsave);  		break;  	default: -		r = kvmppc_get_one_reg(vcpu, reg->id, &val); +		r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);  		break;  	} @@ -1498,7 +1533,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  	int r = 0;  	union kvmppc_one_reg val;  	int size; -	long int i;  	size = one_reg_size(reg->id);  	if (size > sizeof(val)) @@ -1509,16 +1543,24 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  	switch (reg->id) {  	case KVM_REG_PPC_IAC1: +		vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val); +		break;  	case KVM_REG_PPC_IAC2: +		vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val); +		break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2  	case KVM_REG_PPC_IAC3: +		vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val); +		break;  	case KVM_REG_PPC_IAC4: -		i = reg->id - KVM_REG_PPC_IAC1; -		vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val); +		vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);  		break; +#endif  	case KVM_REG_PPC_DAC1: +		vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val); +		break;  	case KVM_REG_PPC_DAC2: -		i = reg->id - KVM_REG_PPC_DAC1; -		vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val); +		vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);  		break;  	case KVM_REG_PPC_EPR: {  		u32 new_epr = set_reg_val(reg->id, val); @@ -1552,20 +1594,17 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  		kvmppc_set_tcr(vcpu, tcr);  		break;  	} +	case KVM_REG_PPC_VRSAVE: +		vcpu->arch.vrsave = set_reg_val(reg->id, val); +		break;  	default: -		r = kvmppc_set_one_reg(vcpu, reg->id, &val); +		r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);  		break;  	}  	return r;  } -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, -					 struct kvm_guest_debug *dbg) -{ -	return -EINVAL; -} -  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)  {  	return -ENOTSUPP; @@ -1590,12 +1629,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)  	return -ENOTSUPP;  } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,  			      struct kvm_memory_slot *dont)  {  } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,  			       unsigned long npages)  {  	return 0; @@ -1671,6 +1710,157 @@ void kvmppc_decrementer_func(unsigned long data)  	kvmppc_set_tsr_bits(vcpu, TSR_DIS);  } +static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg, +				       uint64_t addr, int index) +{ +	switch (index) { +	case 0: +		dbg_reg->dbcr0 |= DBCR0_IAC1; +		dbg_reg->iac1 = addr; +		break; +	case 1: +		dbg_reg->dbcr0 |= DBCR0_IAC2; +		dbg_reg->iac2 = addr; +		break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 +	case 2: +		dbg_reg->dbcr0 |= DBCR0_IAC3; +		dbg_reg->iac3 = addr; +		break; +	case 3: +		dbg_reg->dbcr0 |= DBCR0_IAC4; +		dbg_reg->iac4 = addr; +		break; +#endif +	default: +		return -EINVAL; +	} + +	dbg_reg->dbcr0 |= DBCR0_IDM; +	return 0; +} + +static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr, +				       int type, int index) +{ +	switch (index) { +	case 0: +		if (type & KVMPPC_DEBUG_WATCH_READ) +			dbg_reg->dbcr0 |= DBCR0_DAC1R; +		if (type & KVMPPC_DEBUG_WATCH_WRITE) +			dbg_reg->dbcr0 |= DBCR0_DAC1W; +		dbg_reg->dac1 = addr; +		break; +	case 1: +		if (type & KVMPPC_DEBUG_WATCH_READ) +			dbg_reg->dbcr0 |= DBCR0_DAC2R; +		if (type & KVMPPC_DEBUG_WATCH_WRITE) +			dbg_reg->dbcr0 |= DBCR0_DAC2W; +		dbg_reg->dac2 = addr; +		break; +	default: +		return -EINVAL; +	} + +	dbg_reg->dbcr0 |= DBCR0_IDM; +	return 0; +} +void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set) +{ +	/* XXX: Add similar MSR protection for BookE-PR */ +#ifdef CONFIG_KVM_BOOKE_HV +	BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP)); +	if (set) { +		if (prot_bitmap & MSR_UCLE) +			vcpu->arch.shadow_msrp |= MSRP_UCLEP; +		if (prot_bitmap & MSR_DE) +			vcpu->arch.shadow_msrp |= MSRP_DEP; +		if (prot_bitmap & MSR_PMM) +			vcpu->arch.shadow_msrp |= MSRP_PMMP; +	} else { +		if (prot_bitmap & MSR_UCLE) +			vcpu->arch.shadow_msrp &= ~MSRP_UCLEP; +		if (prot_bitmap & MSR_DE) +			vcpu->arch.shadow_msrp &= ~MSRP_DEP; +		if (prot_bitmap & MSR_PMM) +			vcpu->arch.shadow_msrp &= ~MSRP_PMMP; +	} +#endif +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, +					 struct kvm_guest_debug *dbg) +{ +	struct debug_reg *dbg_reg; +	int n, b = 0, w = 0; + +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0; +		vcpu->guest_debug = 0; +		kvm_guest_protect_msr(vcpu, MSR_DE, false); +		return 0; +	} + +	kvm_guest_protect_msr(vcpu, MSR_DE, true); +	vcpu->guest_debug = dbg->control; +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0; +	/* Set DBCR0_EDM in guest visible DBCR0 register. */ +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; + +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; + +	/* Code below handles only HW breakpoints */ +	dbg_reg = &(vcpu->arch.shadow_dbg_reg); + +#ifdef CONFIG_KVM_BOOKE_HV +	/* +	 * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1 +	 * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0 +	 */ +	dbg_reg->dbcr1 = 0; +	dbg_reg->dbcr2 = 0; +#else +	/* +	 * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1 +	 * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR +	 * is set. +	 */ +	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US | +			  DBCR1_IAC4US; +	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; +#endif + +	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) +		return 0; + +	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) { +		uint64_t addr = dbg->arch.bp[n].addr; +		uint32_t type = dbg->arch.bp[n].type; + +		if (type == KVMPPC_DEBUG_NONE) +			continue; + +		if (type & !(KVMPPC_DEBUG_WATCH_READ | +			     KVMPPC_DEBUG_WATCH_WRITE | +			     KVMPPC_DEBUG_BREAKPOINT)) +			return -EINVAL; + +		if (type & KVMPPC_DEBUG_BREAKPOINT) { +			/* Setting H/W breakpoint */ +			if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++)) +				return -EINVAL; +		} else { +			/* Setting H/W watchpoint */ +			if (kvmppc_booke_add_watchpoint(dbg_reg, addr, +							type, w++)) +				return -EINVAL; +		} +	} + +	return 0; +} +  void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)  {  	vcpu->cpu = smp_processor_id(); @@ -1681,6 +1871,44 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)  {  	current->thread.kvm_vcpu = NULL;  	vcpu->cpu = -1; + +	/* Clear pending debug event in DBSR */ +	kvmppc_clear_dbsr(); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ +	vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ +	return kvm->arch.kvm_ops->init_vm(kvm); +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ +	return kvm->arch.kvm_ops->vcpu_create(kvm, id); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ +	vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +	kvm->arch.kvm_ops->destroy_vm(kvm); +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +	vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ +	vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);  }  int __init kvmppc_booke_init(void) diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 5fd1ba69357..b632cd35919 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -99,6 +99,30 @@ enum int_class {  void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); +extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu); +extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, +				      unsigned int inst, int *advance); +extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, +					 ulong spr_val); +extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, +					 ulong *spr_val); +extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); +extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, +				       struct kvm_vcpu *vcpu, +				       unsigned int inst, int *advance); +extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, +					  ulong spr_val); +extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, +					  ulong *spr_val); +extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); +extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, +				       struct kvm_vcpu *vcpu, +				       unsigned int inst, int *advance); +extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, +					  ulong spr_val); +extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, +					  ulong *spr_val); +  /*   * Load up guest vcpu FP state if it's needed.   * It also set the MSR_FP in thread so that host know @@ -112,7 +136,9 @@ static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)  {  #ifdef CONFIG_PPC_FPU  	if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) { -		load_up_fpu(); +		enable_kernel_fp(); +		load_fp_state(&vcpu->arch.fp); +		current->thread.fp_save_area = &vcpu->arch.fp;  		current->thread.regs->msr |= MSR_FP;  	}  #endif @@ -127,6 +153,12 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)  #ifdef CONFIG_PPC_FPU  	if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))  		giveup_fpu(current); +	current->thread.fp_save_area = NULL;  #endif  } + +static inline void kvmppc_clear_dbsr(void) +{ +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); +}  #endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e8ed7d659c5..a1712b818a5 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -33,6 +33,8 @@  #ifdef CONFIG_64BIT  #include <asm/exception-64e.h> +#include <asm/hw_irq.h> +#include <asm/irqflags.h>  #else  #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */  #endif @@ -227,17 +229,20 @@  	stw	r10, VCPU_CR(r4)  	PPC_STL r11, VCPU_GPR(R4)(r4)  	PPC_STL	r5, VCPU_GPR(R5)(r4) -	.if \type == EX_CRIT -	PPC_LL	r5, (\paca_ex + EX_R13)(r13) -	.else -	mfspr	r5, \scratch -	.endif  	PPC_STL	r6, VCPU_GPR(R6)(r4)  	PPC_STL	r8, VCPU_GPR(R8)(r4)  	PPC_STL	r9, VCPU_GPR(R9)(r4) -	PPC_STL r5, VCPU_GPR(R13)(r4) +	.if \type == EX_TLB +	PPC_LL	r5, EX_TLB_R13(r12) +	PPC_LL	r6, EX_TLB_R10(r12) +	PPC_LL	r8, EX_TLB_R11(r12) +	mfspr	r12, \scratch +	.else +	mfspr	r5, \scratch  	PPC_LL	r6, (\paca_ex + \ex_r10)(r13)  	PPC_LL	r8, (\paca_ex + \ex_r11)(r13) +	.endif +	PPC_STL r5, VCPU_GPR(R13)(r4)  	PPC_STL r3, VCPU_GPR(R3)(r4)  	PPC_STL r7, VCPU_GPR(R7)(r4)  	PPC_STL r12, VCPU_GPR(R12)(r4) @@ -319,6 +324,8 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \  	SPRN_DSRR0, SPRN_DSRR1, 0  kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \  	SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \ +	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)  #else  /*   * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -431,10 +438,16 @@ _GLOBAL(kvmppc_resume_host)  	PPC_STL	r5, VCPU_LR(r4)  	mfspr	r7, SPRN_SPRG5  	stw	r3, VCPU_VRSAVE(r4) +#ifdef CONFIG_64BIT +	PPC_LL	r3, PACA_SPRG_VDSO(r13) +#endif  	PPC_STD(r6, VCPU_SHARED_SPRG4, r11)  	mfspr	r8, SPRN_SPRG6  	PPC_STD(r7, VCPU_SHARED_SPRG5, r11)  	mfspr	r9, SPRN_SPRG7 +#ifdef CONFIG_64BIT +	mtspr	SPRN_SPRG_VDSO_WRITE, r3 +#endif  	PPC_STD(r8, VCPU_SHARED_SPRG6, r11)  	mfxer	r3  	PPC_STD(r9, VCPU_SHARED_SPRG7, r11) @@ -465,6 +478,15 @@ _GLOBAL(kvmppc_resume_host)  	mtspr	SPRN_EPCR, r3  	isync +#ifdef CONFIG_64BIT +	/* +	 * We enter with interrupts disabled in hardware, but +	 * we need to call RECONCILE_IRQ_STATE to ensure +	 * that the software state is kept in sync. +	 */ +	RECONCILE_IRQ_STATE(r3,r5) +#endif +  	/* Switch to kernel stack and jump to handler. */  	PPC_LL	r3, HOST_RUN(r1)  	mr	r5, r14 /* intno */ diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index ce6b73c2961..2e02ed849f3 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -16,6 +16,8 @@  #include <linux/slab.h>  #include <linux/err.h>  #include <linux/export.h> +#include <linux/module.h> +#include <linux/miscdevice.h>  #include <asm/reg.h>  #include <asm/cputable.h> @@ -305,7 +307,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)  {  } -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)  {  	kvmppc_booke_vcpu_load(vcpu, cpu); @@ -313,7 +315,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)  	kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));  } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu)  {  #ifdef CONFIG_SPE  	if (vcpu->arch.shadow_msr & MSR_SPE) @@ -367,7 +369,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)  	return 0;  } -void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu, +				      struct kvm_sregs *sregs)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -388,9 +391,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)  	kvmppc_get_sregs_ivor(vcpu, sregs);  	kvmppc_get_sregs_e500_tlb(vcpu, sregs); +	return 0;  } -int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu, +				      struct kvm_sregs *sregs)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);  	int ret; @@ -425,21 +430,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)  	return kvmppc_set_sregs_ivor(vcpu, sregs);  } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, -			union kvmppc_one_reg *val) +static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, +				   union kvmppc_one_reg *val)  {  	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);  	return r;  } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, -		       union kvmppc_one_reg *val) +static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, +				   union kvmppc_one_reg *val)  {  	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);  	return r;  } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, +						     unsigned int id)  {  	struct kvmppc_vcpu_e500 *vcpu_e500;  	struct kvm_vcpu *vcpu; @@ -481,7 +487,7 @@ out:  	return ERR_PTR(err);  } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -492,15 +498,32 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)  	kmem_cache_free(kvm_vcpu_cache, vcpu_e500);  } -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_e500(struct kvm *kvm)  {  	return 0;  } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_core_destroy_vm_e500(struct kvm *kvm)  {  } +static struct kvmppc_ops kvm_ops_e500 = { +	.get_sregs = kvmppc_core_get_sregs_e500, +	.set_sregs = kvmppc_core_set_sregs_e500, +	.get_one_reg = kvmppc_get_one_reg_e500, +	.set_one_reg = kvmppc_set_one_reg_e500, +	.vcpu_load   = kvmppc_core_vcpu_load_e500, +	.vcpu_put    = kvmppc_core_vcpu_put_e500, +	.vcpu_create = kvmppc_core_vcpu_create_e500, +	.vcpu_free   = kvmppc_core_vcpu_free_e500, +	.mmu_destroy  = kvmppc_mmu_destroy_e500, +	.init_vm = kvmppc_core_init_vm_e500, +	.destroy_vm = kvmppc_core_destroy_vm_e500, +	.emulate_op = kvmppc_core_emulate_op_e500, +	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500, +	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500, +}; +  static int __init kvmppc_e500_init(void)  {  	int r, i; @@ -512,11 +535,11 @@ static int __init kvmppc_e500_init(void)  	r = kvmppc_core_check_processor_compat();  	if (r) -		return r; +		goto err_out;  	r = kvmppc_booke_init();  	if (r) -		return r; +		goto err_out;  	/* copy extra E500 exception handlers */  	ivor[0] = mfspr(SPRN_IVOR32); @@ -534,13 +557,23 @@ static int __init kvmppc_e500_init(void)  	flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +  			   ivor[max_ivor] + handler_len); -	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); +	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); +	if (r) +		goto err_out; +	kvm_ops_e500.owner = THIS_MODULE; +	kvmppc_pr_ops = &kvm_ops_e500; + +err_out: +	return r;  }  static void __exit kvmppc_e500_exit(void)  { +	kvmppc_pr_ops = NULL;  	kvmppc_booke_exit();  }  module_init(kvmppc_e500_init);  module_exit(kvmppc_e500_exit); +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index c2e5e98453a..a326178bdea 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -31,11 +31,13 @@ enum vcpu_ftr {  #define E500_TLB_NUM   2  /* entry is mapped somewhere in host TLB */ -#define E500_TLB_VALID		(1 << 0) +#define E500_TLB_VALID		(1 << 31)  /* TLB1 entry is mapped by host TLB1, tracked by bitmaps */ -#define E500_TLB_BITMAP		(1 << 1) +#define E500_TLB_BITMAP		(1 << 30)  /* TLB1 entry is mapped by host TLB0 */ -#define E500_TLB_TLB0		(1 << 2) +#define E500_TLB_TLB0		(1 << 29) +/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */ +#define E500_TLB_MAS2_ATTR	(0x7f)  struct tlbe_ref {  	pfn_t pfn;		/* valid only for TLB0, except briefly */ @@ -117,7 +119,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)  #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)  #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)  #define MAS2_ATTRIB_MASK \ -	  (MAS2_X0 | MAS2_X1) +	  (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G)  #define MAS3_ATTRIB_MASK \  	  (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \  	   | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index b10a01243ab..002d5176414 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -19,6 +19,7 @@  #include "booke.h"  #include "e500.h" +#define XOP_DCBTLS  166  #define XOP_MSGSND  206  #define XOP_MSGCLR  238  #define XOP_TLBIVAX 786 @@ -26,6 +27,7 @@  #define XOP_TLBRE   946  #define XOP_TLBWE   978  #define XOP_TLBILX  18 +#define XOP_EHPRIV  270  #ifdef CONFIG_KVM_E500MC  static int dbell2prio(ulong param) @@ -82,8 +84,37 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)  }  #endif -int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, -                           unsigned int inst, int *advance) +static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, +				   unsigned int inst, int *advance) +{ +	int emulated = EMULATE_DONE; + +	switch (get_oc(inst)) { +	case EHPRIV_OC_DEBUG: +		run->exit_reason = KVM_EXIT_DEBUG; +		run->debug.arch.address = vcpu->arch.pc; +		run->debug.arch.status = 0; +		kvmppc_account_exit(vcpu, DEBUG_EXITS); +		emulated = EMULATE_EXIT_USER; +		*advance = 0; +		break; +	default: +		emulated = EMULATE_FAIL; +	} +	return emulated; +} + +static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu) +{ +	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + +	/* Always fail to lock the cache */ +	vcpu_e500->l1csr0 |= L1CSR0_CUL; +	return EMULATE_DONE; +} + +int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, +				unsigned int inst, int *advance)  {  	int emulated = EMULATE_DONE;  	int ra = get_ra(inst); @@ -95,6 +126,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,  	case 31:  		switch (get_xop(inst)) { +		case XOP_DCBTLS: +			emulated = kvmppc_e500_emul_dcbtls(vcpu); +			break; +  #ifdef CONFIG_KVM_E500MC  		case XOP_MSGSND:  			emulated = kvmppc_e500_emul_msgsnd(vcpu, rb); @@ -130,6 +165,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,  			emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);  			break; +		case XOP_EHPRIV: +			emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, +							   advance); +			break; +  		default:  			emulated = EMULATE_FAIL;  		} @@ -146,7 +186,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,  	return emulated;  } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) +int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);  	int emulated = EMULATE_DONE; @@ -196,6 +236,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)  		break;  	case SPRN_L1CSR1:  		vcpu_e500->l1csr1 = spr_val; +		vcpu_e500->l1csr1 &= ~(L1CSR1_ICFI | L1CSR1_ICLFR);  		break;  	case SPRN_HID0:  		vcpu_e500->hid0 = spr_val; @@ -237,7 +278,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)  	return emulated;  } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) +int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);  	int emulated = EMULATE_DONE; diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 6d6f153b6c1..50860e919cb 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -32,7 +32,7 @@  #include <asm/kvm_ppc.h>  #include "e500.h" -#include "trace.h" +#include "trace_booke.h"  #include "timing.h"  #include "e500_mmu_host.h" @@ -127,7 +127,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,  }  static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, -		unsigned int eaddr, int as) +		gva_t eaddr, int as)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);  	unsigned int victim, tsized; @@ -536,7 +536,7 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,  	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);  } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu)  {  } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 1c6a9d729df..86903d3f5a0 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -32,10 +32,11 @@  #include <asm/kvm_ppc.h>  #include "e500.h" -#include "trace.h"  #include "timing.h"  #include "e500_mmu_host.h" +#include "trace_booke.h" +  #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)  static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; @@ -64,15 +65,6 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)  	return mas3;  } -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) -{ -#ifdef CONFIG_SMP -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; -#else -	return mas2 & MAS2_ATTRIB_MASK; -#endif -} -  /*   * writing shadow tlb entry to host TLB   */ @@ -230,15 +222,15 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,  		ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID);  	} -	/* Already invalidated in between */ -	if (!(ref->flags & E500_TLB_VALID)) -		return; - -	/* Guest tlbe is backed by at most one host tlbe per shadow pid. */ -	kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); +	/* +	 * If TLB entry is still valid then it's a TLB0 entry, and thus +	 * backed by at most one host tlbe per shadow pid +	 */ +	if (ref->flags & E500_TLB_VALID) +		kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);  	/* Mark the TLB as not backed by the host anymore */ -	ref->flags &= ~E500_TLB_VALID; +	ref->flags = 0;  }  static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) @@ -248,10 +240,16 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)  static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,  					 struct kvm_book3e_206_tlb_entry *gtlbe, -					 pfn_t pfn) +					 pfn_t pfn, unsigned int wimg)  {  	ref->pfn = pfn; -	ref->flags |= E500_TLB_VALID; +	ref->flags = E500_TLB_VALID; + +	/* Use guest supplied MAS2_G and MAS2_E */ +	ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; + +	/* Mark the page accessed */ +	kvm_set_pfn_accessed(pfn);  	if (tlbe_is_writable(gtlbe))  		kvm_set_pfn_dirty(pfn); @@ -312,8 +310,7 @@ static void kvmppc_e500_setup_stlbe(  	/* Force IPROT=0 for all guest mappings. */  	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; -	stlbe->mas2 = (gvaddr & MAS2_EPN) | -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr); +	stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);  	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |  			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); @@ -332,6 +329,17 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,  	unsigned long hva;  	int pfnmap = 0;  	int tsize = BOOK3E_PAGESZ_4K; +	int ret = 0; +	unsigned long mmu_seq; +	struct kvm *kvm = vcpu_e500->vcpu.kvm; +	unsigned long tsize_pages = 0; +	pte_t *ptep; +	unsigned int wimg = 0; +	pgd_t *pgdir; + +	/* used to check for invalidations in progress */ +	mmu_seq = kvm->mmu_notifier_seq; +	smp_rmb();  	/*  	 * Translate guest physical to true physical, acquiring @@ -394,7 +402,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,  			 */  			for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { -				unsigned long gfn_start, gfn_end, tsize_pages; +				unsigned long gfn_start, gfn_end;  				tsize_pages = 1 << (tsize - 2);  				gfn_start = gfn & ~(tsize_pages - 1); @@ -436,11 +444,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,  	}  	if (likely(!pfnmap)) { -		unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); +		tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);  		pfn = gfn_to_pfn_memslot(slot, gfn);  		if (is_error_noslot_pfn(pfn)) { -			printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", -					(long)gfn); +			if (printk_ratelimit()) +				pr_err("%s: real page not found for gfn %lx\n", +				       __func__, (long)gfn);  			return -EINVAL;  		} @@ -449,7 +458,25 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,  		gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);  	} -	kvmppc_e500_ref_setup(ref, gtlbe, pfn); +	spin_lock(&kvm->mmu_lock); +	if (mmu_notifier_retry(kvm, mmu_seq)) { +		ret = -EAGAIN; +		goto out; +	} + + +	pgdir = vcpu_e500->vcpu.arch.pgdir; +	ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages); +	if (pte_present(*ptep)) +		wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; +	else { +		if (printk_ratelimit()) +			pr_err("%s: pte not present: gfn %lx, pfn %lx\n", +				__func__, (long)gfn, pfn); +		ret = -EINVAL; +		goto out; +	} +	kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);  	kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,  				ref, gvaddr, stlbe); @@ -457,10 +484,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,  	/* Clear i-cache for new pages */  	kvmppc_mmu_flush_icache(pfn); +out: +	spin_unlock(&kvm->mmu_lock); +  	/* Drop refcount on page, so that mmu notifiers can clear it */  	kvm_release_pfn_clean(pfn); -	return 0; +	return ret;  }  /* XXX only map the one-one case, for now use TLB0 */ diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 19c8379575f..17e45627922 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -16,6 +16,8 @@  #include <linux/slab.h>  #include <linux/err.h>  #include <linux/export.h> +#include <linux/miscdevice.h> +#include <linux/module.h>  #include <asm/reg.h>  #include <asm/cputable.h> @@ -110,7 +112,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)  static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -147,7 +149,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)  	kvmppc_load_guest_fp(vcpu);  } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)  {  	vcpu->arch.eplc = mfspr(SPRN_EPLC);  	vcpu->arch.epsc = mfspr(SPRN_EPSC); @@ -204,7 +206,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)  	return 0;  } -void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu, +					struct kvm_sregs *sregs)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -224,10 +227,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)  	sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];  	sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; -	kvmppc_get_sregs_ivor(vcpu, sregs); +	return kvmppc_get_sregs_ivor(vcpu, sregs);  } -int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu, +					struct kvm_sregs *sregs)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);  	int ret; @@ -260,21 +264,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)  	return kvmppc_set_sregs_ivor(vcpu, sregs);  } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, -			union kvmppc_one_reg *val) +static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, +			      union kvmppc_one_reg *val)  {  	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);  	return r;  } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, -		       union kvmppc_one_reg *val) +static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, +			      union kvmppc_one_reg *val)  {  	int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);  	return r;  } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, +						       unsigned int id)  {  	struct kvmppc_vcpu_e500 *vcpu_e500;  	struct kvm_vcpu *vcpu; @@ -315,7 +320,7 @@ out:  	return ERR_PTR(err);  } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)  {  	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -325,7 +330,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)  	kmem_cache_free(kvm_vcpu_cache, vcpu_e500);  } -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)  {  	int lpid; @@ -337,29 +342,56 @@ int kvmppc_core_init_vm(struct kvm *kvm)  	return 0;  } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)  {  	kvmppc_free_lpid(kvm->arch.lpid);  } +static struct kvmppc_ops kvm_ops_e500mc = { +	.get_sregs = kvmppc_core_get_sregs_e500mc, +	.set_sregs = kvmppc_core_set_sregs_e500mc, +	.get_one_reg = kvmppc_get_one_reg_e500mc, +	.set_one_reg = kvmppc_set_one_reg_e500mc, +	.vcpu_load   = kvmppc_core_vcpu_load_e500mc, +	.vcpu_put    = kvmppc_core_vcpu_put_e500mc, +	.vcpu_create = kvmppc_core_vcpu_create_e500mc, +	.vcpu_free   = kvmppc_core_vcpu_free_e500mc, +	.mmu_destroy  = kvmppc_mmu_destroy_e500, +	.init_vm = kvmppc_core_init_vm_e500mc, +	.destroy_vm = kvmppc_core_destroy_vm_e500mc, +	.emulate_op = kvmppc_core_emulate_op_e500, +	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500, +	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500, +}; +  static int __init kvmppc_e500mc_init(void)  {  	int r;  	r = kvmppc_booke_init();  	if (r) -		return r; +		goto err_out;  	kvmppc_init_lpid(64);  	kvmppc_claim_lpid(0); /* host */ -	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); +	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); +	if (r) +		goto err_out; +	kvm_ops_e500mc.owner = THIS_MODULE; +	kvmppc_pr_ops = &kvm_ops_e500mc; + +err_out: +	return r;  }  static void __exit kvmppc_e500mc_exit(void)  { +	kvmppc_pr_ops = NULL;  	kvmppc_booke_exit();  }  module_init(kvmppc_e500mc_init);  module_exit(kvmppc_e500mc_exit); +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 751cd45f65a..da86d9ba347 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -97,10 +97,10 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)  	switch (sprn) {  	case SPRN_SRR0: -		vcpu->arch.shared->srr0 = spr_val; +		kvmppc_set_srr0(vcpu, spr_val);  		break;  	case SPRN_SRR1: -		vcpu->arch.shared->srr1 = spr_val; +		kvmppc_set_srr1(vcpu, spr_val);  		break;  	/* XXX We need to context-switch the timebase for @@ -114,24 +114,24 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)  		break;  	case SPRN_SPRG0: -		vcpu->arch.shared->sprg0 = spr_val; +		kvmppc_set_sprg0(vcpu, spr_val);  		break;  	case SPRN_SPRG1: -		vcpu->arch.shared->sprg1 = spr_val; +		kvmppc_set_sprg1(vcpu, spr_val);  		break;  	case SPRN_SPRG2: -		vcpu->arch.shared->sprg2 = spr_val; +		kvmppc_set_sprg2(vcpu, spr_val);  		break;  	case SPRN_SPRG3: -		vcpu->arch.shared->sprg3 = spr_val; +		kvmppc_set_sprg3(vcpu, spr_val);  		break;  	/* PIR can legally be written, but we ignore it */  	case SPRN_PIR: break;  	default: -		emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, -						     spr_val); +		emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn, +								  spr_val);  		if (emulated == EMULATE_FAIL)  			printk(KERN_INFO "mtspr: unknown spr "  				"0x%x\n", sprn); @@ -150,10 +150,10 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)  	switch (sprn) {  	case SPRN_SRR0: -		spr_val = vcpu->arch.shared->srr0; +		spr_val = kvmppc_get_srr0(vcpu);  		break;  	case SPRN_SRR1: -		spr_val = vcpu->arch.shared->srr1; +		spr_val = kvmppc_get_srr1(vcpu);  		break;  	case SPRN_PVR:  		spr_val = vcpu->arch.pvr; @@ -173,16 +173,16 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)  		break;  	case SPRN_SPRG0: -		spr_val = vcpu->arch.shared->sprg0; +		spr_val = kvmppc_get_sprg0(vcpu);  		break;  	case SPRN_SPRG1: -		spr_val = vcpu->arch.shared->sprg1; +		spr_val = kvmppc_get_sprg1(vcpu);  		break;  	case SPRN_SPRG2: -		spr_val = vcpu->arch.shared->sprg2; +		spr_val = kvmppc_get_sprg2(vcpu);  		break;  	case SPRN_SPRG3: -		spr_val = vcpu->arch.shared->sprg3; +		spr_val = kvmppc_get_sprg3(vcpu);  		break;  	/* Note: SPRG4-7 are user-readable, so we don't get  	 * a trap. */ @@ -191,8 +191,8 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)  		spr_val = kvmppc_get_dec(vcpu, get_tb());  		break;  	default: -		emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, -						     &spr_val); +		emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn, +								  &spr_val);  		if (unlikely(emulated == EMULATE_FAIL)) {  			printk(KERN_INFO "mfspr: unknown spr "  				"0x%x\n", sprn); @@ -219,7 +219,6 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)   * lmw   * stmw   * - * XXX is_bigendian should depend on MMU mapping or MSR[LE]   */  /* XXX Should probably auto-generate instruction decoding for a particular core   * from opcode tables in the future. */ @@ -464,7 +463,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)  	}  	if (emulated == EMULATE_FAIL) { -		emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); +		emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst, +							       &advance);  		if (emulated == EMULATE_AGAIN) {  			advance = 0;  		} else if (emulated == EMULATE_FAIL) { @@ -483,3 +483,4 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)  	return emulated;  } +EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction); diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 2861ae9eaae..b68d0dc9479 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -126,6 +126,8 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr,  				      u32 val, int idx);  static int openpic_cpu_read_internal(void *opaque, gpa_t addr,  				     u32 *ptr, int idx); +static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ, +				    uint32_t val);  enum irq_type {  	IRQ_TYPE_NORMAL = 0, @@ -528,7 +530,6 @@ static void openpic_reset(struct openpic *opp)  	/* Initialise IRQ sources */  	for (i = 0; i < opp->max_irq; i++) {  		opp->src[i].ivpr = opp->ivpr_reset; -		opp->src[i].idr = opp->idr_reset;  		switch (opp->src[i].type) {  		case IRQ_TYPE_NORMAL: @@ -543,6 +544,8 @@ static void openpic_reset(struct openpic *opp)  		case IRQ_TYPE_FSLSPECIAL:  			break;  		} + +		write_IRQreg_idr(opp, i, opp->idr_reset);  	}  	/* Initialise IRQ destinations */  	for (i = 0; i < MAX_CPU; i++) { @@ -1635,6 +1638,7 @@ static void mpic_destroy(struct kvm_device *dev)  	dev->kvm->arch.mpic = NULL;  	kfree(opp); +	kfree(dev);  }  static int mpic_set_default_irq_routing(struct openpic *opp) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 07c0106fab7..61c738ab128 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -26,6 +26,7 @@  #include <linux/fs.h>  #include <linux/slab.h>  #include <linux/file.h> +#include <linux/module.h>  #include <asm/cputable.h>  #include <asm/uaccess.h>  #include <asm/kvm_ppc.h> @@ -39,6 +40,12 @@  #define CREATE_TRACE_POINTS  #include "trace.h" +struct kvmppc_ops *kvmppc_hv_ops; +EXPORT_SYMBOL_GPL(kvmppc_hv_ops); +struct kvmppc_ops *kvmppc_pr_ops; +EXPORT_SYMBOL_GPL(kvmppc_pr_ops); + +  int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)  {  	return !!(v->arch.pending_exceptions) || @@ -50,7 +57,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)  	return 1;  } -#ifndef CONFIG_KVM_BOOK3S_64_HV  /*   * Common checks before entering the guest world.  Call with interrupts   * disabled. @@ -62,14 +68,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)   */  int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)  { -	int r = 1; +	int r; + +	WARN_ON(irqs_disabled()); +	hard_irq_disable(); -	WARN_ON_ONCE(!irqs_disabled());  	while (true) {  		if (need_resched()) {  			local_irq_enable();  			cond_resched(); -			local_irq_disable(); +			hard_irq_disable();  			continue;  		} @@ -95,7 +103,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)  			local_irq_enable();  			trace_kvm_check_requests(vcpu);  			r = kvmppc_core_check_requests(vcpu); -			local_irq_disable(); +			hard_irq_disable();  			if (r > 0)  				continue;  			break; @@ -107,25 +115,36 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)  			continue;  		} -#ifdef CONFIG_PPC64 -		/* lazy EE magic */ -		hard_irq_disable(); -		if (lazy_irq_pending()) { -			/* Got an interrupt in between, try again */ -			local_irq_enable(); -			local_irq_disable(); -			kvm_guest_exit(); -			continue; -		} -#endif -  		kvm_guest_enter(); -		break; +		return 1;  	} +	/* return to host */ +	local_irq_enable();  	return r;  } -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter); + +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE) +static void kvmppc_swab_shared(struct kvm_vcpu *vcpu) +{ +	struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared; +	int i; + +	shared->sprg0 = swab64(shared->sprg0); +	shared->sprg1 = swab64(shared->sprg1); +	shared->sprg2 = swab64(shared->sprg2); +	shared->sprg3 = swab64(shared->sprg3); +	shared->srr0 = swab64(shared->srr0); +	shared->srr1 = swab64(shared->srr1); +	shared->dar = swab64(shared->dar); +	shared->msr = swab64(shared->msr); +	shared->dsisr = swab32(shared->dsisr); +	shared->int_pending = swab32(shared->int_pending); +	for (i = 0; i < ARRAY_SIZE(shared->sr); i++) +		shared->sr[i] = swab32(shared->sr[i]); +} +#endif  int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)  { @@ -137,7 +156,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)  	unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);  	unsigned long r2 = 0; -	if (!(vcpu->arch.shared->msr & MSR_SF)) { +	if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {  		/* 32 bit mode */  		param1 &= 0xffffffff;  		param2 &= 0xffffffff; @@ -148,8 +167,28 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)  	switch (nr) {  	case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):  	{ -		vcpu->arch.magic_page_pa = param1; -		vcpu->arch.magic_page_ea = param2; +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE) +		/* Book3S can be little endian, find it out here */ +		int shared_big_endian = true; +		if (vcpu->arch.intr_msr & MSR_LE) +			shared_big_endian = false; +		if (shared_big_endian != vcpu->arch.shared_big_endian) +			kvmppc_swab_shared(vcpu); +		vcpu->arch.shared_big_endian = shared_big_endian; +#endif + +		if (!(param2 & MAGIC_PAGE_FLAG_NOT_MAPPED_NX)) { +			/* +			 * Older versions of the Linux magic page code had +			 * a bug where they would map their trampoline code +			 * NX. If that's the case, remove !PR NX capability. +			 */ +			vcpu->arch.disable_kernel_nx = true; +			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); +		} + +		vcpu->arch.magic_page_pa = param1 & ~0xfffULL; +		vcpu->arch.magic_page_ea = param2 & ~0xfffULL;  		r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; @@ -179,6 +218,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)  	return r;  } +EXPORT_SYMBOL_GPL(kvmppc_kvm_pv);  int kvmppc_sanity_check(struct kvm_vcpu *vcpu)  { @@ -192,11 +232,9 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)  	if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)  		goto out; -#ifdef CONFIG_KVM_BOOK3S_64_HV  	/* HV KVM can only do PAPR mode for now */ -	if (!vcpu->arch.papr_enabled) +	if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm))  		goto out; -#endif  #ifdef CONFIG_KVM_BOOKE_HV  	if (!cpu_has_feature(CPU_FTR_EMB_HV)) @@ -209,6 +247,7 @@ out:  	vcpu->arch.sane = r;  	return r ? 0 : -EINVAL;  } +EXPORT_SYMBOL_GPL(kvmppc_sanity_check);  int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)  { @@ -243,6 +282,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)  	return r;  } +EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);  int kvm_arch_hardware_enable(void *garbage)  { @@ -269,10 +309,35 @@ void kvm_arch_check_processor_compat(void *rtn)  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)  { -	if (type) -		return -EINVAL; - +	struct kvmppc_ops *kvm_ops = NULL; +	/* +	 * if we have both HV and PR enabled, default is HV +	 */ +	if (type == 0) { +		if (kvmppc_hv_ops) +			kvm_ops = kvmppc_hv_ops; +		else +			kvm_ops = kvmppc_pr_ops; +		if (!kvm_ops) +			goto err_out; +	} else	if (type == KVM_VM_PPC_HV) { +		if (!kvmppc_hv_ops) +			goto err_out; +		kvm_ops = kvmppc_hv_ops; +	} else if (type == KVM_VM_PPC_PR) { +		if (!kvmppc_pr_ops) +			goto err_out; +		kvm_ops = kvmppc_pr_ops; +	} else +		goto err_out; + +	if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) +		return -ENOENT; + +	kvm->arch.kvm_ops = kvm_ops;  	return kvmppc_core_init_vm(kvm); +err_out: +	return -EINVAL;  }  void kvm_arch_destroy_vm(struct kvm *kvm) @@ -292,6 +357,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)  	kvmppc_core_destroy_vm(kvm);  	mutex_unlock(&kvm->lock); + +	/* drop the module reference */ +	module_put(kvm->arch.kvm_ops->owner);  }  void kvm_arch_sync_events(struct kvm *kvm) @@ -301,6 +369,10 @@ void kvm_arch_sync_events(struct kvm *kvm)  int kvm_dev_ioctl_check_extension(long ext)  {  	int r; +	/* FIXME!! +	 * Should some of this be vm ioctl ? is it possible now ? +	 */ +	int hv_enabled = kvmppc_hv_ops ? 1 : 0;  	switch (ext) {  #ifdef CONFIG_BOOKE @@ -320,58 +392,68 @@ int kvm_dev_ioctl_check_extension(long ext)  	case KVM_CAP_DEVICE_CTRL:  		r = 1;  		break; -#ifndef CONFIG_KVM_BOOK3S_64_HV  	case KVM_CAP_PPC_PAIRED_SINGLES:  	case KVM_CAP_PPC_OSI:  	case KVM_CAP_PPC_GET_PVINFO:  #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)  	case KVM_CAP_SW_TLB:  #endif -#ifdef CONFIG_KVM_MPIC -	case KVM_CAP_IRQ_MPIC: -#endif -		r = 1; +		/* We support this only for PR */ +		r = !hv_enabled;  		break; +#ifdef CONFIG_KVM_MMIO  	case KVM_CAP_COALESCED_MMIO:  		r = KVM_COALESCED_MMIO_PAGE_OFFSET;  		break;  #endif +#ifdef CONFIG_KVM_MPIC +	case KVM_CAP_IRQ_MPIC: +		r = 1; +		break; +#endif +  #ifdef CONFIG_PPC_BOOK3S_64  	case KVM_CAP_SPAPR_TCE:  	case KVM_CAP_PPC_ALLOC_HTAB:  	case KVM_CAP_PPC_RTAS: +	case KVM_CAP_PPC_FIXUP_HCALL:  #ifdef CONFIG_KVM_XICS  	case KVM_CAP_IRQ_XICS:  #endif  		r = 1;  		break;  #endif /* CONFIG_PPC_BOOK3S_64 */ -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  	case KVM_CAP_PPC_SMT: -		r = threads_per_core; +		if (hv_enabled) +			r = threads_per_subcore; +		else +			r = 0;  		break;  	case KVM_CAP_PPC_RMA: -		r = 1; +		r = hv_enabled;  		/* PPC970 requires an RMA */ -		if (cpu_has_feature(CPU_FTR_ARCH_201)) +		if (r && cpu_has_feature(CPU_FTR_ARCH_201))  			r = 2;  		break;  #endif  	case KVM_CAP_SYNC_MMU: -#ifdef CONFIG_KVM_BOOK3S_64_HV -		r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +		if (hv_enabled) +			r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; +		else +			r = 0;  #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)  		r = 1;  #else  		r = 0; -		break;  #endif -#ifdef CONFIG_KVM_BOOK3S_64_HV +		break; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  	case KVM_CAP_PPC_HTAB_FD: -		r = 1; +		r = hv_enabled;  		break;  #endif -		break;  	case KVM_CAP_NR_VCPUS:  		/*  		 * Recommending a number of CPUs is somewhat arbitrary; we @@ -379,11 +461,10 @@ int kvm_dev_ioctl_check_extension(long ext)  		 * will have secondary threads "offline"), and for other KVM  		 * implementations just count online CPUs.  		 */ -#ifdef CONFIG_KVM_BOOK3S_64_HV -		r = num_present_cpus(); -#else -		r = num_online_cpus(); -#endif +		if (hv_enabled) +			r = num_present_cpus(); +		else +			r = num_online_cpus();  		break;  	case KVM_CAP_MAX_VCPUS:  		r = KVM_MAX_VCPUS; @@ -407,15 +488,16 @@ long kvm_arch_dev_ioctl(struct file *filp,  	return -EINVAL;  } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,  			   struct kvm_memory_slot *dont)  { -	kvmppc_core_free_memslot(free, dont); +	kvmppc_core_free_memslot(kvm, free, dont);  } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, +			    unsigned long npages)  { -	return kvmppc_core_create_memslot(slot, npages); +	return kvmppc_core_create_memslot(kvm, slot, npages);  }  void kvm_arch_memslots_updated(struct kvm *kvm) @@ -608,14 +690,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,  		kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);  		break;  	case KVM_MMIO_REG_FPR: -		vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; +		VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;  		break;  #ifdef CONFIG_PPC_BOOK3S  	case KVM_MMIO_REG_QPR:  		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;  		break;  	case KVM_MMIO_REG_FQPR: -		vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; +		VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;  		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;  		break;  #endif @@ -625,9 +707,19 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,  }  int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, -                       unsigned int rt, unsigned int bytes, int is_bigendian) +		       unsigned int rt, unsigned int bytes, +		       int is_default_endian)  {  	int idx, ret; +	int is_bigendian; + +	if (kvmppc_need_byteswap(vcpu)) { +		/* Default endianness is "little endian". */ +		is_bigendian = !is_default_endian; +	} else { +		/* Default endianness is "big endian". */ +		is_bigendian = is_default_endian; +	}  	if (bytes > sizeof(run->mmio.data)) {  		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, @@ -659,24 +751,35 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,  	return EMULATE_DO_MMIO;  } +EXPORT_SYMBOL_GPL(kvmppc_handle_load);  /* Same as above, but sign extends */  int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, -                        unsigned int rt, unsigned int bytes, int is_bigendian) +			unsigned int rt, unsigned int bytes, +			int is_default_endian)  {  	int r;  	vcpu->arch.mmio_sign_extend = 1; -	r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); +	r = kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian);  	return r;  }  int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, -                        u64 val, unsigned int bytes, int is_bigendian) +			u64 val, unsigned int bytes, int is_default_endian)  {  	void *data = run->mmio.data;  	int idx, ret; +	int is_bigendian; + +	if (kvmppc_need_byteswap(vcpu)) { +		/* Default endianness is "little endian". */ +		is_bigendian = !is_default_endian; +	} else { +		/* Default endianness is "big endian". */ +		is_bigendian = is_default_endian; +	}  	if (bytes > sizeof(run->mmio.data)) {  		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, @@ -720,6 +823,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,  	return EMULATE_DO_MMIO;  } +EXPORT_SYMBOL_GPL(kvmppc_handle_store);  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)  { @@ -953,10 +1057,10 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)  	u32 inst_nop = 0x60000000;  #ifdef CONFIG_KVM_BOOKE_HV  	u32 inst_sc1 = 0x44000022; -	pvinfo->hcall[0] = inst_sc1; -	pvinfo->hcall[1] = inst_nop; -	pvinfo->hcall[2] = inst_nop; -	pvinfo->hcall[3] = inst_nop; +	pvinfo->hcall[0] = cpu_to_be32(inst_sc1); +	pvinfo->hcall[1] = cpu_to_be32(inst_nop); +	pvinfo->hcall[2] = cpu_to_be32(inst_nop); +	pvinfo->hcall[3] = cpu_to_be32(inst_nop);  #else  	u32 inst_lis = 0x3c000000;  	u32 inst_ori = 0x60000000; @@ -972,10 +1076,10 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)  	 *    sc  	 *    nop  	 */ -	pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask); -	pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); -	pvinfo->hcall[2] = inst_sc; -	pvinfo->hcall[3] = inst_nop; +	pvinfo->hcall[0] = cpu_to_be32(inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask)); +	pvinfo->hcall[1] = cpu_to_be32(inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask)); +	pvinfo->hcall[2] = cpu_to_be32(inst_sc); +	pvinfo->hcall[3] = cpu_to_be32(inst_nop);  #endif  	pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE; @@ -1024,52 +1128,12 @@ long kvm_arch_vm_ioctl(struct file *filp,  		r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);  		goto out;  	} -#endif /* CONFIG_PPC_BOOK3S_64 */ - -#ifdef CONFIG_KVM_BOOK3S_64_HV -	case KVM_ALLOCATE_RMA: { -		struct kvm_allocate_rma rma; -		struct kvm *kvm = filp->private_data; - -		r = kvm_vm_ioctl_allocate_rma(kvm, &rma); -		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) -			r = -EFAULT; -		break; -	} - -	case KVM_PPC_ALLOCATE_HTAB: { -		u32 htab_order; - -		r = -EFAULT; -		if (get_user(htab_order, (u32 __user *)argp)) -			break; -		r = kvmppc_alloc_reset_hpt(kvm, &htab_order); -		if (r) -			break; -		r = -EFAULT; -		if (put_user(htab_order, (u32 __user *)argp)) -			break; -		r = 0; -		break; -	} - -	case KVM_PPC_GET_HTAB_FD: { -		struct kvm_get_htab_fd ghf; - -		r = -EFAULT; -		if (copy_from_user(&ghf, argp, sizeof(ghf))) -			break; -		r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); -		break; -	} -#endif /* CONFIG_KVM_BOOK3S_64_HV */ - -#ifdef CONFIG_PPC_BOOK3S_64  	case KVM_PPC_GET_SMMU_INFO: {  		struct kvm_ppc_smmu_info info; +		struct kvm *kvm = filp->private_data;  		memset(&info, 0, sizeof(info)); -		r = kvm_vm_ioctl_get_smmu_info(kvm, &info); +		r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info);  		if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))  			r = -EFAULT;  		break; @@ -1080,11 +1144,15 @@ long kvm_arch_vm_ioctl(struct file *filp,  		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);  		break;  	} -#endif /* CONFIG_PPC_BOOK3S_64 */ +	default: { +		struct kvm *kvm = filp->private_data; +		r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); +	} +#else /* CONFIG_PPC_BOOK3S_64 */  	default:  		r = -ENOTTY; +#endif  	} -  out:  	return r;  } @@ -1106,22 +1174,26 @@ long kvmppc_alloc_lpid(void)  	return lpid;  } +EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);  void kvmppc_claim_lpid(long lpid)  {  	set_bit(lpid, lpid_inuse);  } +EXPORT_SYMBOL_GPL(kvmppc_claim_lpid);  void kvmppc_free_lpid(long lpid)  {  	clear_bit(lpid, lpid_inuse);  } +EXPORT_SYMBOL_GPL(kvmppc_free_lpid);  void kvmppc_init_lpid(unsigned long nr_lpids_param)  {  	nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);  	memset(lpid_inuse, 0, sizeof(lpid_inuse));  } +EXPORT_SYMBOL_GPL(kvmppc_init_lpid);  int kvm_arch_init(void *opaque)  { @@ -1130,4 +1202,5 @@ int kvm_arch_init(void *opaque)  void kvm_arch_exit(void)  { +  } diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index e326489a542..2e0e67ef354 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -31,126 +31,6 @@ TRACE_EVENT(kvm_ppc_instr,  		  __entry->inst, __entry->pc, __entry->emulate)  ); -#ifdef CONFIG_PPC_BOOK3S -#define kvm_trace_symbol_exit \ -	{0x100, "SYSTEM_RESET"}, \ -	{0x200, "MACHINE_CHECK"}, \ -	{0x300, "DATA_STORAGE"}, \ -	{0x380, "DATA_SEGMENT"}, \ -	{0x400, "INST_STORAGE"}, \ -	{0x480, "INST_SEGMENT"}, \ -	{0x500, "EXTERNAL"}, \ -	{0x501, "EXTERNAL_LEVEL"}, \ -	{0x502, "EXTERNAL_HV"}, \ -	{0x600, "ALIGNMENT"}, \ -	{0x700, "PROGRAM"}, \ -	{0x800, "FP_UNAVAIL"}, \ -	{0x900, "DECREMENTER"}, \ -	{0x980, "HV_DECREMENTER"}, \ -	{0xc00, "SYSCALL"}, \ -	{0xd00, "TRACE"}, \ -	{0xe00, "H_DATA_STORAGE"}, \ -	{0xe20, "H_INST_STORAGE"}, \ -	{0xe40, "H_EMUL_ASSIST"}, \ -	{0xf00, "PERFMON"}, \ -	{0xf20, "ALTIVEC"}, \ -	{0xf40, "VSX"} -#else -#define kvm_trace_symbol_exit \ -	{0, "CRITICAL"}, \ -	{1, "MACHINE_CHECK"}, \ -	{2, "DATA_STORAGE"}, \ -	{3, "INST_STORAGE"}, \ -	{4, "EXTERNAL"}, \ -	{5, "ALIGNMENT"}, \ -	{6, "PROGRAM"}, \ -	{7, "FP_UNAVAIL"}, \ -	{8, "SYSCALL"}, \ -	{9, "AP_UNAVAIL"}, \ -	{10, "DECREMENTER"}, \ -	{11, "FIT"}, \ -	{12, "WATCHDOG"}, \ -	{13, "DTLB_MISS"}, \ -	{14, "ITLB_MISS"}, \ -	{15, "DEBUG"}, \ -	{32, "SPE_UNAVAIL"}, \ -	{33, "SPE_FP_DATA"}, \ -	{34, "SPE_FP_ROUND"}, \ -	{35, "PERFORMANCE_MONITOR"}, \ -	{36, "DOORBELL"}, \ -	{37, "DOORBELL_CRITICAL"}, \ -	{38, "GUEST_DBELL"}, \ -	{39, "GUEST_DBELL_CRIT"}, \ -	{40, "HV_SYSCALL"}, \ -	{41, "HV_PRIV"} -#endif - -TRACE_EVENT(kvm_exit, -	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), -	TP_ARGS(exit_nr, vcpu), - -	TP_STRUCT__entry( -		__field(	unsigned int,	exit_nr		) -		__field(	unsigned long,	pc		) -		__field(	unsigned long,	msr		) -		__field(	unsigned long,	dar		) -#ifdef CONFIG_KVM_BOOK3S_PR -		__field(	unsigned long,	srr1		) -#endif -		__field(	unsigned long,	last_inst	) -	), - -	TP_fast_assign( -#ifdef CONFIG_KVM_BOOK3S_PR -		struct kvmppc_book3s_shadow_vcpu *svcpu; -#endif -		__entry->exit_nr	= exit_nr; -		__entry->pc		= kvmppc_get_pc(vcpu); -		__entry->dar		= kvmppc_get_fault_dar(vcpu); -		__entry->msr		= vcpu->arch.shared->msr; -#ifdef CONFIG_KVM_BOOK3S_PR -		svcpu = svcpu_get(vcpu); -		__entry->srr1		= svcpu->shadow_srr1; -		svcpu_put(svcpu); -#endif -		__entry->last_inst	= vcpu->arch.last_inst; -	), - -	TP_printk("exit=%s" -		" | pc=0x%lx" -		" | msr=0x%lx" -		" | dar=0x%lx" -#ifdef CONFIG_KVM_BOOK3S_PR -		" | srr1=0x%lx" -#endif -		" | last_inst=0x%lx" -		, -		__print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), -		__entry->pc, -		__entry->msr, -		__entry->dar, -#ifdef CONFIG_KVM_BOOK3S_PR -		__entry->srr1, -#endif -		__entry->last_inst -		) -); - -TRACE_EVENT(kvm_unmap_hva, -	TP_PROTO(unsigned long hva), -	TP_ARGS(hva), - -	TP_STRUCT__entry( -		__field(	unsigned long,	hva		) -	), - -	TP_fast_assign( -		__entry->hva		= hva; -	), - -	TP_printk("unmap hva 0x%lx\n", __entry->hva) -); -  TRACE_EVENT(kvm_stlb_inval,  	TP_PROTO(unsigned int stlb_index),  	TP_ARGS(stlb_index), @@ -236,315 +116,6 @@ TRACE_EVENT(kvm_check_requests,  		__entry->cpu_nr, __entry->requests)  ); - -/************************************************************************* - *                         Book3S trace points                           * - *************************************************************************/ - -#ifdef CONFIG_KVM_BOOK3S_PR - -TRACE_EVENT(kvm_book3s_reenter, -	TP_PROTO(int r, struct kvm_vcpu *vcpu), -	TP_ARGS(r, vcpu), - -	TP_STRUCT__entry( -		__field(	unsigned int,	r		) -		__field(	unsigned long,	pc		) -	), - -	TP_fast_assign( -		__entry->r		= r; -		__entry->pc		= kvmppc_get_pc(vcpu); -	), - -	TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) -); - -#ifdef CONFIG_PPC_BOOK3S_64 - -TRACE_EVENT(kvm_book3s_64_mmu_map, -	TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, -		 struct kvmppc_pte *orig_pte), -	TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), - -	TP_STRUCT__entry( -		__field(	unsigned char,		flag_w		) -		__field(	unsigned char,		flag_x		) -		__field(	unsigned long,		eaddr		) -		__field(	unsigned long,		hpteg		) -		__field(	unsigned long,		va		) -		__field(	unsigned long long,	vpage		) -		__field(	unsigned long,		hpaddr		) -	), - -	TP_fast_assign( -		__entry->flag_w	= ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; -		__entry->flag_x	= (rflags & HPTE_R_N) ? '-' : 'x'; -		__entry->eaddr	= orig_pte->eaddr; -		__entry->hpteg	= hpteg; -		__entry->va	= va; -		__entry->vpage	= orig_pte->vpage; -		__entry->hpaddr	= hpaddr; -	), - -	TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", -		  __entry->flag_w, __entry->flag_x, __entry->eaddr, -		  __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) -); - -#endif /* CONFIG_PPC_BOOK3S_64 */ - -TRACE_EVENT(kvm_book3s_mmu_map, -	TP_PROTO(struct hpte_cache *pte), -	TP_ARGS(pte), - -	TP_STRUCT__entry( -		__field(	u64,		host_vpn	) -		__field(	u64,		pfn		) -		__field(	ulong,		eaddr		) -		__field(	u64,		vpage		) -		__field(	ulong,		raddr		) -		__field(	int,		flags		) -	), - -	TP_fast_assign( -		__entry->host_vpn	= pte->host_vpn; -		__entry->pfn		= pte->pfn; -		__entry->eaddr		= pte->pte.eaddr; -		__entry->vpage		= pte->pte.vpage; -		__entry->raddr		= pte->pte.raddr; -		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) | -					  (pte->pte.may_write ? 0x2 : 0) | -					  (pte->pte.may_execute ? 0x1 : 0); -	), - -	TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", -		  __entry->host_vpn, __entry->pfn, __entry->eaddr, -		  __entry->vpage, __entry->raddr, __entry->flags) -); - -TRACE_EVENT(kvm_book3s_mmu_invalidate, -	TP_PROTO(struct hpte_cache *pte), -	TP_ARGS(pte), - -	TP_STRUCT__entry( -		__field(	u64,		host_vpn	) -		__field(	u64,		pfn		) -		__field(	ulong,		eaddr		) -		__field(	u64,		vpage		) -		__field(	ulong,		raddr		) -		__field(	int,		flags		) -	), - -	TP_fast_assign( -		__entry->host_vpn	= pte->host_vpn; -		__entry->pfn		= pte->pfn; -		__entry->eaddr		= pte->pte.eaddr; -		__entry->vpage		= pte->pte.vpage; -		__entry->raddr		= pte->pte.raddr; -		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) | -					  (pte->pte.may_write ? 0x2 : 0) | -					  (pte->pte.may_execute ? 0x1 : 0); -	), - -	TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", -		  __entry->host_vpn, __entry->pfn, __entry->eaddr, -		  __entry->vpage, __entry->raddr, __entry->flags) -); - -TRACE_EVENT(kvm_book3s_mmu_flush, -	TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, -		 unsigned long long p2), -	TP_ARGS(type, vcpu, p1, p2), - -	TP_STRUCT__entry( -		__field(	int,			count		) -		__field(	unsigned long long,	p1		) -		__field(	unsigned long long,	p2		) -		__field(	const char *,		type		) -	), - -	TP_fast_assign( -		__entry->count		= to_book3s(vcpu)->hpte_cache_count; -		__entry->p1		= p1; -		__entry->p2		= p2; -		__entry->type		= type; -	), - -	TP_printk("Flush %d %sPTEs: %llx - %llx", -		  __entry->count, __entry->type, __entry->p1, __entry->p2) -); - -TRACE_EVENT(kvm_book3s_slb_found, -	TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), -	TP_ARGS(gvsid, hvsid), - -	TP_STRUCT__entry( -		__field(	unsigned long long,	gvsid		) -		__field(	unsigned long long,	hvsid		) -	), - -	TP_fast_assign( -		__entry->gvsid		= gvsid; -		__entry->hvsid		= hvsid; -	), - -	TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) -); - -TRACE_EVENT(kvm_book3s_slb_fail, -	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), -	TP_ARGS(sid_map_mask, gvsid), - -	TP_STRUCT__entry( -		__field(	unsigned short,		sid_map_mask	) -		__field(	unsigned long long,	gvsid		) -	), - -	TP_fast_assign( -		__entry->sid_map_mask	= sid_map_mask; -		__entry->gvsid		= gvsid; -	), - -	TP_printk("%x/%x: %llx", __entry->sid_map_mask, -		  SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) -); - -TRACE_EVENT(kvm_book3s_slb_map, -	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, -		 unsigned long long hvsid), -	TP_ARGS(sid_map_mask, gvsid, hvsid), - -	TP_STRUCT__entry( -		__field(	unsigned short,		sid_map_mask	) -		__field(	unsigned long long,	guest_vsid	) -		__field(	unsigned long long,	host_vsid	) -	), - -	TP_fast_assign( -		__entry->sid_map_mask	= sid_map_mask; -		__entry->guest_vsid	= gvsid; -		__entry->host_vsid	= hvsid; -	), - -	TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, -		  __entry->guest_vsid, __entry->host_vsid) -); - -TRACE_EVENT(kvm_book3s_slbmte, -	TP_PROTO(u64 slb_vsid, u64 slb_esid), -	TP_ARGS(slb_vsid, slb_esid), - -	TP_STRUCT__entry( -		__field(	u64,	slb_vsid	) -		__field(	u64,	slb_esid	) -	), - -	TP_fast_assign( -		__entry->slb_vsid	= slb_vsid; -		__entry->slb_esid	= slb_esid; -	), - -	TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) -); - -#endif /* CONFIG_PPC_BOOK3S */ - - -/************************************************************************* - *                         Book3E trace points                           * - *************************************************************************/ - -#ifdef CONFIG_BOOKE - -TRACE_EVENT(kvm_booke206_stlb_write, -	TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), -	TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), - -	TP_STRUCT__entry( -		__field(	__u32,	mas0		) -		__field(	__u32,	mas8		) -		__field(	__u32,	mas1		) -		__field(	__u64,	mas2		) -		__field(	__u64,	mas7_3		) -	), - -	TP_fast_assign( -		__entry->mas0		= mas0; -		__entry->mas8		= mas8; -		__entry->mas1		= mas1; -		__entry->mas2		= mas2; -		__entry->mas7_3		= mas7_3; -	), - -	TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", -		__entry->mas0, __entry->mas8, __entry->mas1, -		__entry->mas2, __entry->mas7_3) -); - -TRACE_EVENT(kvm_booke206_gtlb_write, -	TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), -	TP_ARGS(mas0, mas1, mas2, mas7_3), - -	TP_STRUCT__entry( -		__field(	__u32,	mas0		) -		__field(	__u32,	mas1		) -		__field(	__u64,	mas2		) -		__field(	__u64,	mas7_3		) -	), - -	TP_fast_assign( -		__entry->mas0		= mas0; -		__entry->mas1		= mas1; -		__entry->mas2		= mas2; -		__entry->mas7_3		= mas7_3; -	), - -	TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", -		__entry->mas0, __entry->mas1, -		__entry->mas2, __entry->mas7_3) -); - -TRACE_EVENT(kvm_booke206_ref_release, -	TP_PROTO(__u64 pfn, __u32 flags), -	TP_ARGS(pfn, flags), - -	TP_STRUCT__entry( -		__field(	__u64,	pfn		) -		__field(	__u32,	flags		) -	), - -	TP_fast_assign( -		__entry->pfn		= pfn; -		__entry->flags		= flags; -	), - -	TP_printk("pfn=%llx flags=%x", -		__entry->pfn, __entry->flags) -); - -TRACE_EVENT(kvm_booke_queue_irqprio, -	TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), -	TP_ARGS(vcpu, priority), - -	TP_STRUCT__entry( -		__field(	__u32,	cpu_nr		) -		__field(	__u32,	priority		) -		__field(	unsigned long,	pending		) -	), - -	TP_fast_assign( -		__entry->cpu_nr		= vcpu->vcpu_id; -		__entry->priority	= priority; -		__entry->pending	= vcpu->arch.pending_exceptions; -	), - -	TP_printk("vcpu=%x prio=%x pending=%lx", -		__entry->cpu_nr, __entry->priority, __entry->pending) -); - -#endif -  #endif /* _TRACE_KVM_H */  /* This part must be outside protection */ diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h new file mode 100644 index 00000000000..f7537cf26ce --- /dev/null +++ b/arch/powerpc/kvm/trace_booke.h @@ -0,0 +1,177 @@ +#if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_BOOKE_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm_booke +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_booke + +#define kvm_trace_symbol_exit \ +	{0, "CRITICAL"}, \ +	{1, "MACHINE_CHECK"}, \ +	{2, "DATA_STORAGE"}, \ +	{3, "INST_STORAGE"}, \ +	{4, "EXTERNAL"}, \ +	{5, "ALIGNMENT"}, \ +	{6, "PROGRAM"}, \ +	{7, "FP_UNAVAIL"}, \ +	{8, "SYSCALL"}, \ +	{9, "AP_UNAVAIL"}, \ +	{10, "DECREMENTER"}, \ +	{11, "FIT"}, \ +	{12, "WATCHDOG"}, \ +	{13, "DTLB_MISS"}, \ +	{14, "ITLB_MISS"}, \ +	{15, "DEBUG"}, \ +	{32, "SPE_UNAVAIL"}, \ +	{33, "SPE_FP_DATA"}, \ +	{34, "SPE_FP_ROUND"}, \ +	{35, "PERFORMANCE_MONITOR"}, \ +	{36, "DOORBELL"}, \ +	{37, "DOORBELL_CRITICAL"}, \ +	{38, "GUEST_DBELL"}, \ +	{39, "GUEST_DBELL_CRIT"}, \ +	{40, "HV_SYSCALL"}, \ +	{41, "HV_PRIV"} + +TRACE_EVENT(kvm_exit, +	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), +	TP_ARGS(exit_nr, vcpu), + +	TP_STRUCT__entry( +		__field(	unsigned int,	exit_nr		) +		__field(	unsigned long,	pc		) +		__field(	unsigned long,	msr		) +		__field(	unsigned long,	dar		) +		__field(	unsigned long,	last_inst	) +	), + +	TP_fast_assign( +		__entry->exit_nr	= exit_nr; +		__entry->pc		= kvmppc_get_pc(vcpu); +		__entry->dar		= kvmppc_get_fault_dar(vcpu); +		__entry->msr		= vcpu->arch.shared->msr; +		__entry->last_inst	= vcpu->arch.last_inst; +	), + +	TP_printk("exit=%s" +		" | pc=0x%lx" +		" | msr=0x%lx" +		" | dar=0x%lx" +		" | last_inst=0x%lx" +		, +		__print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), +		__entry->pc, +		__entry->msr, +		__entry->dar, +		__entry->last_inst +		) +); + +TRACE_EVENT(kvm_unmap_hva, +	TP_PROTO(unsigned long hva), +	TP_ARGS(hva), + +	TP_STRUCT__entry( +		__field(	unsigned long,	hva		) +	), + +	TP_fast_assign( +		__entry->hva		= hva; +	), + +	TP_printk("unmap hva 0x%lx\n", __entry->hva) +); + +TRACE_EVENT(kvm_booke206_stlb_write, +	TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), +	TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), + +	TP_STRUCT__entry( +		__field(	__u32,	mas0		) +		__field(	__u32,	mas8		) +		__field(	__u32,	mas1		) +		__field(	__u64,	mas2		) +		__field(	__u64,	mas7_3		) +	), + +	TP_fast_assign( +		__entry->mas0		= mas0; +		__entry->mas8		= mas8; +		__entry->mas1		= mas1; +		__entry->mas2		= mas2; +		__entry->mas7_3		= mas7_3; +	), + +	TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", +		__entry->mas0, __entry->mas8, __entry->mas1, +		__entry->mas2, __entry->mas7_3) +); + +TRACE_EVENT(kvm_booke206_gtlb_write, +	TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), +	TP_ARGS(mas0, mas1, mas2, mas7_3), + +	TP_STRUCT__entry( +		__field(	__u32,	mas0		) +		__field(	__u32,	mas1		) +		__field(	__u64,	mas2		) +		__field(	__u64,	mas7_3		) +	), + +	TP_fast_assign( +		__entry->mas0		= mas0; +		__entry->mas1		= mas1; +		__entry->mas2		= mas2; +		__entry->mas7_3		= mas7_3; +	), + +	TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", +		__entry->mas0, __entry->mas1, +		__entry->mas2, __entry->mas7_3) +); + +TRACE_EVENT(kvm_booke206_ref_release, +	TP_PROTO(__u64 pfn, __u32 flags), +	TP_ARGS(pfn, flags), + +	TP_STRUCT__entry( +		__field(	__u64,	pfn		) +		__field(	__u32,	flags		) +	), + +	TP_fast_assign( +		__entry->pfn		= pfn; +		__entry->flags		= flags; +	), + +	TP_printk("pfn=%llx flags=%x", +		__entry->pfn, __entry->flags) +); + +TRACE_EVENT(kvm_booke_queue_irqprio, +	TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), +	TP_ARGS(vcpu, priority), + +	TP_STRUCT__entry( +		__field(	__u32,	cpu_nr		) +		__field(	__u32,	priority		) +		__field(	unsigned long,	pending		) +	), + +	TP_fast_assign( +		__entry->cpu_nr		= vcpu->vcpu_id; +		__entry->priority	= priority; +		__entry->pending	= vcpu->arch.pending_exceptions; +	), + +	TP_printk("vcpu=%x prio=%x pending=%lx", +		__entry->cpu_nr, __entry->priority, __entry->pending) +); + +#endif + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h new file mode 100644 index 00000000000..e1357cd8dc1 --- /dev/null +++ b/arch/powerpc/kvm/trace_pr.h @@ -0,0 +1,297 @@ + +#if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_PR_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm_pr +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_pr + +#define kvm_trace_symbol_exit \ +	{0x100, "SYSTEM_RESET"}, \ +	{0x200, "MACHINE_CHECK"}, \ +	{0x300, "DATA_STORAGE"}, \ +	{0x380, "DATA_SEGMENT"}, \ +	{0x400, "INST_STORAGE"}, \ +	{0x480, "INST_SEGMENT"}, \ +	{0x500, "EXTERNAL"}, \ +	{0x501, "EXTERNAL_LEVEL"}, \ +	{0x502, "EXTERNAL_HV"}, \ +	{0x600, "ALIGNMENT"}, \ +	{0x700, "PROGRAM"}, \ +	{0x800, "FP_UNAVAIL"}, \ +	{0x900, "DECREMENTER"}, \ +	{0x980, "HV_DECREMENTER"}, \ +	{0xc00, "SYSCALL"}, \ +	{0xd00, "TRACE"}, \ +	{0xe00, "H_DATA_STORAGE"}, \ +	{0xe20, "H_INST_STORAGE"}, \ +	{0xe40, "H_EMUL_ASSIST"}, \ +	{0xf00, "PERFMON"}, \ +	{0xf20, "ALTIVEC"}, \ +	{0xf40, "VSX"} + +TRACE_EVENT(kvm_book3s_reenter, +	TP_PROTO(int r, struct kvm_vcpu *vcpu), +	TP_ARGS(r, vcpu), + +	TP_STRUCT__entry( +		__field(	unsigned int,	r		) +		__field(	unsigned long,	pc		) +	), + +	TP_fast_assign( +		__entry->r		= r; +		__entry->pc		= kvmppc_get_pc(vcpu); +	), + +	TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) +); + +#ifdef CONFIG_PPC_BOOK3S_64 + +TRACE_EVENT(kvm_book3s_64_mmu_map, +	TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, +		 struct kvmppc_pte *orig_pte), +	TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), + +	TP_STRUCT__entry( +		__field(	unsigned char,		flag_w		) +		__field(	unsigned char,		flag_x		) +		__field(	unsigned long,		eaddr		) +		__field(	unsigned long,		hpteg		) +		__field(	unsigned long,		va		) +		__field(	unsigned long long,	vpage		) +		__field(	unsigned long,		hpaddr		) +	), + +	TP_fast_assign( +		__entry->flag_w	= ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; +		__entry->flag_x	= (rflags & HPTE_R_N) ? '-' : 'x'; +		__entry->eaddr	= orig_pte->eaddr; +		__entry->hpteg	= hpteg; +		__entry->va	= va; +		__entry->vpage	= orig_pte->vpage; +		__entry->hpaddr	= hpaddr; +	), + +	TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", +		  __entry->flag_w, __entry->flag_x, __entry->eaddr, +		  __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) +); + +#endif /* CONFIG_PPC_BOOK3S_64 */ + +TRACE_EVENT(kvm_book3s_mmu_map, +	TP_PROTO(struct hpte_cache *pte), +	TP_ARGS(pte), + +	TP_STRUCT__entry( +		__field(	u64,		host_vpn	) +		__field(	u64,		pfn		) +		__field(	ulong,		eaddr		) +		__field(	u64,		vpage		) +		__field(	ulong,		raddr		) +		__field(	int,		flags		) +	), + +	TP_fast_assign( +		__entry->host_vpn	= pte->host_vpn; +		__entry->pfn		= pte->pfn; +		__entry->eaddr		= pte->pte.eaddr; +		__entry->vpage		= pte->pte.vpage; +		__entry->raddr		= pte->pte.raddr; +		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) | +					  (pte->pte.may_write ? 0x2 : 0) | +					  (pte->pte.may_execute ? 0x1 : 0); +	), + +	TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", +		  __entry->host_vpn, __entry->pfn, __entry->eaddr, +		  __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_invalidate, +	TP_PROTO(struct hpte_cache *pte), +	TP_ARGS(pte), + +	TP_STRUCT__entry( +		__field(	u64,		host_vpn	) +		__field(	u64,		pfn		) +		__field(	ulong,		eaddr		) +		__field(	u64,		vpage		) +		__field(	ulong,		raddr		) +		__field(	int,		flags		) +	), + +	TP_fast_assign( +		__entry->host_vpn	= pte->host_vpn; +		__entry->pfn		= pte->pfn; +		__entry->eaddr		= pte->pte.eaddr; +		__entry->vpage		= pte->pte.vpage; +		__entry->raddr		= pte->pte.raddr; +		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) | +					  (pte->pte.may_write ? 0x2 : 0) | +					  (pte->pte.may_execute ? 0x1 : 0); +	), + +	TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", +		  __entry->host_vpn, __entry->pfn, __entry->eaddr, +		  __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_flush, +	TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, +		 unsigned long long p2), +	TP_ARGS(type, vcpu, p1, p2), + +	TP_STRUCT__entry( +		__field(	int,			count		) +		__field(	unsigned long long,	p1		) +		__field(	unsigned long long,	p2		) +		__field(	const char *,		type		) +	), + +	TP_fast_assign( +		__entry->count		= to_book3s(vcpu)->hpte_cache_count; +		__entry->p1		= p1; +		__entry->p2		= p2; +		__entry->type		= type; +	), + +	TP_printk("Flush %d %sPTEs: %llx - %llx", +		  __entry->count, __entry->type, __entry->p1, __entry->p2) +); + +TRACE_EVENT(kvm_book3s_slb_found, +	TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), +	TP_ARGS(gvsid, hvsid), + +	TP_STRUCT__entry( +		__field(	unsigned long long,	gvsid		) +		__field(	unsigned long long,	hvsid		) +	), + +	TP_fast_assign( +		__entry->gvsid		= gvsid; +		__entry->hvsid		= hvsid; +	), + +	TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) +); + +TRACE_EVENT(kvm_book3s_slb_fail, +	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), +	TP_ARGS(sid_map_mask, gvsid), + +	TP_STRUCT__entry( +		__field(	unsigned short,		sid_map_mask	) +		__field(	unsigned long long,	gvsid		) +	), + +	TP_fast_assign( +		__entry->sid_map_mask	= sid_map_mask; +		__entry->gvsid		= gvsid; +	), + +	TP_printk("%x/%x: %llx", __entry->sid_map_mask, +		  SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) +); + +TRACE_EVENT(kvm_book3s_slb_map, +	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, +		 unsigned long long hvsid), +	TP_ARGS(sid_map_mask, gvsid, hvsid), + +	TP_STRUCT__entry( +		__field(	unsigned short,		sid_map_mask	) +		__field(	unsigned long long,	guest_vsid	) +		__field(	unsigned long long,	host_vsid	) +	), + +	TP_fast_assign( +		__entry->sid_map_mask	= sid_map_mask; +		__entry->guest_vsid	= gvsid; +		__entry->host_vsid	= hvsid; +	), + +	TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, +		  __entry->guest_vsid, __entry->host_vsid) +); + +TRACE_EVENT(kvm_book3s_slbmte, +	TP_PROTO(u64 slb_vsid, u64 slb_esid), +	TP_ARGS(slb_vsid, slb_esid), + +	TP_STRUCT__entry( +		__field(	u64,	slb_vsid	) +		__field(	u64,	slb_esid	) +	), + +	TP_fast_assign( +		__entry->slb_vsid	= slb_vsid; +		__entry->slb_esid	= slb_esid; +	), + +	TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) +); + +TRACE_EVENT(kvm_exit, +	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), +	TP_ARGS(exit_nr, vcpu), + +	TP_STRUCT__entry( +		__field(	unsigned int,	exit_nr		) +		__field(	unsigned long,	pc		) +		__field(	unsigned long,	msr		) +		__field(	unsigned long,	dar		) +		__field(	unsigned long,	srr1		) +		__field(	unsigned long,	last_inst	) +	), + +	TP_fast_assign( +		__entry->exit_nr	= exit_nr; +		__entry->pc		= kvmppc_get_pc(vcpu); +		__entry->dar		= kvmppc_get_fault_dar(vcpu); +		__entry->msr		= kvmppc_get_msr(vcpu); +		__entry->srr1		= vcpu->arch.shadow_srr1; +		__entry->last_inst	= vcpu->arch.last_inst; +	), + +	TP_printk("exit=%s" +		" | pc=0x%lx" +		" | msr=0x%lx" +		" | dar=0x%lx" +		" | srr1=0x%lx" +		" | last_inst=0x%lx" +		, +		__print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), +		__entry->pc, +		__entry->msr, +		__entry->dar, +		__entry->srr1, +		__entry->last_inst +		) +); + +TRACE_EVENT(kvm_unmap_hva, +	TP_PROTO(unsigned long hva), +	TP_ARGS(hva), + +	TP_STRUCT__entry( +		__field(	unsigned long,	hva		) +	), + +	TP_fast_assign( +		__entry->hva		= hva; +	), + +	TP_printk("unmap hva 0x%lx\n", __entry->hva) +); + +#endif /* _TRACE_KVM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h>  | 
