diff options
Diffstat (limited to 'arch/arm/kvm')
| -rw-r--r-- | arch/arm/kvm/Kconfig | 3 | ||||
| -rw-r--r-- | arch/arm/kvm/Makefile | 2 | ||||
| -rw-r--r-- | arch/arm/kvm/arm.c | 112 | ||||
| -rw-r--r-- | arch/arm/kvm/coproc.c | 202 | ||||
| -rw-r--r-- | arch/arm/kvm/coproc.h | 14 | ||||
| -rw-r--r-- | arch/arm/kvm/coproc_a15.c | 119 | ||||
| -rw-r--r-- | arch/arm/kvm/coproc_a7.c | 54 | ||||
| -rw-r--r-- | arch/arm/kvm/emulate.c | 2 | ||||
| -rw-r--r-- | arch/arm/kvm/guest.c | 117 | ||||
| -rw-r--r-- | arch/arm/kvm/handle_exit.c | 32 | ||||
| -rw-r--r-- | arch/arm/kvm/interrupts.S | 11 | ||||
| -rw-r--r-- | arch/arm/kvm/interrupts_head.S | 21 | ||||
| -rw-r--r-- | arch/arm/kvm/mmio.c | 86 | ||||
| -rw-r--r-- | arch/arm/kvm/mmu.c | 384 | ||||
| -rw-r--r-- | arch/arm/kvm/psci.c | 263 | ||||
| -rw-r--r-- | arch/arm/kvm/reset.c | 19 | 
16 files changed, 1149 insertions, 292 deletions
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index ebf5015508b..4be5bb150bd 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -20,9 +20,10 @@ config KVM  	bool "Kernel-based Virtual Machine (KVM) support"  	select PREEMPT_NOTIFIERS  	select ANON_INODES +	select HAVE_KVM_CPU_RELAX_INTERCEPT  	select KVM_MMIO  	select KVM_ARM_HOST -	depends on ARM_VIRT_EXT && ARM_LPAE +	depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN  	---help---  	  Support hosting virtualized guest machines. You will also  	  need to select one or more of the processor modules below. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index d99bee4950e..789bca9e64a 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o  obj-y += kvm-arm.o init.o interrupts.o  obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o  obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o  obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9c697db2787..3c82b37c0f9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -17,6 +17,7 @@   */  #include <linux/cpu.h> +#include <linux/cpu_pm.h>  #include <linux/errno.h>  #include <linux/err.h>  #include <linux/kvm_host.h> @@ -65,7 +66,7 @@ static bool vgic_present;  static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)  {  	BUG_ON(preemptible()); -	__get_cpu_var(kvm_arm_running_vcpu) = vcpu; +	__this_cpu_write(kvm_arm_running_vcpu, vcpu);  }  /** @@ -75,7 +76,7 @@ static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)  struct kvm_vcpu *kvm_arm_get_running_vcpu(void)  {  	BUG_ON(preemptible()); -	return __get_cpu_var(kvm_arm_running_vcpu); +	return __this_cpu_read(kvm_arm_running_vcpu);  }  /** @@ -137,6 +138,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)  	if (ret)  		goto out_free_stage2_pgd; +	kvm_timer_init(kvm); +  	/* Mark the initial VMID generation invalid */  	kvm->arch.vmid_gen = 0; @@ -152,12 +155,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)  	return VM_FAULT_SIGBUS;  } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,  			   struct kvm_memory_slot *dont)  {  } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, +			    unsigned long npages)  {  	return 0;  } @@ -187,11 +191,13 @@ int kvm_dev_ioctl_check_extension(long ext)  	case KVM_CAP_IRQCHIP:  		r = vgic_present;  		break; +	case KVM_CAP_DEVICE_CTRL:  	case KVM_CAP_USER_MEMORY:  	case KVM_CAP_SYNC_MMU:  	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:  	case KVM_CAP_ONE_REG:  	case KVM_CAP_ARM_PSCI: +	case KVM_CAP_ARM_PSCI_0_2:  		r = 1;  		break;  	case KVM_CAP_COALESCED_MMIO: @@ -338,6 +344,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)  { +	/* +	 * The arch-generic KVM code expects the cpu field of a vcpu to be -1 +	 * if the vcpu is no longer assigned to a cpu.  This is used for the +	 * optimized make_all_cpus_request path. +	 */ +	vcpu->cpu = -1; +  	kvm_arm_set_running_vcpu(NULL);  } @@ -461,6 +474,8 @@ static void update_vttbr(struct kvm *kvm)  static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)  { +	int ret; +  	if (likely(vcpu->arch.has_run_once))  		return 0; @@ -470,22 +485,12 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)  	 * Initialize the VGIC before running a vcpu the first time on  	 * this VM.  	 */ -	if (irqchip_in_kernel(vcpu->kvm) && -	    unlikely(!vgic_initialized(vcpu->kvm))) { -		int ret = kvm_vgic_init(vcpu->kvm); +	if (unlikely(!vgic_initialized(vcpu->kvm))) { +		ret = kvm_vgic_init(vcpu->kvm);  		if (ret)  			return ret;  	} -	/* -	 * Handle the "start in power-off" case by calling into the -	 * PSCI code. -	 */ -	if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { -		*vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; -		kvm_psci_call(vcpu); -	} -  	return 0;  } @@ -699,6 +704,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,  	return -EINVAL;  } +static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, +					 struct kvm_vcpu_init *init) +{ +	int ret; + +	ret = kvm_vcpu_set_target(vcpu, init); +	if (ret) +		return ret; + +	/* +	 * Handle the "start in power-off" case by marking the VCPU as paused. +	 */ +	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) +		vcpu->arch.pause = true; + +	return 0; +} +  long kvm_arch_vcpu_ioctl(struct file *filp,  			 unsigned int ioctl, unsigned long arg)  { @@ -712,8 +735,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,  		if (copy_from_user(&init, argp, sizeof(init)))  			return -EFAULT; -		return kvm_vcpu_set_target(vcpu, &init); - +		return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);  	}  	case KVM_SET_ONE_REG:  	case KVM_GET_ONE_REG: { @@ -771,7 +793,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,  	case KVM_ARM_DEVICE_VGIC_V2:  		if (!vgic_present)  			return -ENXIO; -		return kvm_vgic_set_addr(kvm, type, dev_addr->addr); +		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);  	default:  		return -ENODEV;  	} @@ -797,6 +819,19 @@ long kvm_arch_vm_ioctl(struct file *filp,  			return -EFAULT;  		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);  	} +	case KVM_ARM_PREFERRED_TARGET: { +		int err; +		struct kvm_vcpu_init init; + +		err = kvm_vcpu_preferred_target(&init); +		if (err) +			return err; + +		if (copy_to_user(argp, &init, sizeof(init))) +			return -EFAULT; + +		return 0; +	}  	default:  		return -EINVAL;  	} @@ -815,7 +850,7 @@ static void cpu_init_hyp_mode(void *dummy)  	boot_pgd_ptr = kvm_mmu_get_boot_httbr();  	pgd_ptr = kvm_mmu_get_httbr(); -	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); +	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);  	hyp_stack_ptr = stack_page + PAGE_SIZE;  	vector_ptr = (unsigned long)__kvm_hyp_vector; @@ -839,6 +874,34 @@ static struct notifier_block hyp_init_cpu_nb = {  	.notifier_call = hyp_init_cpu_notify,  }; +#ifdef CONFIG_CPU_PM +static int hyp_init_cpu_pm_notifier(struct notifier_block *self, +				    unsigned long cmd, +				    void *v) +{ +	if (cmd == CPU_PM_EXIT && +	    __hyp_get_vectors() == hyp_default_vectors) { +		cpu_init_hyp_mode(NULL); +		return NOTIFY_OK; +	} + +	return NOTIFY_DONE; +} + +static struct notifier_block hyp_init_cpu_pm_nb = { +	.notifier_call = hyp_init_cpu_pm_notifier, +}; + +static void __init hyp_cpu_pm_init(void) +{ +	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); +} +#else +static inline void hyp_cpu_pm_init(void) +{ +} +#endif +  /**   * Inits Hyp-mode on all online CPUs   */ @@ -989,19 +1052,26 @@ int kvm_arch_init(void *opaque)  		}  	} +	cpu_notifier_register_begin(); +  	err = init_hyp_mode();  	if (err)  		goto out_err; -	err = register_cpu_notifier(&hyp_init_cpu_nb); +	err = __register_cpu_notifier(&hyp_init_cpu_nb);  	if (err) {  		kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);  		goto out_err;  	} +	cpu_notifier_register_done(); + +	hyp_cpu_pm_init(); +  	kvm_coproc_table_init();  	return 0;  out_err: +	cpu_notifier_register_done();  	return err;  } diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index db9cf692d4d..c58a35116f6 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -23,6 +23,7 @@  #include <asm/kvm_host.h>  #include <asm/kvm_emulate.h>  #include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h>  #include <asm/cacheflush.h>  #include <asm/cputype.h>  #include <trace/events/kvm.h> @@ -71,6 +72,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)  	return 1;  } +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ +	/* +	 * Compute guest MPIDR. We build a virtual cluster out of the +	 * vcpu_id, but we read the 'U' bit from the underlying +	 * hardware directly. +	 */ +	vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | +				     ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | +				     (vcpu->vcpu_id & 3)); +} + +/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ +static bool access_actlr(struct kvm_vcpu *vcpu, +			 const struct coproc_params *p, +			 const struct coproc_reg *r) +{ +	if (p->is_write) +		return ignore_write(vcpu, p); + +	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; +	return true; +} + +/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */ +static bool access_cbar(struct kvm_vcpu *vcpu, +			const struct coproc_params *p, +			const struct coproc_reg *r) +{ +	if (p->is_write) +		return write_to_read_only(vcpu, p); +	return read_zero(vcpu, p); +} + +/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */ +static bool access_l2ctlr(struct kvm_vcpu *vcpu, +			  const struct coproc_params *p, +			  const struct coproc_reg *r) +{ +	if (p->is_write) +		return ignore_write(vcpu, p); + +	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; +	return true; +} + +static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ +	u32 l2ctlr, ncores; + +	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); +	l2ctlr &= ~(3 << 24); +	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; +	/* How many cores in the current cluster and the next ones */ +	ncores -= (vcpu->vcpu_id & ~3); +	/* Cap it to the maximum number of cores in a single cluster */ +	ncores = min(ncores, 3U); +	l2ctlr |= (ncores & 3) << 24; + +	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ +	u32 actlr; + +	/* ACTLR contains SMP bit: make sure you create all cpus first! */ +	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); +	/* Make the SMP bit consistent with the guest configuration */ +	if (atomic_read(&vcpu->kvm->online_vcpus) > 1) +		actlr |= 1U << 6; +	else +		actlr &= ~(1U << 6); + +	vcpu->arch.cp15[c1_ACTLR] = actlr; +} + +/* + * TRM entries: A7:4.3.50, A15:4.3.49 + * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). + */ +static bool access_l2ectlr(struct kvm_vcpu *vcpu, +			   const struct coproc_params *p, +			   const struct coproc_reg *r) +{ +	if (p->is_write) +		return ignore_write(vcpu, p); + +	*vcpu_reg(vcpu, p->Rt1) = 0; +	return true; +} +  /* See note at ARM ARM B1.14.4 */  static bool access_dcsw(struct kvm_vcpu *vcpu,  			const struct coproc_params *p, @@ -113,6 +206,44 @@ done:  }  /* + * Generic accessor for VM registers. Only called as long as HCR_TVM + * is set. + */ +static bool access_vm_reg(struct kvm_vcpu *vcpu, +			  const struct coproc_params *p, +			  const struct coproc_reg *r) +{ +	BUG_ON(!p->is_write); + +	vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); +	if (p->is_64bit) +		vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); + +	return true; +} + +/* + * SCTLR accessor. Only called as long as HCR_TVM is set.  If the + * guest enables the MMU, we stop trapping the VM sys_regs and leave + * it in complete control of the caches. + * + * Used by the cpu-specific code. + */ +bool access_sctlr(struct kvm_vcpu *vcpu, +		  const struct coproc_params *p, +		  const struct coproc_reg *r) +{ +	access_vm_reg(vcpu, p, r); + +	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */ +		vcpu->arch.hcr &= ~HCR_TVM; +		stage2_flush_vm(vcpu->kvm); +	} + +	return true; +} + +/*   * We could trap ID_DFR0 and tell the guest we don't support performance   * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was   * NAKed, so it will read the PMCR anyway. @@ -153,37 +284,52 @@ static bool pm_fake(struct kvm_vcpu *vcpu,   *            registers preceding 32-bit ones.   */  static const struct coproc_reg cp15_regs[] = { +	/* MPIDR: we use VMPIDR for guest access. */ +	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, +			NULL, reset_mpidr, c0_MPIDR }, +  	/* CSSELR: swapped by interrupt.S. */  	{ CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,  			NULL, reset_unknown, c0_CSSELR }, -	/* TTBR0/TTBR1: swapped by interrupt.S. */ -	{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, -	{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, +	/* ACTLR: trapped by HCR.TAC bit. */ +	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, +			access_actlr, reset_actlr, c1_ACTLR }, -	/* TTBCR: swapped by interrupt.S. */ +	/* CPACR: swapped by interrupt.S. */ +	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, +			NULL, reset_val, c1_CPACR, 0x00000000 }, + +	/* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */ +	{ CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 }, +	{ CRn(2), CRm( 0), Op1( 0), Op2( 0), is32, +			access_vm_reg, reset_unknown, c2_TTBR0 }, +	{ CRn(2), CRm( 0), Op1( 0), Op2( 1), is32, +			access_vm_reg, reset_unknown, c2_TTBR1 },  	{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32, -			NULL, reset_val, c2_TTBCR, 0x00000000 }, +			access_vm_reg, reset_val, c2_TTBCR, 0x00000000 }, +	{ CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 }, +  	/* DACR: swapped by interrupt.S. */  	{ CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32, -			NULL, reset_unknown, c3_DACR }, +			access_vm_reg, reset_unknown, c3_DACR },  	/* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */  	{ CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32, -			NULL, reset_unknown, c5_DFSR }, +			access_vm_reg, reset_unknown, c5_DFSR },  	{ CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32, -			NULL, reset_unknown, c5_IFSR }, +			access_vm_reg, reset_unknown, c5_IFSR },  	{ CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32, -			NULL, reset_unknown, c5_ADFSR }, +			access_vm_reg, reset_unknown, c5_ADFSR },  	{ CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32, -			NULL, reset_unknown, c5_AIFSR }, +			access_vm_reg, reset_unknown, c5_AIFSR },  	/* DFAR/IFAR: swapped by interrupt.S. */  	{ CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32, -			NULL, reset_unknown, c6_DFAR }, +			access_vm_reg, reset_unknown, c6_DFAR },  	{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, -			NULL, reset_unknown, c6_IFAR }, +			access_vm_reg, reset_unknown, c6_IFAR },  	/* PAR swapped by interrupt.S */  	{ CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, @@ -195,6 +341,13 @@ static const struct coproc_reg cp15_regs[] = {  	{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},  	{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},  	/* +	 * L2CTLR access (guest wants to know #CPUs). +	 */ +	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, +			access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, +	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, + +	/*  	 * Dummy performance monitor implementation.  	 */  	{ CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr}, @@ -213,9 +366,15 @@ static const struct coproc_reg cp15_regs[] = {  	/* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */  	{ CRn(10), CRm( 2), Op1( 0), Op2( 0), is32, -			NULL, reset_unknown, c10_PRRR}, +			access_vm_reg, reset_unknown, c10_PRRR},  	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32, -			NULL, reset_unknown, c10_NMRR}, +			access_vm_reg, reset_unknown, c10_NMRR}, + +	/* AMAIR0/AMAIR1: swapped by interrupt.S. */ +	{ CRn(10), CRm( 3), Op1( 0), Op2( 0), is32, +			access_vm_reg, reset_unknown, c10_AMAIR0}, +	{ CRn(10), CRm( 3), Op1( 0), Op2( 1), is32, +			access_vm_reg, reset_unknown, c10_AMAIR1},  	/* VBAR: swapped by interrupt.S. */  	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, @@ -223,7 +382,7 @@ static const struct coproc_reg cp15_regs[] = {  	/* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */  	{ CRn(13), CRm( 0), Op1( 0), Op2( 1), is32, -			NULL, reset_val, c13_CID, 0x00000000 }, +			access_vm_reg, reset_val, c13_CID, 0x00000000 },  	{ CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,  			NULL, reset_unknown, c13_TID_URW },  	{ CRn(13), CRm( 0), Op1( 0), Op2( 3), is32, @@ -234,6 +393,9 @@ static const struct coproc_reg cp15_regs[] = {  	/* CNTKCTL: swapped by interrupt.S. */  	{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,  			NULL, reset_val, c14_CNTKCTL, 0x00000000 }, + +	/* The Configuration Base Address Register. */ +	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},  };  /* Target specific emulation tables */ @@ -241,6 +403,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];  void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)  { +	unsigned int i; + +	for (i = 1; i < table->num; i++) +		BUG_ON(cmp_reg(&table->table[i-1], +			       &table->table[i]) >= 0); +  	target_tables[table->target] = table;  } @@ -323,7 +491,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)  {  	struct coproc_params params; -	params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; +	params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;  	params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;  	params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);  	params.is_64bit = true; @@ -331,7 +499,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)  	params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;  	params.Op2 = 0;  	params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; -	params.CRn = 0; +	params.CRm = 0;  	return emulate_cp15(vcpu, ¶ms);  } diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 0461d5c8d3d..1a44bbe3964 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p)  {  	/* Look, we even formatted it for you to paste into the table! */  	if (p->is_64bit) { -		kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n", -			      p->CRm, p->Op1, p->is_write ? "write" : "read"); +		kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n", +			      p->CRn, p->Op1, p->is_write ? "write" : "read");  	} else {  		kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"  			      " func_%s },\n", @@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1,  		return -1;  	if (i1->CRn != i2->CRn)  		return i1->CRn - i2->CRn; -	if (i1->is_64 != i2->is_64) -		return i2->is_64 - i1->is_64;  	if (i1->CRm != i2->CRm)  		return i1->CRm - i2->CRm;  	if (i1->Op1 != i2->Op1)  		return i1->Op1 - i2->Op1; -	return i1->Op2 - i2->Op2; +	if (i1->Op2 != i2->Op2) +		return i1->Op2 - i2->Op2; +	return i2->is_64 - i1->is_64;  } @@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,  #define is64		.is_64 = true  #define is32		.is_64 = false +bool access_sctlr(struct kvm_vcpu *vcpu, +		  const struct coproc_params *p, +		  const struct coproc_reg *r); +  #endif /* __ARM_KVM_COPROC_LOCAL_H__ */ diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index cf93472b9dd..e6f4ae48bda 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -17,101 +17,12 @@   * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.   */  #include <linux/kvm_host.h> -#include <asm/cputype.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_host.h> -#include <asm/kvm_emulate.h>  #include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h>  #include <linux/init.h> -static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ -	/* -	 * Compute guest MPIDR: -	 * (Even if we present only one VCPU to the guest on an SMP -	 * host we don't set the U bit in the MPIDR, or vice versa, as -	 * revealing the underlying hardware properties is likely to -	 * be the best choice). -	 */ -	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK) -		| (vcpu->vcpu_id & MPIDR_LEVEL_MASK); -} -  #include "coproc.h" -/* A15 TRM 4.3.28: RO WI */ -static bool access_actlr(struct kvm_vcpu *vcpu, -			 const struct coproc_params *p, -			 const struct coproc_reg *r) -{ -	if (p->is_write) -		return ignore_write(vcpu, p); - -	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; -	return true; -} - -/* A15 TRM 4.3.60: R/O. */ -static bool access_cbar(struct kvm_vcpu *vcpu, -			const struct coproc_params *p, -			const struct coproc_reg *r) -{ -	if (p->is_write) -		return write_to_read_only(vcpu, p); -	return read_zero(vcpu, p); -} - -/* A15 TRM 4.3.48: R/O WI. */ -static bool access_l2ctlr(struct kvm_vcpu *vcpu, -			  const struct coproc_params *p, -			  const struct coproc_reg *r) -{ -	if (p->is_write) -		return ignore_write(vcpu, p); - -	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; -	return true; -} - -static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ -	u32 l2ctlr, ncores; - -	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); -	l2ctlr &= ~(3 << 24); -	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; -	l2ctlr |= (ncores & 3) << 24; - -	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ -	u32 actlr; - -	/* ACTLR contains SMP bit: make sure you create all cpus first! */ -	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); -	/* Make the SMP bit consistent with the guest configuration */ -	if (atomic_read(&vcpu->kvm->online_vcpus) > 1) -		actlr |= 1U << 6; -	else -		actlr &= ~(1U << 6); - -	vcpu->arch.cp15[c1_ACTLR] = actlr; -} - -/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */ -static bool access_l2ectlr(struct kvm_vcpu *vcpu, -			   const struct coproc_params *p, -			   const struct coproc_reg *r) -{ -	if (p->is_write) -		return ignore_write(vcpu, p); - -	*vcpu_reg(vcpu, p->Rt1) = 0; -	return true; -} -  /*   * A15-specific CP15 registers.   * CRn denotes the primary register number, but is copied to the CRm in the @@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,   *            registers preceding 32-bit ones.   */  static const struct coproc_reg a15_regs[] = { -	/* MPIDR: we use VMPIDR for guest access. */ -	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, -			NULL, reset_mpidr, c0_MPIDR }, -  	/* SCTLR: swapped by interrupt.S. */  	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, -			NULL, reset_val, c1_SCTLR, 0x00C50078 }, -	/* ACTLR: trapped by HCR.TAC bit. */ -	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, -			access_actlr, reset_actlr, c1_ACTLR }, -	/* CPACR: swapped by interrupt.S. */ -	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, -			NULL, reset_val, c1_CPACR, 0x00000000 }, - -	/* -	 * L2CTLR access (guest wants to know #CPUs). -	 */ -	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, -			access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, -	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, - -	/* The Configuration Base Address Register. */ -	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, +			access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },  };  static struct kvm_coproc_target_table a15_target_table = { @@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {  static int __init coproc_a15_init(void)  { -	unsigned int i; - -	for (i = 1; i < ARRAY_SIZE(a15_regs); i++) -		BUG_ON(cmp_reg(&a15_regs[i-1], -			       &a15_regs[i]) >= 0); -  	kvm_register_target_coproc_table(&a15_target_table);  	return 0;  } diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c new file mode 100644 index 00000000000..17fc7cd479d --- /dev/null +++ b/arch/arm/kvm/coproc_a7.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Copyright (C) 2013 - ARM Ltd + * + * Authors: Rusty Russell <rusty@rustcorp.au> + *          Christoffer Dall <c.dall@virtualopensystems.com> + *          Jonathan Austin <jonathan.austin@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. + */ +#include <linux/kvm_host.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h> +#include <linux/init.h> + +#include "coproc.h" + +/* + * Cortex-A7 specific CP15 registers. + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + *            registers preceding 32-bit ones. + */ +static const struct coproc_reg a7_regs[] = { +	/* SCTLR: swapped by interrupt.S. */ +	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, +			access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, +}; + +static struct kvm_coproc_target_table a7_target_table = { +	.target = KVM_ARM_TARGET_CORTEX_A7, +	.table = a7_regs, +	.num = ARRAY_SIZE(a7_regs), +}; + +static int __init coproc_a7_init(void) +{ +	kvm_register_target_coproc_table(&a7_target_table); +	return 0; +} +late_initcall(coproc_a7_init); diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index bdede9e7da5..d6c00528367 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)  	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;  	if (is_pabt) { -		/* Set DFAR and DFSR */ +		/* Set IFAR and IFSR */  		vcpu->arch.cp15[c6_IFAR] = addr;  		is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);  		/* Always give debug fault for now - should give guest a clue */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 152d0361218..b23a59c1c52 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)  { +	vcpu->arch.hcr = HCR_GUEST_MASK;  	return 0;  } @@ -109,6 +110,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)  	return -EINVAL;  } +#ifndef CONFIG_KVM_ARM_TIMER + +#define NUM_TIMER_REGS 0 + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ +	return 0; +} + +static bool is_timer_reg(u64 index) +{ +	return false; +} + +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ +	return 0; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ +	return 0; +} + +#else + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ +	switch (index) { +	case KVM_REG_ARM_TIMER_CTL: +	case KVM_REG_ARM_TIMER_CNT: +	case KVM_REG_ARM_TIMER_CVAL: +		return true; +	} +	return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ +	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) +		return -EFAULT; +	uindices++; +	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) +		return -EFAULT; +	uindices++; +	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) +		return -EFAULT; + +	return 0; +} + +#endif + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ +	void __user *uaddr = (void __user *)(long)reg->addr; +	u64 val; +	int ret; + +	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); +	if (ret != 0) +		return ret; + +	return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ +	void __user *uaddr = (void __user *)(long)reg->addr; +	u64 val; + +	val = kvm_arm_timer_get_reg(vcpu, reg->id); +	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} +  static unsigned long num_core_regs(void)  {  	return sizeof(struct kvm_regs) / sizeof(u32); @@ -121,7 +199,8 @@ static unsigned long num_core_regs(void)   */  unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)  { -	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); +	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) +		+ NUM_TIMER_REGS;  }  /** @@ -133,6 +212,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)  {  	unsigned int i;  	const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; +	int ret;  	for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {  		if (put_user(core_reg | i, uindices)) @@ -140,6 +220,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)  		uindices++;  	} +	ret = copy_timer_indices(vcpu, uindices); +	if (ret) +		return ret; +	uindices += NUM_TIMER_REGS; +  	return kvm_arm_copy_coproc_indices(vcpu, uindices);  } @@ -153,6 +238,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)  	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)  		return get_core_reg(vcpu, reg); +	if (is_timer_reg(reg->id)) +		return get_timer_reg(vcpu, reg); +  	return kvm_arm_coproc_get_reg(vcpu, reg);  } @@ -166,6 +254,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)  	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)  		return set_core_reg(vcpu, reg); +	if (is_timer_reg(reg->id)) +		return set_timer_reg(vcpu, reg); +  	return kvm_arm_coproc_set_reg(vcpu, reg);  } @@ -190,6 +281,8 @@ int __attribute_const__ kvm_target_cpu(void)  		return -EINVAL;  	switch (part_number) { +	case ARM_CPU_PART_CORTEX_A7: +		return KVM_ARM_TARGET_CORTEX_A7;  	case ARM_CPU_PART_CORTEX_A15:  		return KVM_ARM_TARGET_CORTEX_A15;  	default: @@ -202,7 +295,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,  {  	unsigned int i; -	/* We can only do a cortex A15 for now. */ +	/* We can only cope with guest==host and only on A15/A7 (for now). */  	if (init->target != kvm_target_cpu())  		return -EINVAL; @@ -222,6 +315,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,  	return kvm_reset_vcpu(vcpu);  } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ +	int target = kvm_target_cpu(); + +	if (target < 0) +		return -ENODEV; + +	memset(init, 0, sizeof(*init)); + +	/* +	 * For now, we don't return any features. +	 * In future, we might use features to return target +	 * specific features available for the preferred +	 * target type. +	 */ +	init->target = (__u32)target; + +	return 0; +} +  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)  {  	return -EINVAL; diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index df4c82d47ad..4c979d466cc 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -26,8 +26,6 @@  #include "trace.h" -#include "trace.h" -  typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);  static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) @@ -40,14 +38,18 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)  static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)  { +	int ret; +  	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),  		      kvm_vcpu_hvc_get_imm(vcpu)); -	if (kvm_psci_call(vcpu)) +	ret = kvm_psci_call(vcpu); +	if (ret < 0) { +		kvm_inject_undefined(vcpu);  		return 1; +	} -	kvm_inject_undefined(vcpu); -	return 1; +	return ret;  }  static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) @@ -73,23 +75,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)  }  /** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests   * @vcpu:	the vcpu pointer   * @run:	the kvm_run structure pointer   * - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will - * halt execution of world-switches and schedule other host processes until - * there is an incoming IRQ or FIQ to the VM. + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM.   */ -static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)  {  	trace_kvm_wfi(*vcpu_pc(vcpu)); -	kvm_vcpu_block(vcpu); +	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) +		kvm_vcpu_on_spin(vcpu); +	else +		kvm_vcpu_block(vcpu); +  	return 1;  }  static exit_handle_fn arm_exit_handlers[] = { -	[HSR_EC_WFI]		= kvm_handle_wfi, +	[HSR_EC_WFI]		= kvm_handle_wfx,  	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,  	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,  	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access, diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index ddc15539bad..0d68d407306 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -220,6 +220,10 @@ after_vfp_restore:   * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are   * passed in r0 and r1.   * + * A function pointer with a value of 0xffffffff has a special meaning, + * and is used to implement __hyp_get_vectors in the same way as in + * arch/arm/kernel/hyp_stub.S. + *   * The calling convention follows the standard AAPCS:   *   r0 - r3: caller save   *   r12:     caller save @@ -363,6 +367,11 @@ hyp_hvc:  host_switch_to_hyp:  	pop	{r0, r1, r2} +	/* Check for __hyp_get_vectors */ +	cmp	r0, #-1 +	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR +	beq	1f +  	push	{lr}  	mrs	lr, SPSR  	push	{lr} @@ -378,7 +387,7 @@ THUMB(	orr	lr, #1)  	pop	{lr}  	msr	SPSR_csxf, lr  	pop	{lr} -	eret +1:	eret  guest_trap:  	load_vcpu			@ Load VCPU pointer to r0 diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 6f18695a09c..76af9302557 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -303,13 +303,17 @@ vcpu	.req	r0		@ vcpu pointer always in r0  	mrc	p15, 0, r2, c14, c1, 0	@ CNTKCTL  	mrrc	p15, 0, r4, r5, c7	@ PAR +	mrc	p15, 0, r6, c10, c3, 0	@ AMAIR0 +	mrc	p15, 0, r7, c10, c3, 1	@ AMAIR1  	.if \store_to_vcpu == 0 -	push	{r2,r4-r5} +	push	{r2,r4-r7}  	.else  	str	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]  	add	r12, vcpu, #CP15_OFFSET(c7_PAR)  	strd	r4, r5, [r12] +	str	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] +	str	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]  	.endif  .endm @@ -322,15 +326,19 @@ vcpu	.req	r0		@ vcpu pointer always in r0   */  .macro write_cp15_state read_from_vcpu  	.if \read_from_vcpu == 0 -	pop	{r2,r4-r5} +	pop	{r2,r4-r7}  	.else  	ldr	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]  	add	r12, vcpu, #CP15_OFFSET(c7_PAR)  	ldrd	r4, r5, [r12] +	ldr	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] +	ldr	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]  	.endif  	mcr	p15, 0, r2, c14, c1, 0	@ CNTKCTL  	mcrr	p15, 0, r4, r5, c7	@ PAR +	mcr	p15, 0, r6, c10, c3, 0	@ AMAIR0 +	mcr	p15, 0, r7, c10, c3, 1	@ AMAIR1  	.if \read_from_vcpu == 0  	pop	{r2-r12} @@ -597,17 +605,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0  /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */  .macro configure_hyp_role operation -	mrc	p15, 4, r2, c1, c1, 0	@ HCR -	bic	r2, r2, #HCR_VIRT_EXCP_MASK -	ldr	r3, =HCR_GUEST_MASK  	.if \operation == vmentry -	orr	r2, r2, r3 +	ldr	r2, [vcpu, #VCPU_HCR]  	ldr	r3, [vcpu, #VCPU_IRQ_LINES]  	orr	r2, r2, r3  	.else -	bic	r2, r2, r3 +	mov	r2, #0  	.endif -	mcr	p15, 4, r2, c1, c1, 0 +	mcr	p15, 4, r2, c1, c1, 0	@ HCR  .endm  .macro load_vcpu diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 0c25d9487d5..4cb5a93182e 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -23,6 +23,68 @@  #include "trace.h" +static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) +{ +	void *datap = NULL; +	union { +		u8	byte; +		u16	hword; +		u32	word; +		u64	dword; +	} tmp; + +	switch (len) { +	case 1: +		tmp.byte	= data; +		datap		= &tmp.byte; +		break; +	case 2: +		tmp.hword	= data; +		datap		= &tmp.hword; +		break; +	case 4: +		tmp.word	= data; +		datap		= &tmp.word; +		break; +	case 8: +		tmp.dword	= data; +		datap		= &tmp.dword; +		break; +	} + +	memcpy(buf, datap, len); +} + +static unsigned long mmio_read_buf(char *buf, unsigned int len) +{ +	unsigned long data = 0; +	union { +		u16	hword; +		u32	word; +		u64	dword; +	} tmp; + +	switch (len) { +	case 1: +		data = buf[0]; +		break; +	case 2: +		memcpy(&tmp.hword, buf, len); +		data = tmp.hword; +		break; +	case 4: +		memcpy(&tmp.word, buf, len); +		data = tmp.word; +		break; +	case 8: +		memcpy(&tmp.dword, buf, len); +		data = tmp.dword; +		break; +	} + +	return data; +} +  /**   * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation   * @vcpu: The VCPU pointer @@ -33,28 +95,27 @@   */  int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)  { -	unsigned long *dest; +	unsigned long data;  	unsigned int len;  	int mask;  	if (!run->mmio.is_write) { -		dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt); -		*dest = 0; -  		len = run->mmio.len;  		if (len > sizeof(unsigned long))  			return -EINVAL; -		memcpy(dest, run->mmio.data, len); - -		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, -				*((u64 *)run->mmio.data)); +		data = mmio_read_buf(run->mmio.data, len);  		if (vcpu->arch.mmio_decode.sign_extend &&  		    len < sizeof(unsigned long)) {  			mask = 1U << ((len * 8) - 1); -			*dest = (*dest ^ mask) - mask; +			data = (data ^ mask) - mask;  		} + +		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, +			       data); +		data = vcpu_data_host_to_guest(vcpu, data, len); +		*vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;  	}  	return 0; @@ -105,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,  		 phys_addr_t fault_ipa)  {  	struct kvm_exit_mmio mmio; +	unsigned long data;  	unsigned long rt;  	int ret; @@ -125,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,  	}  	rt = vcpu->arch.mmio_decode.rt; +	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); +  	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :  					 KVM_TRACE_MMIO_READ_UNSATISFIED,  			mmio.len, fault_ipa, -			(mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0); +			(mmio.is_write) ? data : 0);  	if (mmio.is_write) -		memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len); +		mmio_write_buf(mmio.data, mmio.len, data);  	if (vgic_handle_mmio(vcpu, run, &mmio))  		return 1; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b0de86b56c1..16f804938b8 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -19,6 +19,7 @@  #include <linux/mman.h>  #include <linux/kvm_host.h>  #include <linux/io.h> +#include <linux/hugetlb.h>  #include <trace/events/kvm.h>  #include <asm/pgalloc.h>  #include <asm/cacheflush.h> @@ -41,6 +42,10 @@ static unsigned long hyp_idmap_start;  static unsigned long hyp_idmap_end;  static phys_addr_t hyp_idmap_vector; +#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + +#define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x)) +  static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)  {  	/* @@ -93,19 +98,29 @@ static bool page_empty(void *ptr)  static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)  { -	pmd_t *pmd_table = pmd_offset(pud, 0); -	pud_clear(pud); -	kvm_tlb_flush_vmid_ipa(kvm, addr); -	pmd_free(NULL, pmd_table); +	if (pud_huge(*pud)) { +		pud_clear(pud); +		kvm_tlb_flush_vmid_ipa(kvm, addr); +	} else { +		pmd_t *pmd_table = pmd_offset(pud, 0); +		pud_clear(pud); +		kvm_tlb_flush_vmid_ipa(kvm, addr); +		pmd_free(NULL, pmd_table); +	}  	put_page(virt_to_page(pud));  }  static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)  { -	pte_t *pte_table = pte_offset_kernel(pmd, 0); -	pmd_clear(pmd); -	kvm_tlb_flush_vmid_ipa(kvm, addr); -	pte_free_kernel(NULL, pte_table); +	if (kvm_pmd_huge(*pmd)) { +		pmd_clear(pmd); +		kvm_tlb_flush_vmid_ipa(kvm, addr); +	} else { +		pte_t *pte_table = pte_offset_kernel(pmd, 0); +		pmd_clear(pmd); +		kvm_tlb_flush_vmid_ipa(kvm, addr); +		pte_free_kernel(NULL, pte_table); +	}  	put_page(virt_to_page(pmd));  } @@ -131,28 +146,43 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,  	while (addr < end) {  		pgd = pgdp + pgd_index(addr);  		pud = pud_offset(pgd, addr); +		pte = NULL;  		if (pud_none(*pud)) { -			addr = pud_addr_end(addr, end); +			addr = kvm_pud_addr_end(addr, end); +			continue; +		} + +		if (pud_huge(*pud)) { +			/* +			 * If we are dealing with a huge pud, just clear it and +			 * move on. +			 */ +			clear_pud_entry(kvm, pud, addr); +			addr = kvm_pud_addr_end(addr, end);  			continue;  		}  		pmd = pmd_offset(pud, addr);  		if (pmd_none(*pmd)) { -			addr = pmd_addr_end(addr, end); +			addr = kvm_pmd_addr_end(addr, end);  			continue;  		} -		pte = pte_offset_kernel(pmd, addr); -		clear_pte_entry(kvm, pte, addr); -		next = addr + PAGE_SIZE; +		if (!kvm_pmd_huge(*pmd)) { +			pte = pte_offset_kernel(pmd, addr); +			clear_pte_entry(kvm, pte, addr); +			next = addr + PAGE_SIZE; +		} -		/* If we emptied the pte, walk back up the ladder */ -		if (page_empty(pte)) { +		/* +		 * If the pmd entry is to be cleared, walk back up the ladder +		 */ +		if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) {  			clear_pmd_entry(kvm, pmd, addr); -			next = pmd_addr_end(addr, end); +			next = kvm_pmd_addr_end(addr, end);  			if (page_empty(pmd) && !page_empty(pud)) {  				clear_pud_entry(kvm, pud, addr); -				next = pud_addr_end(addr, end); +				next = kvm_pud_addr_end(addr, end);  			}  		} @@ -160,6 +190,99 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,  	}  } +static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, +			      phys_addr_t addr, phys_addr_t end) +{ +	pte_t *pte; + +	pte = pte_offset_kernel(pmd, addr); +	do { +		if (!pte_none(*pte)) { +			hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); +			kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); +		} +	} while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, +			      phys_addr_t addr, phys_addr_t end) +{ +	pmd_t *pmd; +	phys_addr_t next; + +	pmd = pmd_offset(pud, addr); +	do { +		next = kvm_pmd_addr_end(addr, end); +		if (!pmd_none(*pmd)) { +			if (kvm_pmd_huge(*pmd)) { +				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); +				kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); +			} else { +				stage2_flush_ptes(kvm, pmd, addr, next); +			} +		} +	} while (pmd++, addr = next, addr != end); +} + +static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, +			      phys_addr_t addr, phys_addr_t end) +{ +	pud_t *pud; +	phys_addr_t next; + +	pud = pud_offset(pgd, addr); +	do { +		next = kvm_pud_addr_end(addr, end); +		if (!pud_none(*pud)) { +			if (pud_huge(*pud)) { +				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); +				kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); +			} else { +				stage2_flush_pmds(kvm, pud, addr, next); +			} +		} +	} while (pud++, addr = next, addr != end); +} + +static void stage2_flush_memslot(struct kvm *kvm, +				 struct kvm_memory_slot *memslot) +{ +	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; +	phys_addr_t end = addr + PAGE_SIZE * memslot->npages; +	phys_addr_t next; +	pgd_t *pgd; + +	pgd = kvm->arch.pgd + pgd_index(addr); +	do { +		next = kvm_pgd_addr_end(addr, end); +		stage2_flush_puds(kvm, pgd, addr, next); +	} while (pgd++, addr = next, addr != end); +} + +/** + * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 + * @kvm: The struct kvm pointer + * + * Go through the stage 2 page tables and invalidate any cache lines + * backing memory already mapped to the VM. + */ +void stage2_flush_vm(struct kvm *kvm) +{ +	struct kvm_memslots *slots; +	struct kvm_memory_slot *memslot; +	int idx; + +	idx = srcu_read_lock(&kvm->srcu); +	spin_lock(&kvm->mmu_lock); + +	slots = kvm_memslots(kvm); +	kvm_for_each_memslot(memslot, slots) +		stage2_flush_memslot(kvm, memslot); + +	spin_unlock(&kvm->mmu_lock); +	srcu_read_unlock(&kvm->srcu, idx); +} +  /**   * free_boot_hyp_pgd - free HYP boot page tables   * @@ -172,14 +295,14 @@ void free_boot_hyp_pgd(void)  	if (boot_hyp_pgd) {  		unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);  		unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); -		kfree(boot_hyp_pgd); +		free_pages((unsigned long)boot_hyp_pgd, pgd_order);  		boot_hyp_pgd = NULL;  	}  	if (hyp_pgd)  		unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); -	kfree(init_bounce_page); +	free_page((unsigned long)init_bounce_page);  	init_bounce_page = NULL;  	mutex_unlock(&kvm_hyp_pgd_mutex); @@ -209,7 +332,7 @@ void free_hyp_pgds(void)  		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)  			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); -		kfree(hyp_pgd); +		free_pages((unsigned long)hyp_pgd, pgd_order);  		hyp_pgd = NULL;  	} @@ -307,6 +430,17 @@ out:  	return err;  } +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ +	if (!is_vmalloc_addr(kaddr)) { +		BUG_ON(!virt_addr_valid(kaddr)); +		return __pa(kaddr); +	} else { +		return page_to_phys(vmalloc_to_page(kaddr)) + +		       offset_in_page(kaddr); +	} +} +  /**   * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode   * @from:	The virtual kernel start address of the range @@ -318,16 +452,27 @@ out:   */  int create_hyp_mappings(void *from, void *to)  { -	unsigned long phys_addr = virt_to_phys(from); +	phys_addr_t phys_addr; +	unsigned long virt_addr;  	unsigned long start = KERN_TO_HYP((unsigned long)from);  	unsigned long end = KERN_TO_HYP((unsigned long)to); -	/* Check for a valid kernel memory mapping */ -	if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) -		return -EINVAL; +	start = start & PAGE_MASK; +	end = PAGE_ALIGN(end); -	return __create_hyp_mappings(hyp_pgd, start, end, -				     __phys_to_pfn(phys_addr), PAGE_HYP); +	for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { +		int err; + +		phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); +		err = __create_hyp_mappings(hyp_pgd, virt_addr, +					    virt_addr + PAGE_SIZE, +					    __phys_to_pfn(phys_addr), +					    PAGE_HYP); +		if (err) +			return err; +	} + +	return 0;  }  /** @@ -420,29 +565,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)  	kvm->arch.pgd = NULL;  } - -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, -			  phys_addr_t addr, const pte_t *new_pte, bool iomap) +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +			     phys_addr_t addr)  {  	pgd_t *pgd;  	pud_t *pud;  	pmd_t *pmd; -	pte_t *pte, old_pte; -	/* Create 2nd stage page table mapping - Level 1 */  	pgd = kvm->arch.pgd + pgd_index(addr);  	pud = pud_offset(pgd, addr);  	if (pud_none(*pud)) {  		if (!cache) -			return 0; /* ignore calls from kvm_set_spte_hva */ +			return NULL;  		pmd = mmu_memory_cache_alloc(cache);  		pud_populate(NULL, pud, pmd);  		get_page(virt_to_page(pud));  	} -	pmd = pmd_offset(pud, addr); +	return pmd_offset(pud, addr); +} + +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache +			       *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ +	pmd_t *pmd, old_pmd; + +	pmd = stage2_get_pmd(kvm, cache, addr); +	VM_BUG_ON(!pmd); + +	/* +	 * Mapping in huge pages should only happen through a fault.  If a +	 * page is merged into a transparent huge page, the individual +	 * subpages of that huge page should be unmapped through MMU +	 * notifiers before we get here. +	 * +	 * Merging of CompoundPages is not supported; they should become +	 * splitting first, unmapped, merged, and mapped back in on-demand. +	 */ +	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); + +	old_pmd = *pmd; +	kvm_set_pmd(pmd, *new_pmd); +	if (pmd_present(old_pmd)) +		kvm_tlb_flush_vmid_ipa(kvm, addr); +	else +		get_page(virt_to_page(pmd)); +	return 0; +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +			  phys_addr_t addr, const pte_t *new_pte, bool iomap) +{ +	pmd_t *pmd; +	pte_t *pte, old_pte; + +	/* Create stage-2 page table mapping - Level 1 */ +	pmd = stage2_get_pmd(kvm, cache, addr); +	if (!pmd) { +		/* +		 * Ignore calls from kvm_set_spte_hva for unallocated +		 * address ranges. +		 */ +		return 0; +	} -	/* Create 2nd stage page table mapping - Level 2 */ +	/* Create stage-2 page mappings - Level 2 */  	if (pmd_none(*pmd)) {  		if (!cache)  			return 0; /* ignore calls from kvm_set_spte_hva */ @@ -507,16 +694,60 @@ out:  	return ret;  } +static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) +{ +	pfn_t pfn = *pfnp; +	gfn_t gfn = *ipap >> PAGE_SHIFT; + +	if (PageTransCompound(pfn_to_page(pfn))) { +		unsigned long mask; +		/* +		 * The address we faulted on is backed by a transparent huge +		 * page.  However, because we map the compound huge page and +		 * not the individual tail page, we need to transfer the +		 * refcount to the head page.  We have to be careful that the +		 * THP doesn't start to split while we are adjusting the +		 * refcounts. +		 * +		 * We are sure this doesn't happen, because mmu_notifier_retry +		 * was successful and we are holding the mmu_lock, so if this +		 * THP is trying to split, it will be blocked in the mmu +		 * notifier before touching any of the pages, specifically +		 * before being able to call __split_huge_page_refcount(). +		 * +		 * We can therefore safely transfer the refcount from PG_tail +		 * to PG_head and switch the pfn from a tail page to the head +		 * page accordingly. +		 */ +		mask = PTRS_PER_PMD - 1; +		VM_BUG_ON((gfn & mask) != (pfn & mask)); +		if (pfn & mask) { +			*ipap &= PMD_MASK; +			kvm_release_pfn_clean(pfn); +			pfn &= ~mask; +			kvm_get_pfn(pfn); +			*pfnp = pfn; +		} + +		return true; +	} + +	return false; +} +  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, -			  gfn_t gfn, struct kvm_memory_slot *memslot, +			  struct kvm_memory_slot *memslot,  			  unsigned long fault_status)  { -	pte_t new_pte; -	pfn_t pfn;  	int ret; -	bool write_fault, writable; +	bool write_fault, writable, hugetlb = false, force_pte = false;  	unsigned long mmu_seq; +	gfn_t gfn = fault_ipa >> PAGE_SHIFT; +	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); +	struct kvm *kvm = vcpu->kvm;  	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; +	struct vm_area_struct *vma; +	pfn_t pfn;  	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));  	if (fault_status == FSC_PERM && !write_fault) { @@ -524,6 +755,28 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,  		return -EFAULT;  	} +	/* Let's check if we will get back a huge page backed by hugetlbfs */ +	down_read(¤t->mm->mmap_sem); +	vma = find_vma_intersection(current->mm, hva, hva + 1); +	if (is_vm_hugetlb_page(vma)) { +		hugetlb = true; +		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; +	} else { +		/* +		 * Pages belonging to memslots that don't have the same +		 * alignment for userspace and IPA cannot be mapped using +		 * block descriptors even if the pages belong to a THP for +		 * the process, because the stage-2 block descriptor will +		 * cover more than a single THP and we loose atomicity for +		 * unmapping, updates, and splits of the THP or other pages +		 * in the stage-2 block range. +		 */ +		if ((memslot->userspace_addr & ~PMD_MASK) != +		    ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK)) +			force_pte = true; +	} +	up_read(¤t->mm->mmap_sem); +  	/* We need minimum second+third level pages */  	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);  	if (ret) @@ -541,26 +794,40 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,  	 */  	smp_rmb(); -	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); +	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);  	if (is_error_pfn(pfn))  		return -EFAULT; -	new_pte = pfn_pte(pfn, PAGE_S2); -	coherent_icache_guest_page(vcpu->kvm, gfn); - -	spin_lock(&vcpu->kvm->mmu_lock); -	if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) +	spin_lock(&kvm->mmu_lock); +	if (mmu_notifier_retry(kvm, mmu_seq))  		goto out_unlock; -	if (writable) { -		kvm_set_s2pte_writable(&new_pte); -		kvm_set_pfn_dirty(pfn); +	if (!hugetlb && !force_pte) +		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + +	if (hugetlb) { +		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); +		new_pmd = pmd_mkhuge(new_pmd); +		if (writable) { +			kvm_set_s2pmd_writable(&new_pmd); +			kvm_set_pfn_dirty(pfn); +		} +		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); +		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); +	} else { +		pte_t new_pte = pfn_pte(pfn, PAGE_S2); +		if (writable) { +			kvm_set_s2pte_writable(&new_pte); +			kvm_set_pfn_dirty(pfn); +		} +		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); +		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);  	} -	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); +  out_unlock: -	spin_unlock(&vcpu->kvm->mmu_lock); +	spin_unlock(&kvm->mmu_lock);  	kvm_release_pfn_clean(pfn); -	return 0; +	return ret;  }  /** @@ -629,7 +896,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)  	memslot = gfn_to_memslot(vcpu->kvm, gfn); -	ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); +	ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);  	if (ret == 0)  		ret = 1;  out_unlock: @@ -747,9 +1014,9 @@ int kvm_mmu_init(void)  {  	int err; -	hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); -	hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); -	hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); +	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); +	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); +	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);  	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {  		/* @@ -759,7 +1026,7 @@ int kvm_mmu_init(void)  		size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;  		phys_addr_t phys_base; -		init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); +		init_bounce_page = (void *)__get_free_page(GFP_KERNEL);  		if (!init_bounce_page) {  			kvm_err("Couldn't allocate HYP init bounce page\n");  			err = -ENOMEM; @@ -776,7 +1043,7 @@ int kvm_mmu_init(void)  		 */  		kvm_flush_dcache_to_poc(init_bounce_page, len); -		phys_base = virt_to_phys(init_bounce_page); +		phys_base = kvm_virt_to_phys(init_bounce_page);  		hyp_idmap_vector += phys_base - hyp_idmap_start;  		hyp_idmap_start = phys_base;  		hyp_idmap_end = phys_base + len; @@ -785,8 +1052,9 @@ int kvm_mmu_init(void)  			 (unsigned long)phys_base);  	} -	hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); -	boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); +	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); +	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); +  	if (!hyp_pgd || !boot_hyp_pgd) {  		kvm_err("Hyp mode PGD not allocated\n");  		err = -ENOMEM; diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 86a693a02ba..09cf37737ee 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -18,6 +18,7 @@  #include <linux/kvm_host.h>  #include <linux/wait.h> +#include <asm/cputype.h>  #include <asm/kvm_emulate.h>  #include <asm/kvm_psci.h> @@ -26,6 +27,36 @@   * as described in ARM document number ARM DEN 0022A.   */ +#define AFFINITY_MASK(level)	~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + +static unsigned long psci_affinity_mask(unsigned long affinity_level) +{ +	if (affinity_level <= 3) +		return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); + +	return 0; +} + +static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) +{ +	/* +	 * NOTE: For simplicity, we make VCPU suspend emulation to be +	 * same-as WFI (Wait-for-interrupt) emulation. +	 * +	 * This means for KVM the wakeup events are interrupts and +	 * this is consistent with intended use of StateID as described +	 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). +	 * +	 * Further, we also treat power-down request to be same as +	 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 +	 * specification (ARM DEN 0022A). This means all suspend states +	 * for KVM will preserve the register state. +	 */ +	kvm_vcpu_block(vcpu); + +	return PSCI_RET_SUCCESS; +} +  static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)  {  	vcpu->arch.pause = true; @@ -34,25 +65,41 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)  static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)  {  	struct kvm *kvm = source_vcpu->kvm; -	struct kvm_vcpu *vcpu; +	struct kvm_vcpu *vcpu = NULL, *tmp;  	wait_queue_head_t *wq;  	unsigned long cpu_id; +	unsigned long context_id; +	unsigned long mpidr;  	phys_addr_t target_pc; +	int i;  	cpu_id = *vcpu_reg(source_vcpu, 1);  	if (vcpu_mode_is_32bit(source_vcpu))  		cpu_id &= ~((u32) 0); -	if (cpu_id >= atomic_read(&kvm->online_vcpus)) -		return KVM_PSCI_RET_INVAL; - -	target_pc = *vcpu_reg(source_vcpu, 2); +	kvm_for_each_vcpu(i, tmp, kvm) { +		mpidr = kvm_vcpu_get_mpidr(tmp); +		if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { +			vcpu = tmp; +			break; +		} +	} -	vcpu = kvm_get_vcpu(kvm, cpu_id); +	/* +	 * Make sure the caller requested a valid CPU and that the CPU is +	 * turned off. +	 */ +	if (!vcpu) +		return PSCI_RET_INVALID_PARAMS; +	if (!vcpu->arch.pause) { +		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) +			return PSCI_RET_ALREADY_ON; +		else +			return PSCI_RET_INVALID_PARAMS; +	} -	wq = kvm_arch_vcpu_wq(vcpu); -	if (!waitqueue_active(wq)) -		return KVM_PSCI_RET_INVAL; +	target_pc = *vcpu_reg(source_vcpu, 2); +	context_id = *vcpu_reg(source_vcpu, 3);  	kvm_reset_vcpu(vcpu); @@ -62,26 +109,165 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)  		vcpu_set_thumb(vcpu);  	} +	/* Propagate caller endianness */ +	if (kvm_vcpu_is_be(source_vcpu)) +		kvm_vcpu_set_be(vcpu); +  	*vcpu_pc(vcpu) = target_pc; +	/* +	 * NOTE: We always update r0 (or x0) because for PSCI v0.1 +	 * the general puspose registers are undefined upon CPU_ON. +	 */ +	*vcpu_reg(vcpu, 0) = context_id;  	vcpu->arch.pause = false;  	smp_mb();		/* Make sure the above is visible */ +	wq = kvm_arch_vcpu_wq(vcpu);  	wake_up_interruptible(wq); -	return KVM_PSCI_RET_SUCCESS; +	return PSCI_RET_SUCCESS;  } -/** - * kvm_psci_call - handle PSCI call if r0 value is in range - * @vcpu: Pointer to the VCPU struct - * - * Handle PSCI calls from guests through traps from HVC instructions. - * The calling convention is similar to SMC calls to the secure world where - * the function number is placed in r0 and this function returns true if the - * function number specified in r0 is withing the PSCI range, and false - * otherwise. - */ -bool kvm_psci_call(struct kvm_vcpu *vcpu) +static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) +{ +	int i; +	unsigned long mpidr; +	unsigned long target_affinity; +	unsigned long target_affinity_mask; +	unsigned long lowest_affinity_level; +	struct kvm *kvm = vcpu->kvm; +	struct kvm_vcpu *tmp; + +	target_affinity = *vcpu_reg(vcpu, 1); +	lowest_affinity_level = *vcpu_reg(vcpu, 2); + +	/* Determine target affinity mask */ +	target_affinity_mask = psci_affinity_mask(lowest_affinity_level); +	if (!target_affinity_mask) +		return PSCI_RET_INVALID_PARAMS; + +	/* Ignore other bits of target affinity */ +	target_affinity &= target_affinity_mask; + +	/* +	 * If one or more VCPU matching target affinity are running +	 * then ON else OFF +	 */ +	kvm_for_each_vcpu(i, tmp, kvm) { +		mpidr = kvm_vcpu_get_mpidr(tmp); +		if (((mpidr & target_affinity_mask) == target_affinity) && +		    !tmp->arch.pause) { +			return PSCI_0_2_AFFINITY_LEVEL_ON; +		} +	} + +	return PSCI_0_2_AFFINITY_LEVEL_OFF; +} + +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +{ +	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); +	vcpu->run->system_event.type = type; +	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +static void kvm_psci_system_off(struct kvm_vcpu *vcpu) +{ +	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); +} + +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) +{ +	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); +} + +int kvm_psci_version(struct kvm_vcpu *vcpu) +{ +	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) +		return KVM_ARM_PSCI_0_2; + +	return KVM_ARM_PSCI_0_1; +} + +static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +{ +	int ret = 1; +	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); +	unsigned long val; + +	switch (psci_fn) { +	case PSCI_0_2_FN_PSCI_VERSION: +		/* +		 * Bits[31:16] = Major Version = 0 +		 * Bits[15:0] = Minor Version = 2 +		 */ +		val = 2; +		break; +	case PSCI_0_2_FN_CPU_SUSPEND: +	case PSCI_0_2_FN64_CPU_SUSPEND: +		val = kvm_psci_vcpu_suspend(vcpu); +		break; +	case PSCI_0_2_FN_CPU_OFF: +		kvm_psci_vcpu_off(vcpu); +		val = PSCI_RET_SUCCESS; +		break; +	case PSCI_0_2_FN_CPU_ON: +	case PSCI_0_2_FN64_CPU_ON: +		val = kvm_psci_vcpu_on(vcpu); +		break; +	case PSCI_0_2_FN_AFFINITY_INFO: +	case PSCI_0_2_FN64_AFFINITY_INFO: +		val = kvm_psci_vcpu_affinity_info(vcpu); +		break; +	case PSCI_0_2_FN_MIGRATE: +	case PSCI_0_2_FN64_MIGRATE: +		val = PSCI_RET_NOT_SUPPORTED; +		break; +	case PSCI_0_2_FN_MIGRATE_INFO_TYPE: +		/* +		 * Trusted OS is MP hence does not require migration +	         * or +		 * Trusted OS is not present +		 */ +		val = PSCI_0_2_TOS_MP; +		break; +	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: +	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: +		val = PSCI_RET_NOT_SUPPORTED; +		break; +	case PSCI_0_2_FN_SYSTEM_OFF: +		kvm_psci_system_off(vcpu); +		/* +		 * We should'nt be going back to guest VCPU after +		 * receiving SYSTEM_OFF request. +		 * +		 * If user space accidently/deliberately resumes +		 * guest VCPU after SYSTEM_OFF request then guest +		 * VCPU should see internal failure from PSCI return +		 * value. To achieve this, we preload r0 (or x0) with +		 * PSCI return value INTERNAL_FAILURE. +		 */ +		val = PSCI_RET_INTERNAL_FAILURE; +		ret = 0; +		break; +	case PSCI_0_2_FN_SYSTEM_RESET: +		kvm_psci_system_reset(vcpu); +		/* +		 * Same reason as SYSTEM_OFF for preloading r0 (or x0) +		 * with PSCI return value INTERNAL_FAILURE. +		 */ +		val = PSCI_RET_INTERNAL_FAILURE; +		ret = 0; +		break; +	default: +		return -EINVAL; +	} + +	*vcpu_reg(vcpu, 0) = val; +	return ret; +} + +static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)  {  	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);  	unsigned long val; @@ -89,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)  	switch (psci_fn) {  	case KVM_PSCI_FN_CPU_OFF:  		kvm_psci_vcpu_off(vcpu); -		val = KVM_PSCI_RET_SUCCESS; +		val = PSCI_RET_SUCCESS;  		break;  	case KVM_PSCI_FN_CPU_ON:  		val = kvm_psci_vcpu_on(vcpu);  		break;  	case KVM_PSCI_FN_CPU_SUSPEND:  	case KVM_PSCI_FN_MIGRATE: -		val = KVM_PSCI_RET_NI; +		val = PSCI_RET_NOT_SUPPORTED;  		break; -  	default: -		return false; +		return -EINVAL;  	}  	*vcpu_reg(vcpu, 0) = val; -	return true; +	return 1; +} + +/** + * kvm_psci_call - handle PSCI call if r0 value is in range + * @vcpu: Pointer to the VCPU struct + * + * Handle PSCI calls from guests through traps from HVC instructions. + * The calling convention is similar to SMC calls to the secure world + * where the function number is placed in r0. + * + * This function returns: > 0 (success), 0 (success but exit to user + * space), and < 0 (errors) + * + * Errors: + * -EINVAL: Unrecognized PSCI function + */ +int kvm_psci_call(struct kvm_vcpu *vcpu) +{ +	switch (kvm_psci_version(vcpu)) { +	case KVM_ARM_PSCI_0_2: +		return kvm_psci_0_2_call(vcpu); +	case KVM_ARM_PSCI_0_1: +		return kvm_psci_0_1_call(vcpu); +	default: +		return -EINVAL; +	};  } diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 71e08baee20..f558c073c02 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -30,16 +30,14 @@  #include <kvm/arm_arch_timer.h>  /****************************************************************************** - * Cortex-A15 Reset Values + * Cortex-A15 and Cortex-A7 Reset Values   */ -static const int a15_max_cpu_idx = 3; - -static struct kvm_regs a15_regs_reset = { +static struct kvm_regs cortexa_regs_reset = {  	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,  }; -static const struct kvm_irq_level a15_vtimer_irq = { +static const struct kvm_irq_level cortexa_vtimer_irq = {  	{ .irq = 27 },  	.level = 1,  }; @@ -58,23 +56,22 @@ static const struct kvm_irq_level a15_vtimer_irq = {   */  int kvm_reset_vcpu(struct kvm_vcpu *vcpu)  { -	struct kvm_regs *cpu_reset; +	struct kvm_regs *reset_regs;  	const struct kvm_irq_level *cpu_vtimer_irq;  	switch (vcpu->arch.target) { +	case KVM_ARM_TARGET_CORTEX_A7:  	case KVM_ARM_TARGET_CORTEX_A15: -		if (vcpu->vcpu_id > a15_max_cpu_idx) -			return -EINVAL; -		cpu_reset = &a15_regs_reset; +		reset_regs = &cortexa_regs_reset;  		vcpu->arch.midr = read_cpuid_id(); -		cpu_vtimer_irq = &a15_vtimer_irq; +		cpu_vtimer_irq = &cortexa_vtimer_irq;  		break;  	default:  		return -ENODEV;  	}  	/* Reset core registers */ -	memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); +	memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));  	/* Reset CP15 registers */  	kvm_reset_coprocs(vcpu);  | 
