diff options
Diffstat (limited to 'arch/arm/kvm')
| -rw-r--r-- | arch/arm/kvm/Kconfig | 19 | ||||
| -rw-r--r-- | arch/arm/kvm/Makefile | 9 | ||||
| -rw-r--r-- | arch/arm/kvm/arm.c | 599 | ||||
| -rw-r--r-- | arch/arm/kvm/coproc.c | 260 | ||||
| -rw-r--r-- | arch/arm/kvm/coproc.h | 17 | ||||
| -rw-r--r-- | arch/arm/kvm/coproc_a15.c | 125 | ||||
| -rw-r--r-- | arch/arm/kvm/coproc_a7.c | 54 | ||||
| -rw-r--r-- | arch/arm/kvm/emulate.c | 77 | ||||
| -rw-r--r-- | arch/arm/kvm/guest.c | 134 | ||||
| -rw-r--r-- | arch/arm/kvm/handle_exit.c | 169 | ||||
| -rw-r--r-- | arch/arm/kvm/init.S | 78 | ||||
| -rw-r--r-- | arch/arm/kvm/interrupts.S | 58 | ||||
| -rw-r--r-- | arch/arm/kvm/interrupts_head.S | 191 | ||||
| -rw-r--r-- | arch/arm/kvm/mmio.c | 138 | ||||
| -rw-r--r-- | arch/arm/kvm/mmu.c | 855 | ||||
| -rw-r--r-- | arch/arm/kvm/perf.c | 68 | ||||
| -rw-r--r-- | arch/arm/kvm/psci.c | 263 | ||||
| -rw-r--r-- | arch/arm/kvm/reset.c | 27 | ||||
| -rw-r--r-- | arch/arm/kvm/trace.h | 7 |
19 files changed, 2306 insertions, 842 deletions
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 05227cb57a7..4be5bb150bd 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -20,9 +20,10 @@ config KVM bool "Kernel-based Virtual Machine (KVM) support" select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST - depends on ARM_VIRT_EXT && ARM_LPAE + depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN ---help--- Support hosting virtualized guest machines. You will also need to select one or more of the processor modules below. @@ -51,6 +52,20 @@ config KVM_ARM_MAX_VCPUS large, so only choose a reasonable number that you expect to actually use. -source drivers/virtio/Kconfig +config KVM_ARM_VGIC + bool "KVM support for Virtual GIC" + depends on KVM_ARM_HOST && OF + select HAVE_KVM_IRQCHIP + default y + ---help--- + Adds support for a hardware assisted, in-kernel GIC emulation. + +config KVM_ARM_TIMER + bool "KVM support for Architected Timers" + depends on KVM_ARM_VGIC && ARM_ARCH_TIMER + select HAVE_KVM_IRQCHIP + default y + ---help--- + Adds support for the Architected Timers in virtual machines endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index ea27987bd07..789bca9e64a 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -14,8 +14,11 @@ CFLAGS_mmu.o := -I. AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) -kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +KVM := ../../../virt/kvm +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o -obj-y += arm.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o +obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2d30e3afdaf..3c82b37c0f9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -16,6 +16,8 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include <linux/cpu.h> +#include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> @@ -30,11 +32,9 @@ #define CREATE_TRACE_POINTS #include "trace.h" -#include <asm/unified.h> #include <asm/uaccess.h> #include <asm/ptrace.h> #include <asm/mman.h> -#include <asm/cputype.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> #include <asm/virt.h> @@ -44,21 +44,49 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_psci.h> -#include <asm/opcodes.h> #ifdef REQUIRES_VIRT __asm__(".arch_extension virt"); #endif static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -static struct vfp_hard_struct __percpu *kvm_host_vfp_state; +static kvm_cpu_context_t __percpu *kvm_host_cpu_state; static unsigned long hyp_default_vectors; +/* Per-CPU variable containing the currently running vcpu. */ +static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); + /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +static bool vgic_present; + +static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) +{ + BUG_ON(preemptible()); + __this_cpu_write(kvm_arm_running_vcpu, vcpu); +} + +/** + * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU. + * Must be called from non-preemptible context + */ +struct kvm_vcpu *kvm_arm_get_running_vcpu(void) +{ + BUG_ON(preemptible()); + return __this_cpu_read(kvm_arm_running_vcpu); +} + +/** + * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. + */ +struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) +{ + return &kvm_arm_running_vcpu; +} + int kvm_arch_hardware_enable(void *garbage) { return 0; @@ -110,6 +138,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + kvm_timer_init(kvm); + /* Mark the initial VMID generation invalid */ kvm->arch.vmid_gen = 0; @@ -125,12 +155,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } @@ -157,16 +188,24 @@ int kvm_dev_ioctl_check_extension(long ext) { int r; switch (ext) { + case KVM_CAP_IRQCHIP: + r = vgic_present; + break; + case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: r = 1; break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; + case KVM_CAP_ARM_SET_DEVICE_ADDR: + r = 1; + break; case KVM_CAP_NR_VCPUS: r = num_online_cpus(); break; @@ -174,7 +213,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; default: - r = 0; + r = kvm_arch_dev_ioctl_check_extension(ext); break; } return r; @@ -186,27 +225,22 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - int user_alloc) +void kvm_arch_memslots_updated(struct kvm *kvm) { - return 0; } int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + enum kvm_mr_change change) { return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - int user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { } @@ -255,6 +289,7 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { kvm_mmu_free_memory_caches(vcpu); + kvm_timer_vcpu_terminate(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -268,26 +303,21 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return 0; } -int __attribute_const__ kvm_target_cpu(void) -{ - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); - - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; - - switch (part_number) { - case ARM_CPU_PART_CORTEX_A15: - return KVM_ARM_TARGET_CORTEX_A15; - default: - return -EINVAL; - } -} - int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + int ret; + /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; + + /* Set up VGIC */ + ret = kvm_vgic_vcpu_init(vcpu); + if (ret) + return ret; + + /* Set up the timer */ + kvm_timer_vcpu_init(vcpu); + return 0; } @@ -298,7 +328,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; - vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state); + vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); /* * Check whether this vcpu requires the cache to be flushed on @@ -308,10 +338,20 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) */ if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush)) flush_cache_all(); /* We'd really want v7_flush_dcache_all() */ + + kvm_arm_set_running_vcpu(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + /* + * The arch-generic KVM code expects the cpu field of a vcpu to be -1 + * if the vcpu is no longer assigned to a cpu. This is used for the + * optimized make_all_cpus_request path. + */ + vcpu->cpu = -1; + + kvm_arm_set_running_vcpu(NULL); } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, @@ -342,7 +382,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !!v->arch.irq_lines; + return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v); } /* Just ensure a guest exit from a particular CPU */ @@ -432,177 +472,23 @@ static void update_vttbr(struct kvm *kvm) spin_unlock(&kvm_vmid_lock); } -static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* SVC called from Hyp mode should never get here */ - kvm_debug("SVC called from Hyp mode shouldn't go here\n"); - BUG(); - return -EINVAL; /* Squash warning */ -} - -static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), - vcpu->arch.hsr & HSR_HVC_IMM_MASK); - - if (kvm_psci_call(vcpu)) - return 1; - - kvm_inject_undefined(vcpu); - return 1; -} - -static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - if (kvm_psci_call(vcpu)) - return 1; - - kvm_inject_undefined(vcpu); - return 1; -} - -static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* The hypervisor should never cause aborts */ - kvm_err("Prefetch Abort taken from Hyp mode at %#08x (HSR: %#08x)\n", - vcpu->arch.hxfar, vcpu->arch.hsr); - return -EFAULT; -} - -static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* This is either an error in the ws. code or an external abort */ - kvm_err("Data Abort taken from Hyp mode at %#08x (HSR: %#08x)\n", - vcpu->arch.hxfar, vcpu->arch.hsr); - return -EFAULT; -} - -typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); -static exit_handle_fn arm_exit_handlers[] = { - [HSR_EC_WFI] = kvm_handle_wfi, - [HSR_EC_CP15_32] = kvm_handle_cp15_32, - [HSR_EC_CP15_64] = kvm_handle_cp15_64, - [HSR_EC_CP14_MR] = kvm_handle_cp14_access, - [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store, - [HSR_EC_CP14_64] = kvm_handle_cp14_access, - [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access, - [HSR_EC_CP10_ID] = kvm_handle_cp10_id, - [HSR_EC_SVC_HYP] = handle_svc_hyp, - [HSR_EC_HVC] = handle_hvc, - [HSR_EC_SMC] = handle_smc, - [HSR_EC_IABT] = kvm_handle_guest_abort, - [HSR_EC_IABT_HYP] = handle_pabt_hyp, - [HSR_EC_DABT] = kvm_handle_guest_abort, - [HSR_EC_DABT_HYP] = handle_dabt_hyp, -}; - -/* - * A conditional instruction is allowed to trap, even though it - * wouldn't be executed. So let's re-implement the hardware, in - * software! - */ -static bool kvm_condition_valid(struct kvm_vcpu *vcpu) -{ - unsigned long cpsr, cond, insn; - - /* - * Exception Code 0 can only happen if we set HCR.TGE to 1, to - * catch undefined instructions, and then we won't get past - * the arm_exit_handlers test anyway. - */ - BUG_ON(((vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT) == 0); - - /* Top two bits non-zero? Unconditional. */ - if (vcpu->arch.hsr >> 30) - return true; - - cpsr = *vcpu_cpsr(vcpu); - - /* Is condition field valid? */ - if ((vcpu->arch.hsr & HSR_CV) >> HSR_CV_SHIFT) - cond = (vcpu->arch.hsr & HSR_COND) >> HSR_COND_SHIFT; - else { - /* This can happen in Thumb mode: examine IT state. */ - unsigned long it; - - it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); - - /* it == 0 => unconditional. */ - if (it == 0) - return true; - - /* The cond for this insn works out as the top 4 bits. */ - cond = (it >> 4); - } - - /* Shift makes it look like an ARM-mode instruction */ - insn = cond << 28; - return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL; -} - -/* - * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on - * proper exit to QEMU. - */ -static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, - int exception_index) -{ - unsigned long hsr_ec; - - switch (exception_index) { - case ARM_EXCEPTION_IRQ: - return 1; - case ARM_EXCEPTION_UNDEFINED: - kvm_err("Undefined exception in Hyp mode at: %#08x\n", - vcpu->arch.hyp_pc); - BUG(); - panic("KVM: Hypervisor undefined exception!\n"); - case ARM_EXCEPTION_DATA_ABORT: - case ARM_EXCEPTION_PREF_ABORT: - case ARM_EXCEPTION_HVC: - hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT; - - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) - || !arm_exit_handlers[hsr_ec]) { - kvm_err("Unkown exception class: %#08lx, " - "hsr: %#08x\n", hsr_ec, - (unsigned int)vcpu->arch.hsr); - BUG(); - } - - /* - * See ARM ARM B1.14.1: "Hyp traps on instructions - * that fail their condition code check" - */ - if (!kvm_condition_valid(vcpu)) { - bool is_wide = vcpu->arch.hsr & HSR_IL; - kvm_skip_instr(vcpu, is_wide); - return 1; - } - - return arm_exit_handlers[hsr_ec](vcpu, run); - default: - kvm_pr_unimpl("Unsupported exception type: %d", - exception_index); - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - return 0; - } -} - static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + int ret; + if (likely(vcpu->arch.has_run_once)) return 0; vcpu->arch.has_run_once = true; /* - * Handle the "start in power-off" case by calling into the - * PSCI code. + * Initialize the VGIC before running a vcpu the first time on + * this VM. */ - if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { - *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; - kvm_psci_call(vcpu); + if (unlikely(!vgic_initialized(vcpu->kvm))) { + ret = kvm_vgic_init(vcpu->kvm); + if (ret) + return ret; } return 0; @@ -615,6 +501,11 @@ static void vcpu_pause(struct kvm_vcpu *vcpu) wait_event_interruptible(*wq, !vcpu->arch.pause); } +static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.target >= 0; +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -631,8 +522,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int ret; sigset_t sigsaved; - /* Make sure they initialize the vcpu with KVM_ARM_VCPU_INIT */ - if (unlikely(vcpu->arch.target < 0)) + if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; ret = kvm_vcpu_first_run_init(vcpu); @@ -661,6 +551,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (vcpu->arch.pause) vcpu_pause(vcpu); + kvm_vgic_flush_hwstate(vcpu); + kvm_timer_flush_hwstate(vcpu); + local_irq_disable(); /* @@ -673,6 +566,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { local_irq_enable(); + kvm_timer_sync_hwstate(vcpu); + kvm_vgic_sync_hwstate(vcpu); continue; } @@ -705,6 +600,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * Back from guest *************************************************************/ + kvm_timer_sync_hwstate(vcpu); + kvm_vgic_sync_hwstate(vcpu); + ret = handle_exit(vcpu, run, ret); } @@ -746,7 +644,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) return 0; } -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + bool line_status) { u32 irq = irq_level->irq; unsigned int irq_type, vcpu_idx, irq_num; @@ -760,20 +659,67 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); - if (irq_type != KVM_ARM_IRQ_TYPE_CPU) - return -EINVAL; + switch (irq_type) { + case KVM_ARM_IRQ_TYPE_CPU: + if (irqchip_in_kernel(kvm)) + return -ENXIO; - if (vcpu_idx >= nrcpus) - return -EINVAL; + if (vcpu_idx >= nrcpus) + return -EINVAL; - vcpu = kvm_get_vcpu(kvm, vcpu_idx); - if (!vcpu) - return -EINVAL; + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; - if (irq_num > KVM_ARM_IRQ_CPU_FIQ) - return -EINVAL; + if (irq_num > KVM_ARM_IRQ_CPU_FIQ) + return -EINVAL; + + return vcpu_interrupt_line(vcpu, irq_num, level); + case KVM_ARM_IRQ_TYPE_PPI: + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + if (vcpu_idx >= nrcpus) + return -EINVAL; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + + if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level); + case KVM_ARM_IRQ_TYPE_SPI: + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + if (irq_num < VGIC_NR_PRIVATE_IRQS || + irq_num > KVM_ARM_IRQ_GIC_MAX) + return -EINVAL; - return vcpu_interrupt_line(vcpu, irq_num, level); + return kvm_vgic_inject_irq(kvm, 0, irq_num, level); + } + + return -EINVAL; +} + +static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, + struct kvm_vcpu_init *init) +{ + int ret; + + ret = kvm_vcpu_set_target(vcpu, init); + if (ret) + return ret; + + /* + * Handle the "start in power-off" case by marking the VCPU as paused. + */ + if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + vcpu->arch.pause = true; + + return 0; } long kvm_arch_vcpu_ioctl(struct file *filp, @@ -789,12 +735,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&init, argp, sizeof(init))) return -EFAULT; - return kvm_vcpu_set_target(vcpu, &init); - + return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); } case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { struct kvm_one_reg reg; + + if (unlikely(!kvm_vcpu_initialized(vcpu))) + return -ENOEXEC; + if (copy_from_user(®, argp, sizeof(reg))) return -EFAULT; if (ioctl == KVM_SET_ONE_REG) @@ -807,6 +756,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_reg_list reg_list; unsigned n; + if (unlikely(!kvm_vcpu_initialized(vcpu))) + return -ENOEXEC; + if (copy_from_user(®_list, user_list, sizeof(reg_list))) return -EFAULT; n = reg_list.n; @@ -827,46 +779,134 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -EINVAL; } +static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, + struct kvm_arm_device_addr *dev_addr) +{ + unsigned long dev_id, type; + + dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >> + KVM_ARM_DEVICE_ID_SHIFT; + type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >> + KVM_ARM_DEVICE_TYPE_SHIFT; + + switch (dev_id) { + case KVM_ARM_DEVICE_VGIC_V2: + if (!vgic_present) + return -ENXIO; + return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); + default: + return -ENODEV; + } +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - return -EINVAL; + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: { + if (vgic_present) + return kvm_vgic_create(kvm); + else + return -ENXIO; + } + case KVM_ARM_SET_DEVICE_ADDR: { + struct kvm_arm_device_addr dev_addr; + + if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) + return -EFAULT; + return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); + } + case KVM_ARM_PREFERRED_TARGET: { + int err; + struct kvm_vcpu_init init; + + err = kvm_vcpu_preferred_target(&init); + if (err) + return err; + + if (copy_to_user(argp, &init, sizeof(init))) + return -EFAULT; + + return 0; + } + default: + return -EINVAL; + } } -static void cpu_init_hyp_mode(void *vector) +static void cpu_init_hyp_mode(void *dummy) { - unsigned long long pgd_ptr; - unsigned long pgd_low, pgd_high; + phys_addr_t boot_pgd_ptr; + phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; /* Switch from the HYP stub to our own HYP init vector */ - __hyp_set_vectors((unsigned long)vector); + __hyp_set_vectors(kvm_get_idmap_vector()); - pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); - pgd_low = (pgd_ptr & ((1ULL << 32) - 1)); - pgd_high = (pgd_ptr >> 32ULL); - stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); + boot_pgd_ptr = kvm_mmu_get_boot_httbr(); + pgd_ptr = kvm_mmu_get_httbr(); + stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; - /* - * Call initialization code, and switch to the full blown - * HYP code. The init code doesn't need to preserve these registers as - * r1-r3 and r12 are already callee save according to the AAPCS. - * Note that we slightly misuse the prototype by casing the pgd_low to - * a void *. - */ - kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); + __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); +} + +static int hyp_init_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + cpu_init_hyp_mode(NULL); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block hyp_init_cpu_nb = { + .notifier_call = hyp_init_cpu_notify, +}; + +#ifdef CONFIG_CPU_PM +static int hyp_init_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, + void *v) +{ + if (cmd == CPU_PM_EXIT && + __hyp_get_vectors() == hyp_default_vectors) { + cpu_init_hyp_mode(NULL); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static struct notifier_block hyp_init_cpu_pm_nb = { + .notifier_call = hyp_init_cpu_pm_notifier, +}; + +static void __init hyp_cpu_pm_init(void) +{ + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } +#else +static inline void hyp_cpu_pm_init(void) +{ +} +#endif /** * Inits Hyp-mode on all online CPUs */ static int init_hyp_mode(void) { - phys_addr_t init_phys_addr; int cpu; int err = 0; @@ -899,24 +939,6 @@ static int init_hyp_mode(void) } /* - * Execute the init code on each CPU. - * - * Note: The stack is not mapped yet, so don't do anything else than - * initializing the hypervisor mode on each CPU using a local stack - * space for temporary storage. - */ - init_phys_addr = virt_to_phys(__kvm_hyp_init); - for_each_online_cpu(cpu) { - smp_call_function_single(cpu, cpu_init_hyp_mode, - (void *)(long)init_phys_addr, 1); - } - - /* - * Unmap the identity mapping - */ - kvm_clear_hyp_idmap(); - - /* * Map the Hyp-code called directly from the host */ err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); @@ -939,33 +961,63 @@ static int init_hyp_mode(void) } /* - * Map the host VFP structures + * Map the host CPU structures */ - kvm_host_vfp_state = alloc_percpu(struct vfp_hard_struct); - if (!kvm_host_vfp_state) { + kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); + if (!kvm_host_cpu_state) { err = -ENOMEM; - kvm_err("Cannot allocate host VFP state\n"); + kvm_err("Cannot allocate host CPU state\n"); goto out_free_mappings; } for_each_possible_cpu(cpu) { - struct vfp_hard_struct *vfp; + kvm_cpu_context_t *cpu_ctxt; - vfp = per_cpu_ptr(kvm_host_vfp_state, cpu); - err = create_hyp_mappings(vfp, vfp + 1); + cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); + err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1); if (err) { - kvm_err("Cannot map host VFP state: %d\n", err); - goto out_free_vfp; + kvm_err("Cannot map host CPU state: %d\n", err); + goto out_free_context; } } + /* + * Execute the init code on each CPU. + */ + on_each_cpu(cpu_init_hyp_mode, NULL, 1); + + /* + * Init HYP view of VGIC + */ + err = kvm_vgic_hyp_init(); + if (err) + goto out_free_context; + +#ifdef CONFIG_KVM_ARM_VGIC + vgic_present = true; +#endif + + /* + * Init HYP architected timer support + */ + err = kvm_timer_hyp_init(); + if (err) + goto out_free_mappings; + +#ifndef CONFIG_HOTPLUG_CPU + free_boot_hyp_pgd(); +#endif + + kvm_perf_init(); + kvm_info("Hyp mode initialized successfully\n"); + return 0; -out_free_vfp: - free_percpu(kvm_host_vfp_state); +out_free_context: + free_percpu(kvm_host_cpu_state); out_free_mappings: - free_hyp_pmds(); + free_hyp_pgds(); out_free_stack_pages: for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); @@ -974,36 +1026,59 @@ out_err: return err; } +static void check_kvm_target_cpu(void *ret) +{ + *(int *)ret = kvm_target_cpu(); +} + /** * Initialize Hyp-mode and memory mappings on all CPUs. */ int kvm_arch_init(void *opaque) { int err; + int ret, cpu; if (!is_hyp_mode_available()) { kvm_err("HYP mode not available\n"); return -ENODEV; } - if (kvm_target_cpu() < 0) { - kvm_err("Target CPU not supported!\n"); - return -ENODEV; + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); + if (ret < 0) { + kvm_err("Error, CPU %d not supported!\n", cpu); + return -ENODEV; + } } + cpu_notifier_register_begin(); + err = init_hyp_mode(); if (err) goto out_err; + err = __register_cpu_notifier(&hyp_init_cpu_nb); + if (err) { + kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); + goto out_err; + } + + cpu_notifier_register_done(); + + hyp_cpu_pm_init(); + kvm_coproc_table_init(); return 0; out_err: + cpu_notifier_register_done(); return err; } /* NOP: Compiling as a module not supported */ void kvm_arch_exit(void) { + kvm_perf_teardown(); } static int arm_init(void) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index d782638c7ec..c58a35116f6 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -23,6 +23,7 @@ #include <asm/kvm_host.h> #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> #include <asm/cacheflush.h> #include <asm/cputype.h> #include <trace/events/kvm.h> @@ -71,19 +72,111 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + /* + * Compute guest MPIDR. We build a virtual cluster out of the + * vcpu_id, but we read the 'U' bit from the underlying + * hardware directly. + */ + vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | + ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | + (vcpu->vcpu_id & 3)); +} + +/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; + return true; +} + +/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */ +static bool access_cbar(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p); + return read_zero(vcpu, p); +} + +/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */ +static bool access_l2ctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; + return true; +} + +static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 l2ctlr, ncores; + + asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); + l2ctlr &= ~(3 << 24); + ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; + /* How many cores in the current cluster and the next ones */ + ncores -= (vcpu->vcpu_id & ~3); + /* Cap it to the maximum number of cores in a single cluster */ + ncores = min(ncores, 3U); + l2ctlr |= (ncores & 3) << 24; + + vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 actlr; + + /* ACTLR contains SMP bit: make sure you create all cpus first! */ + asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + /* Make the SMP bit consistent with the guest configuration */ + if (atomic_read(&vcpu->kvm->online_vcpus) > 1) + actlr |= 1U << 6; + else + actlr &= ~(1U << 6); + + vcpu->arch.cp15[c1_ACTLR] = actlr; +} + +/* + * TRM entries: A7:4.3.50, A15:4.3.49 + * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). + */ +static bool access_l2ectlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = 0; + return true; +} + /* See note at ARM ARM B1.14.4 */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { - u32 val; + unsigned long val; int cpu; - cpu = get_cpu(); - if (!p->is_write) return read_from_write_only(vcpu, p); + cpu = get_cpu(); + cpumask_setall(&vcpu->arch.require_dcache_flush); cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); @@ -113,6 +206,44 @@ done: } /* + * Generic accessor for VM registers. Only called as long as HCR_TVM + * is set. + */ +static bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + BUG_ON(!p->is_write); + + vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); + if (p->is_64bit) + vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); + + return true; +} + +/* + * SCTLR accessor. Only called as long as HCR_TVM is set. If the + * guest enables the MMU, we stop trapping the VM sys_regs and leave + * it in complete control of the caches. + * + * Used by the cpu-specific code. + */ +bool access_sctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + access_vm_reg(vcpu, p, r); + + if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ + vcpu->arch.hcr &= ~HCR_TVM; + stage2_flush_vm(vcpu->kvm); + } + + return true; +} + +/* * We could trap ID_DFR0 and tell the guest we don't support performance * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was * NAKed, so it will read the PMCR anyway. @@ -146,40 +277,63 @@ static bool pm_fake(struct kvm_vcpu *vcpu, #define access_pmintenclr pm_fake /* Architected CP15 registers. - * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. */ static const struct coproc_reg cp15_regs[] = { + /* MPIDR: we use VMPIDR for guest access. */ + { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, + NULL, reset_mpidr, c0_MPIDR }, + /* CSSELR: swapped by interrupt.S. */ { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32, NULL, reset_unknown, c0_CSSELR }, - /* TTBR0/TTBR1: swapped by interrupt.S. */ - { CRm( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, - { CRm( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, + /* ACTLR: trapped by HCR.TAC bit. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, + access_actlr, reset_actlr, c1_ACTLR }, - /* TTBCR: swapped by interrupt.S. */ + /* CPACR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, + NULL, reset_val, c1_CPACR, 0x00000000 }, + + /* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */ + { CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 }, + { CRn(2), CRm( 0), Op1( 0), Op2( 0), is32, + access_vm_reg, reset_unknown, c2_TTBR0 }, + { CRn(2), CRm( 0), Op1( 0), Op2( 1), is32, + access_vm_reg, reset_unknown, c2_TTBR1 }, { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c2_TTBCR, 0x00000000 }, + access_vm_reg, reset_val, c2_TTBCR, 0x00000000 }, + { CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 }, + /* DACR: swapped by interrupt.S. */ { CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c3_DACR }, + access_vm_reg, reset_unknown, c3_DACR }, /* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */ { CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c5_DFSR }, + access_vm_reg, reset_unknown, c5_DFSR }, { CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c5_IFSR }, + access_vm_reg, reset_unknown, c5_IFSR }, { CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c5_ADFSR }, + access_vm_reg, reset_unknown, c5_ADFSR }, { CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c5_AIFSR }, + access_vm_reg, reset_unknown, c5_AIFSR }, /* DFAR/IFAR: swapped by interrupt.S. */ { CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c6_DFAR }, + access_vm_reg, reset_unknown, c6_DFAR }, { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_unknown, c6_IFAR }, + access_vm_reg, reset_unknown, c6_IFAR }, + + /* PAR swapped by interrupt.S */ + { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, + /* * DC{C,I,CI}SW operations: */ @@ -187,6 +341,13 @@ static const struct coproc_reg cp15_regs[] = { { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw}, { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw}, /* + * L2CTLR access (guest wants to know #CPUs). + */ + { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, + access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, + { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, + + /* * Dummy performance monitor implementation. */ { CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr}, @@ -205,9 +366,15 @@ static const struct coproc_reg cp15_regs[] = { /* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */ { CRn(10), CRm( 2), Op1( 0), Op2( 0), is32, - NULL, reset_unknown, c10_PRRR}, + access_vm_reg, reset_unknown, c10_PRRR}, { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32, - NULL, reset_unknown, c10_NMRR}, + access_vm_reg, reset_unknown, c10_NMRR}, + + /* AMAIR0/AMAIR1: swapped by interrupt.S. */ + { CRn(10), CRm( 3), Op1( 0), Op2( 0), is32, + access_vm_reg, reset_unknown, c10_AMAIR0}, + { CRn(10), CRm( 3), Op1( 0), Op2( 1), is32, + access_vm_reg, reset_unknown, c10_AMAIR1}, /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, @@ -215,13 +382,20 @@ static const struct coproc_reg cp15_regs[] = { /* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */ { CRn(13), CRm( 0), Op1( 0), Op2( 1), is32, - NULL, reset_val, c13_CID, 0x00000000 }, + access_vm_reg, reset_val, c13_CID, 0x00000000 }, { CRn(13), CRm( 0), Op1( 0), Op2( 2), is32, NULL, reset_unknown, c13_TID_URW }, { CRn(13), CRm( 0), Op1( 0), Op2( 3), is32, NULL, reset_unknown, c13_TID_URO }, { CRn(13), CRm( 0), Op1( 0), Op2( 4), is32, NULL, reset_unknown, c13_TID_PRIV }, + + /* CNTKCTL: swapped by interrupt.S. */ + { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, + NULL, reset_val, c14_CNTKCTL, 0x00000000 }, + + /* The Configuration Base Address Register. */ + { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; /* Target specific emulation tables */ @@ -229,6 +403,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) { + unsigned int i; + + for (i = 1; i < table->num; i++) + BUG_ON(cmp_reg(&table->table[i-1], + &table->table[i]) >= 0); + target_tables[table->target] = table; } @@ -289,12 +469,12 @@ static int emulate_cp15(struct kvm_vcpu *vcpu, if (likely(r->access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ - kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } /* If access function fails, it should complain. */ } else { - kvm_err("Unsupported guest CP15 access at: %08x\n", + kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu)); print_cp_instr(params); } @@ -311,15 +491,15 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct coproc_params params; - params.CRm = (vcpu->arch.hsr >> 1) & 0xf; - params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf; - params.is_write = ((vcpu->arch.hsr & 1) == 0); + params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; + params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf; + params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0); params.is_64bit = true; - params.Op1 = (vcpu->arch.hsr >> 16) & 0xf; + params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf; params.Op2 = 0; - params.Rt2 = (vcpu->arch.hsr >> 10) & 0xf; - params.CRn = 0; + params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; + params.CRm = 0; return emulate_cp15(vcpu, ¶ms); } @@ -343,14 +523,14 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct coproc_params params; - params.CRm = (vcpu->arch.hsr >> 1) & 0xf; - params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf; - params.is_write = ((vcpu->arch.hsr & 1) == 0); + params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; + params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf; + params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0); params.is_64bit = false; - params.CRn = (vcpu->arch.hsr >> 10) & 0xf; - params.Op1 = (vcpu->arch.hsr >> 14) & 0x7; - params.Op2 = (vcpu->arch.hsr >> 17) & 0x7; + params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; + params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 14) & 0x7; + params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7; params.Rt2 = 0; return emulate_cp15(vcpu, ¶ms); @@ -391,12 +571,13 @@ static bool index_to_params(u64 id, struct coproc_params *params) | KVM_REG_ARM_OPC1_MASK)) return false; params->is_64bit = true; - params->CRm = ((id & KVM_REG_ARM_CRM_MASK) + /* CRm to CRn: see cp15_to_index for details */ + params->CRn = ((id & KVM_REG_ARM_CRM_MASK) >> KVM_REG_ARM_CRM_SHIFT); params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK) >> KVM_REG_ARM_OPC1_SHIFT); params->Op2 = 0; - params->CRn = 0; + params->CRm = 0; return true; default: return false; @@ -890,7 +1071,14 @@ static u64 cp15_to_index(const struct coproc_reg *reg) if (reg->is_64) { val |= KVM_REG_SIZE_U64; val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); - val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT); + /* + * CRn always denotes the primary coproc. reg. nr. for the + * in-kernel representation, but the user space API uses the + * CRm for the encoding, because it is modelled after the + * MRRC/MCRR instructions: see the ARM ARM rev. c page + * B3-1445 + */ + val |= (reg->CRn << KVM_REG_ARM_CRM_SHIFT); } else { val |= KVM_REG_SIZE_U32; val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 992adfafa2f..1a44bbe3964 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p) { /* Look, we even formatted it for you to paste into the table! */ if (p->is_64bit) { - kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n", - p->CRm, p->Op1, p->is_write ? "write" : "read"); + kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n", + p->CRn, p->Op1, p->is_write ? "write" : "read"); } else { kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32," " func_%s },\n", @@ -84,7 +84,7 @@ static inline bool read_zero(struct kvm_vcpu *vcpu, static inline bool write_to_read_only(struct kvm_vcpu *vcpu, const struct coproc_params *params) { - kvm_debug("CP15 write to read-only register at: %08x\n", + kvm_debug("CP15 write to read-only register at: %08lx\n", *vcpu_pc(vcpu)); print_cp_instr(params); return false; @@ -93,7 +93,7 @@ static inline bool write_to_read_only(struct kvm_vcpu *vcpu, static inline bool read_from_write_only(struct kvm_vcpu *vcpu, const struct coproc_params *params) { - kvm_debug("CP15 read to write-only register at: %08x\n", + kvm_debug("CP15 read to write-only register at: %08lx\n", *vcpu_pc(vcpu)); print_cp_instr(params); return false; @@ -139,15 +139,22 @@ static inline int cmp_reg(const struct coproc_reg *i1, return i1->CRm - i2->CRm; if (i1->Op1 != i2->Op1) return i1->Op1 - i2->Op1; - return i1->Op2 - i2->Op2; + if (i1->Op2 != i2->Op2) + return i1->Op2 - i2->Op2; + return i2->is_64 - i1->is_64; } #define CRn(_x) .CRn = _x #define CRm(_x) .CRm = _x +#define CRm64(_x) .CRn = _x, .CRm = 0 #define Op1(_x) .Op1 = _x #define Op2(_x) .Op2 = _x #define is64 .is_64 = true #define is32 .is_64 = false +bool access_sctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r); + #endif /* __ARM_KVM_COPROC_LOCAL_H__ */ diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index 685063a6d0c..e6f4ae48bda 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -17,129 +17,24 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <linux/kvm_host.h> -#include <asm/cputype.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_host.h> -#include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h> #include <linux/init.h> -static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - /* - * Compute guest MPIDR: - * (Even if we present only one VCPU to the guest on an SMP - * host we don't set the U bit in the MPIDR, or vice versa, as - * revealing the underlying hardware properties is likely to - * be the best choice). - */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK) - | (vcpu->vcpu_id & MPIDR_LEVEL_MASK); -} - #include "coproc.h" -/* A15 TRM 4.3.28: RO WI */ -static bool access_actlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; - return true; -} - -/* A15 TRM 4.3.60: R/O. */ -static bool access_cbar(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return write_to_read_only(vcpu, p); - return read_zero(vcpu, p); -} - -/* A15 TRM 4.3.48: R/O WI. */ -static bool access_l2ctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; - return true; -} - -static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 l2ctlr, ncores; - - asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); - l2ctlr &= ~(3 << 24); - ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; - l2ctlr |= (ncores & 3) << 24; - - vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 actlr; - - /* ACTLR contains SMP bit: make sure you create all cpus first! */ - asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); - /* Make the SMP bit consistent with the guest configuration */ - if (atomic_read(&vcpu->kvm->online_vcpus) > 1) - actlr |= 1U << 6; - else - actlr &= ~(1U << 6); - - vcpu->arch.cp15[c1_ACTLR] = actlr; -} - -/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */ -static bool access_l2ectlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = 0; - return true; -} - /* * A15-specific CP15 registers. - * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. */ static const struct coproc_reg a15_regs[] = { - /* MPIDR: we use VMPIDR for guest access. */ - { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, - NULL, reset_mpidr, c0_MPIDR }, - /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - NULL, reset_val, c1_SCTLR, 0x00C50078 }, - /* ACTLR: trapped by HCR.TAC bit. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, - access_actlr, reset_actlr, c1_ACTLR }, - /* CPACR: swapped by interrupt.S. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c1_CPACR, 0x00000000 }, - - /* - * L2CTLR access (guest wants to know #CPUs). - */ - { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, - access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, - { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, - - /* The Configuration Base Address Register. */ - { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, + access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, }; static struct kvm_coproc_target_table a15_target_table = { @@ -150,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = { static int __init coproc_a15_init(void) { - unsigned int i; - - for (i = 1; i < ARRAY_SIZE(a15_regs); i++) - BUG_ON(cmp_reg(&a15_regs[i-1], - &a15_regs[i]) >= 0); - kvm_register_target_coproc_table(&a15_target_table); return 0; } diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c new file mode 100644 index 00000000000..17fc7cd479d --- /dev/null +++ b/arch/arm/kvm/coproc_a7.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Copyright (C) 2013 - ARM Ltd + * + * Authors: Rusty Russell <rusty@rustcorp.au> + * Christoffer Dall <c.dall@virtualopensystems.com> + * Jonathan Austin <jonathan.austin@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include <linux/kvm_host.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h> +#include <linux/init.h> + +#include "coproc.h" + +/* + * Cortex-A7 specific CP15 registers. + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. + */ +static const struct coproc_reg a7_regs[] = { + /* SCTLR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, + access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, +}; + +static struct kvm_coproc_target_table a7_target_table = { + .target = KVM_ARM_TARGET_CORTEX_A7, + .table = a7_regs, + .num = ARRAY_SIZE(a7_regs), +}; + +static int __init coproc_a7_init(void) +{ + kvm_register_target_coproc_table(&a7_target_table); + return 0; +} +late_initcall(coproc_a7_init); diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index d61450ac666..d6c00528367 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -20,6 +20,7 @@ #include <linux/kvm_host.h> #include <asm/kvm_arm.h> #include <asm/kvm_emulate.h> +#include <asm/opcodes.h> #include <trace/events/kvm.h> #include "trace.h" @@ -109,10 +110,10 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = { * Return a pointer to the register number valid in the current mode of * the virtual CPU. */ -u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) +unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) { - u32 *reg_array = (u32 *)&vcpu->arch.regs; - u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs; + unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; switch (mode) { case USR_MODE...SVC_MODE: @@ -141,9 +142,9 @@ u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) /* * Return the SPSR for the current mode of the virtual CPU. */ -u32 *vcpu_spsr(struct kvm_vcpu *vcpu) +unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) { - u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; switch (mode) { case SVC_MODE: return &vcpu->arch.regs.KVM_ARM_SVC_spsr; @@ -160,20 +161,48 @@ u32 *vcpu_spsr(struct kvm_vcpu *vcpu) } } -/** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest - * @vcpu: the vcpu pointer - * @run: the kvm_run structure pointer - * - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will - * halt execution of world-switches and schedule other host processes until - * there is an incoming IRQ or FIQ to the VM. +/* + * A conditional instruction is allowed to trap, even though it + * wouldn't be executed. So let's re-implement the hardware, in + * software! */ -int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +bool kvm_condition_valid(struct kvm_vcpu *vcpu) { - trace_kvm_wfi(*vcpu_pc(vcpu)); - kvm_vcpu_block(vcpu); - return 1; + unsigned long cpsr, cond, insn; + + /* + * Exception Code 0 can only happen if we set HCR.TGE to 1, to + * catch undefined instructions, and then we won't get past + * the arm_exit_handlers test anyway. + */ + BUG_ON(!kvm_vcpu_trap_get_class(vcpu)); + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + /* Is condition field valid? */ + if ((kvm_vcpu_get_hsr(vcpu) & HSR_CV) >> HSR_CV_SHIFT) + cond = (kvm_vcpu_get_hsr(vcpu) & HSR_COND) >> HSR_COND_SHIFT; + else { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + /* Shift makes it look like an ARM-mode instruction */ + insn = cond << 28; + return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL; } /** @@ -257,9 +286,9 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu) */ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { - u32 new_lr_value; - u32 new_spsr_value; - u32 cpsr = *vcpu_cpsr(vcpu); + unsigned long new_lr_value; + unsigned long new_spsr_value; + unsigned long cpsr = *vcpu_cpsr(vcpu); u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; bool is_thumb = (cpsr & PSR_T_BIT); u32 vect_offset = 4; @@ -291,9 +320,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) */ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) { - u32 new_lr_value; - u32 new_spsr_value; - u32 cpsr = *vcpu_cpsr(vcpu); + unsigned long new_lr_value; + unsigned long new_spsr_value; + unsigned long cpsr = *vcpu_cpsr(vcpu); u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; bool is_thumb = (cpsr & PSR_T_BIT); u32 vect_offset; @@ -325,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; if (is_pabt) { - /* Set DFAR and DFSR */ + /* Set IFAR and IFSR */ vcpu->arch.cp15[c6_IFAR] = addr; is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); /* Always give debug fault for now - should give guest a clue */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 2339d9609d3..b23a59c1c52 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/fs.h> +#include <asm/cputype.h> #include <asm/uaccess.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> @@ -37,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + vcpu->arch.hcr = HCR_GUEST_MASK; return 0; } @@ -108,6 +110,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } +#ifndef CONFIG_KVM_ARM_TIMER + +#define NUM_TIMER_REGS 0 + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + return 0; +} + +static bool is_timer_reg(u64 index) +{ + return false; +} + +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + return 0; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + return 0; +} + +#else + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +#endif + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + static unsigned long num_core_regs(void) { return sizeof(struct kvm_regs) / sizeof(u32); @@ -120,7 +199,8 @@ static unsigned long num_core_regs(void) */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) + + NUM_TIMER_REGS; } /** @@ -132,6 +212,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { if (put_user(core_reg | i, uindices)) @@ -139,6 +220,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + return kvm_arm_copy_coproc_indices(vcpu, uindices); } @@ -152,6 +238,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + return kvm_arm_coproc_get_reg(vcpu, reg); } @@ -165,6 +254,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + return kvm_arm_coproc_set_reg(vcpu, reg); } @@ -180,12 +272,30 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + if (implementor != ARM_CPU_IMP_ARM) + return -EINVAL; + + switch (part_number) { + case ARM_CPU_PART_CORTEX_A7: + return KVM_ARM_TARGET_CORTEX_A7; + case ARM_CPU_PART_CORTEX_A15: + return KVM_ARM_TARGET_CORTEX_A15; + default: + return -EINVAL; + } +} + int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init) { unsigned int i; - /* We can only do a cortex A15 for now. */ + /* We can only cope with guest==host and only on A15/A7 (for now). */ if (init->target != kvm_target_cpu()) return -EINVAL; @@ -205,6 +315,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, return kvm_reset_vcpu(vcpu); } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c new file mode 100644 index 00000000000..4c979d466cc --- /dev/null +++ b/arch/arm/kvm/handle_exit.c @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_psci.h> +#include <trace/events/kvm.h> + +#include "trace.h" + +typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); + +static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* SVC called from Hyp mode should never get here */ + kvm_debug("SVC called from Hyp mode shouldn't go here\n"); + BUG(); + return -EINVAL; /* Squash warning */ +} + +static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int ret; + + trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), + kvm_vcpu_hvc_get_imm(vcpu)); + + ret = kvm_psci_call(vcpu); + if (ret < 0) { + kvm_inject_undefined(vcpu); + return 1; + } + + return ret; +} + +static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + +static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* The hypervisor should never cause aborts */ + kvm_err("Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n", + kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); + return -EFAULT; +} + +static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + /* This is either an error in the ws. code or an external abort */ + kvm_err("Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n", + kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); + return -EFAULT; +} + +/** + * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests + * @vcpu: the vcpu pointer + * @run: the kvm_run structure pointer + * + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. + */ +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + trace_kvm_wfi(*vcpu_pc(vcpu)); + if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) + kvm_vcpu_on_spin(vcpu); + else + kvm_vcpu_block(vcpu); + + return 1; +} + +static exit_handle_fn arm_exit_handlers[] = { + [HSR_EC_WFI] = kvm_handle_wfx, + [HSR_EC_CP15_32] = kvm_handle_cp15_32, + [HSR_EC_CP15_64] = kvm_handle_cp15_64, + [HSR_EC_CP14_MR] = kvm_handle_cp14_access, + [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store, + [HSR_EC_CP14_64] = kvm_handle_cp14_access, + [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access, + [HSR_EC_CP10_ID] = kvm_handle_cp10_id, + [HSR_EC_SVC_HYP] = handle_svc_hyp, + [HSR_EC_HVC] = handle_hvc, + [HSR_EC_SMC] = handle_smc, + [HSR_EC_IABT] = kvm_handle_guest_abort, + [HSR_EC_IABT_HYP] = handle_pabt_hyp, + [HSR_EC_DABT] = kvm_handle_guest_abort, + [HSR_EC_DABT_HYP] = handle_dabt_hyp, +}; + +static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + + if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || + !arm_exit_handlers[hsr_ec]) { + kvm_err("Unknown exception class: hsr: %#08x\n", + (unsigned int)kvm_vcpu_get_hsr(vcpu)); + BUG(); + } + + return arm_exit_handlers[hsr_ec]; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to userspace. + */ +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index) +{ + exit_handle_fn exit_handler; + + switch (exception_index) { + case ARM_EXCEPTION_IRQ: + return 1; + case ARM_EXCEPTION_UNDEFINED: + kvm_err("Undefined exception in Hyp mode at: %#08lx\n", + kvm_vcpu_get_hyp_pc(vcpu)); + BUG(); + panic("KVM: Hypervisor undefined exception!\n"); + case ARM_EXCEPTION_DATA_ABORT: + case ARM_EXCEPTION_PREF_ABORT: + case ARM_EXCEPTION_HVC: + /* + * See ARM ARM B1.14.1: "Hyp traps on instructions + * that fail their condition code check" + */ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + + exit_handler = kvm_get_exit_handler(vcpu); + + return exit_handler(vcpu, run); + default: + kvm_pr_unimpl("Unsupported exception type: %d", + exception_index); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return 0; + } +} diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 9f37a79b880..1b9844d369c 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -21,13 +21,33 @@ #include <asm/asm-offsets.h> #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> /******************************************************************** * Hypervisor initialization * - should be called with: - * r0,r1 = Hypervisor pgd pointer - * r2 = top of Hyp stack (kernel VA) - * r3 = pointer to hyp vectors + * r0 = top of Hyp stack (kernel VA) + * r1 = pointer to hyp vectors + * r2,r3 = Hypervisor pgd pointer + * + * The init scenario is: + * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd, + * runtime stack, runtime vectors + * - Enable the MMU with the boot pgd + * - Jump to a target into the trampoline page (remember, this is the same + * physical page!) + * - Now switch to the runtime pgd (same VA, and still the same physical + * page!) + * - Invalidate TLBs + * - Set stack and vectors + * - Profit! (or eret, if you only care about the code). + * + * As we only have four registers available to pass parameters (and we + * need six), we split the init in two phases: + * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD. + * Provides the basic HYP init, and enable the MMU. + * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD. + * Switches to the runtime PGD, set stack and vectors. */ .text @@ -47,22 +67,25 @@ __kvm_hyp_init: W(b) . __do_hyp_init: + cmp r0, #0 @ We have a SP? + bne phase2 @ Yes, second stage init + @ Set the HTTBR to point to the hypervisor PGD pointer passed - mcrr p15, 4, r0, r1, c2 + mcrr p15, 4, r2, r3, c2 @ Set the HTCR and VTCR to the same shareability and cacheability @ settings as the non-secure TTBCR and with T0SZ == 0. mrc p15, 4, r0, c2, c0, 2 @ HTCR - ldr r12, =HTCR_MASK - bic r0, r0, r12 + ldr r2, =HTCR_MASK + bic r0, r0, r2 mrc p15, 0, r1, c2, c0, 2 @ TTBCR and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ) orr r0, r0, r1 mcr p15, 4, r0, c2, c0, 2 @ HTCR mrc p15, 4, r1, c2, c1, 2 @ VTCR - ldr r12, =VTCR_MASK - bic r1, r1, r12 + ldr r2, =VTCR_MASK + bic r1, r1, r2 bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits orr r1, r0, r1 orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) @@ -85,24 +108,41 @@ __do_hyp_init: @ - Memory alignment checks: enabled @ - MMU: enabled (this code must be run from an identity mapping) mrc p15, 4, r0, c1, c0, 0 @ HSCR - ldr r12, =HSCTLR_MASK - bic r0, r0, r12 + ldr r2, =HSCTLR_MASK + bic r0, r0, r2 mrc p15, 0, r1, c1, c0, 0 @ SCTLR - ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) - and r1, r1, r12 - ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) ) - THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) - orr r1, r1, r12 + ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) + and r1, r1, r2 + ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) ) + THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) + orr r1, r1, r2 orr r0, r0, r1 isb mcr p15, 4, r0, c1, c0, 0 @ HSCR - isb - @ Set stack pointer and return to the kernel - mov sp, r2 + @ End of init phase-1 + eret + +phase2: + @ Set stack pointer + mov sp, r0 @ Set HVBAR to point to the HYP vectors - mcr p15, 4, r3, c12, c0, 0 @ HVBAR + mcr p15, 4, r1, c12, c0, 0 @ HVBAR + + @ Jump to the trampoline page + ldr r0, =TRAMPOLINE_VA + adr r1, target + bfi r0, r1, #0, #PAGE_SHIFT + mov pc, r0 + +target: @ We're now in the trampoline code, switch page tables + mcrr p15, 4, r2, r3, c2 + isb + + @ Invalidate the old TLBs + mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH + dsb ish eret diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index c5400d2e97c..0d68d407306 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -35,23 +35,27 @@ __kvm_hyp_code_start: /******************************************************************** * Flush per-VMID TLBs * - * void __kvm_tlb_flush_vmid(struct kvm *kvm); + * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); * * We rely on the hardware to broadcast the TLB invalidation to all CPUs * inside the inner-shareable domain (which is the case for all v7 * implementations). If we come across a non-IS SMP implementation, we'll * have to use an IPI based mechanism. Until then, we stick to the simple * hardware assisted version. + * + * As v7 does not support flushing per IPA, just nuke the whole TLB + * instead, ignoring the ipa value. */ -ENTRY(__kvm_tlb_flush_vmid) +ENTRY(__kvm_tlb_flush_vmid_ipa) push {r2, r3} + dsb ishst add r0, r0, #KVM_VTTBR ldrd r2, r3, [r0] mcrr p15, 6, r2, r3, c2 @ Write VTTBR isb mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) - dsb + dsb ish isb mov r2, #0 mov r3, #0 @@ -60,7 +64,7 @@ ENTRY(__kvm_tlb_flush_vmid) pop {r2, r3} bx lr -ENDPROC(__kvm_tlb_flush_vmid) +ENDPROC(__kvm_tlb_flush_vmid_ipa) /******************************************************************** * Flush TLBs and instruction caches of all CPUs inside the inner-shareable @@ -75,7 +79,7 @@ ENTRY(__kvm_flush_vm_context) mcr p15, 4, r0, c8, c3, 4 /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */ mcr p15, 0, r0, c7, c1, 0 - dsb + dsb ish isb @ Not necessary if followed by eret bx lr @@ -94,6 +98,9 @@ ENTRY(__kvm_vcpu_run) save_host_regs + restore_vgic_state + restore_timer_state + @ Store hardware CP15 state and load guest state read_cp15_state store_to_vcpu = 0 write_cp15_state read_from_vcpu = 1 @@ -187,6 +194,9 @@ after_vfp_restore: read_cp15_state store_to_vcpu = 1 write_cp15_state read_from_vcpu = 0 + save_timer_state + save_vgic_state + restore_host_regs clrex @ Clear exclusive monitor mov r0, r1 @ Return the return code @@ -210,6 +220,10 @@ after_vfp_restore: * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are * passed in r0 and r1. * + * A function pointer with a value of 0xffffffff has a special meaning, + * and is used to implement __hyp_get_vectors in the same way as in + * arch/arm/kernel/hyp_stub.S. + * * The calling convention follows the standard AAPCS: * r0 - r3: caller save * r12: caller save @@ -229,9 +243,9 @@ ENTRY(kvm_call_hyp) * instruction is issued since all traps are disabled when running the host * kernel as per the Hyp-mode initialization at boot time. * - * HVC instructions cause a trap to the vector page + offset 0x18 (see hyp_hvc + * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc * below) when the HVC instruction is called from SVC mode (i.e. a guest or the - * host kernel) and they cause a trap to the vector page + offset 0xc when HVC + * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC * instructions are called from within Hyp-mode. * * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode): @@ -282,6 +296,7 @@ THUMB( orr r2, r2, #PSR_T_BIT ) ldr r2, =BSYM(panic) msr ELR_hyp, r2 ldr r0, =\panic_str + clrex @ Clear exclusive monitor eret .endm @@ -352,6 +367,11 @@ hyp_hvc: host_switch_to_hyp: pop {r0, r1, r2} + /* Check for __hyp_get_vectors */ + cmp r0, #-1 + mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR + beq 1f + push {lr} mrs lr, SPSR push {lr} @@ -367,7 +387,7 @@ THUMB( orr lr, #1) pop {lr} msr SPSR_csxf, lr pop {lr} - eret +1: eret guest_trap: load_vcpu @ Load VCPU pointer to r0 @@ -405,6 +425,10 @@ guest_trap: mrcne p15, 4, r2, c6, c0, 4 @ HPFAR bne 3f + /* Preserve PAR */ + mrrc p15, 0, r0, r1, c7 @ PAR + push {r0, r1} + /* Resolve IPA using the xFAR */ mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR isb @@ -415,13 +439,20 @@ guest_trap: lsl r2, r2, #4 orr r2, r2, r1, lsl #24 + /* Restore PAR */ + pop {r0, r1} + mcrr p15, 0, r0, r1, c7 @ PAR + 3: load_vcpu @ Load VCPU pointer to r0 str r2, [r0, #VCPU_HPFAR] 1: mov r1, #ARM_EXCEPTION_HVC b __kvm_vcpu_return -4: pop {r0, r1, r2} @ Failed translation, return to guest +4: pop {r0, r1} @ Failed translation, return to guest + mcrr p15, 0, r0, r1, c7 @ PAR + clrex + pop {r0, r1, r2} eret /* @@ -447,6 +478,7 @@ switch_to_guest_vfp: pop {r3-r7} pop {r0-r2} + clrex eret #endif @@ -469,10 +501,10 @@ __kvm_hyp_code_end: .section ".rodata" und_die_str: - .ascii "unexpected undefined exception in Hyp mode at: %#08x" + .ascii "unexpected undefined exception in Hyp mode at: %#08x\n" pabt_die_str: - .ascii "unexpected prefetch abort in Hyp mode at: %#08x" + .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n" dabt_die_str: - .ascii "unexpected data abort in Hyp mode at: %#08x" + .ascii "unexpected data abort in Hyp mode at: %#08x\n" svc_die_str: - .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x" + .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n" diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 6a95d341e9c..76af9302557 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -1,3 +1,5 @@ +#include <linux/irqchip/arm-gic.h> + #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) #define VCPU_USR_SP (VCPU_USR_REG(13)) #define VCPU_USR_LR (VCPU_USR_REG(14)) @@ -298,6 +300,21 @@ vcpu .req r0 @ vcpu pointer always in r0 str r11, [vcpu, #CP15_OFFSET(c6_IFAR)] str r12, [vcpu, #CP15_OFFSET(c12_VBAR)] .endif + + mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL + mrrc p15, 0, r4, r5, c7 @ PAR + mrc p15, 0, r6, c10, c3, 0 @ AMAIR0 + mrc p15, 0, r7, c10, c3, 1 @ AMAIR1 + + .if \store_to_vcpu == 0 + push {r2,r4-r7} + .else + str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] + add r12, vcpu, #CP15_OFFSET(c7_PAR) + strd r4, r5, [r12] + str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] + str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] + .endif .endm /* @@ -309,6 +326,21 @@ vcpu .req r0 @ vcpu pointer always in r0 */ .macro write_cp15_state read_from_vcpu .if \read_from_vcpu == 0 + pop {r2,r4-r7} + .else + ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] + add r12, vcpu, #CP15_OFFSET(c7_PAR) + ldrd r4, r5, [r12] + ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] + ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] + .endif + + mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL + mcrr p15, 0, r4, r5, c7 @ PAR + mcr p15, 0, r6, c10, c3, 0 @ AMAIR0 + mcr p15, 0, r7, c10, c3, 1 @ AMAIR1 + + .if \read_from_vcpu == 0 pop {r2-r12} .else ldr r2, [vcpu, #CP15_OFFSET(c13_CID)] @@ -369,6 +401,49 @@ vcpu .req r0 @ vcpu pointer always in r0 * Assumes vcpu pointer in vcpu reg */ .macro save_vgic_state +#ifdef CONFIG_KVM_ARM_VGIC + /* Get VGIC VCTRL base into r2 */ + ldr r2, [vcpu, #VCPU_KVM] + ldr r2, [r2, #KVM_VGIC_VCTRL] + cmp r2, #0 + beq 2f + + /* Compute the address of struct vgic_cpu */ + add r11, vcpu, #VCPU_VGIC_CPU + + /* Save all interesting registers */ + ldr r3, [r2, #GICH_HCR] + ldr r4, [r2, #GICH_VMCR] + ldr r5, [r2, #GICH_MISR] + ldr r6, [r2, #GICH_EISR0] + ldr r7, [r2, #GICH_EISR1] + ldr r8, [r2, #GICH_ELRSR0] + ldr r9, [r2, #GICH_ELRSR1] + ldr r10, [r2, #GICH_APR] + + str r3, [r11, #VGIC_CPU_HCR] + str r4, [r11, #VGIC_CPU_VMCR] + str r5, [r11, #VGIC_CPU_MISR] + str r6, [r11, #VGIC_CPU_EISR] + str r7, [r11, #(VGIC_CPU_EISR + 4)] + str r8, [r11, #VGIC_CPU_ELRSR] + str r9, [r11, #(VGIC_CPU_ELRSR + 4)] + str r10, [r11, #VGIC_CPU_APR] + + /* Clear GICH_HCR */ + mov r5, #0 + str r5, [r2, #GICH_HCR] + + /* Save list registers */ + add r2, r2, #GICH_LR0 + add r3, r11, #VGIC_CPU_LR + ldr r4, [r11, #VGIC_CPU_NR_LR] +1: ldr r6, [r2], #4 + str r6, [r3], #4 + subs r4, r4, #1 + bne 1b +2: +#endif .endm /* @@ -377,6 +452,113 @@ vcpu .req r0 @ vcpu pointer always in r0 * Assumes vcpu pointer in vcpu reg */ .macro restore_vgic_state +#ifdef CONFIG_KVM_ARM_VGIC + /* Get VGIC VCTRL base into r2 */ + ldr r2, [vcpu, #VCPU_KVM] + ldr r2, [r2, #KVM_VGIC_VCTRL] + cmp r2, #0 + beq 2f + + /* Compute the address of struct vgic_cpu */ + add r11, vcpu, #VCPU_VGIC_CPU + + /* We only restore a minimal set of registers */ + ldr r3, [r11, #VGIC_CPU_HCR] + ldr r4, [r11, #VGIC_CPU_VMCR] + ldr r8, [r11, #VGIC_CPU_APR] + + str r3, [r2, #GICH_HCR] + str r4, [r2, #GICH_VMCR] + str r8, [r2, #GICH_APR] + + /* Restore list registers */ + add r2, r2, #GICH_LR0 + add r3, r11, #VGIC_CPU_LR + ldr r4, [r11, #VGIC_CPU_NR_LR] +1: ldr r6, [r3], #4 + str r6, [r2], #4 + subs r4, r4, #1 + bne 1b +2: +#endif +.endm + +#define CNTHCTL_PL1PCTEN (1 << 0) +#define CNTHCTL_PL1PCEN (1 << 1) + +/* + * Save the timer state onto the VCPU and allow physical timer/counter access + * for the host. + * + * Assumes vcpu pointer in vcpu reg + * Clobbers r2-r5 + */ +.macro save_timer_state +#ifdef CONFIG_KVM_ARM_TIMER + ldr r4, [vcpu, #VCPU_KVM] + ldr r2, [r4, #KVM_TIMER_ENABLED] + cmp r2, #0 + beq 1f + + mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL + str r2, [vcpu, #VCPU_TIMER_CNTV_CTL] + bic r2, #1 @ Clear ENABLE + mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + isb + + mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL + ldr r4, =VCPU_TIMER_CNTV_CVAL + add r5, vcpu, r4 + strd r2, r3, [r5] + + @ Ensure host CNTVCT == CNTPCT + mov r2, #0 + mcrr p15, 4, r2, r2, c14 @ CNTVOFF + +1: +#endif + @ Allow physical timer/counter access for the host + mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL + orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) + mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL +.endm + +/* + * Load the timer state from the VCPU and deny physical timer/counter access + * for the host. + * + * Assumes vcpu pointer in vcpu reg + * Clobbers r2-r5 + */ +.macro restore_timer_state + @ Disallow physical timer access for the guest + @ Physical counter access is allowed + mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL + orr r2, r2, #CNTHCTL_PL1PCTEN + bic r2, r2, #CNTHCTL_PL1PCEN + mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL + +#ifdef CONFIG_KVM_ARM_TIMER + ldr r4, [vcpu, #VCPU_KVM] + ldr r2, [r4, #KVM_TIMER_ENABLED] + cmp r2, #0 + beq 1f + + ldr r2, [r4, #KVM_TIMER_CNTVOFF] + ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)] + mcrr p15, 4, r2, r3, c14 @ CNTVOFF + + ldr r4, =VCPU_TIMER_CNTV_CVAL + add r5, vcpu, r4 + ldrd r2, r3, [r5] + mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL + isb + + ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL] + and r2, r2, #3 + mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL +1: +#endif .endm .equ vmentry, 0 @@ -423,17 +605,14 @@ vcpu .req r0 @ vcpu pointer always in r0 /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */ .macro configure_hyp_role operation - mrc p15, 4, r2, c1, c1, 0 @ HCR - bic r2, r2, #HCR_VIRT_EXCP_MASK - ldr r3, =HCR_GUEST_MASK .if \operation == vmentry - orr r2, r2, r3 + ldr r2, [vcpu, #VCPU_HCR] ldr r3, [vcpu, #VCPU_IRQ_LINES] orr r2, r2, r3 .else - bic r2, r2, r3 + mov r2, #0 .endif - mcr p15, 4, r2, c1, c1, 0 + mcr p15, 4, r2, c1, c1, 0 @ HCR .endm .macro load_vcpu diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 0144baf8290..4cb5a93182e 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -23,6 +23,68 @@ #include "trace.h" +static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) +{ + void *datap = NULL; + union { + u8 byte; + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + tmp.byte = data; + datap = &tmp.byte; + break; + case 2: + tmp.hword = data; + datap = &tmp.hword; + break; + case 4: + tmp.word = data; + datap = &tmp.word; + break; + case 8: + tmp.dword = data; + datap = &tmp.dword; + break; + } + + memcpy(buf, datap, len); +} + +static unsigned long mmio_read_buf(char *buf, unsigned int len) +{ + unsigned long data = 0; + union { + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + data = buf[0]; + break; + case 2: + memcpy(&tmp.hword, buf, len); + data = tmp.hword; + break; + case 4: + memcpy(&tmp.word, buf, len); + data = tmp.word; + break; + case 8: + memcpy(&tmp.dword, buf, len); + data = tmp.dword; + break; + } + + return data; +} + /** * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation * @vcpu: The VCPU pointer @@ -33,27 +95,27 @@ */ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) { - __u32 *dest; + unsigned long data; unsigned int len; int mask; if (!run->mmio.is_write) { - dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt); - memset(dest, 0, sizeof(int)); - len = run->mmio.len; - if (len > 4) + if (len > sizeof(unsigned long)) return -EINVAL; - memcpy(dest, run->mmio.data, len); - - trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, - *((u64 *)run->mmio.data)); + data = mmio_read_buf(run->mmio.data, len); - if (vcpu->arch.mmio_decode.sign_extend && len < 4) { + if (vcpu->arch.mmio_decode.sign_extend && + len < sizeof(unsigned long)) { mask = 1U << ((len * 8) - 1); - *dest = (*dest ^ mask) - mask; + data = (data ^ mask) - mask; } + + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, + data); + data = vcpu_data_host_to_guest(vcpu, data, len); + *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data; } return 0; @@ -62,45 +124,29 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_exit_mmio *mmio) { - unsigned long rt, len; + unsigned long rt; + int len; bool is_write, sign_extend; - if ((vcpu->arch.hsr >> 8) & 1) { + if (kvm_vcpu_dabt_isextabt(vcpu)) { /* cache operation on I/O addr, tell guest unsupported */ - kvm_inject_dabt(vcpu, vcpu->arch.hxfar); + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); return 1; } - if ((vcpu->arch.hsr >> 7) & 1) { + if (kvm_vcpu_dabt_iss1tw(vcpu)) { /* page table accesses IO mem: tell guest to fix its TTBR */ - kvm_inject_dabt(vcpu, vcpu->arch.hxfar); + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); return 1; } - switch ((vcpu->arch.hsr >> 22) & 0x3) { - case 0: - len = 1; - break; - case 1: - len = 2; - break; - case 2: - len = 4; - break; - default: - kvm_err("Hardware is weird: SAS 0b11 is reserved\n"); - return -EFAULT; - } - - is_write = vcpu->arch.hsr & HSR_WNR; - sign_extend = vcpu->arch.hsr & HSR_SSE; - rt = (vcpu->arch.hsr & HSR_SRT_MASK) >> HSR_SRT_SHIFT; + len = kvm_vcpu_dabt_get_as(vcpu); + if (unlikely(len < 0)) + return len; - if (kvm_vcpu_reg_is_pc(vcpu, rt)) { - /* IO memory trying to read/write pc */ - kvm_inject_pabt(vcpu, vcpu->arch.hxfar); - return 1; - } + is_write = kvm_vcpu_dabt_iswrite(vcpu); + sign_extend = kvm_vcpu_dabt_issext(vcpu); + rt = kvm_vcpu_dabt_get_rd(vcpu); mmio->is_write = is_write; mmio->phys_addr = fault_ipa; @@ -112,7 +158,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 0; } @@ -120,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { struct kvm_exit_mmio mmio; + unsigned long data; unsigned long rt; int ret; @@ -130,7 +177,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, * space do its magic. */ - if (vcpu->arch.hsr & HSR_ISV) { + if (kvm_vcpu_dabt_isvalid(vcpu)) { ret = decode_hsr(vcpu, fault_ipa, &mmio); if (ret) return ret; @@ -140,13 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, } rt = vcpu->arch.mmio_decode.rt; + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); + trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, fault_ipa, - (mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0); + (mmio.is_write) ? data : 0); if (mmio.is_write) - memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len); + mmio_write_buf(mmio.data, mmio.len, data); + + if (vgic_handle_mmio(vcpu, run, &mmio)) + return 1; kvm_prepare_mmio(run, &mmio); return 0; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index f30e13163a9..16f804938b8 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -19,8 +19,8 @@ #include <linux/mman.h> #include <linux/kvm_host.h> #include <linux/io.h> +#include <linux/hugetlb.h> #include <trace/events/kvm.h> -#include <asm/idmap.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> #include <asm/kvm_arm.h> @@ -28,28 +28,34 @@ #include <asm/kvm_mmio.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> -#include <asm/mach/map.h> -#include <trace/events/kvm.h> #include "trace.h" extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; +static pgd_t *boot_hyp_pgd; +static pgd_t *hyp_pgd; static DEFINE_MUTEX(kvm_hyp_pgd_mutex); -static void kvm_tlb_flush_vmid(struct kvm *kvm) -{ - kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); -} +static void *init_bounce_page; +static unsigned long hyp_idmap_start; +static unsigned long hyp_idmap_end; +static phys_addr_t hyp_idmap_vector; + +#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + +#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) -static void kvm_set_pte(pte_t *pte, pte_t new_pte) +static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { - pte_val(*pte) = new_pte; /* - * flush_pmd_entry just takes a void pointer and cleans the necessary - * cache entries, so we can reuse the function for ptes. + * This function also gets called when dealing with HYP page + * tables. As HYP doesn't have an associated struct kvm (and + * the HYP page tables are fairly static), we don't do + * anything there. */ - flush_pmd_entry(pte); + if (kvm) + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, @@ -84,88 +90,282 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static void free_ptes(pmd_t *pmd, unsigned long addr) +static bool page_empty(void *ptr) { - pte_t *pte; - unsigned int i; + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} - for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { - if (!pmd_none(*pmd) && pmd_table(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - pte_free_kernel(NULL, pte); - } - pmd++; +static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +{ + if (pud_huge(*pud)) { + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pmd_free(NULL, pmd_table); } + put_page(virt_to_page(pud)); } -/** - * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables - * - * Assumes this is a page table used strictly in Hyp-mode and therefore contains - * only mappings in the kernel memory area, which is above PAGE_OFFSET. - */ -void free_hyp_pmds(void) +static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +{ + if (kvm_pmd_huge(*pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); + } + put_page(virt_to_page(pmd)); +} + +static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) +{ + if (pte_present(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } +} + +static void unmap_range(struct kvm *kvm, pgd_t *pgdp, + unsigned long long start, u64 size) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - unsigned long addr; + pte_t *pte; + unsigned long long addr = start, end = start + size; + u64 next; - mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) { - pgd = hyp_pgd + pgd_index(addr); + while (addr < end) { + pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); + pte = NULL; + if (pud_none(*pud)) { + addr = kvm_pud_addr_end(addr, end); + continue; + } - if (pud_none(*pud)) + if (pud_huge(*pud)) { + /* + * If we are dealing with a huge pud, just clear it and + * move on. + */ + clear_pud_entry(kvm, pud, addr); + addr = kvm_pud_addr_end(addr, end); continue; - BUG_ON(pud_bad(*pud)); + } pmd = pmd_offset(pud, addr); - free_ptes(pmd, addr); - pmd_free(NULL, pmd); - pud_clear(pud); + if (pmd_none(*pmd)) { + addr = kvm_pmd_addr_end(addr, end); + continue; + } + + if (!kvm_pmd_huge(*pmd)) { + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(kvm, pte, addr); + next = addr + PAGE_SIZE; + } + + /* + * If the pmd entry is to be cleared, walk back up the ladder + */ + if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) { + clear_pmd_entry(kvm, pmd, addr); + next = kvm_pmd_addr_end(addr, end); + if (page_empty(pmd) && !page_empty(pud)) { + clear_pud_entry(kvm, pud, addr); + next = kvm_pud_addr_end(addr, end); + } + } + + addr = next; } - mutex_unlock(&kvm_hyp_pgd_mutex); } -static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end) +static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) { pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); + } + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = pmd_offset(pud, addr); + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); + } else { + stage2_flush_ptes(kvm, pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); +} + +static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + if (pud_huge(*pud)) { + hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); + kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); + } else { + stage2_flush_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); +} + +static void stage2_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = addr + PAGE_SIZE * memslot->npages; + phys_addr_t next; + pgd_t *pgd; + + pgd = kvm->arch.pgd + pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + stage2_flush_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 + * @kvm: The struct kvm pointer + * + * Go through the stage 2 page tables and invalidate any cache lines + * backing memory already mapped to the VM. + */ +void stage2_flush_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_flush_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + +/** + * free_boot_hyp_pgd - free HYP boot page tables + * + * Free the HYP boot page tables. The bounce page is also freed. + */ +void free_boot_hyp_pgd(void) +{ + mutex_lock(&kvm_hyp_pgd_mutex); + + if (boot_hyp_pgd) { + unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + free_pages((unsigned long)boot_hyp_pgd, pgd_order); + boot_hyp_pgd = NULL; + } + + if (hyp_pgd) + unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + + free_page((unsigned long)init_bounce_page); + init_bounce_page = NULL; + + mutex_unlock(&kvm_hyp_pgd_mutex); +} + +/** + * free_hyp_pgds - free Hyp-mode page tables + * + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and + * therefore contains either mappings in the kernel memory area (above + * PAGE_OFFSET), or device mappings in the vmalloc range (from + * VMALLOC_START to VMALLOC_END). + * + * boot_hyp_pgd should only map two pages for the init code. + */ +void free_hyp_pgds(void) +{ unsigned long addr; - struct page *page; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - pte = pte_offset_kernel(pmd, addr); - BUG_ON(!virt_addr_valid(addr)); - page = virt_to_page(addr); - kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); + free_boot_hyp_pgd(); + + mutex_lock(&kvm_hyp_pgd_mutex); + + if (hyp_pgd) { + for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) + unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) + unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + + free_pages((unsigned long)hyp_pgd, pgd_order); + hyp_pgd = NULL; } + + mutex_unlock(&kvm_hyp_pgd_mutex); } -static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end, - unsigned long *pfn_base) +static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) { pte_t *pte; unsigned long addr; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { + addr = start; + do { pte = pte_offset_kernel(pmd, addr); - BUG_ON(pfn_valid(*pfn_base)); - kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); - (*pfn_base)++; - } + kvm_set_pte(pte, pfn_pte(pfn, prot)); + get_page(virt_to_page(pte)); + kvm_flush_dcache_to_poc(pte, sizeof(*pte)); + pfn++; + } while (addr += PAGE_SIZE, addr != end); } static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, - unsigned long end, unsigned long *pfn_base) + unsigned long end, unsigned long pfn, + pgprot_t prot) { pmd_t *pmd; pte_t *pte; unsigned long addr, next; - for (addr = start; addr < end; addr = next) { + addr = start; + do { pmd = pmd_offset(pud, addr); BUG_ON(pmd_sect(*pmd)); @@ -177,42 +377,34 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, return -ENOMEM; } pmd_populate_kernel(NULL, pmd, pte); + get_page(virt_to_page(pmd)); + kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); } next = pmd_addr_end(addr, end); - /* - * If pfn_base is NULL, we map kernel pages into HYP with the - * virtual address. Otherwise, this is considered an I/O - * mapping and we map the physical region starting at - * *pfn_base to [start, end[. - */ - if (!pfn_base) - create_hyp_pte_mappings(pmd, addr, next); - else - create_hyp_io_pte_mappings(pmd, addr, next, pfn_base); - } + create_hyp_pte_mappings(pmd, addr, next, pfn, prot); + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); return 0; } -static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) +static int __create_hyp_mappings(pgd_t *pgdp, + unsigned long start, unsigned long end, + unsigned long pfn, pgprot_t prot) { - unsigned long start = (unsigned long)from; - unsigned long end = (unsigned long)to; pgd_t *pgd; pud_t *pud; pmd_t *pmd; unsigned long addr, next; int err = 0; - BUG_ON(start > end); - if (start < PAGE_OFFSET) - return -EINVAL; - mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = start; addr < end; addr = next) { - pgd = hyp_pgd + pgd_index(addr); + addr = start & PAGE_MASK; + end = PAGE_ALIGN(end); + do { + pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { @@ -223,43 +415,86 @@ static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) goto out; } pud_populate(NULL, pud, pmd); + get_page(virt_to_page(pud)); + kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pgd_addr_end(addr, end); - err = create_hyp_pmd_mappings(pud, addr, next, pfn_base); + err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (err) goto out; - } + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); out: mutex_unlock(&kvm_hyp_pgd_mutex); return err; } +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ + if (!is_vmalloc_addr(kaddr)) { + BUG_ON(!virt_addr_valid(kaddr)); + return __pa(kaddr); + } else { + return page_to_phys(vmalloc_to_page(kaddr)) + + offset_in_page(kaddr); + } +} + /** - * create_hyp_mappings - map a kernel virtual address range in Hyp mode + * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range * @to: The virtual kernel end address of the range (exclusive) * - * The same virtual address as the kernel virtual address is also used in - * Hyp-mode mapping to the same underlying physical pages. - * - * Note: Wrapping around zero in the "to" address is not supported. + * The same virtual address as the kernel virtual address is also used + * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying + * physical pages. */ int create_hyp_mappings(void *from, void *to) { - return __create_hyp_mappings(from, to, NULL); + phys_addr_t phys_addr; + unsigned long virt_addr; + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); + + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); + err = __create_hyp_mappings(hyp_pgd, virt_addr, + virt_addr + PAGE_SIZE, + __phys_to_pfn(phys_addr), + PAGE_HYP); + if (err) + return err; + } + + return 0; } /** - * create_hyp_io_mappings - map a physical IO range in Hyp mode - * @from: The virtual HYP start address of the range - * @to: The virtual HYP end address of the range (exclusive) - * @addr: The physical start address which gets mapped + * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode + * @from: The kernel start VA of the range + * @to: The kernel end VA of the range (exclusive) + * @phys_addr: The physical start address which gets mapped + * + * The resulting HYP VA is the same as the kernel VA, modulo + * HYP_PAGE_OFFSET. */ -int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) +int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) { - unsigned long pfn = __phys_to_pfn(addr); - return __create_hyp_mappings(from, to, &pfn); + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + /* Check for a valid kernel IO mapping */ + if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } /** @@ -286,52 +521,13 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (!pgd) return -ENOMEM; - /* stage-2 pgd must be aligned to its size */ - VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); - memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); - clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); + kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; } -static void clear_pud_entry(pud_t *pud) -{ - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - pmd_free(NULL, pmd_table); - put_page(virt_to_page(pud)); -} - -static void clear_pmd_entry(pmd_t *pmd) -{ - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - pte_free_kernel(NULL, pte_table); - put_page(virt_to_page(pmd)); -} - -static bool pmd_empty(pmd_t *pmd) -{ - struct page *pmd_page = virt_to_page(pmd); - return page_count(pmd_page) == 1; -} - -static void clear_pte_entry(pte_t *pte) -{ - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - } -} - -static bool pte_empty(pte_t *pte) -{ - struct page *pte_page = virt_to_page(pte); - return page_count(pte_page) == 1; -} - /** * unmap_stage2_range -- Clear stage2 page table entries to unmap a range * @kvm: The VM pointer @@ -345,43 +541,7 @@ static bool pte_empty(pte_t *pte) */ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - phys_addr_t addr = start, end = start + size; - u64 range; - - while (addr < end) { - pgd = kvm->arch.pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) { - addr += PUD_SIZE; - continue; - } - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr += PMD_SIZE; - continue; - } - - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(pte); - range = PAGE_SIZE; - - /* If we emptied the pte, walk back up the ladder */ - if (pte_empty(pte)) { - clear_pmd_entry(pmd); - range = PMD_SIZE; - if (pmd_empty(pmd)) { - clear_pud_entry(pud); - range = PUD_SIZE; - } - } - - addr += range; - } + unmap_range(kvm, kvm->arch.pgd, start, size); } /** @@ -405,39 +565,81 @@ void kvm_free_stage2_pgd(struct kvm *kvm) kvm->arch.pgd = NULL; } - -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr, const pte_t *new_pte, bool iomap) +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, old_pte; - /* Create 2nd stage page table mapping - Level 1 */ pgd = kvm->arch.pgd + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { if (!cache) - return 0; /* ignore calls from kvm_set_spte_hva */ + return NULL; pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); - pmd += pmd_index(addr); get_page(virt_to_page(pud)); - } else - pmd = pmd_offset(pud, addr); + } + + return pmd_offset(pud, addr); +} + +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; + + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + + /* + * Mapping in huge pages should only happen through a fault. If a + * page is merged into a transparent huge page, the individual + * subpages of that huge page should be unmapped through MMU + * notifiers before we get here. + * + * Merging of CompoundPages is not supported; they should become + * splitting first, unmapped, merged, and mapped back in on-demand. + */ + VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); + + old_pmd = *pmd; + kvm_set_pmd(pmd, *new_pmd); + if (pmd_present(old_pmd)) + kvm_tlb_flush_vmid_ipa(kvm, addr); + else + get_page(virt_to_page(pmd)); + return 0; +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, bool iomap) +{ + pmd_t *pmd; + pte_t *pte, old_pte; + + /* Create stage-2 page table mapping - Level 1 */ + pmd = stage2_get_pmd(kvm, cache, addr); + if (!pmd) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } - /* Create 2nd stage page table mapping - Level 2 */ + /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pte = mmu_memory_cache_alloc(cache); - clean_pte_table(pte); + kvm_clean_pte(pte); pmd_populate_kernel(NULL, pmd, pte); - pte += pte_index(addr); get_page(virt_to_page(pmd)); - } else - pte = pte_offset_kernel(pmd, addr); + } + + pte = pte_offset_kernel(pmd, addr); if (iomap && pte_present(*pte)) return -EFAULT; @@ -446,7 +648,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, old_pte = *pte; kvm_set_pte(pte, *new_pte); if (pte_present(old_pte)) - kvm_tlb_flush_vmid(kvm); + kvm_tlb_flush_vmid_ipa(kvm, addr); else get_page(virt_to_page(pte)); @@ -473,7 +675,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, pfn = __phys_to_pfn(pa); for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { - pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE | L_PTE_S2_RDWR); + pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); ret = mmu_topup_memory_cache(&cache, 2, 2); if (ret) @@ -492,46 +694,89 @@ out: return ret; } -static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) { - /* - * If we are going to insert an instruction page and the icache is - * either VIPT or PIPT, there is a potential problem where the host - * (or another VM) may have used the same page as this guest, and we - * read incorrect data from the icache. If we're using a PIPT cache, - * we can invalidate just that page, but if we are using a VIPT cache - * we need to invalidate the entire icache - damn shame - as written - * in the ARM ARM (DDI 0406C.b - Page B3-1393). - * - * VIVT caches are tagged using both the ASID and the VMID and doesn't - * need any kind of flushing (DDI 0406C.b - Page B3-1392). - */ - if (icache_is_pipt()) { - unsigned long hva = gfn_to_hva(kvm, gfn); - __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); - } else if (!icache_is_vivt_asid_tagged()) { - /* any kind of VIPT cache */ - __flush_icache_all(); + pfn_t pfn = *pfnp; + gfn_t gfn = *ipap >> PAGE_SHIFT; + + if (PageTransCompound(pfn_to_page(pfn))) { + unsigned long mask; + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + mask = PTRS_PER_PMD - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); + *pfnp = pfn; + } + + return true; } + + return false; } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - gfn_t gfn, struct kvm_memory_slot *memslot, + struct kvm_memory_slot *memslot, unsigned long fault_status) { - pte_t new_pte; - pfn_t pfn; int ret; - bool write_fault, writable; + bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; + gfn_t gfn = fault_ipa >> PAGE_SHIFT; + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); + struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct vm_area_struct *vma; + pfn_t pfn; - write_fault = kvm_is_write_fault(vcpu->arch.hsr); + write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } + /* Let's check if we will get back a huge page backed by hugetlbfs */ + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (is_vm_hugetlb_page(vma)) { + hugetlb = true; + gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; + } else { + /* + * Pages belonging to memslots that don't have the same + * alignment for userspace and IPA cannot be mapped using + * block descriptors even if the pages belong to a THP for + * the process, because the stage-2 block descriptor will + * cover more than a single THP and we loose atomicity for + * unmapping, updates, and splits of the THP or other pages + * in the stage-2 block range. + */ + if ((memslot->userspace_addr & ~PMD_MASK) != + ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK)) + force_pte = true; + } + up_read(¤t->mm->mmap_sem); + /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) @@ -549,26 +794,40 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ smp_rmb(); - pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; - new_pte = pfn_pte(pfn, PAGE_S2); - coherent_icache_guest_page(vcpu->kvm, gfn); - - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; - if (writable) { - pte_val(new_pte) |= L_PTE_S2_RDWR; - kvm_set_pfn_dirty(pfn); + if (!hugetlb && !force_pte) + hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + + if (hugetlb) { + pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); + new_pmd = pmd_mkhuge(new_pmd); + if (writable) { + kvm_set_s2pmd_writable(&new_pmd); + kvm_set_pfn_dirty(pfn); + } + coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + } else { + pte_t new_pte = pfn_pte(pfn, PAGE_S2); + if (writable) { + kvm_set_s2pte_writable(&new_pte); + kvm_set_pfn_dirty(pfn); + } + coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); } - stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); + out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); + spin_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return 0; + return ret; } /** @@ -585,7 +844,6 @@ out_unlock: */ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) { - unsigned long hsr_ec; unsigned long fault_status; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; @@ -593,18 +851,17 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) gfn_t gfn; int ret, idx; - hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT; - is_iabt = (hsr_ec == HSR_EC_IABT); - fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8; + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - trace_kvm_guest_fault(*vcpu_pc(vcpu), vcpu->arch.hsr, - vcpu->arch.hxfar, fault_ipa); + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), + kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE); + fault_status = kvm_vcpu_trap_get_fault(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { - kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n", - hsr_ec, fault_status); + kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", + kvm_vcpu_trap_get_class(vcpu), fault_status); return -EFAULT; } @@ -614,7 +871,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { if (is_iabt) { /* Prefetch Abort on I/O address */ - kvm_inject_pabt(vcpu, vcpu->arch.hxfar); + kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); ret = 1; goto out_unlock; } @@ -626,20 +883,20 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - /* Adjust page offset */ - fault_ipa |= vcpu->arch.hxfar & ~PAGE_MASK; + /* + * The IPA is reported as [MAX:12], so we need to + * complement it with the bottom 12 bits from the + * faulting VA. This is always 12 bits, irrespective + * of the page size. + */ + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); ret = io_mem_abort(vcpu, run, fault_ipa); goto out_unlock; } memslot = gfn_to_memslot(vcpu->kvm, gfn); - if (!memslot->user_alloc) { - kvm_err("non user-alloc memslots not supported\n"); - ret = -EINVAL; - goto out_unlock; - } - ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); if (ret == 0) ret = 1; out_unlock: @@ -687,7 +944,6 @@ static void handle_hva_to_gpa(struct kvm *kvm, static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) { unmap_stage2_range(kvm, gpa, PAGE_SIZE); - kvm_tlb_flush_vmid(kvm); } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) @@ -741,47 +997,106 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) phys_addr_t kvm_mmu_get_httbr(void) { - VM_BUG_ON(!virt_addr_valid(hyp_pgd)); return virt_to_phys(hyp_pgd); } +phys_addr_t kvm_mmu_get_boot_httbr(void) +{ + return virt_to_phys(boot_hyp_pgd); +} + +phys_addr_t kvm_get_idmap_vector(void) +{ + return hyp_idmap_vector; +} + int kvm_mmu_init(void) { - if (!hyp_pgd) { + int err; + + hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); + + if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { + /* + * Our init code is crossing a page boundary. Allocate + * a bounce page, copy the code over and use that. + */ + size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; + phys_addr_t phys_base; + + init_bounce_page = (void *)__get_free_page(GFP_KERNEL); + if (!init_bounce_page) { + kvm_err("Couldn't allocate HYP init bounce page\n"); + err = -ENOMEM; + goto out; + } + + memcpy(init_bounce_page, __hyp_idmap_text_start, len); + /* + * Warning: the code we just copied to the bounce page + * must be flushed to the point of coherency. + * Otherwise, the data may be sitting in L2, and HYP + * mode won't be able to observe it as it runs with + * caches off at that point. + */ + kvm_flush_dcache_to_poc(init_bounce_page, len); + + phys_base = kvm_virt_to_phys(init_bounce_page); + hyp_idmap_vector += phys_base - hyp_idmap_start; + hyp_idmap_start = phys_base; + hyp_idmap_end = phys_base + len; + + kvm_info("Using HYP init bounce page @%lx\n", + (unsigned long)phys_base); + } + + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); - return -ENOMEM; + err = -ENOMEM; + goto out; } - return 0; -} + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(boot_hyp_pgd, + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); -/** - * kvm_clear_idmap - remove all idmaps from the hyp pgd - * - * Free the underlying pmds for all pgds in range and clear the pgds (but - * don't free them) afterwards. - */ -void kvm_clear_hyp_idmap(void) -{ - unsigned long addr, end; - unsigned long next; - pgd_t *pgd = hyp_pgd; - pud_t *pud; - pmd_t *pmd; + if (err) { + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + goto out; + } - addr = virt_to_phys(__hyp_idmap_text_start); - end = virt_to_phys(__hyp_idmap_text_end); + /* Map the very same page at the trampoline VA */ + err = __create_hyp_mappings(boot_hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } - pgd += pgd_index(addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); + /* Map the same page again into the runtime page tables */ + err = __create_hyp_mappings(hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } - pud_clear(pud); - clean_pmd_entry(pmd); - pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); - } while (pgd++, addr = next, addr < end); + return 0; +out: + free_hyp_pgds(); + return err; } diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c new file mode 100644 index 00000000000..1a3849da0b4 --- /dev/null +++ b/arch/arm/kvm/perf.c @@ -0,0 +1,68 @@ +/* + * Based on the x86 implementation. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/perf_event.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +static int kvm_is_in_guest(void) +{ + return kvm_arm_get_running_vcpu() != NULL; +} + +static int kvm_is_user_mode(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return !vcpu_mode_priv(vcpu); + + return 0; +} + +static unsigned long kvm_get_guest_ip(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return *vcpu_pc(vcpu); + + return 0; +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest = kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, +}; + +int kvm_perf_init(void) +{ + return perf_register_guest_info_callbacks(&kvm_guest_cbs); +} + +int kvm_perf_teardown(void) +{ + return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); +} diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 7ee5bb7a366..09cf37737ee 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -18,6 +18,7 @@ #include <linux/kvm_host.h> #include <linux/wait.h> +#include <asm/cputype.h> #include <asm/kvm_emulate.h> #include <asm/kvm_psci.h> @@ -26,6 +27,36 @@ * as described in ARM document number ARM DEN 0022A. */ +#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + +static unsigned long psci_affinity_mask(unsigned long affinity_level) +{ + if (affinity_level <= 3) + return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); + + return 0; +} + +static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + /* + * NOTE: For simplicity, we make VCPU suspend emulation to be + * same-as WFI (Wait-for-interrupt) emulation. + * + * This means for KVM the wakeup events are interrupts and + * this is consistent with intended use of StateID as described + * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). + * + * Further, we also treat power-down request to be same as + * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 + * specification (ARM DEN 0022A). This means all suspend states + * for KVM will preserve the register state. + */ + kvm_vcpu_block(vcpu); + + return PSCI_RET_SUCCESS; +} + static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.pause = true; @@ -34,25 +65,41 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu = NULL, *tmp; wait_queue_head_t *wq; unsigned long cpu_id; + unsigned long context_id; + unsigned long mpidr; phys_addr_t target_pc; + int i; cpu_id = *vcpu_reg(source_vcpu, 1); if (vcpu_mode_is_32bit(source_vcpu)) cpu_id &= ~((u32) 0); - if (cpu_id >= atomic_read(&kvm->online_vcpus)) - return KVM_PSCI_RET_INVAL; - - target_pc = *vcpu_reg(source_vcpu, 2); + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { + vcpu = tmp; + break; + } + } - vcpu = kvm_get_vcpu(kvm, cpu_id); + /* + * Make sure the caller requested a valid CPU and that the CPU is + * turned off. + */ + if (!vcpu) + return PSCI_RET_INVALID_PARAMS; + if (!vcpu->arch.pause) { + if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) + return PSCI_RET_ALREADY_ON; + else + return PSCI_RET_INVALID_PARAMS; + } - wq = kvm_arch_vcpu_wq(vcpu); - if (!waitqueue_active(wq)) - return KVM_PSCI_RET_INVAL; + target_pc = *vcpu_reg(source_vcpu, 2); + context_id = *vcpu_reg(source_vcpu, 3); kvm_reset_vcpu(vcpu); @@ -62,26 +109,165 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu_set_thumb(vcpu); } + /* Propagate caller endianness */ + if (kvm_vcpu_is_be(source_vcpu)) + kvm_vcpu_set_be(vcpu); + *vcpu_pc(vcpu) = target_pc; + /* + * NOTE: We always update r0 (or x0) because for PSCI v0.1 + * the general puspose registers are undefined upon CPU_ON. + */ + *vcpu_reg(vcpu, 0) = context_id; vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ + wq = kvm_arch_vcpu_wq(vcpu); wake_up_interruptible(wq); - return KVM_PSCI_RET_SUCCESS; + return PSCI_RET_SUCCESS; } -/** - * kvm_psci_call - handle PSCI call if r0 value is in range - * @vcpu: Pointer to the VCPU struct - * - * Handle PSCI calls from guests through traps from HVC or SMC instructions. - * The calling convention is similar to SMC calls to the secure world where - * the function number is placed in r0 and this function returns true if the - * function number specified in r0 is withing the PSCI range, and false - * otherwise. - */ -bool kvm_psci_call(struct kvm_vcpu *vcpu) +static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) +{ + int i; + unsigned long mpidr; + unsigned long target_affinity; + unsigned long target_affinity_mask; + unsigned long lowest_affinity_level; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + target_affinity = *vcpu_reg(vcpu, 1); + lowest_affinity_level = *vcpu_reg(vcpu, 2); + + /* Determine target affinity mask */ + target_affinity_mask = psci_affinity_mask(lowest_affinity_level); + if (!target_affinity_mask) + return PSCI_RET_INVALID_PARAMS; + + /* Ignore other bits of target affinity */ + target_affinity &= target_affinity_mask; + + /* + * If one or more VCPU matching target affinity are running + * then ON else OFF + */ + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if (((mpidr & target_affinity_mask) == target_affinity) && + !tmp->arch.pause) { + return PSCI_0_2_AFFINITY_LEVEL_ON; + } + } + + return PSCI_0_2_AFFINITY_LEVEL_OFF; +} + +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +{ + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = type; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +static void kvm_psci_system_off(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); +} + +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); +} + +int kvm_psci_version(struct kvm_vcpu *vcpu) +{ + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) + return KVM_ARM_PSCI_0_2; + + return KVM_ARM_PSCI_0_1; +} + +static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +{ + int ret = 1; + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + /* + * Bits[31:16] = Major Version = 0 + * Bits[15:0] = Minor Version = 2 + */ + val = 2; + break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + val = kvm_psci_vcpu_suspend(vcpu); + break; + case PSCI_0_2_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + val = kvm_psci_vcpu_affinity_info(vcpu); + break; + case PSCI_0_2_FN_MIGRATE: + case PSCI_0_2_FN64_MIGRATE: + val = PSCI_RET_NOT_SUPPORTED; + break; + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + /* + * Trusted OS is MP hence does not require migration + * or + * Trusted OS is not present + */ + val = PSCI_0_2_TOS_MP; + break; + case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + val = PSCI_RET_NOT_SUPPORTED; + break; + case PSCI_0_2_FN_SYSTEM_OFF: + kvm_psci_system_off(vcpu); + /* + * We should'nt be going back to guest VCPU after + * receiving SYSTEM_OFF request. + * + * If user space accidently/deliberately resumes + * guest VCPU after SYSTEM_OFF request then guest + * VCPU should see internal failure from PSCI return + * value. To achieve this, we preload r0 (or x0) with + * PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + case PSCI_0_2_FN_SYSTEM_RESET: + kvm_psci_system_reset(vcpu); + /* + * Same reason as SYSTEM_OFF for preloading r0 (or x0) + * with PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + default: + return -EINVAL; + } + + *vcpu_reg(vcpu, 0) = val; + return ret; +} + +static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -89,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu) switch (psci_fn) { case KVM_PSCI_FN_CPU_OFF: kvm_psci_vcpu_off(vcpu); - val = KVM_PSCI_RET_SUCCESS; + val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; case KVM_PSCI_FN_CPU_SUSPEND: case KVM_PSCI_FN_MIGRATE: - val = KVM_PSCI_RET_NI; + val = PSCI_RET_NOT_SUPPORTED; break; - default: - return false; + return -EINVAL; } *vcpu_reg(vcpu, 0) = val; - return true; + return 1; +} + +/** + * kvm_psci_call - handle PSCI call if r0 value is in range + * @vcpu: Pointer to the VCPU struct + * + * Handle PSCI calls from guests through traps from HVC instructions. + * The calling convention is similar to SMC calls to the secure world + * where the function number is placed in r0. + * + * This function returns: > 0 (success), 0 (success but exit to user + * space), and < 0 (errors) + * + * Errors: + * -EINVAL: Unrecognized PSCI function + */ +int kvm_psci_call(struct kvm_vcpu *vcpu) +{ + switch (kvm_psci_version(vcpu)) { + case KVM_ARM_PSCI_0_2: + return kvm_psci_0_2_call(vcpu); + case KVM_ARM_PSCI_0_1: + return kvm_psci_0_1_call(vcpu); + default: + return -EINVAL; + }; } diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b80256b554c..f558c073c02 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -27,16 +27,21 @@ #include <asm/kvm_arm.h> #include <asm/kvm_coproc.h> +#include <kvm/arm_arch_timer.h> + /****************************************************************************** - * Cortex-A15 Reset Values + * Cortex-A15 and Cortex-A7 Reset Values */ -static const int a15_max_cpu_idx = 3; - -static struct kvm_regs a15_regs_reset = { +static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; +static const struct kvm_irq_level cortexa_vtimer_irq = { + { .irq = 27 }, + .level = 1, +}; + /******************************************************************************* * Exported reset function @@ -51,24 +56,28 @@ static struct kvm_regs a15_regs_reset = { */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - struct kvm_regs *cpu_reset; + struct kvm_regs *reset_regs; + const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { + case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: - if (vcpu->vcpu_id > a15_max_cpu_idx) - return -EINVAL; - cpu_reset = &a15_regs_reset; + reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); + cpu_vtimer_irq = &cortexa_vtimer_irq; break; default: return -ENODEV; } /* Reset core registers */ - memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); + /* Reset arch_timer context */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return 0; } diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index a8e73ed5ad5..b1d640f7862 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault, __entry->ipa = ipa; ), - TP_printk("guest fault at PC %#08lx (hxfar %#08lx, " - "ipa %#16llx, hsr %#08lx", - __entry->vcpu_pc, __entry->hxfar, - __entry->ipa, __entry->hsr) + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", + __entry->ipa, __entry->hsr, + __entry->hxfar, __entry->vcpu_pc) ); TRACE_EVENT(kvm_irq_line, |
