diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-05-30 10:59:04 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-05-30 10:59:04 +0200 |
commit | 063e04776172f93b16a5eefd5661a340c1126513 (patch) | |
tree | 19cb1623631c8cc5dcf0d91f4731feec7cbefa04 /arch/powerpc | |
parent | 59cd358a7a5b2f6b61faa01dae6cfda3830ac62a (diff) | |
parent | 731a7378b81c2f5fa88ca1ae20b83d548d5613dc (diff) |
Merge branch 'linus' into perf/urgent
Merge back Linus's latest branch so that we pick up the uprobes changes.
( I tested this branch locally and while it's one from the middle of the
merge window it's a good one to base further work off. )
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/powerpc')
66 files changed, 4047 insertions, 1604 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 0a947bd9c07..050cb371a69 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -27,15 +27,6 @@ config MMU bool default y -config GENERIC_CMOS_UPDATE - def_bool y - -config GENERIC_TIME_VSYSCALL - def_bool y - -config GENERIC_CLOCKEVENTS - def_bool y - config HAVE_SETUP_PER_CPU_AREA def_bool PPC64 @@ -141,6 +132,11 @@ config PPC select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_SMP_IDLE_THREAD + select GENERIC_CMOS_UPDATE + select GENERIC_TIME_VSYSCALL + select GENERIC_CLOCKEVENTS + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER config EARLY_PRINTK bool @@ -281,7 +277,6 @@ config HIGHMEM bool "High memory support" depends on PPC32 -source kernel/time/Kconfig source kernel/Kconfig.hz source kernel/Kconfig.preempt source "fs/Kconfig.binfmt" diff --git a/arch/powerpc/boot/dts/mpc8569mds.dts b/arch/powerpc/boot/dts/mpc8569mds.dts index 7e283c891b7..fe0d60935e9 100644 --- a/arch/powerpc/boot/dts/mpc8569mds.dts +++ b/arch/powerpc/boot/dts/mpc8569mds.dts @@ -119,6 +119,7 @@ sdhc@2e000 { status = "disabled"; sdhci,1-bit-only; + bus-width = <1>; }; par_io@e0100 { diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index b9219e99bd2..50d82c8a037 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -168,6 +168,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_LWSYNC ASM_CONST(0x0000000008000000) #define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000010000000) #define CPU_FTR_INDEXED_DCR ASM_CONST(0x0000000020000000) +#define CPU_FTR_EMB_HV ASM_CONST(0x0000000040000000) /* * Add the 64-bit processor unique features in the top half of the word; @@ -376,7 +377,8 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_47X (CPU_FTRS_440x6) #define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \ CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \ - CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE) + CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \ + CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ CPU_FTR_NOEXECUTE) @@ -385,15 +387,15 @@ extern const char *powerpc_base_platform; CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ - CPU_FTR_DBELL) + CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) #define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ - CPU_FTR_DEBUG_LVL_EXC) + CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) #define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ - CPU_FTR_DEBUG_LVL_EXC) + CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ @@ -486,8 +488,10 @@ enum { CPU_FTRS_E200 | #endif #ifdef CONFIG_E500 - CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC | - CPU_FTRS_E5500 | CPU_FTRS_E6500 | + CPU_FTRS_E500 | CPU_FTRS_E500_2 | +#endif +#ifdef CONFIG_PPC_E500MC + CPU_FTRS_E500MC | CPU_FTRS_E5500 | CPU_FTRS_E6500 | #endif 0, }; @@ -531,9 +535,12 @@ enum { CPU_FTRS_E200 & #endif #ifdef CONFIG_E500 - CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC & - CPU_FTRS_E5500 & CPU_FTRS_E6500 & + CPU_FTRS_E500 & CPU_FTRS_E500_2 & +#endif +#ifdef CONFIG_PPC_E500MC + CPU_FTRS_E500MC & CPU_FTRS_E5500 & CPU_FTRS_E6500 & #endif + ~CPU_FTR_EMB_HV & /* can be removed at runtime */ CPU_FTRS_POSSIBLE, }; #endif /* __powerpc64__ */ diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index efa74ac44a3..154c067761b 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -19,6 +19,9 @@ #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) +#define PPC_DBELL_TYPE_MASK PPC_DBELL_TYPE(0xf) +#define PPC_DBELL_LPID(x) ((x) << (63 - 49)) +#define PPC_DBELL_PIR_MASK 0x3fff enum ppc_dbell { PPC_DBELL = 0, /* doorbell */ PPC_DBELL_CRIT = 1, /* critical doorbell */ diff --git a/arch/powerpc/include/asm/gpio.h b/arch/powerpc/include/asm/gpio.h index 38762edb5e5..b3799d88ffc 100644 --- a/arch/powerpc/include/asm/gpio.h +++ b/arch/powerpc/include/asm/gpio.h @@ -1,53 +1,4 @@ -/* - * Generic GPIO API implementation for PowerPC. - * - * Copyright (c) 2007-2008 MontaVista Software, Inc. - * - * Author: Anton Vorontsov <avorontsov@ru.mvista.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef __ASM_POWERPC_GPIO_H -#define __ASM_POWERPC_GPIO_H - -#include <linux/errno.h> -#include <asm-generic/gpio.h> - -#ifdef CONFIG_GPIOLIB - -/* - * We don't (yet) implement inlined/rapid versions for on-chip gpios. - * Just call gpiolib. - */ -static inline int gpio_get_value(unsigned int gpio) -{ - return __gpio_get_value(gpio); -} - -static inline void gpio_set_value(unsigned int gpio, int value) -{ - __gpio_set_value(gpio, value); -} - -static inline int gpio_cansleep(unsigned int gpio) -{ - return __gpio_cansleep(gpio); -} - -static inline int gpio_to_irq(unsigned int gpio) -{ - return __gpio_to_irq(gpio); -} - -static inline int irq_to_gpio(unsigned int irq) -{ - return -EINVAL; -} - -#endif /* CONFIG_GPIOLIB */ - -#endif /* __ASM_POWERPC_GPIO_H */ +#ifndef __LINUX_GPIO_H +#warning Include linux/gpio.h instead of asm/gpio.h +#include <linux/gpio.h> +#endif diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 61225238819..423cf9eaf4a 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -133,6 +133,16 @@ #define H_PP1 (1UL<<(63-62)) #define H_PP2 (1UL<<(63-63)) +/* Flags for H_REGISTER_VPA subfunction field */ +#define H_VPA_FUNC_SHIFT (63-18) /* Bit posn of subfunction code */ +#define H_VPA_FUNC_MASK 7UL +#define H_VPA_REG_VPA 1UL /* Register Virtual Processor Area */ +#define H_VPA_REG_DTL 2UL /* Register Dispatch Trace Log */ +#define H_VPA_REG_SLB 3UL /* Register SLB shadow buffer */ +#define H_VPA_DEREG_VPA 5UL /* Deregister Virtual Processor Area */ +#define H_VPA_DEREG_DTL 6UL /* Deregister Dispatch Trace Log */ +#define H_VPA_DEREG_SLB 7UL /* Deregister SLB shadow buffer */ + /* VASI States */ #define H_VASI_INVALID 0 #define H_VASI_ENABLED 1 diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 51010bfc792..c9aac24b02e 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -33,6 +33,7 @@ extern void __replay_interrupt(unsigned int vector); extern void timer_interrupt(struct pt_regs *); +extern void performance_monitor_exception(struct pt_regs *regs); #ifdef CONFIG_PPC64 #include <asm/paca.h> diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index b921c3f4892..1bea4d8ea6f 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -277,6 +277,7 @@ struct kvm_sync_regs { #define KVM_CPU_E500V2 2 #define KVM_CPU_3S_32 3 #define KVM_CPU_3S_64 4 +#define KVM_CPU_E500MC 5 /* for KVM_CAP_SPAPR_TCE */ struct kvm_create_spapr_tce { diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 7b1f0e0fc65..76fdcfef088 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -20,6 +20,16 @@ #ifndef __POWERPC_KVM_ASM_H__ #define __POWERPC_KVM_ASM_H__ +#ifdef __ASSEMBLY__ +#ifdef CONFIG_64BIT +#define PPC_STD(sreg, offset, areg) std sreg, (offset)(areg) +#define PPC_LD(treg, offset, areg) ld treg, (offset)(areg) +#else +#define PPC_STD(sreg, offset, areg) stw sreg, (offset+4)(areg) +#define PPC_LD(treg, offset, areg) lwz treg, (offset+4)(areg) +#endif +#endif + /* IVPR must be 64KiB-aligned. */ #define VCPU_SIZE_ORDER 4 #define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12) @@ -48,6 +58,14 @@ #define BOOKE_INTERRUPT_SPE_FP_DATA 33 #define BOOKE_INTERRUPT_SPE_FP_ROUND 34 #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 +#define BOOKE_INTERRUPT_DOORBELL 36 +#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37 + +/* booke_hv */ +#define BOOKE_INTERRUPT_GUEST_DBELL 38 +#define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39 +#define BOOKE_INTERRUPT_HV_SYSCALL 40 +#define BOOKE_INTERRUPT_HV_PRIV 41 /* book3s */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index fd07f43d662..f0e0c6a66d9 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -453,4 +453,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) #define INS_DCBZ 0x7c0007ec +/* LPIDs we support with this build -- runtime limit may be lower */ +#define KVMPPC_NR_LPIDS (LPID_RSVD + 1) + #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 1f2f5b6156b..88609b23b77 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -79,6 +79,9 @@ struct kvmppc_host_state { u8 napping; #ifdef CONFIG_KVM_BOOK3S_64_HV + u8 hwthread_req; + u8 hwthread_state; + struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; @@ -122,4 +125,9 @@ struct kvmppc_book3s_shadow_vcpu { #endif /*__ASSEMBLY__ */ +/* Values for kvm_state */ +#define KVM_HWTHREAD_IN_KERNEL 0 +#define KVM_HWTHREAD_IN_NAP 1 +#define KVM_HWTHREAD_IN_KVM 2 + #endif /* __ASM_KVM_BOOK3S_ASM_H__ */ diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index a90e0918877..b7cd3356a53 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -23,6 +23,9 @@ #include <linux/types.h> #include <linux/kvm_host.h> +/* LPIDs we support with this build -- runtime limit may be lower */ +#define KVMPPC_NR_LPIDS 64 + static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { vcpu->arch.gpr[num] = val; diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h new file mode 100644 index 00000000000..30a600fa1b6 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -0,0 +1,49 @@ +/* + * Copyright 2010-2011 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef ASM_KVM_BOOKE_HV_ASM_H +#define ASM_KVM_BOOKE_HV_ASM_H + +#ifdef __ASSEMBLY__ + +/* + * All exceptions from guest state must go through KVM + * (except for those which are delivered directly to the guest) -- + * there are no exceptions for which we fall through directly to + * the normal host handler. + * + * Expected inputs (normal exceptions): + * SCRATCH0 = saved r10 + * r10 = thread struct + * r11 = appropriate SRR1 variant (currently used as scratch) + * r13 = saved CR + * *(r10 + THREAD_NORMSAVE(0)) = saved r11 + * *(r10 + THREAD_NORMSAVE(2)) = saved r13 + * + * Expected inputs (crit/mcheck/debug exceptions): + * appropriate SCRATCH = saved r8 + * r8 = exception level stack frame + * r9 = *(r8 + _CCR) = saved CR + * r11 = appropriate SRR1 variant (currently used as scratch) + * *(r8 + GPR9) = saved r9 + * *(r8 + GPR10) = saved r10 (r10 not yet clobbered) + * *(r8 + GPR11) = saved r11 + */ +.macro DO_KVM intno srr1 +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ + bf 3, kvmppc_resume_\intno\()_\srr1 + b kvmppc_handler_\intno\()_\srr1 +kvmppc_resume_\intno\()_\srr1: +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif +.endm + +#endif /*__ASSEMBLY__ */ +#endif /* ASM_KVM_BOOKE_HV_ASM_H */ diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h deleted file mode 100644 index 8cd50a51427..00000000000 --- a/arch/powerpc/include/asm/kvm_e500.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: Yu Liu, <yu.liu@freescale.com> - * - * Description: - * This file is derived from arch/powerpc/include/asm/kvm_44x.h, - * by Hollis Blanchard <hollisb@us.ibm.com>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - */ - -#ifndef __ASM_KVM_E500_H__ -#define __ASM_KVM_E500_H__ - -#include <linux/kvm_host.h> - -#define BOOKE_INTERRUPT_SIZE 36 - -#define E500_PID_NUM 3 -#define E500_TLB_NUM 2 - -#define E500_TLB_VALID 1 -#define E500_TLB_DIRTY 2 - -struct tlbe_ref { - pfn_t pfn; - unsigned int flags; /* E500_TLB_* */ -}; - -struct tlbe_priv { - struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ -}; - -struct vcpu_id_table; - -struct kvmppc_e500_tlb_params { - int entries, ways, sets; -}; - -struct kvmppc_vcpu_e500 { - /* Unmodified copy of the guest's TLB -- shared with host userspace. */ - struct kvm_book3e_206_tlb_entry *gtlb_arch; - - /* Starting entry number in gtlb_arch[] */ - int gtlb_offset[E500_TLB_NUM]; - - /* KVM internal information associated with each guest TLB entry */ - struct tlbe_priv *gtlb_priv[E500_TLB_NUM]; - - struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM]; - - unsigned int gtlb_nv[E500_TLB_NUM]; - - /* - * information associated with each host TLB entry -- - * TLB1 only for now. If/when guest TLB1 entries can be - * mapped with host TLB0, this will be used for that too. - * - * We don't want to use this for guest TLB0 because then we'd - * have the overhead of doing the translation again even if - * the entry is still in the guest TLB (e.g. we swapped out - * and back, and our host TLB entries got evicted). - */ - struct tlbe_ref *tlb_refs[E500_TLB_NUM]; - unsigned int host_tlb1_nv; - - u32 host_pid[E500_PID_NUM]; - u32 pid[E500_PID_NUM]; - u32 svr; - - /* vcpu id table */ - struct vcpu_id_table *idt; - - u32 l1csr0; - u32 l1csr1; - u32 hid0; - u32 hid1; - u32 tlb0cfg; - u32 tlb1cfg; - u64 mcar; - - struct page **shared_tlb_pages; - int num_shared_tlb_pages; - - struct kvm_vcpu vcpu; -}; - -static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) -{ - return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu); -} - -#endif /* __ASM_KVM_E500_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 52eb9c1f4fe..d848cdc4971 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -82,7 +82,7 @@ struct kvm_vcpu; struct lppaca; struct slb_shadow; -struct dtl; +struct dtl_entry; struct kvm_vm_stat { u32 remote_tlb_flush; @@ -106,6 +106,8 @@ struct kvm_vcpu_stat { u32 dec_exits; u32 ext_intr_exits; u32 halt_wakeup; + u32 dbell_exits; + u32 gdbell_exits; #ifdef CONFIG_PPC_BOOK3S u32 pf_storage; u32 pf_instruc; @@ -140,6 +142,7 @@ enum kvm_exit_types { EMULATED_TLBSX_EXITS, EMULATED_TLBWE_EXITS, EMULATED_RFI_EXITS, + EMULATED_RFCI_EXITS, DEC_EXITS, EXT_INTR_EXITS, HALT_WAKEUP, @@ -147,6 +150,8 @@ enum kvm_exit_types { FP_UNAVAIL, DEBUG_EXITS, TIMEINGUEST, + DBELL_EXITS, + GDBELL_EXITS, __NUMBER_OF_KVM_EXIT_TYPES }; @@ -217,10 +222,10 @@ struct kvm_arch_memory_slot { }; struct kvm_arch { + unsigned int lpid; #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; - unsigned int lpid; unsigned int host_lpid; unsigned long host_lpcr; unsigned long sdr1; @@ -232,7 +237,6 @@ struct kvm_arch { unsigned long vrma_slb_v; int rma_setup_done; int using_mmu_notifiers; - struct list_head spapr_tce_tables; spinlock_t slot_phys_lock; unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; int slot_npages[KVM_MEM_SLOTS_NUM]; @@ -240,6 +244,9 @@ struct kvm_arch { struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; struct kvmppc_linear_info *hpt_li; #endif /* CONFIG_KVM_BOOK3S_64_HV */ +#ifdef CONFIG_PPC_BOOK3S_64 + struct list_head spapr_tce_tables; +#endif }; /* @@ -263,6 +270,9 @@ struct kvmppc_vcore { struct list_head runnable_threads; spinlock_t lock; wait_queue_head_t wq; + u64 stolen_tb; + u64 preempt_tb; + struct kvm_vcpu *runner; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -274,6 +284,19 @@ struct kvmppc_vcore { #define VCORE_EXITING 2 #define VCORE_SLEEPING 3 +/* + * Struct used to manage memory for a virtual processor area + * registered by a PAPR guest. There are three types of area + * that a guest can register. + */ +struct kvmppc_vpa { + void *pinned_addr; /* Address in kernel linear mapping */ + void *pinned_end; /* End of region */ + unsigned long next_gpa; /* Guest phys addr for update */ + unsigned long len; /* Number of bytes required */ + u8 update_pending; /* 1 => update pinned_addr from next_gpa */ +}; + struct kvmppc_pte { ulong eaddr; u64 vpage; @@ -345,6 +368,17 @@ struct kvm_vcpu_arch { u64 vsr[64]; #endif +#ifdef CONFIG_KVM_BOOKE_HV + u32 host_mas4; + u32 host_mas6; + u32 shadow_epcr; + u32 epcr; + u32 shadow_msrp; + u32 eplc; + u32 epsc; + u32 oldpir; +#endif + #ifdef CONFIG_PPC_BOOK3S /* For Gekko paired singles */ u32 qpr[32]; @@ -370,6 +404,7 @@ struct kvm_vcpu_arch { #endif u32 vrsave; /* also USPRG0 */ u32 mmucr; + /* shadow_msr is unused for BookE HV */ ulong shadow_msr; ulong csrr0; ulong csrr1; @@ -426,8 +461,12 @@ struct kvm_vcpu_arch { ulong fault_esr; ulong queued_dear; ulong queued_esr; + u32 tlbcfg[4]; + u32 mmucfg; + u32 epr; #endif gpa_t paddr_accessed; + gva_t vaddr_accessed; u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; @@ -453,11 +492,6 @@ struct kvm_vcpu_arch { u8 prodded; u32 last_inst; - struct lppaca *vpa; - struct slb_shadow *slb_shadow; - struct dtl *dtl; - struct dtl *dtl_end; - wait_queue_head_t *wqp; struct kvmppc_vcore *vcore; int ret; @@ -482,6 +516,14 @@ struct kvm_vcpu_arch { struct task_struct *run_task; struct kvm_run *kvm_run; pgd_t *pgdir; + + spinlock_t vpa_update_lock; + struct kvmppc_vpa vpa; + struct kvmppc_vpa dtl; + struct dtl_entry *dtl_ptr; + unsigned long dtl_index; + u64 stolen_logged; + struct kvmppc_vpa slb_shadow; #endif }; @@ -498,4 +540,6 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_QPR 0x0040 #define KVM_MMIO_REG_FQPR 0x0060 +#define __KVM_HAVE_ARCH_WQP + #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 7b754e74300..c18916bff68 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -206,6 +206,11 @@ static inline unsigned int kvm_arch_para_features(void) return r; } +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + #endif /* __KERNEL__ */ #endif /* __POWERPC_KVM_PARA_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9d6dee0f7d4..f68c22fa2fc 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -95,7 +95,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); -extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); +extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); @@ -107,8 +107,10 @@ extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); -extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); -extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); +extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, + ulong val); +extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, + ulong *val); extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); @@ -126,6 +128,8 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); +extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba, unsigned long tce); extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *rma); extern struct kvmppc_linear_info *kvm_alloc_rma(void); @@ -138,6 +142,11 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); +extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, + struct kvm_ppc_smmu_info *info); + +extern int kvmppc_bookehv_init(void); +extern void kvmppc_bookehv_exit(void); /* * Cuts out inst bits with ordering according to spec. @@ -204,4 +213,9 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, struct kvm_dirty_tlb *cfg); +long kvmppc_alloc_lpid(void); +void kvmppc_claim_lpid(long lpid); +void kvmppc_free_lpid(long lpid); +void kvmppc_init_lpid(unsigned long nr_lpids); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index cdb5421877e..eeabcdbc30f 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -104,6 +104,8 @@ #define MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */ #define MAS4_TSIZED_SHIFT 7 +#define MAS5_SGS 0x80000000 + #define MAS6_SPID0 0x3FFF0000 #define MAS6_SPID1 0x00007FFE #define MAS6_ISIZE(x) MAS1_TSIZE(x) @@ -118,6 +120,10 @@ #define MAS7_RPN 0xFFFFFFFF +#define MAS8_TGS 0x80000000 /* Guest space */ +#define MAS8_VF 0x40000000 /* Virtualization Fault */ +#define MAS8_TLPID 0x000000ff + /* Bit definitions for MMUCFG */ #define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */ #define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 55e85631c42..413a5eaef56 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -240,6 +240,9 @@ struct thread_struct { #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) + struct kvm_vcpu *kvm_vcpu; +#endif #ifdef CONFIG_PPC64 unsigned long dscr; int dscr_inherit; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9d7f0fb6902..f0cb7f461b9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -257,7 +257,9 @@ #define LPCR_LPES_SH 2 #define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ #define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ +#ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ +#endif #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hardware m? error recovery */ #define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 8a97aa7289d..2d916c4982c 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -56,18 +56,30 @@ #define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */ #define SPRN_EPCR 0x133 /* Embedded Processor Control Register */ #define SPRN_DBCR2 0x136 /* Debug Control Register 2 */ +#define SPRN_MSRP 0x137 /* MSR Protect Register */ #define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */ #define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */ #define SPRN_DVC1 0x13E /* Data Value Compare Register 1 */ #define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */ +#define SPRN_LPID 0x152 /* Logical Partition ID */ #define SPRN_MAS8 0x155 /* MMU Assist Register 8 */ #define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */ #define SPRN_TLB1PS 0x159 /* TLB 1 Page Size Register */ #define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */ #define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */ #define SPRN_EPTCFG 0x15e /* Embedded Page Table Config */ +#define SPRN_GSPRG0 0x170 /* Guest SPRG0 */ +#define SPRN_GSPRG1 0x171 /* Guest SPRG1 */ +#define SPRN_GSPRG2 0x172 /* Guest SPRG2 */ +#define SPRN_GSPRG3 0x173 /* Guest SPRG3 */ #define SPRN_MAS7_MAS3 0x174 /* MMU Assist Register 7 || 3 */ #define SPRN_MAS0_MAS1 0x175 /* MMU Assist Register 0 || 1 */ +#define SPRN_GSRR0 0x17A /* Guest SRR0 */ +#define SPRN_GSRR1 0x17B /* Guest SRR1 */ +#define SPRN_GEPR 0x17C /* Guest EPR */ +#define SPRN_GDEAR 0x17D /* Guest DEAR */ +#define SPRN_GPIR 0x17E /* Guest PIR */ +#define SPRN_GESR 0x17F /* Guest Exception Syndrome Register */ #define SPRN_IVOR0 0x190 /* Interrupt Vector Offset Register 0 */ #define SPRN_IVOR1 0x191 /* Interrupt Vector Offset Register 1 */ #define SPRN_IVOR2 0x192 /* Interrupt Vector Offset Register 2 */ @@ -88,6 +100,13 @@ #define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */ #define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */ #define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */ +#define SPRN_GIVOR2 0x1B8 /* Guest IVOR2 */ +#define SPRN_GIVOR3 0x1B9 /* Guest IVOR3 */ +#define SPRN_GIVOR4 0x1BA /* Guest IVOR4 */ +#define SPRN_GIVOR8 0x1BB /* Guest IVOR8 */ +#define SPRN_GIVOR13 0x1BC /* Guest IVOR13 */ +#define SPRN_GIVOR14 0x1BD /* Guest IVOR14 */ +#define SPRN_GIVPR 0x1BF /* Guest IVPR */ #define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */ #define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */ #define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */ @@ -240,6 +259,10 @@ #define MCSR_LDG 0x00002000UL /* Guarded Load */ #define MCSR_TLBSYNC 0x00000002UL /* Multiple tlbsyncs detected */ #define MCSR_BSL2_ERR 0x00000001UL /* Backside L2 cache error */ + +#define MSRP_UCLEP 0x04000000 /* Protect MSR[UCLE] */ +#define MSRP_DEP 0x00000200 /* Protect MSR[DE] */ +#define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */ #endif #ifdef CONFIG_E200 @@ -594,6 +617,17 @@ #define SPRN_EPCR_DMIUH 0x00400000 /* Disable MAS Interrupt updates * for hypervisor */ +/* Bit definitions for EPLC/EPSC */ +#define EPC_EPR 0x80000000 /* 1 = user, 0 = kernel */ +#define EPC_EPR_SHIFT 31 +#define EPC_EAS 0x40000000 /* Address Space */ +#define EPC_EAS_SHIFT 30 +#define EPC_EGS 0x20000000 /* 1 = guest, 0 = hypervisor */ +#define EPC_EGS_SHIFT 29 +#define EPC_ELPID 0x00ff0000 +#define EPC_ELPID_SHIFT 16 +#define EPC_EPID 0x00003fff +#define EPC_EPID_SHIFT 0 /* * The IBM-403 is an even more odd special case, as it is much diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 1a6320290d2..200d763a0a6 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -17,6 +17,7 @@ extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); extern void giveup_fpu(struct task_struct *); +extern void load_up_fpu(void); extern void disable_kernel_fp(void); extern void enable_kernel_fp(void); extern void flush_fp_to_thread(struct task_struct *); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 2136f58a54e..3b4b4a8da92 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -23,6 +23,7 @@ extern unsigned long tb_ticks_per_jiffy; extern unsigned long tb_ticks_per_usec; extern unsigned long tb_ticks_per_sec; +extern struct clock_event_device decrementer_clockevent; struct rtc_time; extern void to_tm(int tim, struct rtc_time * tm); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index bd0fb849515..17bb40cad5b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -40,6 +40,8 @@ #define segment_eq(a, b) ((a).seg == (b).seg) +#define user_addr_max() (get_fs().seg) + #ifdef __powerpc64__ /* * This check is sufficient because there is a large enough @@ -453,42 +455,9 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) return size; } -extern int __strncpy_from_user(char *dst, const char __user *src, long count); - -static inline long strncpy_from_user(char *dst, const char __user *src, - long count) -{ - might_sleep(); - if (likely(access_ok(VERIFY_READ, src, 1))) - return __strncpy_from_user(dst, src, count); - return -EFAULT; -} - -/* - * Return the size of a string (including the ending 0) - * - * Return 0 for error - */ -extern int __strnlen_user(const char __user *str, long len, unsigned long top); - -/* - * Returns the length of the string at str (including the null byte), - * or 0 if we hit a page we can't access, - * or something > len if we didn't find a null byte. - * - * The `top' parameter to __strnlen_user is to make sure that - * we can never overflow from the user area into kernel space. - */ -static inline int strnlen_user(const char __user *str, long len) -{ - unsigned long top = current->thread.fs.seg; - - if ((unsigned long)str > top) - return 0; - return __strnlen_user(str, len, top); -} - -#define strlen_user(str) strnlen_user((str), 0x7ffffffe) +extern long strncpy_from_user(char *dst, const char __user *src, long count); +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h new file mode 100644 index 00000000000..d0b6d4ac6dd --- /dev/null +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -0,0 +1,41 @@ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +/* + * Word-at-a-time interfaces for PowerPC. + */ + +#include <linux/kernel.h> +#include <asm/asm-compat.h> + +struct word_at_a_time { + const unsigned long high_bits, low_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) } + +/* Bit set in the bytes that have a zero */ +static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c) +{ + unsigned long mask = (val & c->low_bits) + c->low_bits; + return ~(mask | rhs); +} + +#define create_zero_mask(mask) (mask) + +static inline long find_zero(unsigned long mask) +{ + long leading_zero_bits; + + asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask)); + return leading_zero_bits >> 3; +} + +static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +{ + unsigned long rhs = val | c->low_bits; + *data = rhs; + return (val + c->high_bits) & ~rhs; +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4554dc2fe85..52c7ad78242 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -116,6 +116,9 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); #endif +#ifdef CONFIG_KVM_BOOKE_HV + DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); +#endif DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); @@ -383,6 +386,7 @@ int main(void) #ifdef CONFIG_KVM DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); + DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); @@ -425,9 +429,11 @@ int main(void) DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4)); DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); + /* book3s */ #ifdef CONFIG_KVM_BOOK3S_64_HV - DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); @@ -440,9 +446,9 @@ int main(void) DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); + DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); #endif #ifdef CONFIG_PPC_BOOK3S - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); @@ -457,7 +463,6 @@ int main(void) DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); - DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); @@ -533,6 +538,8 @@ int main(void) HSTATE_FIELD(HSTATE_NAPPING, napping); #ifdef CONFIG_KVM_BOOK3S_64_HV + HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); + HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); @@ -593,6 +600,12 @@ int main(void) DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr)); #endif +#ifdef CONFIG_KVM_BOOKE_HV + DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4)); + DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6)); + DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc)); +#endif + #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, arch.timing_exit.tv32.tbu)); diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 8053db02b85..69fdd2322a6 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -73,6 +73,7 @@ _GLOBAL(__setup_cpu_e500v2) mtlr r4 blr _GLOBAL(__setup_cpu_e500mc) + mr r5, r4 mflr r4 bl __e500_icache_setup bl __e500_dcache_setup diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7bed44ee16..1c06d297154 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -63,11 +63,13 @@ BEGIN_FTR_SECTION GET_PACA(r13) #ifdef CONFIG_KVM_BOOK3S_64_HV - lbz r0,PACAPROCSTART(r13) - cmpwi r0,0x80 - bne 1f - li r0,1 - stb r0,PACAPROCSTART(r13) + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beq 1f b kvm_start_guest 1: #endif diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 22d608e8bb7..7a2e5e421ab 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -248,10 +248,11 @@ _ENTRY(_start); interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) + CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ + machine_check_exception) MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception) /* Data Storage Interrupt */ @@ -261,7 +262,8 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \ + do_IRQ, EXC_XFER_LITE) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -273,29 +275,32 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ + FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL) EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxiliary Processor Unavailable Interrupt */ - EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ + AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ + unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ #ifdef CONFIG_BOOKE_WDT - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) + CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) + CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 0e4175388f4..5f051eeb93a 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -2,6 +2,9 @@ #define __HEAD_BOOKE_H__ #include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ +#include <asm/kvm_asm.h> +#include <asm/kvm_booke_hv_asm.h> + /* * Macros used for common Book-e exception handling */ @@ -28,14 +31,15 @@ */ #define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4)) -#define NORMAL_EXCEPTION_PROLOG \ +#define NORMAL_EXCEPTION_PROLOG(intno) \ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ mfspr r10, SPRN_SPRG_THREAD; \ stw r11, THREAD_NORMSAVE(0)(r10); \ stw r13, THREAD_NORMSAVE(2)(r10); \ mfcr r13; /* save CR in r13 for now */\ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ + mfspr r11, SPRN_SRR1; \ + DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \ + andi. r11, r11, MSR_PR; /* check whether user or kernel */\ mr r11, r1; \ beq 1f; \ /* if from user, start at top of this thread's kernel stack */ \ @@ -113,7 +117,7 @@ * registers as the normal prolog above. Instead we use a portion of the * critical/machine check exception stack at low physical addresses. */ -#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \ +#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \ mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ stw r9,GPR9(r8); /* save various registers */\ @@ -121,8 +125,9 @@ stw r10,GPR10(r8); \ stw r11,GPR11(r8); \ stw r9,_CCR(r8); /* save CR on stack */\ - mfspr r10,exc_level_srr1; /* check whether user or kernel */\ - andi. r10,r10,MSR_PR; \ + mfspr r11,exc_level_srr1; /* check whether user or kernel */\ + DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ + andi. r11,r11,MSR_PR; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ @@ -162,12 +167,30 @@ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) -#define CRITICAL_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1) +#define CRITICAL_EXCEPTION_PROLOG(intno) \ + EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1) #define DEBUG_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1) + EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1) #define MCHECK_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1) + EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \ + SPRN_MCSRR0, SPRN_MCSRR1) + +/* + * Guest Doorbell -- this is a bit odd in that uses GSRR0/1 despite + * being delivered to the host. This exception can only happen + * inside a KVM guest -- so we just handle up to the DO_KVM rather + * than try to fit this into one of the existing prolog macros. + */ +#define GUEST_DOORBELL_EXCEPTION \ + START_EXCEPTION(GuestDoorbell); \ + mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ + mfspr r10, SPRN_SPRG_THREAD; \ + stw r11, THREAD_NORMSAVE(0)(r10); \ + mfspr r11, SPRN_SRR1; \ + stw r13, THREAD_NORMSAVE(2)(r10); \ + mfcr r13; /* save CR in r13 for now */\ + DO_KVM BOOKE_INTERRUPT_GUEST_DBELL SPRN_GSRR1; \ + trap /* * Exception vectors. @@ -181,16 +204,16 @@ label: .long func; \ .long ret_from_except_full -#define EXCEPTION(n, label, hdlr, xfer) \ +#define EXCEPTION(n, intno, label, hdlr, xfer) \ START_EXCEPTION(label); \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(intno); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) -#define CRITICAL_EXCEPTION(n, label, hdlr) \ - START_EXCEPTION(label); \ - CRITICAL_EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ +#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \ + START_EXCEPTION(label); \ + CRITICAL_EXCEPTION_PROLOG(intno); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ NOCOPY, crit_transfer_to_handler, \ ret_from_crit_exc) @@ -302,7 +325,7 @@ label: #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ - CRITICAL_EXCEPTION_PROLOG; \ + CRITICAL_EXCEPTION_PROLOG(DEBUG); \ \ /* \ * If there is a single step or branch-taken exception in an \ @@ -355,7 +378,7 @@ label: #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ @@ -363,7 +386,7 @@ label: #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mr r4,r12; /* Pass SRR0 as arg2 */ \ @@ -372,7 +395,7 @@ label: #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -380,7 +403,7 @@ label: #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(PROGRAM); \ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -388,7 +411,7 @@ label: #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(DECREMENTER); \ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -396,7 +419,7 @@ label: #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \ beq 1f; \ bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index de80e0f9a2b..1f4434a3860 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -301,19 +301,20 @@ _ENTRY(__early_start) interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_E200 /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) + CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ + machine_check_exception) #else MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(DATA_STORAGE) mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ @@ -328,7 +329,7 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -342,32 +343,36 @@ interrupt_base: #else #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ + program_check_exception, EXC_XFER_EE) #else - EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ + unknown_exception, EXC_XFER_EE) #endif #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(SYSCALL) EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxiliary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ + unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ + unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) + CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) + CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ @@ -375,10 +380,16 @@ interrupt_base: mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mfspr r10, SPRN_SPRG_THREAD stw r11, THREAD_NORMSAVE(0)(r10) +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mfspr r11, SPRN_SRR1 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif stw r12, THREAD_NORMSAVE(1)(r10) stw r13, THREAD_NORMSAVE(2)(r10) mfcr r13 stw r13, THREAD_NORMSAVE(3)(r10) + DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1 mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -463,10 +474,16 @@ interrupt_base: mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mfspr r10, SPRN_SPRG_THREAD stw r11, THREAD_NORMSAVE(0)(r10) +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mfspr r11, SPRN_SRR1 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif stw r12, THREAD_NORMSAVE(1)(r10) stw r13, THREAD_NORMSAVE(2)(r10) mfcr r13 stw r13, THREAD_NORMSAVE(3)(r10) + DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1 mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -538,36 +555,54 @@ interrupt_base: #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) bne load_up_spe addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else - EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ + unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); + EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \ + SPEFloatingPointException, EXC_XFER_EE); /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE) + EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ + SPEFloatingPointRoundException, EXC_XFER_EE) #else - EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \ + unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ + unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* Performance Monitor */ - EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) + EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ + performance_monitor_exception, EXC_XFER_STD) - EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD) + EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD) - CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception) + CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \ + CriticalDoorbell, unknown_exception) /* Debug Interrupt */ DEBUG_DEBUG_EXCEPTION DEBUG_CRIT_EXCEPTION + GUEST_DOORBELL_EXCEPTION + + CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \ + unknown_exception) + + /* Hypercall */ + EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE) + + /* Embedded Hypervisor Privilege */ + EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE) + /* * Local functions */ @@ -871,8 +906,31 @@ _GLOBAL(__setup_e500mc_ivors) mtspr SPRN_IVOR36,r3 li r3,CriticalDoorbell@l mtspr SPRN_IVOR37,r3 + + /* + * We only want to touch IVOR38-41 if we're running on hardware + * that supports category E.HV. The architectural way to determine + * this is MMUCFG[LPIDSIZE]. + */ + mfspr r3, SPRN_MMUCFG + andis. r3, r3, MMUCFG_LPIDSIZE@h + beq no_hv + li r3,GuestDoorbell@l + mtspr SPRN_IVOR38,r3 + li r3,CriticalGuestDoorbell@l + mtspr SPRN_IVOR39,r3 + li r3,Hypercall@l + mtspr SPRN_IVOR40,r3 + li r3,Ehvpriv@l + mtspr SPRN_IVOR41,r3 +skip_hv_ivors: sync blr +no_hv: + lwz r3, CPU_SPEC_FEATURES(r5) + rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV + stw r3, CPU_SPEC_FEATURES(r5) + b skip_hv_ivors #ifdef CONFIG_SPE /* diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 0cdc9a39283..7140d838339 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -16,6 +16,7 @@ #include <asm/asm-offsets.h> #include <asm/ppc-opcode.h> #include <asm/hw_irq.h> +#include <asm/kvm_book3s_asm.h> #undef DEBUG @@ -81,6 +82,12 @@ _GLOBAL(power7_idle) std r9,_MSR(r1) std r1,PACAR1(r13) +#ifdef CONFIG_KVM_BOOK3S_64_HV + /* Tell KVM we're napping */ + li r4,KVM_HWTHREAD_IN_NAP + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + /* Magic NAP mode enter sequence */ std r0,0(r1) ptesync diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 786a2700ec2..3e4031581c6 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -85,8 +85,6 @@ EXPORT_SYMBOL(csum_tcpudp_magic); EXPORT_SYMBOL(__copy_tofrom_user); EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(copy_page); #if defined(CONFIG_PCI) && defined(CONFIG_PPC32) @@ -190,3 +188,7 @@ EXPORT_SYMBOL(__arch_hweight16); EXPORT_SYMBOL(__arch_hweight32); EXPORT_SYMBOL(__arch_hweight64); #endif + +#ifdef CONFIG_PPC_BOOK3S_64 +EXPORT_SYMBOL_GPL(mmu_psize_defs); +#endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2c42cd72d0f..99a995c2a3f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -100,7 +100,7 @@ static int decrementer_set_next_event(unsigned long evt, static void decrementer_set_mode(enum clock_event_mode mode, struct clock_event_device *dev); -static struct clock_event_device decrementer_clockevent = { +struct clock_event_device decrementer_clockevent = { .name = "decrementer", .rating = 200, .irq = 0, @@ -108,6 +108,7 @@ static struct clock_event_device decrementer_clockevent = { .set_mode = decrementer_set_mode, .features = CLOCK_EVT_FEAT_ONESHOT, }; +EXPORT_SYMBOL(decrementer_clockevent); DEFINE_PER_CPU(u64, decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 7b612a76c70..50e7dbc7356 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -29,15 +29,18 @@ #include <asm/kvm_ppc.h> #include "44x_tlb.h" +#include "booke.h" void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + kvmppc_booke_vcpu_load(vcpu, cpu); kvmppc_44x_tlb_load(vcpu); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { kvmppc_44x_tlb_put(vcpu); + kvmppc_booke_vcpu_put(vcpu); } int kvmppc_core_check_processor_compat(void) @@ -160,6 +163,15 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu_44x); } +int kvmppc_core_init_vm(struct kvm *kvm) +{ + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +} + static int __init kvmppc_44x_init(void) { int r; diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 549bb2c9a47..c8c61578fdf 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -37,22 +37,19 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; - int dcrn; - int ra; - int rb; - int rc; - int rs; - int rt; - int ws; + int dcrn = get_dcrn(inst); + int ra = get_ra(inst); + int rb = get_rb(inst); + int rc = get_rc(inst); + int rs = get_rs(inst); + int rt = get_rt(inst); + int ws = get_ws(inst); switch (get_op(inst)) { case 31: switch (get_xop(inst)) { case XOP_MFDCR: - dcrn = get_dcrn(inst); - rt = get_rt(inst); - /* The guest may access CPR0 registers to determine the timebase * frequency, and it must know the real host frequency because it * can directly access the timebase registers. @@ -88,9 +85,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_MTDCR: - dcrn = get_dcrn(inst); - rs = get_rs(inst); - /* emulate some access in kernel */ switch (dcrn) { case DCRN_CPR0_CONFIG_ADDR: @@ -108,17 +102,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_TLBWE: - ra = get_ra(inst); - rs = get_rs(inst); - ws = get_ws(inst); emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); break; case XOP_TLBSX: - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - rc = get_rc(inst); emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); break; @@ -141,41 +128,41 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; switch (sprn) { case SPRN_PID: - kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break; + kvmppc_set_pid(vcpu, spr_val); break; case SPRN_MMUCR: - vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.mmucr = spr_val; break; case SPRN_CCR0: - vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.ccr0 = spr_val; break; case SPRN_CCR1: - vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.ccr1 = spr_val; break; default: - emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); } return emulated; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { int emulated = EMULATE_DONE; switch (sprn) { case SPRN_PID: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break; + *spr_val = vcpu->arch.pid; break; case SPRN_MMUCR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break; + *spr_val = vcpu->arch.mmucr; break; case SPRN_CCR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break; + *spr_val = vcpu->arch.ccr0; break; case SPRN_CCR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break; + *spr_val = vcpu->arch.ccr1; break; default: - emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); } return emulated; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 8f64709ae33..f4dacb9c57f 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -90,6 +90,9 @@ config KVM_BOOK3S_64_PR depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV select KVM_BOOK3S_PR +config KVM_BOOKE_HV + bool + config KVM_440 bool "KVM support for PowerPC 440 processors" depends on EXPERIMENTAL && 44x @@ -106,7 +109,7 @@ config KVM_440 config KVM_EXIT_TIMING bool "Detailed exit timing" - depends on KVM_440 || KVM_E500 + depends on KVM_440 || KVM_E500V2 || KVM_E500MC ---help--- Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. @@ -115,14 +118,29 @@ config KVM_EXIT_TIMING If unsure, say N. -config KVM_E500 - bool "KVM support for PowerPC E500 processors" - depends on EXPERIMENTAL && E500 +config KVM_E500V2 + bool "KVM support for PowerPC E500v2 processors" + depends on EXPERIMENTAL && E500 && !PPC_E500MC select KVM select KVM_MMIO ---help--- Support running unmodified E500 guest kernels in virtual machines on - E500 host processors. + E500v2 host processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_E500MC + bool "KVM support for PowerPC E500MC/E5500 processors" + depends on EXPERIMENTAL && PPC_E500MC + select KVM + select KVM_MMIO + select KVM_BOOKE_HV + ---help--- + Support running unmodified E500MC/E5500 (32-bit) guest kernels in + virtual machines on E500MC/E5500 host processors. This module provides access to the hardware capabilities through a character device node named /dev/kvm. diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 3688aeecc4b..c2a08636e6d 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -36,7 +36,17 @@ kvm-e500-objs := \ e500.o \ e500_tlb.o \ e500_emulate.o -kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) +kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs) + +kvm-e500mc-objs := \ + $(common-objs-y) \ + booke.o \ + booke_emulate.o \ + bookehv_interrupts.o \ + e500mc.o \ + e500_tlb.o \ + e500_emulate.o +kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ ../../../virt/kvm/coalesced_mmio.o \ @@ -44,6 +54,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ book3s_paired_singles.o \ book3s_pr.o \ book3s_pr_papr.o \ + book3s_64_vio_hv.o \ book3s_emulate.o \ book3s_interrupts.o \ book3s_mmu_hpte.o \ @@ -68,6 +79,7 @@ kvm-book3s_64-module-objs := \ powerpc.o \ emulate.o \ book3s.o \ + book3s_64_vio.o \ $(kvm-book3s_64-objs-y) kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) @@ -88,7 +100,8 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) kvm-objs := $(kvm-objs-m) $(kvm-objs-y) obj-$(CONFIG_KVM_440) += kvm.o -obj-$(CONFIG_KVM_E500) += kvm.o +obj-$(CONFIG_KVM_E500V2) += kvm.o +obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 7d54f4ed6d9..3f2a8360c85 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) return true; } -void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) +int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned long old_pending = vcpu->arch.pending_exceptions; @@ -283,12 +283,17 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) /* Tell the guest about our interrupt status */ kvmppc_update_int_pending(vcpu, *pending, old_pending); + + return 0; } pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) { ulong mp_pa = vcpu->arch.magic_page_pa; + if (!(vcpu->arch.shared->msr & MSR_SF)) + mp_pa = (uint32_t)mp_pa; + /* Magic page override */ if (unlikely(mp_pa) && unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index c3beaeef3f6..80a57751758 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -36,13 +36,11 @@ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ #define MAX_LPID_970 63 -#define NR_LPIDS (LPID_RSVD + 1) -unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)]; long kvmppc_alloc_hpt(struct kvm *kvm) { unsigned long hpt; - unsigned long lpid; + long lpid; struct revmap_entry *rev; struct kvmppc_linear_info *li; @@ -72,14 +70,9 @@ long kvmppc_alloc_hpt(struct kvm *kvm) } kvm->arch.revmap = rev; - /* Allocate the guest's logical partition ID */ - do { - lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); - if (lpid >= NR_LPIDS) { - pr_err("kvm_alloc_hpt: No LPIDs free\n"); - goto out_freeboth; - } - } while (test_and_set_bit(lpid, lpid_inuse)); + lpid = kvmppc_alloc_lpid(); + if (lpid < 0) + goto out_freeboth; kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18); kvm->arch.lpid = lpid; @@ -96,7 +89,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm) void kvmppc_free_hpt(struct kvm *kvm) { - clear_bit(kvm->arch.lpid, lpid_inuse); + kvmppc_free_lpid(kvm->arch.lpid); vfree(kvm->arch.revmap); if (kvm->arch.hpt_li) kvm_release_hpt(kvm->arch.hpt_li); @@ -171,8 +164,7 @@ int kvmppc_mmu_hv_init(void) if (!cpu_has_feature(CPU_FTR_HVMODE)) return -EINVAL; - memset(lpid_inuse, 0, sizeof(lpid_inuse)); - + /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ if (cpu_has_feature(CPU_FTR_ARCH_206)) { host_lpid = mfspr(SPRN_LPID); /* POWER7 */ rsvd_lpid = LPID_RSVD; @@ -181,9 +173,11 @@ int kvmppc_mmu_hv_init(void) rsvd_lpid = MAX_LPID_970; } - set_bit(host_lpid, lpid_inuse); + kvmppc_init_lpid(rsvd_lpid + 1); + + kvmppc_claim_lpid(host_lpid); /* rsvd_lpid is reserved for use in partition switching */ - set_bit(rsvd_lpid, lpid_inuse); + kvmppc_claim_lpid(rsvd_lpid); return 0; } @@ -452,7 +446,7 @@ static int instruction_is_store(unsigned int instr) } static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned long gpa, int is_store) + unsigned long gpa, gva_t ea, int is_store) { int ret; u32 last_inst; @@ -499,6 +493,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, */ vcpu->arch.paddr_accessed = gpa; + vcpu->arch.vaddr_accessed = ea; return kvmppc_emulate_mmio(run, vcpu); } @@ -552,7 +547,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1)); - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, dsisr & DSISR_ISSTORE); } diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index f2e6e48ea46..56b983e7b73 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -90,8 +90,6 @@ slb_exit_skip_ ## num: or r10, r10, r12 slbie r10 - isync - /* Fill SLB with our shadow */ lbz r12, SVCPU_SLB_MAX(r3) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c new file mode 100644 index 00000000000..72ffc899c08 --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -0,0 +1,150 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/hugetlb.h> +#include <linux/list.h> +#include <linux/anon_inodes.h> + +#include <asm/tlbflush.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu-hash64.h> +#include <asm/hvcall.h> +#include <asm/synch.h> +#include <asm/ppc-opcode.h> +#include <asm/kvm_host.h> +#include <asm/udbg.h> + +#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) + +static long kvmppc_stt_npages(unsigned long window_size) +{ + return ALIGN((window_size >> SPAPR_TCE_SHIFT) + * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; +} + +static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) +{ + struct kvm *kvm = stt->kvm; + int i; + + mutex_lock(&kvm->lock); + list_del(&stt->list); + for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) + __free_page(stt->pages[i]); + kfree(stt); + mutex_unlock(&kvm->lock); + + kvm_put_kvm(kvm); +} + +static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; + struct page *page; + + if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) + return VM_FAULT_SIGBUS; + + page = stt->pages[vmf->pgoff]; + get_page(page); + vmf->page = page; + return 0; +} + +static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { + .fault = kvm_spapr_tce_fault, +}; + +static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &kvm_spapr_tce_vm_ops; + return 0; +} + +static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) +{ + struct kvmppc_spapr_tce_table *stt = filp->private_data; + + release_spapr_tce_table(stt); + return 0; +} + +static struct file_operations kvm_spapr_tce_fops = { + .mmap = kvm_spapr_tce_mmap, + .release = kvm_spapr_tce_release, +}; + +long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, + struct kvm_create_spapr_tce *args) +{ + struct kvmppc_spapr_tce_table *stt = NULL; + long npages; + int ret = -ENOMEM; + int i; + + /* Check this LIOBN hasn't been previously allocated */ + list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt->liobn == args->liobn) + return -EBUSY; + } + + npages = kvmppc_stt_npages(args->window_size); + + stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), + GFP_KERNEL); + if (!stt) + goto fail; + + stt->liobn = args->liobn; + stt->window_size = args->window_size; + stt->kvm = kvm; + + for (i = 0; i < npages; i++) { + stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!stt->pages[i]) + goto fail; + } + + kvm_get_kvm(kvm); + + mutex_lock(&kvm->lock); + list_add(&stt->list, &kvm->arch.spapr_tce_tables); + + mutex_unlock(&kvm->lock); + + return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR); + +fail: + if (stt) { + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + } + return ret; +} diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index ea0f8c537c2..30c2f3b134c 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -38,6 +38,9 @@ #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) +/* WARNING: This will be called in real-mode on HV KVM and virtual + * mode on PR KVM + */ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 135663a3e4f..b9a989dc76c 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -87,6 +87,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; + int rt = get_rt(inst); + int rs = get_rs(inst); + int ra = get_ra(inst); + int rb = get_rb(inst); switch (get_op(inst)) { case 19: @@ -106,21 +110,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case 31: switch (get_xop(inst)) { case OP_31_XOP_MFMSR: - kvmppc_set_gpr(vcpu, get_rt(inst), - vcpu->arch.shared->msr); + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); break; case OP_31_XOP_MTMSRD: { - ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst)); + ulong rs_val = kvmppc_get_gpr(vcpu, rs); if (inst & 0x10000) { - vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE); - vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE); + ulong new_msr = vcpu->arch.shared->msr; + new_msr &= ~(MSR_RI | MSR_EE); + new_msr |= rs_val & (MSR_RI | MSR_EE); + vcpu->arch.shared->msr = new_msr; } else - kvmppc_set_msr(vcpu, rs); + kvmppc_set_msr(vcpu, rs_val); break; } case OP_31_XOP_MTMSR: - kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); + kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); break; case OP_31_XOP_MFSR: { @@ -130,7 +135,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, if (vcpu->arch.mmu.mfsrin) { u32 sr; sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); - kvmppc_set_gpr(vcpu, get_rt(inst), sr); + kvmppc_set_gpr(vcpu, rt, sr); } break; } @@ -138,29 +143,29 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, { int srnum; - srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf; + srnum = (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf; if (vcpu->arch.mmu.mfsrin) { u32 sr; sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); - kvmppc_set_gpr(vcpu, get_rt(inst), sr); + kvmppc_set_gpr(vcpu, rt, sr); } break; } case OP_31_XOP_MTSR: vcpu->arch.mmu.mtsrin(vcpu, (inst >> 16) & 0xf, - kvmppc_get_gpr(vcpu, get_rs(inst))); + kvmppc_get_gpr(vcpu, rs)); break; case OP_31_XOP_MTSRIN: vcpu->arch.mmu.mtsrin(vcpu, - (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, - kvmppc_get_gpr(vcpu, get_rs(inst))); + (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf, + kvmppc_get_gpr(vcpu, rs)); break; case OP_31_XOP_TLBIE: case OP_31_XOP_TLBIEL: { bool large = (inst & 0x00200000) ? true : false; - ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst)); + ulong addr = kvmppc_get_gpr(vcpu, rb); vcpu->arch.mmu.tlbie(vcpu, addr, large); break; } @@ -171,15 +176,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; vcpu->arch.mmu.slbmte(vcpu, - kvmppc_get_gpr(vcpu, get_rs(inst)), - kvmppc_get_gpr(vcpu, get_rb(inst))); + kvmppc_get_gpr(vcpu, rs), + kvmppc_get_gpr(vcpu, rb)); break; case OP_31_XOP_SLBIE: if (!vcpu->arch.mmu.slbie) return EMULATE_FAIL; vcpu->arch.mmu.slbie(vcpu, - kvmppc_get_gpr(vcpu, get_rb(inst))); + kvmppc_get_gpr(vcpu, rb)); break; case OP_31_XOP_SLBIA: if (!vcpu->arch.mmu.slbia) @@ -191,22 +196,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, if (!vcpu->arch.mmu.slbmfee) { emulated = EMULATE_FAIL; } else { - ulong t, rb; + ulong t, rb_val; - rb = kvmppc_get_gpr(vcpu, get_rb(inst)); - t = vcpu->arch.mmu.slbmfee(vcpu, rb); - kvmppc_set_gpr(vcpu, get_rt(inst), t); + rb_val = kvmppc_get_gpr(vcpu, rb); + t = vcpu->arch.mmu.slbmfee(vcpu, rb_val); + kvmppc_set_gpr(vcpu, rt, t); } break; case OP_31_XOP_SLBMFEV: if (!vcpu->arch.mmu.slbmfev) { emulated = EMULATE_FAIL; } else { - ulong t, rb; + ulong t, rb_val; - rb = kvmppc_get_gpr(vcpu, get_rb(inst)); - t = vcpu->arch.mmu.slbmfev(vcpu, rb); - kvmppc_set_gpr(vcpu, get_rt(inst), t); + rb_val = kvmppc_get_gpr(vcpu, rb); + t = vcpu->arch.mmu.slbmfev(vcpu, rb_val); + kvmppc_set_gpr(vcpu, rt, t); } break; case OP_31_XOP_DCBA: @@ -214,17 +219,17 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case OP_31_XOP_DCBZ: { - ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); - ulong ra = 0; + ulong rb_val = kvmppc_get_gpr(vcpu, rb); + ulong ra_val = 0; ulong addr, vaddr; u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; u32 dsisr; int r; - if (get_ra(inst)) - ra = kvmppc_get_gpr(vcpu, get_ra(inst)); + if (ra) + ra_val = kvmppc_get_gpr(vcpu, ra); - addr = (ra + rb) & ~31ULL; + addr = (ra_val + rb_val) & ~31ULL; if (!(vcpu->arch.shared->msr & MSR_SF)) addr &= 0xffffffff; vaddr = addr; @@ -313,10 +318,9 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn) return bat; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; - ulong spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { case SPRN_SDR1: @@ -428,7 +432,7 @@ unprivileged: return emulated; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { int emulated = EMULATE_DONE; @@ -441,46 +445,46 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); if (sprn % 2) - kvmppc_set_gpr(vcpu, rt, bat->raw >> 32); + *spr_val = bat->raw >> 32; else - kvmppc_set_gpr(vcpu, rt, bat->raw); + *spr_val = bat->raw; break; } case SPRN_SDR1: if (!spr_allowed(vcpu, PRIV_HYPER)) goto unprivileged; - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); + *spr_val = to_book3s(vcpu)->sdr1; break; case SPRN_DSISR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr); + *spr_val = vcpu->arch.shared->dsisr; break; case SPRN_DAR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); + *spr_val = vcpu->arch.shared->dar; break; case SPRN_HIOR: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior); + *spr_val = to_book3s(vcpu)->hior; break; case SPRN_HID0: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]); + *spr_val = to_book3s(vcpu)->hid[0]; break; case SPRN_HID1: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); + *spr_val = to_book3s(vcpu)->hid[1]; break; case SPRN_HID2: case SPRN_HID2_GEKKO: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); + *spr_val = to_book3s(vcpu)->hid[2]; break; case SPRN_HID4: case SPRN_HID4_GEKKO: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); + *spr_val = to_book3s(vcpu)->hid[4]; break; case SPRN_HID5: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); + *spr_val = to_book3s(vcpu)->hid[5]; break; case SPRN_CFAR: case SPRN_PURR: - kvmppc_set_gpr(vcpu, rt, 0); + *spr_val = 0; break; case SPRN_GQR0: case SPRN_GQR1: @@ -490,8 +494,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_GQR5: case SPRN_GQR6: case SPRN_GQR7: - kvmppc_set_gpr(vcpu, rt, - to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]); + *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]; break; case SPRN_THRM1: case SPRN_THRM2: @@ -506,7 +509,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_PMC3_GEKKO: case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: - kvmppc_set_gpr(vcpu, rt, 0); + *spr_val = 0; break; default: unprivileged: @@ -565,23 +568,22 @@ u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst) ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) { ulong dar = 0; - ulong ra; + ulong ra = get_ra(inst); + ulong rb = get_rb(inst); switch (get_op(inst)) { case OP_LFS: case OP_LFD: case OP_STFD: case OP_STFS: - ra = get_ra(inst); if (ra) dar = kvmppc_get_gpr(vcpu, ra); dar += (s32)((s16)inst); break; case 31: - ra = get_ra(inst); if (ra) dar = kvmppc_get_gpr(vcpu, ra); - dar += kvmppc_get_gpr(vcpu, get_rb(inst)); + dar += kvmppc_get_gpr(vcpu, rb); break; default: printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 108d1f58017..c6af1d62383 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -60,12 +60,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu); void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + struct kvmppc_vcore *vc = vcpu->arch.vcore; + local_paca->kvm_hstate.kvm_vcpu = vcpu; - local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore; + local_paca->kvm_hstate.kvm_vcore = vc; + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) + vc->stolen_tb += mftb() - vc->preempt_tb; } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) + vc->preempt_tb = mftb(); } void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) @@ -134,6 +142,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) vpa->yield_count = 1; } +/* Length for a per-processor buffer is passed in at offset 4 in the buffer */ +struct reg_vpa { + u32 dummy; + union { + u16 hword; + u32 word; + } length; +}; + +static int vpa_is_registered(struct kvmppc_vpa *vpap) +{ + if (vpap->update_pending) + return vpap->next_gpa != 0; + return vpap->pinned_addr != NULL; +} + static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long vcpuid, unsigned long vpa) @@ -142,88 +166,182 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, unsigned long len, nb; void *va; struct kvm_vcpu *tvcpu; - int err = H_PARAMETER; + int err; + int subfunc; + struct kvmppc_vpa *vpap; tvcpu = kvmppc_find_vcpu(kvm, vcpuid); if (!tvcpu) return H_PARAMETER; - flags >>= 63 - 18; - flags &= 7; - if (flags == 0 || flags == 4) - return H_PARAMETER; - if (flags < 4) { - if (vpa & 0x7f) + subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK; + if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL || + subfunc == H_VPA_REG_SLB) { + /* Registering new area - address must be cache-line aligned */ + if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa) return H_PARAMETER; - if (flags >= 2 && !tvcpu->arch.vpa) - return H_RESOURCE; - /* registering new area; convert logical addr to real */ + + /* convert logical addr to kernel addr and read length */ va = kvmppc_pin_guest_page(kvm, vpa, &nb); if (va == NULL) return H_PARAMETER; - if (flags <= 1) - len = *(unsigned short *)(va + 4); + if (subfunc == H_VPA_REG_VPA) + len = ((struct reg_vpa *)va)->length.hword; else - len = *(unsigned int *)(va + 4); - if (len > nb) - goto out_unpin; - switch (flags) { - case 1: /* register VPA */ - if (len < 640) - goto out_unpin; - if (tvcpu->arch.vpa) - kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa); - tvcpu->arch.vpa = va; - init_vpa(vcpu, va); - break; - case 2: /* register DTL */ - if (len < 48) - goto out_unpin; - len -= len % 48; - if (tvcpu->arch.dtl) - kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl); - tvcpu->arch.dtl = va; - tvcpu->arch.dtl_end = va + len; + len = ((struct reg_vpa *)va)->length.word; + kvmppc_unpin_guest_page(kvm, va); + + /* Check length */ + if (len > nb || len < sizeof(struct reg_vpa)) + return H_PARAMETER; + } else { + vpa = 0; + len = 0; + } + + err = H_PARAMETER; + vpap = NULL; + spin_lock(&tvcpu->arch.vpa_update_lock); + + switch (subfunc) { + case H_VPA_REG_VPA: /* register VPA */ + if (len < sizeof(struct lppaca)) break; - case 3: /* register SLB shadow buffer */ - if (len < 16) - goto out_unpin; - if (tvcpu->arch.slb_shadow) - kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow); - tvcpu->arch.slb_shadow = va; + vpap = &tvcpu->arch.vpa; + err = 0; + break; + + case H_VPA_REG_DTL: /* register DTL */ + if (len < sizeof(struct dtl_entry)) break; - } - } else { - switch (flags) { - case 5: /* unregister VPA */ - if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) - return H_RESOURCE; - if (!tvcpu->arch.vpa) - break; - kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa); - tvcpu->arch.vpa = NULL; + len -= len % sizeof(struct dtl_entry); + + /* Check that they have previously registered a VPA */ + err = H_RESOURCE; + if (!vpa_is_registered(&tvcpu->arch.vpa)) break; - case 6: /* unregister DTL */ - if (!tvcpu->arch.dtl) - break; - kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl); - tvcpu->arch.dtl = NULL; + + vpap = &tvcpu->arch.dtl; + err = 0; + break; + + case H_VPA_REG_SLB: /* register SLB shadow buffer */ + /* Check that they have previously registered a VPA */ + err = H_RESOURCE; + if (!vpa_is_registered(&tvcpu->arch.vpa)) break; - case 7: /* unregister SLB shadow buffer */ - if (!tvcpu->arch.slb_shadow) - break; - kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow); - tvcpu->arch.slb_shadow = NULL; + + vpap = &tvcpu->arch.slb_shadow; + err = 0; + break; + + case H_VPA_DEREG_VPA: /* deregister VPA */ + /* Check they don't still have a DTL or SLB buf registered */ + err = H_RESOURCE; + if (vpa_is_registered(&tvcpu->arch.dtl) || + vpa_is_registered(&tvcpu->arch.slb_shadow)) break; - } + + vpap = &tvcpu->arch.vpa; + err = 0; + break; + + case H_VPA_DEREG_DTL: /* deregister DTL */ + vpap = &tvcpu->arch.dtl; + err = 0; + break; + + case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */ + vpap = &tvcpu->arch.slb_shadow; + err = 0; + break; + } + + if (vpap) { + vpap->next_gpa = vpa; + vpap->len = len; + vpap->update_pending = 1; } - return H_SUCCESS; - out_unpin: - kvmppc_unpin_guest_page(kvm, va); + spin_unlock(&tvcpu->arch.vpa_update_lock); + return err; } +static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap) +{ + void *va; + unsigned long nb; + + vpap->update_pending = 0; + va = NULL; + if (vpap->next_gpa) { + va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); + if (nb < vpap->len) { + /* + * If it's now too short, it must be that userspace + * has changed the mappings underlying guest memory, + * so unregister the region. + */ + kvmppc_unpin_guest_page(kvm, va); + va = NULL; + } + } + if (vpap->pinned_addr) + kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); + vpap->pinned_addr = va; + if (va) + vpap->pinned_end = va + vpap->len; +} + +static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + spin_lock(&vcpu->arch.vpa_update_lock); + if (vcpu->arch.vpa.update_pending) { + kvmppc_update_vpa(kvm, &vcpu->arch.vpa); + init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); + } + if (vcpu->arch.dtl.update_pending) { + kvmppc_update_vpa(kvm, &vcpu->arch.dtl); + vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr; + vcpu->arch.dtl_index = 0; + } + if (vcpu->arch.slb_shadow.update_pending) + kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow); + spin_unlock(&vcpu->arch.vpa_update_lock); +} + +static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + struct kvmppc_vcore *vc) +{ + struct dtl_entry *dt; + struct lppaca *vpa; + unsigned long old_stolen; + + dt = vcpu->arch.dtl_ptr; + vpa = vcpu->arch.vpa.pinned_addr; + old_stolen = vcpu->arch.stolen_logged; + vcpu->arch.stolen_logged = vc->stolen_tb; + if (!dt || !vpa) + return; + memset(dt, 0, sizeof(struct dtl_entry)); + dt->dispatch_reason = 7; + dt->processor_id = vc->pcpu + vcpu->arch.ptid; + dt->timebase = mftb(); + dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen; + dt->srr0 = kvmppc_get_pc(vcpu); + dt->srr1 = vcpu->arch.shregs.msr; + ++dt; + if (dt == vcpu->arch.dtl.pinned_end) + dt = vcpu->arch.dtl.pinned_addr; + vcpu->arch.dtl_ptr = dt; + /* order writing *dt vs. writing vpa->dtl_idx */ + smp_wmb(); + vpa->dtl_idx = ++vcpu->arch.dtl_index; +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -468,6 +586,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) /* default to host PVR, since we can't spoof it */ vcpu->arch.pvr = mfspr(SPRN_PVR); kvmppc_set_pvr(vcpu, vcpu->arch.pvr); + spin_lock_init(&vcpu->arch.vpa_update_lock); kvmppc_mmu_book3s_hv_init(vcpu); @@ -486,6 +605,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); init_waitqueue_head(&vcore->wq); + vcore->preempt_tb = mftb(); } kvm->arch.vcores[core] = vcore; } @@ -498,6 +618,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) ++vcore->num_threads; spin_unlock(&vcore->lock); vcpu->arch.vcore = vcore; + vcpu->arch.stolen_logged = vcore->stolen_tb; vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); @@ -512,12 +633,14 @@ out: void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { - if (vcpu->arch.dtl) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl); - if (vcpu->arch.slb_shadow) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow); - if (vcpu->arch.vpa) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa); + spin_lock(&vcpu->arch.vpa_update_lock); + if (vcpu->arch.dtl.pinned_addr) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); + if (vcpu->arch.slb_shadow.pinned_addr) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); + if (vcpu->arch.vpa.pinned_addr) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); + spin_unlock(&vcpu->arch.vpa_update_lock); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -569,6 +692,45 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, list_del(&vcpu->arch.run_list); } +static int kvmppc_grab_hwthread(int cpu) +{ + struct paca_struct *tpaca; + long timeout = 1000; + + tpaca = &paca[cpu]; + + /* Ensure the thread won't go into the kernel if it wakes */ + tpaca->kvm_hstate.hwthread_req = 1; + + /* + * If the thread is already executing in the kernel (e.g. handling + * a stray interrupt), wait for it to get back to nap mode. + * The smp_mb() is to ensure that our setting of hwthread_req + * is visible before we look at hwthread_state, so if this + * races with the code at system_reset_pSeries and the thread + * misses our setting of hwthread_req, we are sure to see its + * setting of hwthread_state, and vice versa. + */ + smp_mb(); + while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) { + if (--timeout <= 0) { + pr_err("KVM: couldn't grab cpu %d\n", cpu); + return -EBUSY; + } + udelay(1); + } + return 0; +} + +static void kvmppc_release_hwthread(int cpu) +{ + struct paca_struct *tpaca; + + tpaca = &paca[cpu]; + tpaca->kvm_hstate.hwthread_req = 0; + tpaca->kvm_hstate.kvm_vcpu = NULL; +} + static void kvmppc_start_thread(struct kvm_vcpu *vcpu) { int cpu; @@ -588,8 +750,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) smp_wmb(); #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (vcpu->arch.ptid) { - tpaca->cpu_start = 0x80; - wmb(); + kvmppc_grab_hwthread(cpu); xics_wake_cpu(cpu); ++vc->n_woken; } @@ -639,7 +800,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) struct kvm_vcpu *vcpu, *vcpu0, *vnext; long ret; u64 now; - int ptid; + int ptid, i; /* don't start if any threads have a signal pending */ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) @@ -681,17 +842,29 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vc->nap_count = 0; vc->entry_exit_count = 0; vc->vcore_state = VCORE_RUNNING; + vc->stolen_tb += mftb() - vc->preempt_tb; vc->in_guest = 0; vc->pcpu = smp_processor_id(); vc->napping_threads = 0; - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { kvmppc_start_thread(vcpu); + if (vcpu->arch.vpa.update_pending || + vcpu->arch.slb_shadow.update_pending || + vcpu->arch.dtl.update_pending) + kvmppc_update_vpas(vcpu); + kvmppc_create_dtl_entry(vcpu, vc); + } + /* Grab any remaining hw threads so they can't go into the kernel */ + for (i = ptid; i < threads_per_core; ++i) + kvmppc_grab_hwthread(vc->pcpu + i); preempt_disable(); spin_unlock(&vc->lock); kvm_guest_enter(); __kvmppc_vcore_entry(NULL, vcpu0); + for (i = 0; i < threads_per_core; ++i) + kvmppc_release_hwthread(vc->pcpu + i); spin_lock(&vc->lock); /* disable sending of IPIs on virtual external irqs */ @@ -737,6 +910,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) spin_lock(&vc->lock); out: vc->vcore_state = VCORE_INACTIVE; + vc->preempt_tb = mftb(); list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, arch.run_list) { if (vcpu->arch.ret != RESUME_GUEST) { @@ -835,6 +1009,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) spin_lock(&vc->lock); continue; } + vc->runner = vcpu; n_ceded = 0; list_for_each_entry(v, &vc->runnable_threads, arch.run_list) n_ceded += v->arch.ceded; @@ -854,6 +1029,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) wake_up(&v->arch.cpu_run); } } + vc->runner = NULL; } if (signal_pending(current)) { @@ -917,115 +1093,6 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -static long kvmppc_stt_npages(unsigned long window_size) -{ - return ALIGN((window_size >> SPAPR_TCE_SHIFT) - * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; -} - -static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) -{ - struct kvm *kvm = stt->kvm; - int i; - - mutex_lock(&kvm->lock); - list_del(&stt->list); - for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) - __free_page(stt->pages[i]); - kfree(stt); - mutex_unlock(&kvm->lock); - - kvm_put_kvm(kvm); -} - -static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; - struct page *page; - - if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) - return VM_FAULT_SIGBUS; - - page = stt->pages[vmf->pgoff]; - get_page(page); - vmf->page = page; - return 0; -} - -static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { - .fault = kvm_spapr_tce_fault, -}; - -static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_ops = &kvm_spapr_tce_vm_ops; - return 0; -} - -static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) -{ - struct kvmppc_spapr_tce_table *stt = filp->private_data; - - release_spapr_tce_table(stt); - return 0; -} - -static struct file_operations kvm_spapr_tce_fops = { - .mmap = kvm_spapr_tce_mmap, - .release = kvm_spapr_tce_release, -}; - -long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, - struct kvm_create_spapr_tce *args) -{ - struct kvmppc_spapr_tce_table *stt = NULL; - long npages; - int ret = -ENOMEM; - int i; - - /* Check this LIOBN hasn't been previously allocated */ - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == args->liobn) - return -EBUSY; - } - - npages = kvmppc_stt_npages(args->window_size); - - stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *), - GFP_KERNEL); - if (!stt) - goto fail; - - stt->liobn = args->liobn; - stt->window_size = args->window_size; - stt->kvm = kvm; - - for (i = 0; i < npages; i++) { - stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!stt->pages[i]) - goto fail; - } - - kvm_get_kvm(kvm); - - mutex_lock(&kvm->lock); - list_add(&stt->list, &kvm->arch.spapr_tce_tables); - - mutex_unlock(&kvm->lock); - - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR); - -fail: - if (stt) { - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); - - kfree(stt); - } - return ret; -} /* Work out RMLS (real mode limit selector) field value for a given RMA size. Assumes POWER7 or PPC970. */ @@ -1108,6 +1175,38 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) return fd; } +static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, + int linux_psize) +{ + struct mmu_psize_def *def = &mmu_psize_defs[linux_psize]; + + if (!def->shift) + return; + (*sps)->page_shift = def->shift; + (*sps)->slb_enc = def->sllp; + (*sps)->enc[0].page_shift = def->shift; + (*sps)->enc[0].pte_enc = def->penc; + (*sps)++; +} + +int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +{ + struct kvm_ppc_one_seg_page_size *sps; + + info->flags = KVM_PPC_PAGE_SIZES_REAL; + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + info->flags |= KVM_PPC_1T_SEGMENTS; + info->slb_size = mmu_slb_size; + + /* We only support these sizes for now, and no muti-size segments */ + sps = &info->sps[0]; + kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K); + kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K); + kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M); + + return 0; +} + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -1404,12 +1503,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { return EMULATE_FAIL; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { return EMULATE_FAIL; } diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index d3fb4df02c4..84035a528c8 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -68,19 +68,24 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) rotldi r10,r10,16 mtmsrd r10,1 - /* Save host PMU registers and load guest PMU registers */ + /* Save host PMU registers */ /* R4 is live here (vcpu pointer) but not r3 or r5 */ li r3, 1 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r7, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ + mfspr r6, SPRN_MMCRA +BEGIN_FTR_SECTION + /* On P7, clear MMCRA in order to disable SDAR updates */ + li r5, 0 + mtspr SPRN_MMCRA, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ lbz r5, LPPACA_PMCINUSE(r3) cmpwi r5, 0 beq 31f /* skip if not */ mfspr r5, SPRN_MMCR1 - mfspr r6, SPRN_MMCRA std r7, HSTATE_MMCR(r13) std r5, HSTATE_MMCR + 8(r13) std r6, HSTATE_MMCR + 16(r13) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b70bf22a3ff..a84aafce2a1 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -26,6 +26,7 @@ #include <asm/hvcall.h> #include <asm/asm-offsets.h> #include <asm/exception-64s.h> +#include <asm/kvm_book3s_asm.h> /***************************************************************************** * * @@ -82,6 +83,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline) #define XICS_XIRR 4 #define XICS_QIRR 0xc +#define XICS_IPI 2 /* interrupt source # for IPIs */ /* * We come in here when wakened from nap mode on a secondary hw thread. @@ -94,26 +96,54 @@ kvm_start_guest: subi r1,r1,STACK_FRAME_OVERHEAD ld r2,PACATOC(r13) - /* were we napping due to cede? */ - lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,0 - bne kvm_end_cede + li r0,KVM_HWTHREAD_IN_KVM + stb r0,HSTATE_HWTHREAD_STATE(r13) - /* get vcpu pointer */ - ld r4, HSTATE_KVM_VCPU(r13) + /* NV GPR values from power7_idle() will no longer be valid */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) - /* We got here with an IPI; clear it */ - ld r5, HSTATE_XICS_PHYS(r13) - li r0, 0xff - li r6, XICS_QIRR - li r7, XICS_XIRR - lwzcix r8, r5, r7 /* ack the interrupt */ + /* get vcpu pointer, NULL if we have no vcpu to run */ + ld r4,HSTATE_KVM_VCPU(r13) + cmpdi cr1,r4,0 + + /* Check the wake reason in SRR1 to see why we got here */ + mfspr r3,SPRN_SRR1 + rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ + cmpwi r3,4 /* was it an external interrupt? */ + bne 27f + + /* + * External interrupt - for now assume it is an IPI, since we + * should never get any other interrupts sent to offline threads. + * Only do this for secondary threads. + */ + beq cr1,25f + lwz r3,VCPU_PTID(r4) + cmpwi r3,0 + beq 27f +25: ld r5,HSTATE_XICS_PHYS(r13) + li r0,0xff + li r6,XICS_QIRR + li r7,XICS_XIRR + lwzcix r8,r5,r7 /* get and ack the interrupt */ sync - stbcix r0, r5, r6 /* clear it */ - stwcix r8, r5, r7 /* EOI it */ + clrldi. r9,r8,40 /* get interrupt source ID. */ + beq 27f /* none there? */ + cmpwi r9,XICS_IPI + bne 26f + stbcix r0,r5,r6 /* clear IPI */ +26: stwcix r8,r5,r7 /* EOI the interrupt */ - /* NV GPR values from power7_idle() will no longer be valid */ - stb r0, PACA_NAPSTATELOST(r13) +27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + + /* if we have no vcpu to run, go back to sleep */ + beq cr1,kvm_no_guest + + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede .global kvmppc_hv_entry kvmppc_hv_entry: @@ -129,24 +159,15 @@ kvmppc_hv_entry: mflr r0 std r0, HSTATE_VMHANDLER(r13) - ld r14, VCPU_GPR(r14)(r4) - ld r15, VCPU_GPR(r15)(r4) - ld r16, VCPU_GPR(r16)(r4) - ld r17, VCPU_GPR(r17)(r4) - ld r18, VCPU_GPR(r18)(r4) - ld r19, VCPU_GPR(r19)(r4) - ld r20, VCPU_GPR(r20)(r4) - ld r21, VCPU_GPR(r21)(r4) - ld r22, VCPU_GPR(r22)(r4) - ld r23, VCPU_GPR(r23)(r4) - ld r24, VCPU_GPR(r24)(r4) - ld r25, VCPU_GPR(r25)(r4) - ld r26, VCPU_GPR(r26)(r4) - ld r27, VCPU_GPR(r27)(r4) - ld r28, VCPU_GPR(r28)(r4) - ld r29, VCPU_GPR(r29)(r4) - ld r30, VCPU_GPR(r30)(r4) - ld r31, VCPU_GPR(r31)(r4) + /* Set partition DABR */ + /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ + li r5,3 + ld r6,VCPU_DABR(r4) + mtspr SPRN_DABRX,r5 + mtspr SPRN_DABR,r6 +BEGIN_FTR_SECTION + isync +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Load guest PMU registers */ /* R4 is live here (vcpu pointer) */ @@ -185,6 +206,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* Load up FP, VMX and VSX registers */ bl kvmppc_load_fp + ld r14, VCPU_GPR(r14)(r4) + ld r15, VCPU_GPR(r15)(r4) + ld r16, VCPU_GPR(r16)(r4) + ld r17, VCPU_GPR(r17)(r4) + ld r18, VCPU_GPR(r18)(r4) + ld r19, VCPU_GPR(r19)(r4) + ld r20, VCPU_GPR(r20)(r4) + ld r21, VCPU_GPR(r21)(r4) + ld r22, VCPU_GPR(r22)(r4) + ld r23, VCPU_GPR(r23)(r4) + ld r24, VCPU_GPR(r24)(r4) + ld r25, VCPU_GPR(r25)(r4) + ld r26, VCPU_GPR(r26)(r4) + ld r27, VCPU_GPR(r27)(r4) + ld r28, VCPU_GPR(r28)(r4) + ld r29, VCPU_GPR(r29)(r4) + ld r30, VCPU_GPR(r30)(r4) + ld r31, VCPU_GPR(r31)(r4) + BEGIN_FTR_SECTION /* Switch DSCR to guest value */ ld r5, VCPU_DSCR(r4) @@ -226,12 +266,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) mtspr SPRN_DAR, r5 mtspr SPRN_DSISR, r6 - /* Set partition DABR */ - li r5,3 - ld r6,VCPU_DABR(r4) - mtspr SPRN_DABRX,r5 - mtspr SPRN_DABR,r6 - BEGIN_FTR_SECTION /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) @@ -925,12 +959,6 @@ BEGIN_FTR_SECTION mtspr SPRN_AMR,r6 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - /* Restore host DABR and DABRX */ - ld r5,HSTATE_DABR(r13) - li r6,7 - mtspr SPRN_DABR,r5 - mtspr SPRN_DABRX,r6 - /* Switch DSCR back to host value */ BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR @@ -969,6 +997,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r5, VCPU_SPRG2(r9) std r6, VCPU_SPRG3(r9) + /* save FP state */ + mr r3, r9 + bl .kvmppc_save_fp + /* Increment yield count if they have a VPA */ ld r8, VCPU_VPA(r9) /* do they have a VPA? */ cmpdi r8, 0 @@ -983,6 +1015,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r4, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + mfspr r6, SPRN_MMCRA +BEGIN_FTR_SECTION + /* On P7, clear MMCRA in order to disable SDAR updates */ + li r7, 0 + mtspr SPRN_MMCRA, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync beq 21f /* if no VPA, save PMU stuff anyway */ lbz r7, LPPACA_PMCINUSE(r8) @@ -991,7 +1029,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ b 22f 21: mfspr r5, SPRN_MMCR1 - mfspr r6, SPRN_MMCRA std r4, VCPU_MMCR(r9) std r5, VCPU_MMCR + 8(r9) std r6, VCPU_MMCR + 16(r9) @@ -1016,17 +1053,20 @@ BEGIN_FTR_SECTION stw r11, VCPU_PMC + 28(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 22: - /* save FP state */ - mr r3, r9 - bl .kvmppc_save_fp /* Secondary threads go off to take a nap on POWER7 */ BEGIN_FTR_SECTION - lwz r0,VCPU_PTID(r3) + lwz r0,VCPU_PTID(r9) cmpwi r0,0 bne secondary_nap END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + /* Restore host DABR and DABRX */ + ld r5,HSTATE_DABR(r13) + li r6,7 + mtspr SPRN_DABR,r5 + mtspr SPRN_DABRX,r6 + /* * Reload DEC. HDEC interrupts were disabled when * we reloaded the host's LPCR value. @@ -1363,7 +1403,12 @@ bounce_ext_interrupt: _GLOBAL(kvmppc_h_set_dabr) std r4,VCPU_DABR(r3) - mtspr SPRN_DABR,r4 + /* Work around P7 bug where DABR can get corrupted on mtspr */ +1: mtspr SPRN_DABR,r4 + mfspr r5, SPRN_DABR + cmpd r4, r5 + bne 1b + isync li r3,0 blr @@ -1445,8 +1490,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * Take a nap until a decrementer or external interrupt occurs, * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR */ - li r0,0x80 - stb r0,PACAPROCSTART(r13) + li r0,1 + stb r0,HSTATE_HWTHREAD_REQ(r13) mfspr r5,SPRN_LPCR ori r5,r5,LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR,r5 @@ -1463,26 +1508,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) kvm_end_cede: /* Woken by external or decrementer interrupt */ ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATOC(r13) - /* If we're a secondary thread and we got here by an IPI, ack it */ - ld r4,HSTATE_KVM_VCPU(r13) - lwz r3,VCPU_PTID(r4) - cmpwi r3,0 - beq 27f - mfspr r3,SPRN_SRR1 - rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ - cmpwi r3,4 /* was it an external interrupt? */ - bne 27f - ld r5, HSTATE_XICS_PHYS(r13) - li r0,0xff - li r6,XICS_QIRR - li r7,XICS_XIRR - lwzcix r8,r5,r7 /* ack the interrupt */ - sync - stbcix r0,r5,r6 /* clear it */ - stwcix r8,r5,r7 /* EOI it */ -27: /* load up FP state */ bl kvmppc_load_fp @@ -1580,12 +1606,17 @@ secondary_nap: stwcx. r3, 0, r4 bne 51b +kvm_no_guest: + li r0, KVM_HWTHREAD_IN_NAP + stb r0, HSTATE_HWTHREAD_STATE(r13) + li r0, 0 + std r0, HSTATE_KVM_VCPU(r13) + li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 isync - li r0, 0 std r0, HSTATE_SCRATCH0(r13) ptesync ld r0, HSTATE_SCRATCH0(r13) @@ -1599,8 +1630,8 @@ secondary_nap: * r3 = vcpu pointer */ _GLOBAL(kvmppc_save_fp) - mfmsr r9 - ori r8,r9,MSR_FP + mfmsr r5 + ori r8,r5,MSR_FP #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION oris r8,r8,MSR_VEC@h @@ -1649,7 +1680,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif mfspr r6,SPRN_VRSAVE stw r6,VCPU_VRSAVE(r3) - mtmsrd r9 + mtmsrd r5 isync blr diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 7759053d391..a1baec340f7 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -120,6 +120,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) if (msr & MSR_POW) { if (!vcpu->arch.pending_exceptions) { kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; /* Unset POW bit after we woke up */ @@ -144,6 +145,21 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) } } + /* + * When switching from 32 to 64-bit, we may have a stale 32-bit + * magic page around, we need to flush it. Typically 32-bit magic + * page will be instanciated when calling into RTAS. Note: We + * assume that such transition only happens while in kernel mode, + * ie, we never transition from user 32-bit to kernel 64-bit with + * a 32-bit magic page around. + */ + if (vcpu->arch.magic_page_pa && + !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) { + /* going from RTAS to normal kernel code */ + kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa, + ~0xFFFUL); + } + /* Preload FPU if it's enabled */ if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); @@ -251,6 +267,9 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) { ulong mp_pa = vcpu->arch.magic_page_pa; + if (!(vcpu->arch.shared->msr & MSR_SF)) + mp_pa = (uint32_t)mp_pa; + if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { return 1; @@ -351,6 +370,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* MMIO */ vcpu->stat.mmio_exits++; vcpu->arch.paddr_accessed = pte.raddr; + vcpu->arch.vaddr_accessed = pte.eaddr; r = kvmppc_emulate_mmio(run, vcpu); if ( r == RESUME_HOST_NV ) r = RESUME_HOST; @@ -528,6 +548,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; + /* We get here with MSR.EE=0, so enable it to be a nice citizen */ + __hard_irq_enable(); + trace_kvm_book3s_exit(exit_nr, vcpu); preempt_enable(); kvm_resched(vcpu); @@ -617,10 +640,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; /* We're good on these - the host merely wanted to get our attention */ case BOOK3S_INTERRUPT_DECREMENTER: + case BOOK3S_INTERRUPT_HV_DECREMENTER: vcpu->stat.dec_exits++; r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_EXTERNAL: + case BOOK3S_INTERRUPT_EXTERNAL_LEVEL: + case BOOK3S_INTERRUPT_EXTERNAL_HV: vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; @@ -628,6 +654,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_PROGRAM: + case BOOK3S_INTERRUPT_H_EMUL_ASSIST: { enum emulation_result er; struct kvmppc_book3s_shadow_vcpu *svcpu; @@ -1131,6 +1158,31 @@ out: return r; } +#ifdef CONFIG_PPC64 +int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +{ + /* No flags */ + info->flags = 0; + + /* SLB is always 64 entries */ + info->slb_size = 64; + + /* Standard 4k base page size segment */ + info->sps[0].page_shift = 12; + info->sps[0].slb_enc = 0; + info->sps[0].enc[0].page_shift = 12; + info->sps[0].enc[0].pte_enc = 0; + + /* Standard 16M large page size segment */ + info->sps[1].page_shift = 24; + info->sps[1].slb_enc = SLB_VSID_L; + info->sps[1].enc[0].page_shift = 24; + info->sps[1].enc[0].pte_enc = 0; + + return 0; +} +#endif /* CONFIG_PPC64 */ + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { @@ -1144,11 +1196,18 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, int kvmppc_core_init_vm(struct kvm *kvm) { +#ifdef CONFIG_PPC64 + INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); +#endif + return 0; } void kvmppc_core_destroy_vm(struct kvm *kvm) { +#ifdef CONFIG_PPC64 + WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); +#endif } static int kvmppc_book3s_init(void) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index b9589324797..3ff9013d6e7 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -15,6 +15,8 @@ * published by the Free Software Foundation. */ +#include <linux/anon_inodes.h> + #include <asm/uaccess.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -98,6 +100,83 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +/* Request defs for kvmppc_h_pr_bulk_remove() */ +#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL +#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL +#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL +#define H_BULK_REMOVE_END 0xc000000000000000ULL +#define H_BULK_REMOVE_CODE 0x3000000000000000ULL +#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL +#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL +#define H_BULK_REMOVE_PARM 0x2000000000000000ULL +#define H_BULK_REMOVE_HW 0x3000000000000000ULL +#define H_BULK_REMOVE_RC 0x0c00000000000000ULL +#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL +#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL +#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL +#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL +#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL +#define H_BULK_REMOVE_MAX_BATCH 4 + +static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) +{ + int i; + int paramnr = 4; + int ret = H_SUCCESS; + + for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { + unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i)); + unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1); + unsigned long pteg, rb, flags; + unsigned long pte[2]; + unsigned long v = 0; + + if ((tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) { + break; /* Exit success */ + } else if ((tsh & H_BULK_REMOVE_TYPE) != + H_BULK_REMOVE_REQUEST) { + ret = H_PARAMETER; + break; /* Exit fail */ + } + + tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS; + tsh |= H_BULK_REMOVE_RESPONSE; + + if ((tsh & H_BULK_REMOVE_ANDCOND) && + (tsh & H_BULK_REMOVE_AVPN)) { + tsh |= H_BULK_REMOVE_PARM; + kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); + ret = H_PARAMETER; + break; /* Exit fail */ + } + + pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); + copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + + /* tsl = AVPN */ + flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26; + + if ((pte[0] & HPTE_V_VALID) == 0 || + ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != tsl) || + ((flags & H_ANDCOND) && (pte[0] & tsl) != 0)) { + tsh |= H_BULK_REMOVE_NOT_FOUND; + } else { + /* Splat the pteg in (userland) hpt */ + copy_to_user((void __user *)pteg, &v, sizeof(v)); + + rb = compute_tlbie_rb(pte[0], pte[1], + tsh & H_BULK_REMOVE_PTEX); + vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); + tsh |= H_BULK_REMOVE_SUCCESS; + tsh |= (pte[1] & (HPTE_R_C | HPTE_R_R)) << 43; + } + kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); + } + kvmppc_set_gpr(vcpu, 3, ret); + + return EMULATE_DONE; +} + static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) { unsigned long flags = kvmppc_get_gpr(vcpu, 4); @@ -134,6 +213,20 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) +{ + unsigned long liobn = kvmppc_get_gpr(vcpu, 4); + unsigned long ioba = kvmppc_get_gpr(vcpu, 5); + unsigned long tce = kvmppc_get_gpr(vcpu, 6); + long rc; + + rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce); + if (rc == H_TOO_HARD) + return EMULATE_FAIL; + kvmppc_set_gpr(vcpu, 3, rc); + return EMULATE_DONE; +} + int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { switch (cmd) { @@ -144,12 +237,12 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) case H_PROTECT: return kvmppc_h_pr_protect(vcpu); case H_BULK_REMOVE: - /* We just flush all PTEs, so user space can - handle the HPT modifications */ - kvmppc_mmu_pte_flush(vcpu, 0, 0); - break; + return kvmppc_h_pr_bulk_remove(vcpu); + case H_PUT_TCE: + return kvmppc_h_pr_put_tce(vcpu); case H_CEDE: kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; return EMULATE_DONE; } diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 6e6e9cef34a..798491a268b 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -128,24 +128,25 @@ no_dcbz32_on: /* First clear RI in our current MSR value */ li r0, MSR_RI andc r6, r6, r0 - MTMSR_EERI(r6) - mtsrr0 r9 - mtsrr1 r4 PPC_LL r0, SVCPU_R0(r3) PPC_LL r1, SVCPU_R1(r3) PPC_LL r2, SVCPU_R2(r3) - PPC_LL r4, SVCPU_R4(r3) PPC_LL r5, SVCPU_R5(r3) - PPC_LL r6, SVCPU_R6(r3) PPC_LL r7, SVCPU_R7(r3) PPC_LL r8, SVCPU_R8(r3) - PPC_LL r9, SVCPU_R9(r3) PPC_LL r10, SVCPU_R10(r3) PPC_LL r11, SVCPU_R11(r3) PPC_LL r12, SVCPU_R12(r3) PPC_LL r13, SVCPU_R13(r3) + MTMSR_EERI(r6) + mtsrr0 r9 + mtsrr1 r4 + + PPC_LL r4, SVCPU_R4(r3) + PPC_LL r6, SVCPU_R6(r3) + PPC_LL r9, SVCPU_R9(r3) PPC_LL r3, (SVCPU_R3)(r3) RFI diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ee9e1ee9c85..72f13f4a06e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -17,6 +17,8 @@ * * Authors: Hollis Blanchard <hollisb@us.ibm.com> * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + * Scott Wood <scottwood@freescale.com> + * Varun Sethi <varun.sethi@freescale.com> */ #include <linux/errno.h> @@ -30,9 +32,12 @@ #include <asm/cputable.h> #include <asm/uaccess.h> #include <asm/kvm_ppc.h> -#include "timing.h" #include <asm/cacheflush.h> +#include <asm/dbell.h> +#include <asm/hw_irq.h> +#include <asm/irq.h> +#include "timing.h" #include "booke.h" unsigned long kvmppc_booke_handlers; @@ -55,6 +60,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { "doorbell", VCPU_STAT(dbell_exits) }, + { "guest doorbell", VCPU_STAT(gdbell_exits) }, { NULL } }; @@ -121,6 +128,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) { u32 old_msr = vcpu->arch.shared->msr; +#ifdef CONFIG_KVM_BOOKE_HV + new_msr |= MSR_GS; +#endif + vcpu->arch.shared->msr = new_msr; kvmppc_mmu_msr_notify(vcpu, old_msr); @@ -195,17 +206,87 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); } +static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GSRR0, srr0); + mtspr(SPRN_GSRR1, srr1); +#else + vcpu->arch.shared->srr0 = srr0; + vcpu->arch.shared->srr1 = srr1; +#endif +} + +static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ + vcpu->arch.csrr0 = srr0; + vcpu->arch.csrr1 = srr1; +} + +static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) { + vcpu->arch.dsrr0 = srr0; + vcpu->arch.dsrr1 = srr1; + } else { + set_guest_csrr(vcpu, srr0, srr1); + } +} + +static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ + vcpu->arch.mcsrr0 = srr0; + vcpu->arch.mcsrr1 = srr1; +} + +static unsigned long get_guest_dear(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GDEAR); +#else + return vcpu->arch.shared->dar; +#endif +} + +static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GDEAR, dear); +#else + vcpu->arch.shared->dar = dear; +#endif +} + +static unsigned long get_guest_esr(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GESR); +#else + return vcpu->arch.shared->esr; +#endif +} + +static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GESR, esr); +#else + vcpu->arch.shared->esr = esr; +#endif +} + /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int allowed = 0; - ulong uninitialized_var(msr_mask); + ulong msr_mask = 0; bool update_esr = false, update_dear = false; ulong crit_raw = vcpu->arch.shared->critical; ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); bool crit; bool keep_irq = false; + enum int_class int_class; /* Truncate crit indicators in 32 bit mode */ if (!(vcpu->arch.shared->msr & MSR_SF)) { @@ -241,46 +322,85 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_AP_UNAVAIL: case BOOKE_IRQPRIO_ALIGNMENT: allowed = 1; - msr_mask = MSR_CE|MSR_ME|MSR_DE; + msr_mask = MSR_CE | MSR_ME | MSR_DE; + int_class = INT_CLASS_NONCRIT; break; case BOOKE_IRQPRIO_CRITICAL: - case BOOKE_IRQPRIO_WATCHDOG: + case BOOKE_IRQPRIO_DBELL_CRIT: allowed = vcpu->arch.shared->msr & MSR_CE; + allowed = allowed && !crit; msr_mask = MSR_ME; + int_class = INT_CLASS_CRIT; break; case BOOKE_IRQPRIO_MACHINE_CHECK: allowed = vcpu->arch.shared->msr & MSR_ME; - msr_mask = 0; + allowed = allowed && !crit; + int_class = INT_CLASS_MC; break; case BOOKE_IRQPRIO_DECREMENTER: case BOOKE_IRQPRIO_FIT: keep_irq = true; /* fall through */ case BOOKE_IRQPRIO_EXTERNAL: + case BOOKE_IRQPRIO_DBELL: allowed = vcpu->arch.shared->msr & MSR_EE; allowed = allowed && !crit; - msr_mask = MSR_CE|MSR_ME|MSR_DE; + msr_mask = MSR_CE | MSR_ME | MSR_DE; + int_class = INT_CLASS_NONCRIT; break; case BOOKE_IRQPRIO_DEBUG: allowed = vcpu->arch.shared->msr & MSR_DE; + allowed = allowed && !crit; msr_mask = MSR_ME; + int_class = INT_CLASS_CRIT; break; } if (allowed) { - vcpu->arch.shared->srr0 = vcpu->arch.pc; - vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; + switch (int_class) { + case INT_CLASS_NONCRIT: + set_guest_srr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + case INT_CLASS_CRIT: + set_guest_csrr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + case INT_CLASS_DBG: + set_guest_dsrr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + case INT_CLASS_MC: + set_guest_mcsrr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + } + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) - vcpu->arch.shared->esr = vcpu->arch.queued_esr; + set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) - vcpu->arch.shared->dar = vcpu->arch.queued_dear; + set_guest_dear(vcpu, vcpu->arch.queued_dear); kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); if (!keep_irq) clear_bit(priority, &vcpu->arch.pending_exceptions); } +#ifdef CONFIG_KVM_BOOKE_HV + /* + * If an interrupt is pending but masked, raise a guest doorbell + * so that we are notified when the guest enables the relevant + * MSR bit. + */ + if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE) + kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT); + if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE) + kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT); + if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK) + kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC); +#endif + return allowed; } @@ -305,7 +425,7 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) } priority = __ffs(*pending); - while (priority <= BOOKE_IRQPRIO_MAX) { + while (priority < BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) break; @@ -319,8 +439,9 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) } /* Check pending exceptions and deliver one, if possible. */ -void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) +int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) { + int r = 0; WARN_ON_ONCE(!irqs_disabled()); kvmppc_core_check_exceptions(vcpu); @@ -328,16 +449,60 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) if (vcpu->arch.shared->msr & MSR_WE) { local_irq_enable(); kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); local_irq_disable(); kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); - kvmppc_core_check_exceptions(vcpu); + r = 1; }; + + return r; +} + +/* + * Common checks before entering the guest world. Call with interrupts + * disabled. + * + * returns !0 if a signal is pending and check_signal is true + */ +static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) +{ + int r = 0; + + WARN_ON_ONCE(!irqs_disabled()); + while (true) { + if (need_resched()) { + local_irq_enable(); + cond_resched(); + local_irq_disable(); + continue; + } + + if (signal_pending(current)) { + r = 1; + break; + } + + if (kvmppc_core_prepare_to_enter(vcpu)) { + /* interrupts got enabled in between, so we + are back at square 1 */ + continue; + } + + break; + } + + return r; } int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; +#ifdef CONFIG_PPC_FPU + unsigned int fpscr; + int fpexc_mode; + u64 fpr[32]; +#endif if (!vcpu->arch.sane) { kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -345,17 +510,53 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } local_irq_disable(); - - kvmppc_core_prepare_to_enter(vcpu); - - if (signal_pending(current)) { + if (kvmppc_prepare_to_enter(vcpu)) { kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; goto out; } kvm_guest_enter(); + +#ifdef CONFIG_PPC_FPU + /* Save userspace FPU state in stack */ + enable_kernel_fp(); + memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); + fpscr = current->thread.fpscr.val; + fpexc_mode = current->thread.fpexc_mode; + + /* Restore guest FPU state to thread */ + memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr)); + current->thread.fpscr.val = vcpu->arch.fpscr; + + /* + * Since we can't trap on MSR_FP in GS-mode, we consider the guest + * as always using the FPU. Kernel usage of FP (via + * enable_kernel_fp()) in this thread must not occur while + * vcpu->fpu_active is set. + */ + vcpu->fpu_active = 1; + + kvmppc_load_guest_fp(vcpu); +#endif + ret = __kvmppc_vcpu_run(kvm_run, vcpu); + +#ifdef CONFIG_PPC_FPU + kvmppc_save_guest_fp(vcpu); + + vcpu->fpu_active = 0; + + /* Save guest FPU state from thread */ + memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr)); + vcpu->arch.fpscr = current->thread.fpscr.val; + + /* Restore userspace FPU state from stack */ + memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); + current->thread.fpscr.val = fpscr; + current->thread.fpexc_mode = fpexc_mode; +#endif + kvm_guest_exit(); out: @@ -363,6 +564,84 @@ out: return ret; } +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* don't overwrite subtypes, just account kvm_stats */ + kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); + /* Future optimization: only reload non-volatiles if + * they were actually modified by emulation. */ + return RESUME_GUEST_NV; + + case EMULATE_DO_DCR: + run->exit_reason = KVM_EXIT_DCR; + return RESUME_HOST; + + case EMULATE_FAIL: + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", + __func__, vcpu->arch.pc, vcpu->arch.last_inst); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= vcpu->arch.last_inst; + kvmppc_core_queue_program(vcpu, ESR_PIL); + return RESUME_HOST; + + default: + BUG(); + } +} + +static void kvmppc_fill_pt_regs(struct pt_regs *regs) +{ + ulong r1, ip, msr, lr; + + asm("mr %0, 1" : "=r"(r1)); + asm("mflr %0" : "=r"(lr)); + asm("mfmsr %0" : "=r"(msr)); + asm("bl 1f; 1: mflr %0" : "=r"(ip)); + + memset(regs, 0, sizeof(*regs)); + regs->gpr[1] = r1; + regs->nip = ip; + regs->msr = msr; + regs->link = lr; +} + +static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, + unsigned int exit_nr) +{ + struct pt_regs regs; + + switch (exit_nr) { + case BOOKE_INTERRUPT_EXTERNAL: + kvmppc_fill_pt_regs(®s); + do_IRQ(®s); + break; + case BOOKE_INTERRUPT_DECREMENTER: + kvmppc_fill_pt_regs(®s); + timer_interrupt(®s); + break; +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) + case BOOKE_INTERRUPT_DOORBELL: + kvmppc_fill_pt_regs(®s); + doorbell_exception(®s); + break; +#endif + case BOOKE_INTERRUPT_MACHINE_CHECK: + /* FIXME */ + break; + case BOOKE_INTERRUPT_PERFORMANCE_MONITOR: + kvmppc_fill_pt_regs(®s); + performance_monitor_exception(®s); + break; + } +} + /** * kvmppc_handle_exit * @@ -371,12 +650,14 @@ out: int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { - enum emulation_result er; int r = RESUME_HOST; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); + /* restart interrupts if they were meant for the host */ + kvmppc_restart_interrupt(vcpu, exit_nr); + local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; @@ -386,62 +667,74 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOKE_INTERRUPT_MACHINE_CHECK: printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); kvmppc_dump_vcpu(vcpu); + /* For debugging, send invalid exit reason to user space */ + run->hw.hardware_exit_reason = ~1ULL << 32; + run->hw.hardware_exit_reason |= mfspr(SPRN_MCSR); r = RESUME_HOST; break; case BOOKE_INTERRUPT_EXTERNAL: kvmppc_account_exit(vcpu, EXT_INTR_EXITS); - if (need_resched()) - cond_resched(); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_DECREMENTER: - /* Since we switched IVPR back to the host's value, the host - * handled this interrupt the moment we enabled interrupts. - * Now we just offer it a chance to reschedule the guest. */ kvmppc_account_exit(vcpu, DEC_EXITS); - if (need_resched()) - cond_resched(); r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_DOORBELL: + kvmppc_account_exit(vcpu, DBELL_EXITS); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_GUEST_DBELL_CRIT: + kvmppc_account_exit(vcpu, GDBELL_EXITS); + + /* + * We are here because there is a pending guest interrupt + * which could not be delivered as MSR_CE or MSR_ME was not + * set. Once we break from here we will retry delivery. + */ + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_GUEST_DBELL: + kvmppc_account_exit(vcpu, GDBELL_EXITS); + + /* + * We are here because there is a pending guest interrupt + * which could not be delivered as MSR_EE was not set. Once + * we break from here we will retry delivery. + */ + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_PERFORMANCE_MONITOR: + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_HV_PRIV: + r = emulation_exit(run, vcpu); + break; + case BOOKE_INTERRUPT_PROGRAM: - if (vcpu->arch.shared->msr & MSR_PR) { - /* Program traps generated by user-level software must be handled - * by the guest kernel. */ + if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { + /* + * Program traps generated by user-level software must + * be handled by the guest kernel. + * + * In GS mode, hypervisor privileged instructions trap + * on BOOKE_INTERRUPT_HV_PRIV, not here, so these are + * actual program interrupts, handled by the guest. + */ kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); r = RESUME_GUEST; kvmppc_account_exit(vcpu, USR_PR_INST); break; } - er = kvmppc_emulate_instruction(run, vcpu); - switch (er) { - case EMULATE_DONE: - /* don't overwrite subtypes, just account kvm_stats */ - kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); - /* Future optimization: only reload non-volatiles if - * they were actually modified by emulation. */ - r = RESUME_GUEST_NV; - break; - case EMULATE_DO_DCR: - run->exit_reason = KVM_EXIT_DCR; - r = RESUME_HOST; - break; - case EMULATE_FAIL: - /* XXX Deliver Program interrupt to guest. */ - printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, vcpu->arch.pc, vcpu->arch.last_inst); - /* For debugging, encode the failing instruction and - * report it to userspace. */ - run->hw.hardware_exit_reason = ~0ULL << 32; - run->hw.hardware_exit_reason |= vcpu->arch.last_inst; - r = RESUME_HOST; - break; - default: - BUG(); - } + r = emulation_exit(run, vcpu); break; case BOOKE_INTERRUPT_FP_UNAVAIL: @@ -506,6 +799,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; +#ifdef CONFIG_KVM_BOOKE_HV + case BOOKE_INTERRUPT_HV_SYSCALL: + if (!(vcpu->arch.shared->msr & MSR_PR)) { + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + } else { + /* + * hcall from guest userspace -- send privileged + * instruction program check. + */ + kvmppc_core_queue_program(vcpu, ESR_PPR); + } + + r = RESUME_GUEST; + break; +#else case BOOKE_INTERRUPT_SYSCALL: if (!(vcpu->arch.shared->msr & MSR_PR) && (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { @@ -519,6 +827,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_account_exit(vcpu, SYSCALL_EXITS); r = RESUME_GUEST; break; +#endif case BOOKE_INTERRUPT_DTLB_MISS: { unsigned long eaddr = vcpu->arch.fault_dear; @@ -526,7 +835,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, gpa_t gpaddr; gfn_t gfn; -#ifdef CONFIG_KVM_E500 +#ifdef CONFIG_KVM_E500V2 if (!(vcpu->arch.shared->msr & MSR_PR) && (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { kvmppc_map_magic(vcpu); @@ -567,6 +876,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Guest has mapped and accessed a page which is not * actually RAM. */ vcpu->arch.paddr_accessed = gpaddr; + vcpu->arch.vaddr_accessed = eaddr; r = kvmppc_emulate_mmio(run, vcpu); kvmppc_account_exit(vcpu, MMIO_EXITS); } @@ -634,15 +944,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, BUG(); } - local_irq_disable(); - - kvmppc_core_prepare_to_enter(vcpu); - + /* + * To avoid clobbering exit_reason, only check for signals if we + * aren't already exiting to userspace for some other reason. + */ if (!(r & RESUME_HOST)) { - /* To avoid clobbering exit_reason, only check for signals if - * we aren't already exiting to userspace for some other - * reason. */ - if (signal_pending(current)) { + local_irq_disable(); + if (kvmppc_prepare_to_enter(vcpu)) { run->exit_reason = KVM_EXIT_INTR; r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); kvmppc_account_exit(vcpu, SIGNAL_EXITS); @@ -659,12 +967,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) int r; vcpu->arch.pc = 0; - vcpu->arch.shared->msr = 0; - vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; vcpu->arch.shared->pir = vcpu->vcpu_id; kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ + kvmppc_set_msr(vcpu, 0); +#ifndef CONFIG_KVM_BOOKE_HV + vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; vcpu->arch.shadow_pid = 1; + vcpu->arch.shared->msr = 0; +#endif /* Eye-catching numbers so we know if the guest takes an interrupt * before it's programmed its own IVPR/IVORs. */ @@ -745,8 +1056,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr0 = vcpu->arch.csrr0; sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; - sregs->u.e.esr = vcpu->arch.shared->esr; - sregs->u.e.dear = vcpu->arch.shared->dar; + sregs->u.e.esr = get_guest_esr(vcpu); + sregs->u.e.dear = get_guest_dear(vcpu); sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); @@ -763,8 +1074,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr0 = sregs->u.e.csrr0; vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; - vcpu->arch.shared->esr = sregs->u.e.esr; - vcpu->arch.shared->dar = sregs->u.e.dear; + set_guest_esr(vcpu, sregs->u.e.esr); + set_guest_dear(vcpu, sregs->u.e.dear); vcpu->arch.vrsave = sregs->u.e.vrsave; kvmppc_set_tcr(vcpu, sregs->u.e.tcr); @@ -932,15 +1243,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, { } -int kvmppc_core_init_vm(struct kvm *kvm) -{ - return 0; -} - -void kvmppc_core_destroy_vm(struct kvm *kvm) -{ -} - void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) { vcpu->arch.tcr = new_tcr; @@ -968,8 +1270,19 @@ void kvmppc_decrementer_func(unsigned long data) kvmppc_set_tsr_bits(vcpu, TSR_DIS); } +void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + current->thread.kvm_vcpu = vcpu; +} + +void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) +{ + current->thread.kvm_vcpu = NULL; +} + int __init kvmppc_booke_init(void) { +#ifndef CONFIG_KVM_BOOKE_HV unsigned long ivor[16]; unsigned long max_ivor = 0; int i; @@ -1012,7 +1325,7 @@ int __init kvmppc_booke_init(void) } flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); - +#endif /* !BOOKE_HV */ return 0; } diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 2fe202705a3..ba61974c1e2 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -23,6 +23,7 @@ #include <linux/types.h> #include <linux/kvm_host.h> #include <asm/kvm_ppc.h> +#include <asm/switch_to.h> #include "timing.h" /* interrupt priortity ordering */ @@ -48,7 +49,20 @@ #define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 /* Internal pseudo-irqprio for level triggered externals */ #define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20 -#define BOOKE_IRQPRIO_MAX 20 +#define BOOKE_IRQPRIO_DBELL 21 +#define BOOKE_IRQPRIO_DBELL_CRIT 22 +#define BOOKE_IRQPRIO_MAX 23 + +#define BOOKE_IRQMASK_EE ((1 << BOOKE_IRQPRIO_EXTERNAL_LEVEL) | \ + (1 << BOOKE_IRQPRIO_PERFORMANCE_MONITOR) | \ + (1 << BOOKE_IRQPRIO_DBELL) | \ + (1 << BOOKE_IRQPRIO_DECREMENTER) | \ + (1 << BOOKE_IRQPRIO_FIT) | \ + (1 << BOOKE_IRQPRIO_EXTERNAL)) + +#define BOOKE_IRQMASK_CE ((1 << BOOKE_IRQPRIO_DBELL_CRIT) | \ + (1 << BOOKE_IRQPRIO_WATCHDOG) | \ + (1 << BOOKE_IRQPRIO_CRITICAL)) extern unsigned long kvmppc_booke_handlers; @@ -61,8 +75,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); -int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); -int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); /* low-level asm code to transfer guest state */ void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu); @@ -71,4 +85,46 @@ void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu); /* high-level function, manages flags, host state */ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu); +void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu); + +enum int_class { + INT_CLASS_NONCRIT, + INT_CLASS_CRIT, + INT_CLASS_MC, + INT_CLASS_DBG, +}; + +void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); + +/* + * Load up guest vcpu FP state if it's needed. + * It also set the MSR_FP in thread so that host know + * we're holding FPU, and then host can help to save + * guest vcpu FP state if other threads require to use FPU. + * This simulates an FP unavailable fault. + * + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) { + load_up_fpu(); + current->thread.regs->msr |= MSR_FP; + } +#endif +} + +/* + * Save guest vcpu FP state into thread. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP)) + giveup_fpu(current); +#endif +} #endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 3e652da3653..6c76397f2af 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -40,8 +40,8 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; - int rs; - int rt; + int rs = get_rs(inst); + int rt = get_rt(inst); switch (get_op(inst)) { case 19: @@ -62,19 +62,16 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (get_xop(inst)) { case OP_31_XOP_MFMSR: - rt = get_rt(inst); kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); break; case OP_31_XOP_MTMSR: - rs = get_rs(inst); kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); break; case OP_31_XOP_WRTEE: - rs = get_rs(inst); vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) | (kvmppc_get_gpr(vcpu, rs) & MSR_EE); kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); @@ -99,22 +96,32 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +/* + * NOTE: some of these registers are not emulated on BOOKE_HV (GS-mode). + * Their backing store is in real registers, and these functions + * will return the wrong result if called for them in another context + * (such as debugging). + */ +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; - ulong spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { case SPRN_DEAR: - vcpu->arch.shared->dar = spr_val; break; + vcpu->arch.shared->dar = spr_val; + break; case SPRN_ESR: - vcpu->arch.shared->esr = spr_val; break; + vcpu->arch.shared->esr = spr_val; + break; case SPRN_DBCR0: - vcpu->arch.dbcr0 = spr_val; break; + vcpu->arch.dbcr0 = spr_val; + break; case SPRN_DBCR1: - vcpu->arch.dbcr1 = spr_val; break; + vcpu->arch.dbcr1 = spr_val; + break; case SPRN_DBSR: - vcpu->arch.dbsr &= ~spr_val; break; + vcpu->arch.dbsr &= ~spr_val; + break; case SPRN_TSR: kvmppc_clr_tsr_bits(vcpu, spr_val); break; @@ -122,20 +129,29 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) kvmppc_set_tcr(vcpu, spr_val); break; - /* Note: SPRG4-7 are user-readable. These values are - * loaded into the real SPRGs when resuming the - * guest. */ + /* + * Note: SPRG4-7 are user-readable. + * These values are loaded into the real SPRGs when resuming the + * guest (PR-mode only). + */ case SPRN_SPRG4: - vcpu->arch.shared->sprg4 = spr_val; break; + vcpu->arch.shared->sprg4 = spr_val; + break; case SPRN_SPRG5: - vcpu->arch.shared->sprg5 = spr_val; break; + vcpu->arch.shared->sprg5 = spr_val; + break; case SPRN_SPRG6: - vcpu->arch.shared->sprg6 = spr_val; break; + vcpu->arch.shared->sprg6 = spr_val; + break; case SPRN_SPRG7: - vcpu->arch.shared->sprg7 = spr_val; break; + vcpu->arch.shared->sprg7 = spr_val; + break; case SPRN_IVPR: vcpu->arch.ivpr = spr_val; +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GIVPR, spr_val); +#endif break; case SPRN_IVOR0: vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val; @@ -145,6 +161,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) break; case SPRN_IVOR2: vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val; +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GIVOR2, spr_val); +#endif break; case SPRN_IVOR3: vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val; @@ -163,6 +182,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) break; case SPRN_IVOR8: vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val; +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GIVOR8, spr_val); +#endif break; case SPRN_IVOR9: vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val; @@ -193,75 +215,83 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) return emulated; } -int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { int emulated = EMULATE_DONE; switch (sprn) { case SPRN_IVPR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break; + *spr_val = vcpu->arch.ivpr; + break; case SPRN_DEAR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; + *spr_val = vcpu->arch.shared->dar; + break; case SPRN_ESR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break; + *spr_val = vcpu->arch.shared->esr; + break; case SPRN_DBCR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; + *spr_val = vcpu->arch.dbcr0; + break; case SPRN_DBCR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; + *spr_val = vcpu->arch.dbcr1; + break; case SPRN_DBSR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; + *spr_val = vcpu->arch.dbsr; + break; case SPRN_TSR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break; + *spr_val = vcpu->arch.tsr; + break; case SPRN_TCR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break; + *spr_val = vcpu->arch.tcr; + break; case SPRN_IVOR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; break; case SPRN_IVOR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; break; case SPRN_IVOR2: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; break; case SPRN_IVOR3: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; break; case SPRN_IVOR4: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; break; case SPRN_IVOR5: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; break; case SPRN_IVOR6: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; break; case SPRN_IVOR7: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; break; case SPRN_IVOR8: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; break; case SPRN_IVOR9: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; break; case SPRN_IVOR10: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; break; case SPRN_IVOR11: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; break; case SPRN_IVOR12: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; break; case SPRN_IVOR13: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; break; case SPRN_IVOR14: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; break; case SPRN_IVOR15: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; break; default: diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index c8c4b878795..8feec2ff392 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -419,13 +419,13 @@ lightweight_exit: * written directly to the shared area, so we * need to reload them here with the guest's values. */ - lwz r3, VCPU_SHARED_SPRG4(r5) + PPC_LD(r3, VCPU_SHARED_SPRG4, r5) mtspr SPRN_SPRG4W, r3 - lwz r3, VCPU_SHARED_SPRG5(r5) + PPC_LD(r3, VCPU_SHARED_SPRG5, r5) mtspr SPRN_SPRG5W, r3 - lwz r3, VCPU_SHARED_SPRG6(r5) + PPC_LD(r3, VCPU_SHARED_SPRG6, r5) mtspr SPRN_SPRG6W, r3 - lwz r3, VCPU_SHARED_SPRG7(r5) + PPC_LD(r3, VCPU_SHARED_SPRG7, r5) mtspr SPRN_SPRG7W, r3 #ifdef CONFIG_KVM_EXIT_TIMING diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S new file mode 100644 index 00000000000..6048a00515d --- /dev/null +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -0,0 +1,597 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2010-2011 Freescale Semiconductor, Inc. + * + * Author: Varun Sethi <varun.sethi@freescale.com> + * Author: Scott Wood <scotwood@freescale.com> + * + * This file is derived from arch/powerpc/kvm/booke_interrupts.S + */ + +#include <asm/ppc_asm.h> +#include <asm/kvm_asm.h> +#include <asm/reg.h> +#include <asm/mmu-44x.h> +#include <asm/page.h> +#include <asm/asm-compat.h> +#include <asm/asm-offsets.h> +#include <asm/bitsperlong.h> +#include <asm/thread_info.h> + +#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */ + +#define GET_VCPU(vcpu, thread) \ + PPC_LL vcpu, THREAD_KVM_VCPU(thread) + +#define LONGBYTES (BITS_PER_LONG / 8) + +#define VCPU_GPR(n) (VCPU_GPRS + (n * LONGBYTES)) +#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES)) + +/* The host stack layout: */ +#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */ +#define HOST_CALLEE_LR (1 * LONGBYTES) +#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */ +/* + * r2 is special: it holds 'current', and it made nonvolatile in the + * kernel with the -ffixed-r2 gcc option. + */ +#define HOST_R2 (3 * LONGBYTES) +#define HOST_CR (4 * LONGBYTES) +#define HOST_NV_GPRS (5 * LONGBYTES) +#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES)) +#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + LONGBYTES) +#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */ +#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */ + +#define NEED_EMU 0x00000001 /* emulation -- save nv regs */ +#define NEED_DEAR 0x00000002 /* save faulting DEAR */ +#define NEED_ESR 0x00000004 /* save faulting ESR */ + +/* + * On entry: + * r4 = vcpu, r5 = srr0, r6 = srr1 + * saved in vcpu: cr, ctr, r3-r13 + */ +.macro kvm_handler_common intno, srr0, flags + /* Restore host stack pointer */ + PPC_STL r1, VCPU_GPR(r1)(r4) + PPC_STL r2, VCPU_GPR(r2)(r4) + PPC_LL r1, VCPU_HOST_STACK(r4) + PPC_LL r2, HOST_R2(r1) + + mfspr r10, SPRN_PID + lwz r8, VCPU_HOST_PID(r4) + PPC_LL r11, VCPU_SHARED(r4) + PPC_STL r14, VCPU_GPR(r14)(r4) /* We need a non-volatile GPR. */ + li r14, \intno + + stw r10, VCPU_GUEST_PID(r4) + mtspr SPRN_PID, r8 + +#ifdef CONFIG_KVM_EXIT_TIMING + /* save exit time */ +1: mfspr r7, SPRN_TBRU + mfspr r8, SPRN_TBRL + mfspr r9, SPRN_TBRU + cmpw r9, r7 + stw r8, VCPU_TIMING_EXIT_TBL(r4) + bne- 1b + stw r9, VCPU_TIMING_EXIT_TBU(r4) +#endif + + oris r8, r6, MSR_CE@h + PPC_STD(r6, VCPU_SHARED_MSR, r11) + ori r8, r8, MSR_ME | MSR_RI + PPC_STL r5, VCPU_PC(r4) + + /* + * Make sure CE/ME/RI are set (if appropriate for exception type) + * whether or not the guest had it set. Since mfmsr/mtmsr are + * somewhat expensive, skip in the common case where the guest + * had all these bits set (and thus they're still set if + * appropriate for the exception type). + */ + cmpw r6, r8 + beq 1f + mfmsr r7 + .if \srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0 + oris r7, r7, MSR_CE@h + .endif + .if \srr0 != SPRN_MCSRR0 + ori r7, r7, MSR_ME | MSR_RI + .endif + mtmsr r7 +1: + + .if \flags & NEED_EMU + /* + * This assumes you have external PID support. + * To support a bookehv CPU without external PID, you'll + * need to look up the TLB entry and create a temporary mapping. + * + * FIXME: we don't currently handle if the lwepx faults. PR-mode + * booke doesn't handle it either. Since Linux doesn't use + * broadcast tlbivax anymore, the only way this should happen is + * if the guest maps its memory execute-but-not-read, or if we + * somehow take a TLB miss in the middle of this entry code and + * evict the relevant entry. On e500mc, all kernel lowmem is + * bolted into TLB1 large page mappings, and we don't use + * broadcast invalidates, so we should not take a TLB miss here. + * + * Later we'll need to deal with faults here. Disallowing guest + * mappings that are execute-but-not-read could be an option on + * e500mc, but not on chips with an LRAT if it is used. + */ + + mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */ + PPC_STL r15, VCPU_GPR(r15)(r4) + PPC_STL r16, VCPU_GPR(r16)(r4) + PPC_STL r17, VCPU_GPR(r17)(r4) + PPC_STL r18, VCPU_GPR(r18)(r4) + PPC_STL r19, VCPU_GPR(r19)(r4) + mr r8, r3 + PPC_STL r20, VCPU_GPR(r20)(r4) + rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS + PPC_STL r21, VCPU_GPR(r21)(r4) + rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR + PPC_STL r22, VCPU_GPR(r22)(r4) + rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID + PPC_STL r23, VCPU_GPR(r23)(r4) + PPC_STL r24, VCPU_GPR(r24)(r4) + PPC_STL r25, VCPU_GPR(r25)(r4) + PPC_STL r26, VCPU_GPR(r26)(r4) + PPC_STL r27, VCPU_GPR(r27)(r4) + PPC_STL r28, VCPU_GPR(r28)(r4) + PPC_STL r29, VCPU_GPR(r29)(r4) + PPC_STL r30, VCPU_GPR(r30)(r4) + PPC_STL r31, VCPU_GPR(r31)(r4) + mtspr SPRN_EPLC, r8 + + /* disable preemption, so we are sure we hit the fixup handler */ +#ifdef CONFIG_PPC64 + clrrdi r8,r1,THREAD_SHIFT +#else + rlwinm r8,r1,0,0,31-THREAD_SHIFT /* current thread_info */ +#endif + li r7, 1 + stw r7, TI_PREEMPT(r8) + + isync + + /* + * In case the read goes wrong, we catch it and write an invalid value + * in LAST_INST instead. + */ +1: lwepx r9, 0, r5 +2: +.section .fixup, "ax" +3: li r9, KVM_INST_FETCH_FAILED + b 2b +.previous +.section __ex_table,"a" + PPC_LONG_ALIGN + PPC_LONG 1b,3b +.previous + + mtspr SPRN_EPLC, r3 + li r7, 0 + stw r7, TI_PREEMPT(r8) + stw r9, VCPU_LAST_INST(r4) + .endif + + .if \flags & NEED_ESR + mfspr r8, SPRN_ESR + PPC_STL r8, VCPU_FAULT_ESR(r4) + .endif + + .if \flags & NEED_DEAR + mfspr r9, SPRN_DEAR + PPC_STL r9, VCPU_FAULT_DEAR(r4) + .endif + + b kvmppc_resume_host +.endm + +/* + * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h + */ +.macro kvm_handler intno srr0, srr1, flags +_GLOBAL(kvmppc_handler_\intno\()_\srr1) + GET_VCPU(r11, r10) + PPC_STL r3, VCPU_GPR(r3)(r11) + mfspr r3, SPRN_SPRG_RSCRATCH0 + PPC_STL r4, VCPU_GPR(r4)(r11) + PPC_LL r4, THREAD_NORMSAVE(0)(r10) + PPC_STL r5, VCPU_GPR(r5)(r11) + stw r13, VCPU_CR(r11) + mfspr r5, \srr0 + PPC_STL r3, VCPU_GPR(r10)(r11) + PPC_LL r3, THREAD_NORMSAVE(2)(r10) + PPC_STL r6, VCPU_GPR(r6)(r11) + PPC_STL r4, VCPU_GPR(r11)(r11) + mfspr r6, \srr1 + PPC_STL r7, VCPU_GPR(r7)(r11) + PPC_STL r8, VCPU_GPR(r8)(r11) + PPC_STL r9, VCPU_GPR(r9)(r11) + PPC_STL r3, VCPU_GPR(r13)(r11) + mfctr r7 + PPC_STL r12, VCPU_GPR(r12)(r11) + PPC_STL r7, VCPU_CTR(r11) + mr r4, r11 + kvm_handler_common \intno, \srr0, \flags +.endm + +.macro kvm_lvl_handler intno scratch srr0, srr1, flags +_GLOBAL(kvmppc_handler_\intno\()_\srr1) + mfspr r10, SPRN_SPRG_THREAD + GET_VCPU(r11, r10) + PPC_STL r3, VCPU_GPR(r3)(r11) + mfspr r3, \scratch + PPC_STL r4, VCPU_GPR(r4)(r11) + PPC_LL r4, GPR9(r8) + PPC_STL r5, VCPU_GPR(r5)(r11) + stw r9, VCPU_CR(r11) + mfspr r5, \srr0 + PPC_STL r3, VCPU_GPR(r8)(r11) + PPC_LL r3, GPR10(r8) + PPC_STL r6, VCPU_GPR(r6)(r11) + PPC_STL r4, VCPU_GPR(r9)(r11) + mfspr r6, \srr1 + PPC_LL r4, GPR11(r8) + PPC_STL r7, VCPU_GPR(r7)(r11) + PPC_STL r3, VCPU_GPR(r10)(r11) + mfctr r7 + PPC_STL r12, VCPU_GPR(r12)(r11) + PPC_STL r13, VCPU_GPR(r13)(r11) + PPC_STL r4, VCPU_GPR(r11)(r11) + PPC_STL r7, VCPU_CTR(r11) + mr r4, r11 + kvm_handler_common \intno, \srr0, \flags +.endm + +kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \ + SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0 +kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \ + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR) +kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ + SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR) +kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \ + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) +kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU +kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ + SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0 + + +/* Registers: + * SPRG_SCRATCH0: guest r10 + * r4: vcpu pointer + * r11: vcpu->arch.shared + * r14: KVM exit number + */ +_GLOBAL(kvmppc_resume_host) + /* Save remaining volatile guest register state to vcpu. */ + mfspr r3, SPRN_VRSAVE + PPC_STL r0, VCPU_GPR(r0)(r4) + mflr r5 + mfspr r6, SPRN_SPRG4 + PPC_STL r5, VCPU_LR(r4) + mfspr r7, SPRN_SPRG5 + stw r3, VCPU_VRSAVE(r4) + PPC_STD(r6, VCPU_SHARED_SPRG4, r11) + mfspr r8, SPRN_SPRG6 + PPC_STD(r7, VCPU_SHARED_SPRG5, r11) + mfspr r9, SPRN_SPRG7 + PPC_STD(r8, VCPU_SHARED_SPRG6, r11) + mfxer r3 + PPC_STD(r9, VCPU_SHARED_SPRG7, r11) + + /* save guest MAS registers and restore host mas4 & mas6 */ + mfspr r5, SPRN_MAS0 + PPC_STL r3, VCPU_XER(r4) + mfspr r6, SPRN_MAS1 + stw r5, VCPU_SHARED_MAS0(r11) + mfspr r7, SPRN_MAS2 + stw r6, VCPU_SHARED_MAS1(r11) + PPC_STD(r7, VCPU_SHARED_MAS2, r11) + mfspr r5, SPRN_MAS3 + mfspr r6, SPRN_MAS4 + stw r5, VCPU_SHARED_MAS7_3+4(r11) + mfspr r7, SPRN_MAS6 + stw r6, VCPU_SHARED_MAS4(r11) + mfspr r5, SPRN_MAS7 + lwz r6, VCPU_HOST_MAS4(r4) + stw r7, VCPU_SHARED_MAS6(r11) + lwz r8, VCPU_HOST_MAS6(r4) + mtspr SPRN_MAS4, r6 + stw r5, VCPU_SHARED_MAS7_3+0(r11) + mtspr SPRN_MAS6, r8 + /* Enable MAS register updates via exception */ + mfspr r3, SPRN_EPCR + rlwinm r3, r3, 0, ~SPRN_EPCR_DMIUH + mtspr SPRN_EPCR, r3 + isync + + /* Switch to kernel stack and jump to handler. */ + PPC_LL r3, HOST_RUN(r1) + mr r5, r14 /* intno */ + mr r14, r4 /* Save vcpu pointer. */ + bl kvmppc_handle_exit + + /* Restore vcpu pointer and the nonvolatiles we used. */ + mr r4, r14 + PPC_LL r14, VCPU_GPR(r14)(r4) + + andi. r5, r3, RESUME_FLAG_NV + beq skip_nv_load + PPC_LL r15, VCPU_GPR(r15)(r4) + PPC_LL r16, VCPU_GPR(r16)(r4) + PPC_LL r17, VCPU_GPR(r17)(r4) + PPC_LL r18, VCPU_GPR(r18)(r4) + PPC_LL r19, VCPU_GPR(r19)(r4) + PPC_LL r20, VCPU_GPR(r20)(r4) + PPC_LL r21, VCPU_GPR(r21)(r4) + PPC_LL r22, VCPU_GPR(r22)(r4) + PPC_LL r23, VCPU_GPR(r23)(r4) + PPC_LL r24, VCPU_GPR(r24)(r4) + PPC_LL r25, VCPU_GPR(r25)(r4) + PPC_LL r26, VCPU_GPR(r26)(r4) + PPC_LL r27, VCPU_GPR(r27)(r4) + PPC_LL r28, VCPU_GPR(r28)(r4) + PPC_LL r29, VCPU_GPR(r29)(r4) + PPC_LL r30, VCPU_GPR(r30)(r4) + PPC_LL r31, VCPU_GPR(r31)(r4) +skip_nv_load: + /* Should we return to the guest? */ + andi. r5, r3, RESUME_FLAG_HOST + beq lightweight_exit + + srawi r3, r3, 2 /* Shift -ERR back down. */ + +heavyweight_exit: + /* Not returning to guest. */ + PPC_LL r5, HOST_STACK_LR(r1) + lwz r6, HOST_CR(r1) + + /* + * We already saved guest volatile register state; now save the + * non-volatiles. + */ + + PPC_STL r15, VCPU_GPR(r15)(r4) + PPC_STL r16, VCPU_GPR(r16)(r4) + PPC_STL r17, VCPU_GPR(r17)(r4) + PPC_STL r18, VCPU_GPR(r18)(r4) + PPC_STL r19, VCPU_GPR(r19)(r4) + PPC_STL r20, VCPU_GPR(r20)(r4) + PPC_STL r21, VCPU_GPR(r21)(r4) + PPC_STL r22, VCPU_GPR(r22)(r4) + PPC_STL r23, VCPU_GPR(r23)(r4) + PPC_STL r24, VCPU_GPR(r24)(r4) + PPC_STL r25, VCPU_GPR(r25)(r4) + PPC_STL r26, VCPU_GPR(r26)(r4) + PPC_STL r27, VCPU_GPR(r27)(r4) + PPC_STL r28, VCPU_GPR(r28)(r4) + PPC_STL r29, VCPU_GPR(r29)(r4) + PPC_STL r30, VCPU_GPR(r30)(r4) + PPC_STL r31, VCPU_GPR(r31)(r4) + + /* Load host non-volatile register state from host stack. */ + PPC_LL r14, HOST_NV_GPR(r14)(r1) + PPC_LL r15, HOST_NV_GPR(r15)(r1) + PPC_LL r16, HOST_NV_GPR(r16)(r1) + PPC_LL r17, HOST_NV_GPR(r17)(r1) + PPC_LL r18, HOST_NV_GPR(r18)(r1) + PPC_LL r19, HOST_NV_GPR(r19)(r1) + PPC_LL r20, HOST_NV_GPR(r20)(r1) + PPC_LL r21, HOST_NV_GPR(r21)(r1) + PPC_LL r22, HOST_NV_GPR(r22)(r1) + PPC_LL r23, HOST_NV_GPR(r23)(r1) + PPC_LL r24, HOST_NV_GPR(r24)(r1) + PPC_LL r25, HOST_NV_GPR(r25)(r1) + PPC_LL r26, HOST_NV_GPR(r26)(r1) + PPC_LL r27, HOST_NV_GPR(r27)(r1) + PPC_LL r28, HOST_NV_GPR(r28)(r1) + PPC_LL r29, HOST_NV_GPR(r29)(r1) + PPC_LL r30, HOST_NV_GPR(r30)(r1) + PPC_LL r31, HOST_NV_GPR(r31)(r1) + + /* Return to kvm_vcpu_run(). */ + mtlr r5 + mtcr r6 + addi r1, r1, HOST_STACK_SIZE + /* r3 still contains the return code from kvmppc_handle_exit(). */ + blr + +/* Registers: + * r3: kvm_run pointer + * r4: vcpu pointer + */ +_GLOBAL(__kvmppc_vcpu_run) + stwu r1, -HOST_STACK_SIZE(r1) + PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + + /* Save host state to stack. */ + PPC_STL r3, HOST_RUN(r1) + mflr r3 + mfcr r5 + PPC_STL r3, HOST_STACK_LR(r1) + + stw r5, HOST_CR(r1) + + /* Save host non-volatile register state to stack. */ + PPC_STL r14, HOST_NV_GPR(r14)(r1) + PPC_STL r15, HOST_NV_GPR(r15)(r1) + PPC_STL r16, HOST_NV_GPR(r16)(r1) + PPC_STL r17, HOST_NV_GPR(r17)(r1) + PPC_STL r18, HOST_NV_GPR(r18)(r1) + PPC_STL r19, HOST_NV_GPR(r19)(r1) + PPC_STL r20, HOST_NV_GPR(r20)(r1) + PPC_STL r21, HOST_NV_GPR(r21)(r1) + PPC_STL r22, HOST_NV_GPR(r22)(r1) + PPC_STL r23, HOST_NV_GPR(r23)(r1) + PPC_STL r24, HOST_NV_GPR(r24)(r1) + PPC_STL r25, HOST_NV_GPR(r25)(r1) + PPC_STL r26, HOST_NV_GPR(r26)(r1) + PPC_STL r27, HOST_NV_GPR(r27)(r1) + PPC_STL r28, HOST_NV_GPR(r28)(r1) + PPC_STL r29, HOST_NV_GPR(r29)(r1) + PPC_STL r30, HOST_NV_GPR(r30)(r1) + PPC_STL r31, HOST_NV_GPR(r31)(r1) + + /* Load guest non-volatiles. */ + PPC_LL r14, VCPU_GPR(r14)(r4) + PPC_LL r15, VCPU_GPR(r15)(r4) + PPC_LL r16, VCPU_GPR(r16)(r4) + PPC_LL r17, VCPU_GPR(r17)(r4) + PPC_LL r18, VCPU_GPR(r18)(r4) + PPC_LL r19, VCPU_GPR(r19)(r4) + PPC_LL r20, VCPU_GPR(r20)(r4) + PPC_LL r21, VCPU_GPR(r21)(r4) + PPC_LL r22, VCPU_GPR(r22)(r4) + PPC_LL r23, VCPU_GPR(r23)(r4) + PPC_LL r24, VCPU_GPR(r24)(r4) + PPC_LL r25, VCPU_GPR(r25)(r4) + PPC_LL r26, VCPU_GPR(r26)(r4) + PPC_LL r27, VCPU_GPR(r27)(r4) + PPC_LL r28, VCPU_GPR(r28)(r4) + PPC_LL r29, VCPU_GPR(r29)(r4) + PPC_LL r30, VCPU_GPR(r30)(r4) + PPC_LL r31, VCPU_GPR(r31)(r4) + + +lightweight_exit: + PPC_STL r2, HOST_R2(r1) + + mfspr r3, SPRN_PID + stw r3, VCPU_HOST_PID(r4) + lwz r3, VCPU_GUEST_PID(r4) + mtspr SPRN_PID, r3 + + PPC_LL r11, VCPU_SHARED(r4) + /* Disable MAS register updates via exception */ + mfspr r3, SPRN_EPCR + oris r3, r3, SPRN_EPCR_DMIUH@h + mtspr SPRN_EPCR, r3 + isync + /* Save host mas4 and mas6 and load guest MAS registers */ + mfspr r3, SPRN_MAS4 + stw r3, VCPU_HOST_MAS4(r4) + mfspr r3, SPRN_MAS6 + stw r3, VCPU_HOST_MAS6(r4) + lwz r3, VCPU_SHARED_MAS0(r11) + lwz r5, VCPU_SHARED_MAS1(r11) + PPC_LD(r6, VCPU_SHARED_MAS2, r11) + lwz r7, VCPU_SHARED_MAS7_3+4(r11) + lwz r8, VCPU_SHARED_MAS4(r11) + mtspr SPRN_MAS0, r3 + mtspr SPRN_MAS1, r5 + mtspr SPRN_MAS2, r6 + mtspr SPRN_MAS3, r7 + mtspr SPRN_MAS4, r8 + lwz r3, VCPU_SHARED_MAS6(r11) + lwz r5, VCPU_SHARED_MAS7_3+0(r11) + mtspr SPRN_MAS6, r3 + mtspr SPRN_MAS7, r5 + + /* + * Host interrupt handlers may have clobbered these guest-readable + * SPRGs, so we need to reload them here with the guest's values. + */ + lwz r3, VCPU_VRSAVE(r4) + PPC_LD(r5, VCPU_SHARED_SPRG4, r11) + mtspr SPRN_VRSAVE, r3 + PPC_LD(r6, VCPU_SHARED_SPRG5, r11) + mtspr SPRN_SPRG4W, r5 + PPC_LD(r7, VCPU_SHARED_SPRG6, r11) + mtspr SPRN_SPRG5W, r6 + PPC_LD(r8, VCPU_SHARED_SPRG7, r11) + mtspr SPRN_SPRG6W, r7 + mtspr SPRN_SPRG7W, r8 + + /* Load some guest volatiles. */ + PPC_LL r3, VCPU_LR(r4) + PPC_LL r5, VCPU_XER(r4) + PPC_LL r6, VCPU_CTR(r4) + lwz r7, VCPU_CR(r4) + PPC_LL r8, VCPU_PC(r4) + PPC_LD(r9, VCPU_SHARED_MSR, r11) + PPC_LL r0, VCPU_GPR(r0)(r4) + PPC_LL r1, VCPU_GPR(r1)(r4) + PPC_LL r2, VCPU_GPR(r2)(r4) + PPC_LL r10, VCPU_GPR(r10)(r4) + PPC_LL r11, VCPU_GPR(r11)(r4) + PPC_LL r12, VCPU_GPR(r12)(r4) + PPC_LL r13, VCPU_GPR(r13)(r4) + mtlr r3 + mtxer r5 + mtctr r6 + mtsrr0 r8 + mtsrr1 r9 + +#ifdef CONFIG_KVM_EXIT_TIMING + /* save enter time */ +1: + mfspr r6, SPRN_TBRU + mfspr r9, SPRN_TBRL + mfspr r8, SPRN_TBRU + cmpw r8, r6 + stw r9, VCPU_TIMING_LAST_ENTER_TBL(r4) + bne 1b + stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4) +#endif + + /* + * Don't execute any instruction which can change CR after + * below instruction. + */ + mtcr r7 + + /* Finish loading guest volatiles and jump to guest. */ + PPC_LL r5, VCPU_GPR(r5)(r4) + PPC_LL r6, VCPU_GPR(r6)(r4) + PPC_LL r7, VCPU_GPR(r7)(r4) + PPC_LL r8, VCPU_GPR(r8)(r4) + PPC_LL r9, VCPU_GPR(r9)(r4) + + PPC_LL r3, VCPU_GPR(r3)(r4) + PPC_LL r4, VCPU_GPR(r4)(r4) + rfi diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index ddcd896fa2f..b479ed77c51 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -20,11 +20,282 @@ #include <asm/reg.h> #include <asm/cputable.h> #include <asm/tlbflush.h> -#include <asm/kvm_e500.h> #include <asm/kvm_ppc.h> +#include "../mm/mmu_decl.h" #include "booke.h" -#include "e500_tlb.h" +#include "e500.h" + +struct id { + unsigned long val; + struct id **pentry; +}; + +#define NUM_TIDS 256 + +/* + * This table provide mappings from: + * (guestAS,guestTID,guestPR) --> ID of physical cpu + * guestAS [0..1] + * guestTID [0..255] + * guestPR [0..1] + * ID [1..255] + * Each vcpu keeps one vcpu_id_table. + */ +struct vcpu_id_table { + struct id id[2][NUM_TIDS][2]; +}; + +/* + * This table provide reversed mappings of vcpu_id_table: + * ID --> address of vcpu_id_table item. + * Each physical core has one pcpu_id_table. + */ +struct pcpu_id_table { + struct id *entry[NUM_TIDS]; +}; + +static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids); + +/* This variable keeps last used shadow ID on local core. + * The valid range of shadow ID is [1..255] */ +static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); + +/* + * Allocate a free shadow id and setup a valid sid mapping in given entry. + * A mapping is only valid when vcpu_id_table and pcpu_id_table are match. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static inline int local_sid_setup_one(struct id *entry) +{ + unsigned long sid; + int ret = -1; + + sid = ++(__get_cpu_var(pcpu_last_used_sid)); + if (sid < NUM_TIDS) { + __get_cpu_var(pcpu_sids).entry[sid] = entry; + entry->val = sid; + entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; + ret = sid; + } + + /* + * If sid == NUM_TIDS, we've run out of sids. We return -1, and + * the caller will invalidate everything and start over. + * + * sid > NUM_TIDS indicates a race, which we disable preemption to + * avoid. + */ + WARN_ON(sid > NUM_TIDS); + + return ret; +} + +/* + * Check if given entry contain a valid shadow id mapping. + * An ID mapping is considered valid only if + * both vcpu and pcpu know this mapping. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static inline int local_sid_lookup(struct id *entry) +{ + if (entry && entry->val != 0 && + __get_cpu_var(pcpu_sids).entry[entry->val] == entry && + entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) + return entry->val; + return -1; +} + +/* Invalidate all id mappings on local core -- call with preempt disabled */ +static inline void local_sid_destroy_all(void) +{ + __get_cpu_var(pcpu_last_used_sid) = 0; + memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); +} + +static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL); + return vcpu_e500->idt; +} + +static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kfree(vcpu_e500->idt); + vcpu_e500->idt = NULL; +} + +/* Map guest pid to shadow. + * We use PID to keep shadow of current guest non-zero PID, + * and use PID1 to keep shadow of guest zero PID. + * So that guest tlbe with TID=0 can be accessed at any time */ +static void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + preempt_disable(); + vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500, + get_cur_as(&vcpu_e500->vcpu), + get_cur_pid(&vcpu_e500->vcpu), + get_cur_pr(&vcpu_e500->vcpu), 1); + vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500, + get_cur_as(&vcpu_e500->vcpu), 0, + get_cur_pr(&vcpu_e500->vcpu), 1); + preempt_enable(); +} + +/* Invalidate all mappings on vcpu */ +static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table)); + + /* Update shadow pid when mappings are changed */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +/* Invalidate one ID mapping on vcpu */ +static inline void kvmppc_e500_id_table_reset_one( + struct kvmppc_vcpu_e500 *vcpu_e500, + int as, int pid, int pr) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + + BUG_ON(as >= 2); + BUG_ON(pid >= NUM_TIDS); + BUG_ON(pr >= 2); + + idt->id[as][pid][pr].val = 0; + idt->id[as][pid][pr].pentry = NULL; + + /* Update shadow pid when mappings are changed */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +/* + * Map guest (vcpu,AS,ID,PR) to physical core shadow id. + * This function first lookup if a valid mapping exists, + * if not, then creates a new one. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, + unsigned int as, unsigned int gid, + unsigned int pr, int avoid_recursion) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + int sid; + + BUG_ON(as >= 2); + BUG_ON(gid >= NUM_TIDS); + BUG_ON(pr >= 2); + + sid = local_sid_lookup(&idt->id[as][gid][pr]); + + while (sid <= 0) { + /* No mapping yet */ + sid = local_sid_setup_one(&idt->id[as][gid][pr]); + if (sid <= 0) { + _tlbil_all(); + local_sid_destroy_all(); + } + + /* Update shadow pid when mappings are changed */ + if (!avoid_recursion) + kvmppc_e500_recalc_shadow_pid(vcpu_e500); + } + + return sid; +} + +unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + return kvmppc_e500_get_sid(to_e500(vcpu), get_tlb_ts(gtlbe), + get_tlb_tid(gtlbe), get_cur_pr(vcpu), 0); +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (vcpu->arch.pid != pid) { + vcpu_e500->pid[0] = vcpu->arch.pid = pid; + kvmppc_e500_recalc_shadow_pid(vcpu_e500); + } +} + +/* gtlbe must not be mapped by more than one host tlbe */ +void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + unsigned int pr, tid, ts, pid; + u32 val, eaddr; + unsigned long flags; + + ts = get_tlb_ts(gtlbe); + tid = get_tlb_tid(gtlbe); + + preempt_disable(); + + /* One guest ID may be mapped to two shadow IDs */ + for (pr = 0; pr < 2; pr++) { + /* + * The shadow PID can have a valid mapping on at most one + * host CPU. In the common case, it will be valid on this + * CPU, in which case we do a local invalidation of the + * specific address. + * + * If the shadow PID is not valid on the current host CPU, + * we invalidate the entire shadow PID. + */ + pid = local_sid_lookup(&idt->id[ts][tid][pr]); + if (pid <= 0) { + kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr); + continue; + } + + /* + * The guest is invalidating a 4K entry which is in a PID + * that has a valid shadow mapping on this host CPU. We + * search host TLB to invalidate it's shadow TLB entry, + * similar to __tlbil_va except that we need to look in AS1. + */ + val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS; + eaddr = get_tlb_eaddr(gtlbe); + + local_irq_save(flags); + + mtspr(SPRN_MAS6, val); + asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr)); + val = mfspr(SPRN_MAS1); + if (val & MAS1_VALID) { + mtspr(SPRN_MAS1, val & ~MAS1_VALID); + asm volatile("tlbwe"); + } + + local_irq_restore(flags); + } + + preempt_enable(); +} + +void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kvmppc_e500_id_table_reset_all(vcpu_e500); +} + +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) +{ + /* Recalc shadow pid since MSR changes */ + kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); +} void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) { @@ -36,17 +307,20 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - kvmppc_e500_tlb_load(vcpu, cpu); + kvmppc_booke_vcpu_load(vcpu, cpu); + + /* Shadow PID may be expired on local core */ + kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { - kvmppc_e500_tlb_put(vcpu); - #ifdef CONFIG_SPE if (vcpu->arch.shadow_msr & MSR_SPE) kvmppc_vcpu_disable_spe(vcpu); #endif + + kvmppc_booke_vcpu_put(vcpu); } int kvmppc_core_check_processor_compat(void) @@ -61,6 +335,23 @@ int kvmppc_core_check_processor_compat(void) return r; } +static void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + struct kvm_book3e_206_tlb_entry *tlbe; + + /* Insert large initial mapping for guest. */ + tlbe = get_entry(vcpu_e500, 1, 0); + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); + tlbe->mas2 = 0; + tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK; + + /* 4K map for serial output. Used by kernel wrapper. */ + tlbe = get_entry(vcpu_e500, 1, 1); + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); + tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; + tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; +} + int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -76,32 +367,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ -int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as); - if (index < 0) { - tr->valid = 0; - return 0; - } - - tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; - - return 0; -} - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -115,19 +380,6 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; - sregs->u.e.mas0 = vcpu->arch.shared->mas0; - sregs->u.e.mas1 = vcpu->arch.shared->mas1; - sregs->u.e.mas2 = vcpu->arch.shared->mas2; - sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3; - sregs->u.e.mas4 = vcpu->arch.shared->mas4; - sregs->u.e.mas6 = vcpu->arch.shared->mas6; - - sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); - sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; - sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; - sregs->u.e.tlbcfg[2] = 0; - sregs->u.e.tlbcfg[3] = 0; - sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; @@ -135,11 +387,13 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; kvmppc_get_sregs_ivor(vcpu, sregs); + kvmppc_get_sregs_e500_tlb(vcpu, sregs); } int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int ret; if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { vcpu_e500->svr = sregs->u.e.impl.fsl.svr; @@ -147,14 +401,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; } - if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { - vcpu->arch.shared->mas0 = sregs->u.e.mas0; - vcpu->arch.shared->mas1 = sregs->u.e.mas1; - vcpu->arch.shared->mas2 = sregs->u.e.mas2; - vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3; - vcpu->arch.shared->mas4 = sregs->u.e.mas4; - vcpu->arch.shared->mas6 = sregs->u.e.mas6; - } + ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs); + if (ret < 0) + return ret; if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) return 0; @@ -193,9 +442,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_vcpu; + if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) + goto uninit_vcpu; + err = kvmppc_e500_tlb_init(vcpu_e500); if (err) - goto uninit_vcpu; + goto uninit_id; vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); if (!vcpu->arch.shared) @@ -205,6 +457,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) uninit_tlb: kvmppc_e500_tlb_uninit(vcpu_e500); +uninit_id: + kvmppc_e500_id_table_free(vcpu_e500); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_vcpu: @@ -218,11 +472,21 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); free_page((unsigned long)vcpu->arch.shared); - kvm_vcpu_uninit(vcpu); kvmppc_e500_tlb_uninit(vcpu_e500); + kvmppc_e500_id_table_free(vcpu_e500); + kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } +int kvmppc_core_init_vm(struct kvm *kvm) +{ + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +} + static int __init kvmppc_e500_init(void) { int r, i; diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h new file mode 100644 index 00000000000..aa8b81428bf --- /dev/null +++ b/arch/powerpc/kvm/e500.h @@ -0,0 +1,306 @@ +/* + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu <yu.liu@freescale.com> + * Scott Wood <scottwood@freescale.com> + * Ashish Kalra <ashish.kalra@freescale.com> + * Varun Sethi <varun.sethi@freescale.com> + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.h and + * arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard <hollisb@us.ibm.com>, + * Copyright IBM Corp. 2007-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef KVM_E500_H +#define KVM_E500_H + +#include <linux/kvm_host.h> +#include <asm/mmu-book3e.h> +#include <asm/tlb.h> + +#define E500_PID_NUM 3 +#define E500_TLB_NUM 2 + +#define E500_TLB_VALID 1 +#define E500_TLB_DIRTY 2 +#define E500_TLB_BITMAP 4 + +struct tlbe_ref { + pfn_t pfn; + unsigned int flags; /* E500_TLB_* */ +}; + +struct tlbe_priv { + struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ +}; + +#ifdef CONFIG_KVM_E500V2 +struct vcpu_id_table; +#endif + +struct kvmppc_e500_tlb_params { + int entries, ways, sets; +}; + +struct kvmppc_vcpu_e500 { + struct kvm_vcpu vcpu; + + /* Unmodified copy of the guest's TLB -- shared with host userspace. */ + struct kvm_book3e_206_tlb_entry *gtlb_arch; + + /* Starting entry number in gtlb_arch[] */ + int gtlb_offset[E500_TLB_NUM]; + + /* KVM internal information associated with each guest TLB entry */ + struct tlbe_priv *gtlb_priv[E500_TLB_NUM]; + + struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM]; + + unsigned int gtlb_nv[E500_TLB_NUM]; + + /* + * information associated with each host TLB entry -- + * TLB1 only for now. If/when guest TLB1 entries can be + * mapped with host TLB0, this will be used for that too. + * + * We don't want to use this for guest TLB0 because then we'd + * have the overhead of doing the translation again even if + * the entry is still in the guest TLB (e.g. we swapped out + * and back, and our host TLB entries got evicted). + */ + struct tlbe_ref *tlb_refs[E500_TLB_NUM]; + unsigned int host_tlb1_nv; + + u32 svr; + u32 l1csr0; + u32 l1csr1; + u32 hid0; + u32 hid1; + u64 mcar; + + struct page **shared_tlb_pages; + int num_shared_tlb_pages; + + u64 *g2h_tlb1_map; + unsigned int *h2g_tlb1_rmap; + + /* Minimum and maximum address mapped my TLB1 */ + unsigned long tlb1_min_eaddr; + unsigned long tlb1_max_eaddr; + +#ifdef CONFIG_KVM_E500V2 + u32 pid[E500_PID_NUM]; + + /* vcpu id table */ + struct vcpu_id_table *idt; +#endif +}; + +static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) +{ + return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu); +} + + +/* This geometry is the legacy default -- can be overridden by userspace */ +#define KVM_E500_TLB0_WAY_SIZE 128 +#define KVM_E500_TLB0_WAY_NUM 2 + +#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) +#define KVM_E500_TLB1_SIZE 16 + +#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF)) +#define tlbsel_of(index) ((index) >> 16) +#define esel_of(index) ((index) & 0xFFFF) + +#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) +#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) +#define MAS2_ATTRIB_MASK \ + (MAS2_X0 | MAS2_X1) +#define MAS3_ATTRIB_MASK \ + (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ + | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) + +int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, + ulong value); +int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu); +int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu); +int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb); +int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb); +int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb); +int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500); +void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); + +void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + + +#ifdef CONFIG_KVM_E500V2 +unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, + unsigned int as, unsigned int gid, + unsigned int pr, int avoid_recursion); +#endif + +/* TLB helper functions */ +static inline unsigned int +get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 7) & 0x1f; +} + +static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return tlbe->mas2 & 0xfffff000; +} + +static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + unsigned int pgsize = get_tlb_size(tlbe); + return 1ULL << 10 << pgsize; +} + +static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + u64 bytes = get_tlb_bytes(tlbe); + return get_tlb_eaddr(tlbe) + bytes - 1; +} + +static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return tlbe->mas7_3 & ~0xfffULL; +} + +static inline unsigned int +get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 16) & 0xff; +} + +static inline unsigned int +get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 12) & 0x1; +} + +static inline unsigned int +get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 31) & 0x1; +} + +static inline unsigned int +get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 30) & 0x1; +} + +static inline unsigned int +get_tlb_tsize(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; +} + +static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.pid & 0xff; +} + +static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS)); +} + +static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.shared->msr & MSR_PR); +} + +static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.shared->mas6 >> 16) & 0xff; +} + +static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.shared->mas6 & 0x1; +} + +static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu) +{ + /* + * Manual says that tlbsel has 2 bits wide. + * Since we only have two TLBs, only lower bit is used. + */ + return (vcpu->arch.shared->mas0 >> 28) & 0x1; +} + +static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.shared->mas0 & 0xfff; +} + +static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.shared->mas0 >> 16) & 0xfff; +} + +static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct kvm_book3e_206_tlb_entry *tlbe) +{ + gpa_t gpa; + + if (!get_tlb_v(tlbe)) + return 0; + +#ifndef CONFIG_KVM_BOOKE_HV + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) + return 0; +#endif + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +static inline struct kvm_book3e_206_tlb_entry *get_entry( + struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry) +{ + int offset = vcpu_e500->gtlb_offset[tlbsel]; + return &vcpu_e500->gtlb_arch[offset + entry]; +} + +void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe); +void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500); + +#ifdef CONFIG_KVM_BOOKE_HV +#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe) +#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu) +#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS) +#else +unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_entry *gtlbe); + +static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int tidseld = (vcpu->arch.shared->mas4 >> 16) & 0xf; + + return vcpu_e500->pid[tidseld]; +} + +/* Force TS=1 for all guest mappings. */ +#define get_tlb_sts(gtlbe) (MAS1_TS) +#endif /* !BOOKE_HV */ + +#endif /* KVM_E500_H */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 6d0b2bd54fb..8b99e076dc8 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -14,27 +14,96 @@ #include <asm/kvm_ppc.h> #include <asm/disassemble.h> -#include <asm/kvm_e500.h> +#include <asm/dbell.h> #include "booke.h" -#include "e500_tlb.h" +#include "e500.h" +#define XOP_MSGSND 206 +#define XOP_MSGCLR 238 #define XOP_TLBIVAX 786 #define XOP_TLBSX 914 #define XOP_TLBRE 946 #define XOP_TLBWE 978 +#define XOP_TLBILX 18 + +#ifdef CONFIG_KVM_E500MC +static int dbell2prio(ulong param) +{ + int msg = param & PPC_DBELL_TYPE_MASK; + int prio = -1; + + switch (msg) { + case PPC_DBELL_TYPE(PPC_DBELL): + prio = BOOKE_IRQPRIO_DBELL; + break; + case PPC_DBELL_TYPE(PPC_DBELL_CRIT): + prio = BOOKE_IRQPRIO_DBELL_CRIT; + break; + default: + break; + } + + return prio; +} + +static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb) +{ + ulong param = vcpu->arch.gpr[rb]; + int prio = dbell2prio(param); + + if (prio < 0) + return EMULATE_FAIL; + + clear_bit(prio, &vcpu->arch.pending_exceptions); + return EMULATE_DONE; +} + +static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) +{ + ulong param = vcpu->arch.gpr[rb]; + int prio = dbell2prio(rb); + int pir = param & PPC_DBELL_PIR_MASK; + int i; + struct kvm_vcpu *cvcpu; + + if (prio < 0) + return EMULATE_FAIL; + + kvm_for_each_vcpu(i, cvcpu, vcpu->kvm) { + int cpir = cvcpu->arch.shared->pir; + if ((param & PPC_DBELL_MSG_BRDCAST) || (cpir == pir)) { + set_bit(prio, &cvcpu->arch.pending_exceptions); + kvm_vcpu_kick(cvcpu); + } + } + + return EMULATE_DONE; +} +#endif int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; - int ra; - int rb; + int ra = get_ra(inst); + int rb = get_rb(inst); + int rt = get_rt(inst); switch (get_op(inst)) { case 31: switch (get_xop(inst)) { +#ifdef CONFIG_KVM_E500MC + case XOP_MSGSND: + emulated = kvmppc_e500_emul_msgsnd(vcpu, rb); + break; + + case XOP_MSGCLR: + emulated = kvmppc_e500_emul_msgclr(vcpu, rb); + break; +#endif + case XOP_TLBRE: emulated = kvmppc_e500_emul_tlbre(vcpu); break; @@ -44,13 +113,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_TLBSX: - rb = get_rb(inst); emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); break; + case XOP_TLBILX: + emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb); + break; + case XOP_TLBIVAX: - ra = get_ra(inst); - rb = get_rb(inst); emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb); break; @@ -70,52 +140,63 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int emulated = EMULATE_DONE; - ulong spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { +#ifndef CONFIG_KVM_BOOKE_HV case SPRN_PID: kvmppc_set_pid(vcpu, spr_val); break; case SPRN_PID1: if (spr_val != 0) return EMULATE_FAIL; - vcpu_e500->pid[1] = spr_val; break; + vcpu_e500->pid[1] = spr_val; + break; case SPRN_PID2: if (spr_val != 0) return EMULATE_FAIL; - vcpu_e500->pid[2] = spr_val; break; + vcpu_e500->pid[2] = spr_val; + break; case SPRN_MAS0: - vcpu->arch.shared->mas0 = spr_val; break; + vcpu->arch.shared->mas0 = spr_val; + break; case SPRN_MAS1: - vcpu->arch.shared->mas1 = spr_val; break; + vcpu->arch.shared->mas1 = spr_val; + break; case SPRN_MAS2: - vcpu->arch.shared->mas2 = spr_val; break; + vcpu->arch.shared->mas2 = spr_val; + break; case SPRN_MAS3: vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff; vcpu->arch.shared->mas7_3 |= spr_val; break; case SPRN_MAS4: - vcpu->arch.shared->mas4 = spr_val; break; + vcpu->arch.shared->mas4 = spr_val; + break; case SPRN_MAS6: - vcpu->arch.shared->mas6 = spr_val; break; + vcpu->arch.shared->mas6 = spr_val; + break; case SPRN_MAS7: vcpu->arch.shared->mas7_3 &= (u64)0xffffffff; vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32; break; +#endif case SPRN_L1CSR0: vcpu_e500->l1csr0 = spr_val; vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); break; case SPRN_L1CSR1: - vcpu_e500->l1csr1 = spr_val; break; + vcpu_e500->l1csr1 = spr_val; + break; case SPRN_HID0: - vcpu_e500->hid0 = spr_val; break; + vcpu_e500->hid0 = spr_val; + break; case SPRN_HID1: - vcpu_e500->hid1 = spr_val; break; + vcpu_e500->hid1 = spr_val; + break; case SPRN_MMUCSR0: emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, @@ -135,81 +216,112 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_IVOR35: vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; break; - +#ifdef CONFIG_KVM_BOOKE_HV + case SPRN_IVOR36: + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = spr_val; + break; + case SPRN_IVOR37: + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = spr_val; + break; +#endif default: - emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); } return emulated; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int emulated = EMULATE_DONE; - unsigned long val; switch (sprn) { +#ifndef CONFIG_KVM_BOOKE_HV case SPRN_PID: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break; + *spr_val = vcpu_e500->pid[0]; + break; case SPRN_PID1: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break; + *spr_val = vcpu_e500->pid[1]; + break; case SPRN_PID2: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; + *spr_val = vcpu_e500->pid[2]; + break; case SPRN_MAS0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break; + *spr_val = vcpu->arch.shared->mas0; + break; case SPRN_MAS1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break; + *spr_val = vcpu->arch.shared->mas1; + break; case SPRN_MAS2: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break; + *spr_val = vcpu->arch.shared->mas2; + break; case SPRN_MAS3: - val = (u32)vcpu->arch.shared->mas7_3; - kvmppc_set_gpr(vcpu, rt, val); + *spr_val = (u32)vcpu->arch.shared->mas7_3; break; case SPRN_MAS4: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break; + *spr_val = vcpu->arch.shared->mas4; + break; case SPRN_MAS6: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break; + *spr_val = vcpu->arch.shared->mas6; + break; case SPRN_MAS7: - val = vcpu->arch.shared->mas7_3 >> 32; - kvmppc_set_gpr(vcpu, rt, val); + *spr_val = vcpu->arch.shared->mas7_3 >> 32; break; +#endif case SPRN_TLB0CFG: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; + *spr_val = vcpu->arch.tlbcfg[0]; + break; case SPRN_TLB1CFG: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break; + *spr_val = vcpu->arch.tlbcfg[1]; + break; case SPRN_L1CSR0: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break; + *spr_val = vcpu_e500->l1csr0; + break; case SPRN_L1CSR1: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break; + *spr_val = vcpu_e500->l1csr1; + break; case SPRN_HID0: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; + *spr_val = vcpu_e500->hid0; + break; case SPRN_HID1: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; + *spr_val = vcpu_e500->hid1; + break; case SPRN_SVR: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; + *spr_val = vcpu_e500->svr; + break; case SPRN_MMUCSR0: - kvmppc_set_gpr(vcpu, rt, 0); break; + *spr_val = 0; + break; case SPRN_MMUCFG: - kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break; + *spr_val = vcpu->arch.mmucfg; + break; /* extra exceptions */ case SPRN_IVOR32: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; break; case SPRN_IVOR33: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; break; case SPRN_IVOR34: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; break; case SPRN_IVOR35: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + break; +#ifdef CONFIG_KVM_BOOKE_HV + case SPRN_IVOR36: + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]; + break; + case SPRN_IVOR37: + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; break; +#endif default: - emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); } return emulated; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 6e53e4164de..c510fc96130 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -2,6 +2,9 @@ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com + * Scott Wood, scottwood@freescale.com + * Ashish Kalra, ashish.kalra@freescale.com + * Varun Sethi, varun.sethi@freescale.com * * Description: * This file is based on arch/powerpc/kvm/44x_tlb.c, @@ -26,210 +29,15 @@ #include <linux/vmalloc.h> #include <linux/hugetlb.h> #include <asm/kvm_ppc.h> -#include <asm/kvm_e500.h> -#include "../mm/mmu_decl.h" -#include "e500_tlb.h" +#include "e500.h" #include "trace.h" #include "timing.h" #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) -struct id { - unsigned long val; - struct id **pentry; -}; - -#define NUM_TIDS 256 - -/* - * This table provide mappings from: - * (guestAS,guestTID,guestPR) --> ID of physical cpu - * guestAS [0..1] - * guestTID [0..255] - * guestPR [0..1] - * ID [1..255] - * Each vcpu keeps one vcpu_id_table. - */ -struct vcpu_id_table { - struct id id[2][NUM_TIDS][2]; -}; - -/* - * This table provide reversed mappings of vcpu_id_table: - * ID --> address of vcpu_id_table item. - * Each physical core has one pcpu_id_table. - */ -struct pcpu_id_table { - struct id *entry[NUM_TIDS]; -}; - -static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids); - -/* This variable keeps last used shadow ID on local core. - * The valid range of shadow ID is [1..255] */ -static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); - static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; -static struct kvm_book3e_206_tlb_entry *get_entry( - struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry) -{ - int offset = vcpu_e500->gtlb_offset[tlbsel]; - return &vcpu_e500->gtlb_arch[offset + entry]; -} - -/* - * Allocate a free shadow id and setup a valid sid mapping in given entry. - * A mapping is only valid when vcpu_id_table and pcpu_id_table are match. - * - * The caller must have preemption disabled, and keep it that way until - * it has finished with the returned shadow id (either written into the - * TLB or arch.shadow_pid, or discarded). - */ -static inline int local_sid_setup_one(struct id *entry) -{ - unsigned long sid; - int ret = -1; - - sid = ++(__get_cpu_var(pcpu_last_used_sid)); - if (sid < NUM_TIDS) { - __get_cpu_var(pcpu_sids).entry[sid] = entry; - entry->val = sid; - entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; - ret = sid; - } - - /* - * If sid == NUM_TIDS, we've run out of sids. We return -1, and - * the caller will invalidate everything and start over. - * - * sid > NUM_TIDS indicates a race, which we disable preemption to - * avoid. - */ - WARN_ON(sid > NUM_TIDS); - - return ret; -} - -/* - * Check if given entry contain a valid shadow id mapping. - * An ID mapping is considered valid only if - * both vcpu and pcpu know this mapping. - * - * The caller must have preemption disabled, and keep it that way until - * it has finished with the returned shadow id (either written into the - * TLB or arch.shadow_pid, or discarded). - */ -static inline int local_sid_lookup(struct id *entry) -{ - if (entry && entry->val != 0 && - __get_cpu_var(pcpu_sids).entry[entry->val] == entry && - entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) - return entry->val; - return -1; -} - -/* Invalidate all id mappings on local core -- call with preempt disabled */ -static inline void local_sid_destroy_all(void) -{ - __get_cpu_var(pcpu_last_used_sid) = 0; - memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); -} - -static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL); - return vcpu_e500->idt; -} - -static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - kfree(vcpu_e500->idt); -} - -/* Invalidate all mappings on vcpu */ -static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table)); - - /* Update shadow pid when mappings are changed */ - kvmppc_e500_recalc_shadow_pid(vcpu_e500); -} - -/* Invalidate one ID mapping on vcpu */ -static inline void kvmppc_e500_id_table_reset_one( - struct kvmppc_vcpu_e500 *vcpu_e500, - int as, int pid, int pr) -{ - struct vcpu_id_table *idt = vcpu_e500->idt; - - BUG_ON(as >= 2); - BUG_ON(pid >= NUM_TIDS); - BUG_ON(pr >= 2); - - idt->id[as][pid][pr].val = 0; - idt->id[as][pid][pr].pentry = NULL; - - /* Update shadow pid when mappings are changed */ - kvmppc_e500_recalc_shadow_pid(vcpu_e500); -} - -/* - * Map guest (vcpu,AS,ID,PR) to physical core shadow id. - * This function first lookup if a valid mapping exists, - * if not, then creates a new one. - * - * The caller must have preemption disabled, and keep it that way until - * it has finished with the returned shadow id (either written into the - * TLB or arch.shadow_pid, or discarded). - */ -static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, - unsigned int as, unsigned int gid, - unsigned int pr, int avoid_recursion) -{ - struct vcpu_id_table *idt = vcpu_e500->idt; - int sid; - - BUG_ON(as >= 2); - BUG_ON(gid >= NUM_TIDS); - BUG_ON(pr >= 2); - - sid = local_sid_lookup(&idt->id[as][gid][pr]); - - while (sid <= 0) { - /* No mapping yet */ - sid = local_sid_setup_one(&idt->id[as][gid][pr]); - if (sid <= 0) { - _tlbil_all(); - local_sid_destroy_all(); - } - - /* Update shadow pid when mappings are changed */ - if (!avoid_recursion) - kvmppc_e500_recalc_shadow_pid(vcpu_e500); - } - - return sid; -} - -/* Map guest pid to shadow. - * We use PID to keep shadow of current guest non-zero PID, - * and use PID1 to keep shadow of guest zero PID. - * So that guest tlbe with TID=0 can be accessed at any time */ -void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - preempt_disable(); - vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500, - get_cur_as(&vcpu_e500->vcpu), - get_cur_pid(&vcpu_e500->vcpu), - get_cur_pr(&vcpu_e500->vcpu), 1); - vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500, - get_cur_as(&vcpu_e500->vcpu), 0, - get_cur_pr(&vcpu_e500->vcpu), 1); - preempt_enable(); -} - static inline unsigned int gtlb0_get_next_victim( struct kvmppc_vcpu_e500 *vcpu_e500) { @@ -258,6 +66,7 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) /* Mask off reserved bits. */ mas3 &= MAS3_ATTRIB_MASK; +#ifndef CONFIG_KVM_BOOKE_HV if (!usermode) { /* Guest is in supervisor mode, * so we need to translate guest @@ -265,8 +74,9 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) mas3 &= ~E500_TLB_USER_PERM_MASK; mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; } - - return mas3 | E500_TLB_SUPER_PERM_MASK; + mas3 |= E500_TLB_SUPER_PERM_MASK; +#endif + return mas3; } static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) @@ -292,7 +102,16 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_MAS8, stlbe->mas8); +#endif asm volatile("isync; tlbwe" : : : "memory"); + +#ifdef CONFIG_KVM_BOOKE_HV + /* Must clear mas8 for other host tlbwe's */ + mtspr(SPRN_MAS8, 0); + isync(); +#endif local_irq_restore(flags); trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, @@ -337,6 +156,7 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, } } +#ifdef CONFIG_KVM_E500V2 void kvmppc_map_magic(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -361,75 +181,41 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); preempt_enable(); } - -void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - - /* Shadow PID may be expired on local core */ - kvmppc_e500_recalc_shadow_pid(vcpu_e500); -} - -void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) -{ -} +#endif static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel) { struct kvm_book3e_206_tlb_entry *gtlbe = get_entry(vcpu_e500, tlbsel, esel); - struct vcpu_id_table *idt = vcpu_e500->idt; - unsigned int pr, tid, ts, pid; - u32 val, eaddr; - unsigned long flags; - - ts = get_tlb_ts(gtlbe); - tid = get_tlb_tid(gtlbe); - - preempt_disable(); - - /* One guest ID may be mapped to two shadow IDs */ - for (pr = 0; pr < 2; pr++) { - /* - * The shadow PID can have a valid mapping on at most one - * host CPU. In the common case, it will be valid on this - * CPU, in which case (for TLB0) we do a local invalidation - * of the specific address. - * - * If the shadow PID is not valid on the current host CPU, or - * if we're invalidating a TLB1 entry, we invalidate the - * entire shadow PID. - */ - if (tlbsel == 1 || - (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) { - kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr); - continue; - } - /* - * The guest is invalidating a TLB0 entry which is in a PID - * that has a valid shadow mapping on this host CPU. We - * search host TLB0 to invalidate it's shadow TLB entry, - * similar to __tlbil_va except that we need to look in AS1. - */ - val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS; - eaddr = get_tlb_eaddr(gtlbe); + if (tlbsel == 1 && + vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) { + u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; + int hw_tlb_indx; + unsigned long flags; local_irq_save(flags); - - mtspr(SPRN_MAS6, val); - asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr)); - val = mfspr(SPRN_MAS1); - if (val & MAS1_VALID) { - mtspr(SPRN_MAS1, val & ~MAS1_VALID); + while (tmp) { + hw_tlb_indx = __ilog2_u64(tmp & -tmp); + mtspr(SPRN_MAS0, + MAS0_TLBSEL(1) | + MAS0_ESEL(to_htlb1_esel(hw_tlb_indx))); + mtspr(SPRN_MAS1, 0); asm volatile("tlbwe"); + vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0; + tmp &= tmp - 1; } - + mb(); + vcpu_e500->g2h_tlb1_map[esel] = 0; + vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP; local_irq_restore(flags); + + return; } - preempt_enable(); + /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ + kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); } static int tlb0_set_base(gva_t addr, int sets, int ways) @@ -475,6 +261,9 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, set_base = gtlb0_set_base(vcpu_e500, eaddr); size = vcpu_e500->gtlb_params[0].ways; } else { + if (eaddr < vcpu_e500->tlb1_min_eaddr || + eaddr > vcpu_e500->tlb1_max_eaddr) + return -1; set_base = 0; } @@ -530,6 +319,16 @@ static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) } } +static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + if (vcpu_e500->g2h_tlb1_map) + memset(vcpu_e500->g2h_tlb1_map, + sizeof(u64) * vcpu_e500->gtlb_params[1].entries, 0); + if (vcpu_e500->h2g_tlb1_rmap) + memset(vcpu_e500->h2g_tlb1_rmap, + sizeof(unsigned int) * host_tlb_params[1].entries, 0); +} + static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) { int tlbsel = 0; @@ -547,7 +346,7 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) int stlbsel = 1; int i; - kvmppc_e500_id_table_reset_all(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { struct tlbe_ref *ref = @@ -562,19 +361,18 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, unsigned int eaddr, int as) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - unsigned int victim, pidsel, tsized; + unsigned int victim, tsized; int tlbsel; /* since we only have two TLBs, only lower bit is used. */ tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1; victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; - pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf; tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f; vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) - | MAS1_TID(vcpu_e500->pid[pidsel]) + | MAS1_TID(get_tlbmiss_tid(vcpu)) | MAS1_TSIZE(tsized); vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN) | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK); @@ -586,23 +384,26 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, /* TID must be supplied by the caller */ static inline void kvmppc_e500_setup_stlbe( - struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_vcpu *vcpu, struct kvm_book3e_206_tlb_entry *gtlbe, int tsize, struct tlbe_ref *ref, u64 gvaddr, struct kvm_book3e_206_tlb_entry *stlbe) { pfn_t pfn = ref->pfn; + u32 pr = vcpu->arch.shared->msr & MSR_PR; BUG_ON(!(ref->flags & E500_TLB_VALID)); - /* Force TS=1 IPROT=0 for all guest mappings. */ - stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID; - stlbe->mas2 = (gvaddr & MAS2_EPN) - | e500_shadow_mas2_attrib(gtlbe->mas2, - vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) - | e500_shadow_mas3_attrib(gtlbe->mas7_3, - vcpu_e500->vcpu.arch.shared->msr & MSR_PR); + /* Force IPROT=0 for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; + stlbe->mas2 = (gvaddr & MAS2_EPN) | + e500_shadow_mas2_attrib(gtlbe->mas2, pr); + stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | + e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); + +#ifdef CONFIG_KVM_BOOKE_HV + stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; +#endif } static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -736,7 +537,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, kvmppc_e500_ref_release(ref); kvmppc_e500_ref_setup(ref, gtlbe, pfn); - kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe); + kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, + ref, gvaddr, stlbe); } /* XXX only map the one-one case, for now use TLB0 */ @@ -760,7 +562,7 @@ static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* XXX for both one-one and one-to-many , for now use TLB1 */ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, - struct kvm_book3e_206_tlb_entry *stlbe) + struct kvm_book3e_206_tlb_entry *stlbe, int esel) { struct tlbe_ref *ref; unsigned int victim; @@ -773,15 +575,74 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, ref = &vcpu_e500->tlb_refs[1][victim]; kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim; + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + if (vcpu_e500->h2g_tlb1_rmap[victim]) { + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim]; + vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim); + } + vcpu_e500->h2g_tlb1_rmap[victim] = esel; + return victim; } -void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) +static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int size = vcpu_e500->gtlb_params[1].entries; + unsigned int offset; + gva_t eaddr; + int i; + + vcpu_e500->tlb1_min_eaddr = ~0UL; + vcpu_e500->tlb1_max_eaddr = 0; + offset = vcpu_e500->gtlb_offset[1]; + + for (i = 0; i < size; i++) { + struct kvm_book3e_206_tlb_entry *tlbe = + &vcpu_e500->gtlb_arch[offset + i]; + + if (!get_tlb_v(tlbe)) + continue; + + eaddr = get_tlb_eaddr(tlbe); + vcpu_e500->tlb1_min_eaddr = + min(vcpu_e500->tlb1_min_eaddr, eaddr); + + eaddr = get_tlb_end(tlbe); + vcpu_e500->tlb1_max_eaddr = + max(vcpu_e500->tlb1_max_eaddr, eaddr); + } +} + +static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe) { + unsigned long start, end, size; + + size = get_tlb_bytes(gtlbe); + start = get_tlb_eaddr(gtlbe) & ~(size - 1); + end = start + size - 1; + + return vcpu_e500->tlb1_min_eaddr == start || + vcpu_e500->tlb1_max_eaddr == end; +} + +/* This function is supposed to be called for a adding a new valid tlb entry */ +static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + unsigned long start, end, size; struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - /* Recalc shadow pid since MSR changes */ - kvmppc_e500_recalc_shadow_pid(vcpu_e500); + if (!get_tlb_v(gtlbe)) + return; + + size = get_tlb_bytes(gtlbe); + start = get_tlb_eaddr(gtlbe) & ~(size - 1); + end = start + size - 1; + + vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start); + vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end); } static inline int kvmppc_e500_gtlbe_invalidate( @@ -794,6 +655,9 @@ static inline int kvmppc_e500_gtlbe_invalidate( if (unlikely(get_tlb_iprot(gtlbe))) return -1; + if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe)) + kvmppc_recalc_tlb1map_range(vcpu_e500); + gtlbe->mas1 = 0; return 0; @@ -811,7 +675,7 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); /* Invalidate all vcpu id mappings */ - kvmppc_e500_id_table_reset_all(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); return EMULATE_DONE; } @@ -844,7 +708,59 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) } /* Invalidate all vcpu id mappings */ - kvmppc_e500_id_table_reset_all(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); + + return EMULATE_DONE; +} + +static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, + int pid, int rt) +{ + struct kvm_book3e_206_tlb_entry *tlbe; + int tid, esel; + + /* invalidate all entries */ + for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) { + tlbe = get_entry(vcpu_e500, tlbsel, esel); + tid = get_tlb_tid(tlbe); + if (rt == 0 || tid == pid) { + inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } + } +} + +static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid, + int ra, int rb) +{ + int tlbsel, esel; + gva_t ea; + + ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb); + if (ra) + ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra); + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1); + if (esel >= 0) { + inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + break; + } + } +} + +int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int pid = get_cur_spid(vcpu); + + if (rt == 0 || rt == 1) { + tlbilx_all(vcpu_e500, 0, pid, rt); + tlbilx_all(vcpu_e500, 1, pid, rt); + } else if (rt == 3) { + tlbilx_one(vcpu_e500, pid, ra, rb); + } return EMULATE_DONE; } @@ -929,9 +845,7 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, int stid; preempt_disable(); - stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe), - get_tlb_tid(gtlbe), - get_cur_pr(&vcpu_e500->vcpu), 0); + stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); stlbe->mas1 |= MAS1_TID(stid); write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); @@ -941,16 +855,21 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_entry *gtlbe; - int tlbsel, esel; + struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; + int tlbsel, esel, stlbsel, sesel; + int recal = 0; tlbsel = get_tlb_tlbsel(vcpu); esel = get_tlb_esel(vcpu, tlbsel); gtlbe = get_entry(vcpu_e500, tlbsel, esel); - if (get_tlb_v(gtlbe)) + if (get_tlb_v(gtlbe)) { inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + if ((tlbsel == 1) && + kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe)) + recal = 1; + } gtlbe->mas1 = vcpu->arch.shared->mas1; gtlbe->mas2 = vcpu->arch.shared->mas2; @@ -959,10 +878,20 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, gtlbe->mas2, gtlbe->mas7_3); + if (tlbsel == 1) { + /* + * If a valid tlb1 entry is overwritten then recalculate the + * min/max TLB1 map address range otherwise no need to look + * in tlb1 array. + */ + if (recal) + kvmppc_recalc_tlb1map_range(vcpu_e500); + else + kvmppc_set_tlb1map_range(vcpu, gtlbe); + } + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { - struct kvm_book3e_206_tlb_entry stlbe; - int stlbsel, sesel; u64 eaddr; u64 raddr; @@ -989,7 +918,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) * are mapped on the fly. */ stlbsel = 1; sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, - raddr >> PAGE_SHIFT, gtlbe, &stlbe); + raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel); break; default: @@ -1003,6 +932,48 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, + gva_t eaddr, unsigned int pid, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int esel, tlbsel; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as); + if (esel >= 0) + return index_of(tlbsel, esel); + } + + return -1; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as); + if (index < 0) { + tr->valid = 0; + return 0; + } + + tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + + int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); @@ -1066,7 +1037,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, sesel = 0; /* unused */ priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K, + kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, &priv->ref, eaddr, &stlbe); break; @@ -1075,7 +1046,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, stlbsel = 1; sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, - gtlbe, &stlbe); + gtlbe, &stlbe, esel); break; } @@ -1087,52 +1058,13 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); } -int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, - gva_t eaddr, unsigned int pid, int as) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int esel, tlbsel; - - for (tlbsel = 0; tlbsel < 2; tlbsel++) { - esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as); - if (esel >= 0) - return index_of(tlbsel, esel); - } - - return -1; -} - -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - - if (vcpu->arch.pid != pid) { - vcpu_e500->pid[0] = vcpu->arch.pid = pid; - kvmppc_e500_recalc_shadow_pid(vcpu_e500); - } -} - -void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - struct kvm_book3e_206_tlb_entry *tlbe; - - /* Insert large initial mapping for guest. */ - tlbe = get_entry(vcpu_e500, 1, 0); - tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); - tlbe->mas2 = 0; - tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK; - - /* 4K map for serial output. Used by kernel wrapper. */ - tlbe = get_entry(vcpu_e500, 1, 1); - tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); - tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; - tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; -} - static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) { int i; + clear_tlb1_bitmap(vcpu_e500); + kfree(vcpu_e500->g2h_tlb1_map); + clear_tlb_refs(vcpu_e500); kfree(vcpu_e500->gtlb_priv[0]); kfree(vcpu_e500->gtlb_priv[1]); @@ -1155,6 +1087,36 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) vcpu_e500->gtlb_arch = NULL; } +void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + sregs->u.e.mas0 = vcpu->arch.shared->mas0; + sregs->u.e.mas1 = vcpu->arch.shared->mas1; + sregs->u.e.mas2 = vcpu->arch.shared->mas2; + sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3; + sregs->u.e.mas4 = vcpu->arch.shared->mas4; + sregs->u.e.mas6 = vcpu->arch.shared->mas6; + + sregs->u.e.mmucfg = vcpu->arch.mmucfg; + sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0]; + sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1]; + sregs->u.e.tlbcfg[2] = 0; + sregs->u.e.tlbcfg[3] = 0; +} + +int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { + vcpu->arch.shared->mas0 = sregs->u.e.mas0; + vcpu->arch.shared->mas1 = sregs->u.e.mas1; + vcpu->arch.shared->mas2 = sregs->u.e.mas2; + vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3; + vcpu->arch.shared->mas4 = sregs->u.e.mas4; + vcpu->arch.shared->mas6 = sregs->u.e.mas6; + } + + return 0; +} + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg) { @@ -1163,6 +1125,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, char *virt; struct page **pages; struct tlbe_priv *privs[2] = {}; + u64 *g2h_bitmap = NULL; size_t array_len; u32 sets; int num_pages, ret, i; @@ -1224,10 +1187,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, if (!privs[0] || !privs[1]) goto err_put_page; + g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], + GFP_KERNEL); + if (!g2h_bitmap) + goto err_put_page; + free_gtlb(vcpu_e500); vcpu_e500->gtlb_priv[0] = privs[0]; vcpu_e500->gtlb_priv[1] = privs[1]; + vcpu_e500->g2h_tlb1_map = g2h_bitmap; vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *) (virt + (cfg->array & (PAGE_SIZE - 1))); @@ -1238,14 +1207,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, vcpu_e500->gtlb_offset[0] = 0; vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; - vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; + + vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); if (params.tlb_sizes[0] <= 2048) - vcpu_e500->tlb0cfg |= params.tlb_sizes[0]; - vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; + vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0]; + vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; - vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu_e500->tlb1cfg |= params.tlb_sizes[1]; - vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; + vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1]; + vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; vcpu_e500->shared_tlb_pages = pages; vcpu_e500->num_shared_tlb_pages = num_pages; @@ -1256,6 +1227,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1]; vcpu_e500->gtlb_params[1].sets = 1; + kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; err_put_page: @@ -1274,13 +1246,14 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, struct kvm_dirty_tlb *dirty) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - + kvmppc_recalc_tlb1map_range(vcpu_e500); clear_tlb_refs(vcpu_e500); return 0; } int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) { + struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; @@ -1357,22 +1330,32 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->gtlb_priv[1]) goto err; - if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) + vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) * + vcpu_e500->gtlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->g2h_tlb1_map) + goto err; + + vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * + host_tlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->h2g_tlb1_rmap) goto err; /* Init TLB configuration register */ - vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & + vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries; - vcpu_e500->tlb0cfg |= + vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries; + vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; - vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & + vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries; - vcpu_e500->tlb0cfg |= + vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries; + vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; + kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; err: @@ -1385,8 +1368,7 @@ err: void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { free_gtlb(vcpu_e500); - kvmppc_e500_id_table_free(vcpu_e500); - + kfree(vcpu_e500->h2g_tlb1_rmap); kfree(vcpu_e500->tlb_refs[0]); kfree(vcpu_e500->tlb_refs[1]); } diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h deleted file mode 100644 index 5c6d2d7bf05..00000000000 --- a/arch/powerpc/kvm/e500_tlb.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: Yu Liu, yu.liu@freescale.com - * - * Description: - * This file is based on arch/powerpc/kvm/44x_tlb.h, - * by Hollis Blanchard <hollisb@us.ibm.com>. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - */ - -#ifndef __KVM_E500_TLB_H__ -#define __KVM_E500_TLB_H__ - -#include <linux/kvm_host.h> -#include <asm/mmu-book3e.h> -#include <asm/tlb.h> -#include <asm/kvm_e500.h> - -/* This geometry is the legacy default -- can be overridden by userspace */ -#define KVM_E500_TLB0_WAY_SIZE 128 -#define KVM_E500_TLB0_WAY_NUM 2 - -#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) -#define KVM_E500_TLB1_SIZE 16 - -#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF)) -#define tlbsel_of(index) ((index) >> 16) -#define esel_of(index) ((index) & 0xFFFF) - -#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) -#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) -#define MAS2_ATTRIB_MASK \ - (MAS2_X0 | MAS2_X1) -#define MAS3_ATTRIB_MASK \ - (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ - | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) - -extern void kvmppc_dump_tlbs(struct kvm_vcpu *); -extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong); -extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *); -extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *); -extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int); -extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int); -extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int); -extern void kvmppc_e500_tlb_put(struct kvm_vcpu *); -extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int); -extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *); -extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *); -extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); -extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); - -/* TLB helper functions */ -static inline unsigned int -get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 7) & 0x1f; -} - -static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return tlbe->mas2 & 0xfffff000; -} - -static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - unsigned int pgsize = get_tlb_size(tlbe); - return 1ULL << 10 << pgsize; -} - -static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - u64 bytes = get_tlb_bytes(tlbe); - return get_tlb_eaddr(tlbe) + bytes - 1; -} - -static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return tlbe->mas7_3 & ~0xfffULL; -} - -static inline unsigned int -get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 16) & 0xff; -} - -static inline unsigned int -get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 12) & 0x1; -} - -static inline unsigned int -get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 31) & 0x1; -} - -static inline unsigned int -get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 30) & 0x1; -} - -static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.pid & 0xff; -} - -static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu) -{ - return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS)); -} - -static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu) -{ - return !!(vcpu->arch.shared->msr & MSR_PR); -} - -static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.shared->mas6 >> 16) & 0xff; -} - -static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.shared->mas6 & 0x1; -} - -static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu) -{ - /* - * Manual says that tlbsel has 2 bits wide. - * Since we only have two TLBs, only lower bit is used. - */ - return (vcpu->arch.shared->mas0 >> 28) & 0x1; -} - -static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.shared->mas0 & 0xfff; -} - -static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.shared->mas0 >> 16) & 0xfff; -} - -static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct kvm_book3e_206_tlb_entry *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -#endif /* __KVM_E500_TLB_H__ */ diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c new file mode 100644 index 00000000000..fe6c1de6b70 --- /dev/null +++ b/arch/powerpc/kvm/e500mc.c @@ -0,0 +1,342 @@ +/* + * Copyright (C) 2010 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Varun Sethi, <varun.sethi@freescale.com> + * + * Description: + * This file is derived from arch/powerpc/kvm/e500.c, + * by Yu Liu <yu.liu@freescale.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/export.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/tlbflush.h> +#include <asm/kvm_ppc.h> +#include <asm/dbell.h> + +#include "booke.h" +#include "e500.h" + +void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type) +{ + enum ppc_dbell dbell_type; + unsigned long tag; + + switch (type) { + case INT_CLASS_NONCRIT: + dbell_type = PPC_G_DBELL; + break; + case INT_CLASS_CRIT: + dbell_type = PPC_G_DBELL_CRIT; + break; + case INT_CLASS_MC: + dbell_type = PPC_G_DBELL_MC; + break; + default: + WARN_ONCE(1, "%s: unknown int type %d\n", __func__, type); + return; + } + + + tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id; + mb(); + ppc_msgsnd(dbell_type, 0, tag); +} + +/* gtlbe must not be mapped by more than one host tlb entry */ +void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + unsigned int tid, ts; + u32 val, eaddr, lpid; + unsigned long flags; + + ts = get_tlb_ts(gtlbe); + tid = get_tlb_tid(gtlbe); + lpid = vcpu_e500->vcpu.kvm->arch.lpid; + + /* We search the host TLB to invalidate its shadow TLB entry */ + val = (tid << 16) | ts; + eaddr = get_tlb_eaddr(gtlbe); + + local_irq_save(flags); + + mtspr(SPRN_MAS6, val); + mtspr(SPRN_MAS5, MAS5_SGS | lpid); + + asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr)); + val = mfspr(SPRN_MAS1); + if (val & MAS1_VALID) { + mtspr(SPRN_MAS1, val & ~MAS1_VALID); + asm volatile("tlbwe"); + } + mtspr(SPRN_MAS5, 0); + /* NOTE: tlbsx also updates mas8, so clear it for host tlbwe */ + mtspr(SPRN_MAS8, 0); + isync(); + + local_irq_restore(flags); +} + +void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + unsigned long flags; + + local_irq_save(flags); + mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid); + asm volatile("tlbilxlpid"); + mtspr(SPRN_MAS5, 0); + local_irq_restore(flags); +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + vcpu->arch.pid = pid; +} + +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) +{ +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + kvmppc_booke_vcpu_load(vcpu, cpu); + + mtspr(SPRN_LPID, vcpu->kvm->arch.lpid); + mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); + mtspr(SPRN_GPIR, vcpu->vcpu_id); + mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp); + mtspr(SPRN_EPLC, vcpu->arch.eplc); + mtspr(SPRN_EPSC, vcpu->arch.epsc); + + mtspr(SPRN_GIVPR, vcpu->arch.ivpr); + mtspr(SPRN_GIVOR2, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]); + mtspr(SPRN_GIVOR8, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]); + mtspr(SPRN_GSPRG0, (unsigned long)vcpu->arch.shared->sprg0); + mtspr(SPRN_GSPRG1, (unsigned long)vcpu->arch.shared->sprg1); + mtspr(SPRN_GSPRG2, (unsigned long)vcpu->arch.shared->sprg2); + mtspr(SPRN_GSPRG3, (unsigned long)vcpu->arch.shared->sprg3); + + mtspr(SPRN_GSRR0, vcpu->arch.shared->srr0); + mtspr(SPRN_GSRR1, vcpu->arch.shared->srr1); + + mtspr(SPRN_GEPR, vcpu->arch.epr); + mtspr(SPRN_GDEAR, vcpu->arch.shared->dar); + mtspr(SPRN_GESR, vcpu->arch.shared->esr); + + if (vcpu->arch.oldpir != mfspr(SPRN_PIR)) + kvmppc_e500_tlbil_all(vcpu_e500); + + kvmppc_load_guest_fp(vcpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + vcpu->arch.eplc = mfspr(SPRN_EPLC); + vcpu->arch.epsc = mfspr(SPRN_EPSC); + + vcpu->arch.shared->sprg0 = mfspr(SPRN_GSPRG0); + vcpu->arch.shared->sprg1 = mfspr(SPRN_GSPRG1); + vcpu->arch.shared->sprg2 = mfspr(SPRN_GSPRG2); + vcpu->arch.shared->sprg3 = mfspr(SPRN_GSPRG3); + + vcpu->arch.shared->srr0 = mfspr(SPRN_GSRR0); + vcpu->arch.shared->srr1 = mfspr(SPRN_GSRR1); + + vcpu->arch.epr = mfspr(SPRN_GEPR); + vcpu->arch.shared->dar = mfspr(SPRN_GDEAR); + vcpu->arch.shared->esr = mfspr(SPRN_GESR); + + vcpu->arch.oldpir = mfspr(SPRN_PIR); + + kvmppc_booke_vcpu_put(vcpu); +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strcmp(cur_cpu_spec->cpu_name, "e500mc") == 0) + r = 0; + else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \ + SPRN_EPCR_DUVD; + vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP; + vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT); + vcpu->arch.epsc = vcpu->arch.eplc; + + vcpu->arch.pvr = mfspr(SPRN_PVR); + vcpu_e500->svr = mfspr(SPRN_SVR); + + vcpu->arch.cpu_type = KVM_CPU_E500MC; + + return 0; +} + +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_PM | + KVM_SREGS_E_PC; + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; + + sregs->u.e.impl.fsl.features = 0; + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; + + kvmppc_get_sregs_e500_tlb(vcpu, sregs); + + sregs->u.e.ivor_high[3] = + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]; + sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; + + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int ret; + + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; + } + + ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs); + if (ret < 0) + return ret; + + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + if (sregs->u.e.features & KVM_SREGS_E_PM) { + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = + sregs->u.e.ivor_high[3]; + } + + if (sregs->u.e.features & KVM_SREGS_E_PC) { + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = + sregs->u.e.ivor_high[4]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = + sregs->u.e.ivor_high[5]; + } + + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_e500 *vcpu_e500; + struct kvm_vcpu *vcpu; + int err; + + vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_e500) { + err = -ENOMEM; + goto out; + } + vcpu = &vcpu_e500->vcpu; + + /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */ + vcpu->arch.oldpir = 0xffffffff; + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + err = kvmppc_e500_tlb_init(vcpu_e500); + if (err) + goto uninit_vcpu; + + vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!vcpu->arch.shared) + goto uninit_tlb; + + return vcpu; + +uninit_tlb: + kvmppc_e500_tlb_uninit(vcpu_e500); +uninit_vcpu: + kvm_vcpu_uninit(vcpu); + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + free_page((unsigned long)vcpu->arch.shared); + kvmppc_e500_tlb_uninit(vcpu_e500); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + int lpid; + + lpid = kvmppc_alloc_lpid(); + if (lpid < 0) + return lpid; + + kvm->arch.lpid = lpid; + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ + kvmppc_free_lpid(kvm->arch.lpid); +} + +static int __init kvmppc_e500mc_init(void) +{ + int r; + + r = kvmppc_booke_init(); + if (r) + return r; + + kvmppc_init_lpid(64); + kvmppc_claim_lpid(0); /* host */ + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); +} + +static void __exit kvmppc_e500mc_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_e500mc_init); +module_exit(kvmppc_e500mc_exit); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 968f4010188..f90e86dea7a 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -23,6 +23,7 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/kvm_host.h> +#include <linux/clockchips.h> #include <asm/reg.h> #include <asm/time.h> @@ -35,7 +36,9 @@ #define OP_TRAP 3 #define OP_TRAP_64 2 +#define OP_31_XOP_TRAP 4 #define OP_31_XOP_LWZX 23 +#define OP_31_XOP_TRAP_64 68 #define OP_31_XOP_LBZX 87 #define OP_31_XOP_STWX 151 #define OP_31_XOP_STBX 215 @@ -102,8 +105,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) */ dec_time = vcpu->arch.dec; - dec_time *= 1000; - do_div(dec_time, tb_ticks_per_usec); + /* + * Guest timebase ticks at the same frequency as host decrementer. + * So use the host decrementer calculations for decrementer emulation. + */ + dec_time = dec_time << decrementer_clockevent.shift; + do_div(dec_time, decrementer_clockevent.mult); dec_nsec = do_div(dec_time, NSEC_PER_SEC); hrtimer_start(&vcpu->arch.dec_timer, ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); @@ -141,14 +148,13 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 inst = kvmppc_get_last_inst(vcpu); - u32 ea; - int ra; - int rb; - int rs; - int rt; - int sprn; + int ra = get_ra(inst); + int rs = get_rs(inst); + int rt = get_rt(inst); + int sprn = get_sprn(inst); enum emulation_result emulated = EMULATE_DONE; int advance = 1; + ulong spr_val = 0; /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); @@ -170,173 +176,143 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case 31: switch (get_xop(inst)) { + case OP_31_XOP_TRAP: +#ifdef CONFIG_64BIT + case OP_31_XOP_TRAP_64: +#endif +#ifdef CONFIG_PPC_BOOK3S + kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); +#else + kvmppc_core_queue_program(vcpu, + vcpu->arch.shared->esr | ESR_PTR); +#endif + advance = 0; + break; case OP_31_XOP_LWZX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; case OP_31_XOP_LBZX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; case OP_31_XOP_LBZUX: - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, ea); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_STWX: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 4, 1); break; case OP_31_XOP_STBX: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); break; case OP_31_XOP_STBUX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); - kvmppc_set_gpr(vcpu, rs, ea); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_LHAX: - rt = get_rt(inst); emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); break; case OP_31_XOP_LHZX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); break; case OP_31_XOP_LHZUX: - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, ea); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_MFSPR: - sprn = get_sprn(inst); - rt = get_rt(inst); - switch (sprn) { case SPRN_SRR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0); + spr_val = vcpu->arch.shared->srr0; break; case SPRN_SRR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1); + spr_val = vcpu->arch.shared->srr1; break; case SPRN_PVR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break; + spr_val = vcpu->arch.pvr; + break; case SPRN_PIR: - kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break; + spr_val = vcpu->vcpu_id; + break; case SPRN_MSSSR0: - kvmppc_set_gpr(vcpu, rt, 0); break; + spr_val = 0; + break; /* Note: mftb and TBRL/TBWL are user-accessible, so * the guest can always access the real TB anyways. * In fact, we probably will never see these traps. */ case SPRN_TBWL: - kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break; + spr_val = get_tb() >> 32; + break; case SPRN_TBWU: - kvmppc_set_gpr(vcpu, rt, get_tb()); break; + spr_val = get_tb(); + break; case SPRN_SPRG0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0); + spr_val = vcpu->arch.shared->sprg0; break; case SPRN_SPRG1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1); + spr_val = vcpu->arch.shared->sprg1; break; case SPRN_SPRG2: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2); + spr_val = vcpu->arch.shared->sprg2; break; case SPRN_SPRG3: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3); + spr_val = vcpu->arch.shared->sprg3; break; /* Note: SPRG4-7 are user-readable, so we don't get * a trap. */ case SPRN_DEC: - { - kvmppc_set_gpr(vcpu, rt, - kvmppc_get_dec(vcpu, get_tb())); + spr_val = kvmppc_get_dec(vcpu, get_tb()); break; - } default: - emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); - if (emulated == EMULATE_FAIL) { - printk("mfspr: unknown spr %x\n", sprn); - kvmppc_set_gpr(vcpu, rt, 0); + emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, + &spr_val); + if (unlikely(emulated == EMULATE_FAIL)) { + printk(KERN_INFO "mfspr: unknown spr " + "0x%x\n", sprn); } break; } + kvmppc_set_gpr(vcpu, rt, spr_val); kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); break; case OP_31_XOP_STHX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); break; case OP_31_XOP_STHUX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); - kvmppc_set_gpr(vcpu, ra, ea); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_MTSPR: - sprn = get_sprn(inst); - rs = get_rs(inst); + spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { case SPRN_SRR0: - vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->srr0 = spr_val; break; case SPRN_SRR1: - vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->srr1 = spr_val; break; /* XXX We need to context-switch the timebase for @@ -347,27 +323,29 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_MSSSR0: break; case SPRN_DEC: - vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.dec = spr_val; kvmppc_emulate_dec(vcpu); break; case SPRN_SPRG0: - vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->sprg0 = spr_val; break; case SPRN_SPRG1: - vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->sprg1 = spr_val; break; case SPRN_SPRG2: - vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->sprg2 = spr_val; break; case SPRN_SPRG3: - vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->sprg3 = spr_val; break; default: - emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); + emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, + spr_val); if (emulated == EMULATE_FAIL) - printk("mtspr: unknown spr %x\n", sprn); + printk(KERN_INFO "mtspr: unknown spr " + "0x%x\n", sprn); break; } kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); @@ -382,7 +360,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_LWBRX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); break; @@ -390,25 +367,16 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_STWBRX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 4, 0); break; case OP_31_XOP_LHBRX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); break; case OP_31_XOP_STHBRX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 0); @@ -421,99 +389,78 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_LWZ: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; case OP_LWZU: - ra = get_ra(inst); - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_LBZ: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; case OP_LBZU: - ra = get_ra(inst); - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_STW: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 4, 1); break; case OP_STWU: - ra = get_ra(inst); - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_STB: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); break; case OP_STBU: - ra = get_ra(inst); - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_LHZ: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); break; case OP_LHZU: - ra = get_ra(inst); - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_LHA: - rt = get_rt(inst); emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); break; case OP_LHAU: - ra = get_ra(inst); - rt = get_rt(inst); emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_STH: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); break; case OP_STHU: - ra = get_ra(inst); - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; default: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 00d7e345b3f..1493c8de947 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -43,6 +43,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) v->requests; } +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return 1; +} + int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) { int nr = kvmppc_get_gpr(vcpu, 11); @@ -74,7 +79,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) } case HC_VENDOR_KVM | KVM_HC_FEATURES: r = HC_EV_SUCCESS; -#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500) +#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); #endif @@ -109,6 +114,11 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu) goto out; #endif +#ifdef CONFIG_KVM_BOOKE_HV + if (!cpu_has_feature(CPU_FTR_EMB_HV)) + goto out; +#endif + r = true; out: @@ -225,7 +235,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: -#ifdef CONFIG_KVM_E500 +#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: #endif r = 1; @@ -234,10 +244,12 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; #endif -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: r = 1; break; +#endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_KVM_BOOK3S_64_HV case KVM_CAP_PPC_SMT: r = threads_per_core; break; @@ -267,6 +279,11 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; +#ifdef CONFIG_PPC_BOOK3S_64 + case KVM_CAP_PPC_GET_SMMU_INFO: + r = 1; + break; +#endif default: r = 0; break; @@ -588,21 +605,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return r; } -void kvm_vcpu_kick(struct kvm_vcpu *vcpu) -{ - int me; - int cpu = vcpu->cpu; - - me = get_cpu(); - if (waitqueue_active(vcpu->arch.wqp)) { - wake_up_interruptible(vcpu->arch.wqp); - vcpu->stat.halt_wakeup++; - } else if (cpu != me && cpu != -1) { - smp_send_reschedule(vcpu->cpu); - } - put_cpu(); -} - int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { @@ -611,6 +613,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) } kvmppc_core_queue_external(vcpu, irq); + kvm_vcpu_kick(vcpu); return 0; @@ -633,7 +636,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; -#ifdef CONFIG_KVM_E500 +#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: { struct kvm_config_tlb cfg; void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0]; @@ -710,7 +713,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } -#ifdef CONFIG_KVM_E500 +#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; @@ -720,7 +723,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } #endif - default: r = -EINVAL; } @@ -777,7 +779,7 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; struct kvm *kvm = filp->private_data; @@ -788,7 +790,9 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); goto out; } +#endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_KVM_BOOK3S_64_HV case KVM_ALLOCATE_RMA: { struct kvm *kvm = filp->private_data; struct kvm_allocate_rma rma; @@ -800,6 +804,18 @@ long kvm_arch_vm_ioctl(struct file *filp, } #endif /* CONFIG_KVM_BOOK3S_64_HV */ +#ifdef CONFIG_PPC_BOOK3S_64 + case KVM_PPC_GET_SMMU_INFO: { + struct kvm *kvm = filp->private_data; + struct kvm_ppc_smmu_info info; + + memset(&info, 0, sizeof(info)); + r = kvm_vm_ioctl_get_smmu_info(kvm, &info); + if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) + r = -EFAULT; + break; + } +#endif /* CONFIG_PPC_BOOK3S_64 */ default: r = -ENOTTY; } @@ -808,6 +824,40 @@ out: return r; } +static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)]; +static unsigned long nr_lpids; + +long kvmppc_alloc_lpid(void) +{ + long lpid; + + do { + lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS); + if (lpid >= nr_lpids) { + pr_err("%s: No LPIDs free\n", __func__); + return -ENOMEM; + } + } while (test_and_set_bit(lpid, lpid_inuse)); + + return lpid; +} + +void kvmppc_claim_lpid(long lpid) +{ + set_bit(lpid, lpid_inuse); +} + +void kvmppc_free_lpid(long lpid) +{ + clear_bit(lpid, lpid_inuse); +} + +void kvmppc_init_lpid(unsigned long nr_lpids_param) +{ + nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param); + memset(lpid_inuse, 0, sizeof(lpid_inuse)); +} + int kvm_arch_init(void *opaque) { return 0; diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index 8167d42a776..bf191e72b2d 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -93,6 +93,12 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) case SIGNAL_EXITS: vcpu->stat.signal_exits++; break; + case DBELL_EXITS: + vcpu->stat.dbell_exits++; + break; + case GDBELL_EXITS: + vcpu->stat.gdbell_exits++; + break; } } diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index 455881a5563..093d6316435 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -160,48 +160,3 @@ _GLOBAL(__clear_user) PPC_LONG 1b,91b PPC_LONG 8b,92b .text - -_GLOBAL(__strncpy_from_user) - addi r6,r3,-1 - addi r4,r4,-1 - cmpwi 0,r5,0 - beq 2f - mtctr r5 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - stbu r0,1(r6) - bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ - beq 3f -2: addi r6,r6,1 -3: subf r3,r3,r6 - blr -99: li r3,-EFAULT - blr - - .section __ex_table,"a" - PPC_LONG 1b,99b - .text - -/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ -_GLOBAL(__strnlen_user) - addi r7,r3,-1 - subf r6,r7,r5 /* top+1 - str */ - cmplw 0,r4,r6 - bge 0f - mr r6,r4 -0: mtctr r6 /* ctr = min(len, top - str) */ -1: lbzu r0,1(r7) /* get next byte */ - cmpwi 0,r0,0 - bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ - addi r7,r7,1 - subf r3,r3,r7 /* number of bytes we have looked at */ - beqlr /* return if we found a 0 byte */ - cmpw 0,r3,r4 /* did we look at all len bytes? */ - blt 99f /* if not, must have hit top */ - addi r3,r4,1 /* return len + 1 to indicate no null found */ - blr -99: li r3,0 /* bad address, return 0 */ - blr - - .section __ex_table,"a" - PPC_LONG 1b,99b diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1d75c92ea8f..66519d263da 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -151,7 +151,7 @@ static void spufs_evict_inode(struct inode *inode) { struct spufs_inode_info *ei = SPUFS_I(inode); - end_writeback(inode); + clear_inode(inode); if (ei->i_ctx) put_spu_context(ei->i_ctx); if (ei->i_gang) |