linux - Linux kernel source tree

diff options

Diffstat (limited to 'Documentation/sysctl/README')

0 files changed, 0 insertions, 0 deletions

ct kvm_s390_ucas_mapping { + __u64 user_addr; + __u64 vcpu_addr; + __u64 length; + }; + +This ioctl unmaps the memory in the vcpu's address space starting at +"vcpu_addr" with the length "length". The field "user_addr" is ignored. +All parameters need to be alligned by 1 megabyte. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f0937552175..2d3248895de 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -233,6 +233,10 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) (__u64) vcpu->arch.sie_block) vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; smp_mb(); + + if (kvm_is_ucontrol(vcpu->kvm)) + gmap_free(vcpu->arch.gmap); + free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); kfree(vcpu); @@ -263,12 +267,20 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_vcpus(kvm); free_page((unsigned long)(kvm->arch.sca)); debug_unregister(kvm->arch.dbf); - gmap_free(kvm->arch.gmap); + if (!kvm_is_ucontrol(kvm)) + gmap_free(kvm->arch.gmap); } /* Section: vcpu related */ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->arch.gmap = gmap_alloc(current->mm); + if (!vcpu->arch.gmap) + return -ENOMEM; + return 0; + } + vcpu->arch.gmap = vcpu->kvm->arch.gmap; return 0; } @@ -687,6 +699,42 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_S390_INITIAL_RESET: r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); break; +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_S390_UCAS_MAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, + ucasmap.vcpu_addr, ucasmap.length); + break; + } + case KVM_S390_UCAS_UNMAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, + ucasmap.length); + break; + } +#endif default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index bba393a6760..0a66c107269 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -658,6 +658,16 @@ struct kvm_clock_data { struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) + +/* enable ucontrol for s390 */ +struct kvm_s390_ucas_mapping { + __u64 user_addr; + __u64 vcpu_addr; + __u64 length; +}; +#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) +#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) + /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) -- cgit v1.2.3-70-g09d2 From e168bf8de33e16a909df2401af1f7d419c5780de Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 4 Jan 2012 10:25:22 +0100 Subject: KVM: s390: ucontrol: export page faults to user This patch introduces a new exit reason in the kvm_run structure named KVM_EXIT_S390_UCONTROL. This exit indicates, that a virtual cpu has regognized a fault on the host page table. The idea is that userspace can handle this fault by mapping memory at the fault location into the cpu's address space and then continue to run the virtual cpu. Signed-off-by: Carsten Otte Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 14 ++++++++++++++ arch/s390/kvm/kvm-s390.c | 32 +++++++++++++++++++++++++++----- arch/s390/kvm/kvm-s390.h | 1 + include/linux/kvm.h | 6 ++++++ 4 files changed, 48 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index ee394b26326..6e53ff51422 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1694,6 +1694,20 @@ s390 specific. s390 specific. + /* KVM_EXIT_S390_UCONTROL */ + struct { + __u64 trans_exc_code; + __u32 pgm_code; + } s390_ucontrol; + +s390 specific. A page fault has occurred for a user controlled virtual +machine (KVM_VM_S390_UNCONTROL) on it's host page table that cannot be +resolved by the kernel. +The program code and the translation exception code that were placed +in the cpu's lowcore are presented here as defined by the z Architecture +Principles of Operation Book in the Chapter for Dynamic Address Translation +(DAT) + /* KVM_EXIT_DCR */ struct { __u32 dcrn; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2d3248895de..af05328aca2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -493,8 +493,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } -static void __vcpu_run(struct kvm_vcpu *vcpu) +static int __vcpu_run(struct kvm_vcpu *vcpu) { + int rc; + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); if (need_resched()) @@ -511,9 +513,15 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); - if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { - VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs); + if (rc) { + if (kvm_is_ucontrol(vcpu->kvm)) { + rc = SIE_INTERCEPT_UCONTROL; + } else { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = 0; + } } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); @@ -522,6 +530,7 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16); + return rc; } int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -542,6 +551,7 @@ rerun_vcpu: case KVM_EXIT_UNKNOWN: case KVM_EXIT_INTR: case KVM_EXIT_S390_RESET: + case KVM_EXIT_S390_UCONTROL: break; default: BUG(); @@ -553,7 +563,9 @@ rerun_vcpu: might_fault(); do { - __vcpu_run(vcpu); + rc = __vcpu_run(vcpu); + if (rc) + break; rc = kvm_handle_sie_intercept(vcpu); } while (!signal_pending(current) && !rc); @@ -565,6 +577,16 @@ rerun_vcpu: rc = -EINTR; } +#ifdef CONFIG_KVM_S390_UCONTROL + if (rc == SIE_INTERCEPT_UCONTROL) { + kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL; + kvm_run->s390_ucontrol.trans_exc_code = + current->thread.gmap_addr; + kvm_run->s390_ucontrol.pgm_code = 0x10; + rc = 0; + } +#endif + if (rc == -EOPNOTSUPP) { /* intercept cannot be handled in-kernel, prepare kvm-run */ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 45b236a7c73..62aa5f19bb9 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -26,6 +26,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* negativ values are error codes, positive values for internal conditions */ #define SIE_INTERCEPT_RERUNVCPU (1<<0) +#define SIE_INTERCEPT_UCONTROL (1<<1) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0a66c107269..7f686f6708b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -162,6 +162,7 @@ struct kvm_pit_config { #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 #define KVM_EXIT_PAPR_HCALL 19 +#define KVM_EXIT_S390_UCONTROL 20 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -249,6 +250,11 @@ struct kvm_run { #define KVM_S390_RESET_CPU_INIT 8 #define KVM_S390_RESET_IPL 16 __u64 s390_reset_flags; + /* KVM_EXIT_S390_UCONTROL */ + struct { + __u64 trans_exc_code; + __u32 pgm_code; + } s390_ucontrol; /* KVM_EXIT_DCR */ struct { __u32 dcrn; -- cgit v1.2.3-70-g09d2 From 5b1c1493afe8d69909f9df3221bb2fffdf479f4a Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 4 Jan 2012 10:25:23 +0100 Subject: KVM: s390: ucontrol: export SIE control block to user This patch exports the s390 SIE hardware control block to userspace via the mapping of the vcpu file descriptor. In order to do so, a new arch callback named kvm_arch_vcpu_fault is introduced for all architectures. It allows to map architecture specific pages. Signed-off-by: Carsten Otte Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 5 +++++ arch/ia64/kvm/kvm-ia64.c | 5 +++++ arch/powerpc/kvm/powerpc.c | 5 +++++ arch/s390/kvm/kvm-s390.c | 13 +++++++++++++ arch/x86/kvm/x86.c | 5 +++++ include/linux/kvm.h | 2 ++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 2 +- 8 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6e53ff51422..5ebf47d99e5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -218,6 +218,11 @@ allocation of vcpu ids. For example, if userspace wants single-threaded guest vcpus, it should make all vcpu ids be a multiple of the number of vcpus per vcore. +For virtual cpus that have been created with S390 user controlled virtual +machines, the resulting vcpu fd can be memory mapped at page offset +KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual +cpu's hardware control block. + 4.8 KVM_GET_DIRTY_LOG (vm ioctl) Capability: basic diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index df6b1419405..8ca7261e7b3 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1566,6 +1566,11 @@ out: return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 83f24456987..a5671616af8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -659,6 +659,11 @@ out: return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) { u32 inst_lis = 0x3c000000; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index af05328aca2..d6bc65aeb95 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -763,6 +763,19 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) + && (kvm_is_ucontrol(vcpu->kvm))) { + vmf->page = virt_to_page(vcpu->arch.sie_block); + get_page(vmf->page); + return 0; + } +#endif + return VM_FAULT_SIGBUS; +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 06925b4bcc2..a3ce196d21f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2814,6 +2814,11 @@ out: return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) { int ret; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 7f686f6708b..8f888df206a 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -440,6 +440,8 @@ struct kvm_ppc_pvinfo { /* machine type bits, to be used as argument to KVM_CREATE_VM */ #define KVM_VM_S390_UCONTROL 1 +#define KVM_S390_SIE_PAGE_OFFSET 1 + /* * ioctls for /dev/kvm fds: */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 82375e145e6..d4d4d709211 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -450,6 +450,7 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); int kvm_dev_ioctl_check_extension(long ext); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 32e3b048a6c..64be836f334 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1657,7 +1657,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif else - return VM_FAULT_SIGBUS; + return kvm_arch_vcpu_fault(vcpu, vmf); get_page(page); vmf->page = page; return 0; -- cgit v1.2.3-70-g09d2 From ccc7910fe564d99415def7c041fa261e62a43011 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 4 Jan 2012 10:25:26 +0100 Subject: KVM: s390: ucontrol: interface to inject faults on a vcpu page table This patch allows the user to fault in pages on a virtual cpus address space for user controlled virtual machines. Typically this is superfluous because userspace can just create a mapping and let the kernel's page fault logic take are of it. There is one exception: SIE won't start if the lowcore is not present. Normally the kernel takes care of this [handle_validity() in arch/s390/kvm/intercept.c] but since the kernel does not handle intercepts for user controlled virtual machines, userspace needs to be able to handle this condition. Signed-off-by: Carsten Otte Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 16 ++++++++++++++++ arch/s390/kvm/kvm-s390.c | 6 ++++++ include/linux/kvm.h | 1 + 3 files changed, 23 insertions(+) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 5ebf47d99e5..a67fb35993f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1539,6 +1539,22 @@ This ioctl unmaps the memory in the vcpu's address space starting at "vcpu_addr" with the length "length". The field "user_addr" is ignored. All parameters need to be alligned by 1 megabyte. +4.66 KVM_S390_VCPU_FAULT + +Capability: KVM_CAP_S390_UCONTROL +Architectures: s390 +Type: vcpu ioctl +Parameters: vcpu absolute address (in) +Returns: 0 in case of success + +This call creates a page table entry on the virtual cpu's address space +(for user controlled virtual machines) or the virtual machine's address +space (for regular virtual machines). This only works for minor faults, +thus it's recommended to access subject memory page via the user page +table upfront. This is useful to handle validity intercepts for user +controlled virtual machines to fault in the virtual cpu's lowcore pages +prior to calling the KVM_RUN ioctl. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5b5c28e471d..8489edf80c8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -761,6 +761,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } #endif + case KVM_S390_VCPU_FAULT: { + r = gmap_fault(arg, vcpu->arch.gmap); + if (!IS_ERR_VALUE(r)) + r = 0; + break; + } default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8f888df206a..778e748927b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -675,6 +675,7 @@ struct kvm_s390_ucas_mapping { }; #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) +#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) -- cgit v1.2.3-70-g09d2 From 1efd0f595ab9d10fef1486dfdef952107c91f3db Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 4 Jan 2012 10:25:29 +0100 Subject: KVM: s390: ucontrol: announce capability for user controlled vms This patch announces a new capability KVM_CAP_S390_UCONTROL that indicates that kvm can now support virtual machines that are controlled by userspace. Signed-off-by: Carsten Otte Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 3 +++ include/linux/kvm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index abf784d8c68..a1061b361c4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -129,6 +129,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_S390_PSW: case KVM_CAP_S390_GMAP: case KVM_CAP_SYNC_MMU: +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_CAP_S390_UCONTROL: +#endif r = 1; break; default: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 778e748927b..6cf048d9604 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -569,6 +569,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_PAPR 68 #define KVM_CAP_S390_GMAP 71 #define KVM_CAP_TSC_DEADLINE_TIMER 72 +#define KVM_CAP_S390_UCONTROL 73 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-70-g09d2 From b9e5dc8d4511e6a00862a795319569e7fe7f60f4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 11 Jan 2012 11:20:30 +0100 Subject: KVM: provide synchronous registers in kvm_run On some cpus the overhead for virtualization instructions is in the same range as a system call. Having to call multiple ioctls to get set registers will make certain userspace handled exits more expensive than necessary. Lets provide a section in kvm_run that works as a shared save area for guest registers. We also provide two 64bit flags fields (architecture specific), that will specify 1. which parts of these fields are valid. 2. which registers were modified by userspace Each bit for these flag fields will define a group of registers (like general purpose) or a single register. Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 23 +++++++++++++++++++++++ arch/ia64/include/asm/kvm.h | 4 ++++ arch/powerpc/include/asm/kvm.h | 4 ++++ arch/s390/include/asm/kvm.h | 3 +++ arch/x86/include/asm/kvm.h | 4 ++++ include/linux/kvm.h | 15 +++++++++++++++ 6 files changed, 53 insertions(+) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a67fb35993f..7ca696227d3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1771,6 +1771,29 @@ developer registration required to access it). /* Fix the size of the union. */ char padding[256]; }; + + /* + * shared registers between kvm and userspace. + * kvm_valid_regs specifies the register classes set by the host + * kvm_dirty_regs specified the register classes dirtied by userspace + * struct kvm_sync_regs is architecture specific, as well as the + * bits for kvm_valid_regs and kvm_dirty_regs + */ + __u64 kvm_valid_regs; + __u64 kvm_dirty_regs; + union { + struct kvm_sync_regs regs; + char padding[1024]; + } s; + +If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access +certain guest registers without having to call SET/GET_*REGS. Thus we can +avoid some system call overhead if userspace has to handle the exit. +Userspace can query the validity of the structure by checking +kvm_valid_regs for specific bits. These bits are architecture specific +and usually define the validity of a groups of registers. (e.g. one bit + for general purpose registers) + }; 6. Capabilities that can be enabled diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index bc90c75adf6..b9f82c84f09 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -261,4 +261,8 @@ struct kvm_debug_exit_arch { struct kvm_guest_debug_arch { }; +/* definition of registers in kvm_run */ +struct kvm_sync_regs { +}; + #endif diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index f7727d91ac6..7d9d4de057e 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -265,6 +265,10 @@ struct kvm_debug_exit_arch { struct kvm_guest_debug_arch { }; +/* definition of registers in kvm_run */ +struct kvm_sync_regs { +}; + #define KVM_REG_MASK 0x001f #define KVM_REG_EXT_MASK 0xffe0 #define KVM_REG_GPR 0x0000 diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index 82b32a100c7..325560afb77 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -41,4 +41,7 @@ struct kvm_debug_exit_arch { struct kvm_guest_debug_arch { }; +/* definition of registers in kvm_run */ +struct kvm_sync_regs { +}; #endif diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4d8dcbdfc12..e7d1c194d27 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -321,4 +321,8 @@ struct kvm_xcrs { __u64 padding[16]; }; +/* definition of registers in kvm_run */ +struct kvm_sync_regs { +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 6cf048d9604..245bcb3a0fc 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -279,6 +279,20 @@ struct kvm_run { /* Fix the size of the union. */ char padding[256]; }; + + /* + * shared registers between kvm and userspace. + * kvm_valid_regs specifies the register classes set by the host + * kvm_dirty_regs specified the register classes dirtied by userspace + * struct kvm_sync_regs is architecture specific, as well as the + * bits for kvm_valid_regs and kvm_dirty_regs + */ + __u64 kvm_valid_regs; + __u64 kvm_dirty_regs; + union { + struct kvm_sync_regs regs; + char padding[1024]; + } s; }; /* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */ @@ -570,6 +584,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_S390_GMAP 71 #define KVM_CAP_TSC_DEADLINE_TIMER 72 #define KVM_CAP_S390_UCONTROL 73 +#define KVM_CAP_SYNC_REGS 74 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-70-g09d2 From dc83b8bc0256ee682506ed83853a98eaba529c6f Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 18 Aug 2011 15:25:21 -0500 Subject: KVM: PPC: e500: MMU API This implements a shared-memory API for giving host userspace access to the guest's TLB. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 74 +++++++ arch/powerpc/include/asm/kvm.h | 35 ++++ arch/powerpc/include/asm/kvm_e500.h | 24 +-- arch/powerpc/include/asm/kvm_ppc.h | 5 + arch/powerpc/kvm/e500.c | 5 +- arch/powerpc/kvm/e500_emulate.c | 12 +- arch/powerpc/kvm/e500_tlb.c | 393 ++++++++++++++++++++++++------------ arch/powerpc/kvm/e500_tlb.h | 38 ++-- arch/powerpc/kvm/powerpc.c | 28 +++ include/linux/kvm.h | 18 ++ 10 files changed, 469 insertions(+), 163 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 7ca696227d3..bcd45d5afca 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1409,6 +1409,38 @@ The following flags are defined: If datamatch flag is set, the event will be signaled only if the written value to the registered address is equal to datamatch in struct kvm_ioeventfd. +4.59 KVM_DIRTY_TLB + +Capability: KVM_CAP_SW_TLB +Architectures: ppc +Type: vcpu ioctl +Parameters: struct kvm_dirty_tlb (in) +Returns: 0 on success, -1 on error + +struct kvm_dirty_tlb { + __u64 bitmap; + __u32 num_dirty; +}; + +This must be called whenever userspace has changed an entry in the shared +TLB, prior to calling KVM_RUN on the associated vcpu. + +The "bitmap" field is the userspace address of an array. This array +consists of a number of bits, equal to the total number of TLB entries as +determined by the last successful call to KVM_CONFIG_TLB, rounded up to the +nearest multiple of 64. + +Each bit corresponds to one TLB entry, ordered the same as in the shared TLB +array. + +The array is little-endian: the bit 0 is the least significant bit of the +first byte, bit 8 is the least significant bit of the second byte, etc. +This avoids any complications with differing word sizes. + +The "num_dirty" field is a performance hint for KVM to determine whether it +should skip processing the bitmap and just invalidate everything. It must +be set to the number of set bits in the bitmap. + 4.62 KVM_CREATE_SPAPR_TCE Capability: KVM_CAP_SPAPR_TCE @@ -1842,3 +1874,45 @@ HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the HTAB invisible to the guest. When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur. + +6.3 KVM_CAP_SW_TLB + +Architectures: ppc +Parameters: args[0] is the address of a struct kvm_config_tlb +Returns: 0 on success; -1 on error + +struct kvm_config_tlb { + __u64 params; + __u64 array; + __u32 mmu_type; + __u32 array_len; +}; + +Configures the virtual CPU's TLB array, establishing a shared memory area +between userspace and KVM. The "params" and "array" fields are userspace +addresses of mmu-type-specific data structures. The "array_len" field is an +safety mechanism, and should be set to the size in bytes of the memory that +userspace has reserved for the array. It must be at least the size dictated +by "mmu_type" and "params". + +While KVM_RUN is active, the shared region is under control of KVM. Its +contents are undefined, and any modification by userspace results in +boundedly undefined behavior. + +On return from KVM_RUN, the shared region will reflect the current state of +the guest's TLB. If userspace makes any changes, it must call KVM_DIRTY_TLB +to tell KVM which entries have been changed, prior to calling KVM_RUN again +on this vcpu. + +For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV: + - The "params" field is of type "struct kvm_book3e_206_tlb_params". + - The "array" field points to an array of type "struct + kvm_book3e_206_tlb_entry". + - The array consists of all entries in the first TLB, followed by all + entries in the second TLB. + - Within a TLB, entries are ordered first by increasing set number. Within a + set, entries are ordered by way (increasing ESEL). + - The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1) + where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value. + - The tsize field of mas1 shall be set to 4K on TLB0, even though the + hardware ignores this value for TLB0. diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 7d9d4de057e..663c57f8716 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -296,4 +296,39 @@ struct kvm_allocate_rma { __u64 rma_size; }; +struct kvm_book3e_206_tlb_entry { + __u32 mas8; + __u32 mas1; + __u64 mas2; + __u64 mas7_3; +}; + +struct kvm_book3e_206_tlb_params { + /* + * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV: + * + * - The number of ways of TLB0 must be a power of two between 2 and + * 16. + * - TLB1 must be fully associative. + * - The size of TLB0 must be a multiple of the number of ways, and + * the number of sets must be a power of two. + * - The size of TLB1 may not exceed 64 entries. + * - TLB0 supports 4 KiB pages. + * - The page sizes supported by TLB1 are as indicated by + * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1) + * as returned by KVM_GET_SREGS. + * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[] + * and tlb_ways[] must be zero. + * + * tlb_ways[n] = tlb_sizes[n] means the array is fully associative. + * + * KVM will adjust TLBnCFG based on the sizes configured here, + * though arrays greater than 2048 entries will have TLBnCFG[NENTRY] + * set to zero. + */ + __u32 tlb_sizes[4]; + __u32 tlb_ways[4]; + __u32 reserved[8]; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index a5197d816ec..bc17441535f 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -22,13 +22,6 @@ #define E500_PID_NUM 3 #define E500_TLB_NUM 2 -struct tlbe{ - u32 mas1; - u32 mas2; - u32 mas3; - u32 mas7; -}; - #define E500_TLB_VALID 1 #define E500_TLB_DIRTY 2 @@ -48,13 +41,17 @@ struct kvmppc_e500_tlb_params { }; struct kvmppc_vcpu_e500 { - /* Unmodified copy of the guest's TLB. */ - struct tlbe *gtlb_arch[E500_TLB_NUM]; + /* Unmodified copy of the guest's TLB -- shared with host userspace. */ + struct kvm_book3e_206_tlb_entry *gtlb_arch; + + /* Starting entry number in gtlb_arch[] */ + int gtlb_offset[E500_TLB_NUM]; /* KVM internal information associated with each guest TLB entry */ struct tlbe_priv *gtlb_priv[E500_TLB_NUM]; - unsigned int gtlb_size[E500_TLB_NUM]; + struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM]; + unsigned int gtlb_nv[E500_TLB_NUM]; /* @@ -68,7 +65,6 @@ struct kvmppc_vcpu_e500 { * and back, and our host TLB entries got evicted). */ struct tlbe_ref *tlb_refs[E500_TLB_NUM]; - unsigned int host_tlb1_nv; u32 host_pid[E500_PID_NUM]; @@ -78,11 +74,10 @@ struct kvmppc_vcpu_e500 { u32 mas0; u32 mas1; u32 mas2; - u32 mas3; + u64 mas7_3; u32 mas4; u32 mas5; u32 mas6; - u32 mas7; /* vcpu id table */ struct vcpu_id_table *idt; @@ -95,6 +90,9 @@ struct kvmppc_vcpu_e500 { u32 tlb1cfg; u64 mcar; + struct page **shared_tlb_pages; + int num_shared_tlb_pages; + struct kvm_vcpu vcpu; }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 46efd1a265c..a284f209e2d 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -193,4 +193,9 @@ static inline void kvm_rma_init(void) {} #endif +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, + struct kvm_config_tlb *cfg); +int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, + struct kvm_dirty_tlb *cfg); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 8c0d45a6faf..f17d7e732a1 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -121,7 +121,7 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->u.e.mas0 = vcpu_e500->mas0; sregs->u.e.mas1 = vcpu_e500->mas1; sregs->u.e.mas2 = vcpu_e500->mas2; - sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; + sregs->u.e.mas7_3 = vcpu_e500->mas7_3; sregs->u.e.mas4 = vcpu_e500->mas4; sregs->u.e.mas6 = vcpu_e500->mas6; @@ -154,8 +154,7 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu_e500->mas0 = sregs->u.e.mas0; vcpu_e500->mas1 = sregs->u.e.mas1; vcpu_e500->mas2 = sregs->u.e.mas2; - vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; - vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; + vcpu_e500->mas7_3 = sregs->u.e.mas7_3; vcpu_e500->mas4 = sregs->u.e.mas4; vcpu_e500->mas6 = sregs->u.e.mas6; } diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index d48ae396f41..e0d36099c75 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -95,13 +95,17 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_MAS2: vcpu_e500->mas2 = spr_val; break; case SPRN_MAS3: - vcpu_e500->mas3 = spr_val; break; + vcpu_e500->mas7_3 &= ~(u64)0xffffffff; + vcpu_e500->mas7_3 |= spr_val; + break; case SPRN_MAS4: vcpu_e500->mas4 = spr_val; break; case SPRN_MAS6: vcpu_e500->mas6 = spr_val; break; case SPRN_MAS7: - vcpu_e500->mas7 = spr_val; break; + vcpu_e500->mas7_3 &= (u64)0xffffffff; + vcpu_e500->mas7_3 |= (u64)spr_val << 32; + break; case SPRN_L1CSR0: vcpu_e500->l1csr0 = spr_val; vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); @@ -158,13 +162,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_MAS2: kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break; case SPRN_MAS3: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break; + kvmppc_set_gpr(vcpu, rt, (u32)vcpu_e500->mas7_3); break; case SPRN_MAS4: kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break; case SPRN_MAS6: kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break; case SPRN_MAS7: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break; + kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7_3 >> 32); break; case SPRN_TLB0CFG: kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 59221bb1e00..f19ae2f6152 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -19,6 +19,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -66,6 +71,13 @@ static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; +static struct kvm_book3e_206_tlb_entry *get_entry( + struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry) +{ + int offset = vcpu_e500->gtlb_offset[tlbsel]; + return &vcpu_e500->gtlb_arch[offset + entry]; +} + /* * Allocate a free shadow id and setup a valid sid mapping in given entry. * A mapping is only valid when vcpu_id_table and pcpu_id_table are match. @@ -217,34 +229,13 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500) preempt_enable(); } -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe *tlbe; - int i, tlbsel; - - printk("| %8s | %8s | %8s | %8s | %8s |\n", - "nr", "mas1", "mas2", "mas3", "mas7"); - - for (tlbsel = 0; tlbsel < 2; tlbsel++) { - printk("Guest TLB%d:\n", tlbsel); - for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) { - tlbe = &vcpu_e500->gtlb_arch[tlbsel][i]; - if (tlbe->mas1 & MAS1_VALID) - printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n", - tlbsel, i, tlbe->mas1, tlbe->mas2, - tlbe->mas3, tlbe->mas7); - } - } -} - static inline unsigned int gtlb0_get_next_victim( struct kvmppc_vcpu_e500 *vcpu_e500) { unsigned int victim; victim = vcpu_e500->gtlb_nv[0]++; - if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM)) + if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways)) vcpu_e500->gtlb_nv[0] = 0; return victim; @@ -256,9 +247,9 @@ static inline unsigned int tlb1_max_shadow_size(void) return host_tlb_params[1].entries - tlbcam_index - 1; } -static inline int tlbe_is_writable(struct tlbe *tlbe) +static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) { - return tlbe->mas3 & (MAS3_SW|MAS3_UW); + return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); } static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) @@ -289,39 +280,41 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) /* * writing shadow tlb entry to host TLB */ -static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0) +static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, + uint32_t mas0) { unsigned long flags; local_irq_save(flags); mtspr(SPRN_MAS0, mas0); mtspr(SPRN_MAS1, stlbe->mas1); - mtspr(SPRN_MAS2, stlbe->mas2); - mtspr(SPRN_MAS3, stlbe->mas3); - mtspr(SPRN_MAS7, stlbe->mas7); + mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); + mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); + mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); asm volatile("isync; tlbwe" : : : "memory"); local_irq_restore(flags); } /* esel is index into set, not whole array */ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel, struct tlbe *stlbe) + int tlbsel, int esel, struct kvm_book3e_206_tlb_entry *stlbe) { if (tlbsel == 0) { - __write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(esel)); + int way = esel & (vcpu_e500->gtlb_params[0].ways - 1); + __write_host_tlbe(stlbe, MAS0_TLBSEL(0) | MAS0_ESEL(way)); } else { __write_host_tlbe(stlbe, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel))); } trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, - stlbe->mas3, stlbe->mas7); + (u32)stlbe->mas7_3, (u32)(stlbe->mas7_3 >> 32)); } void kvmppc_map_magic(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe magic; + struct kvm_book3e_206_tlb_entry magic; ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; unsigned int stid; pfn_t pfn; @@ -335,9 +328,8 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | MAS1_TSIZE(BOOK3E_PAGESZ_4K); magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; - magic.mas3 = (pfn << PAGE_SHIFT) | - MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; - magic.mas7 = pfn >> (32 - PAGE_SHIFT); + magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | + MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); preempt_enable(); @@ -358,7 +350,8 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel) { - struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + struct kvm_book3e_206_tlb_entry *gtlbe = + get_entry(vcpu_e500, tlbsel, esel); struct vcpu_id_table *idt = vcpu_e500->idt; unsigned int pr, tid, ts, pid; u32 val, eaddr; @@ -424,9 +417,8 @@ static int tlb0_set_base(gva_t addr, int sets, int ways) static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr) { - int sets = KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM; - - return tlb0_set_base(addr, sets, KVM_E500_TLB0_WAY_NUM); + return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets, + vcpu_e500->gtlb_params[0].ways); } static int htlb0_set_base(gva_t addr) @@ -440,10 +432,10 @@ static unsigned int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel) unsigned int esel = get_tlb_esel_bit(vcpu_e500); if (tlbsel == 0) { - esel &= KVM_E500_TLB0_WAY_NUM_MASK; + esel &= vcpu_e500->gtlb_params[0].ways - 1; esel += gtlb0_set_base(vcpu_e500, vcpu_e500->mas2); } else { - esel &= vcpu_e500->gtlb_size[tlbsel] - 1; + esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1; } return esel; @@ -453,19 +445,22 @@ static unsigned int get_tlb_esel(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel) static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t eaddr, int tlbsel, unsigned int pid, int as) { - int size = vcpu_e500->gtlb_size[tlbsel]; - unsigned int set_base; + int size = vcpu_e500->gtlb_params[tlbsel].entries; + unsigned int set_base, offset; int i; if (tlbsel == 0) { set_base = gtlb0_set_base(vcpu_e500, eaddr); - size = KVM_E500_TLB0_WAY_NUM; + size = vcpu_e500->gtlb_params[0].ways; } else { set_base = 0; } + offset = vcpu_e500->gtlb_offset[tlbsel]; + for (i = 0; i < size; i++) { - struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i]; + struct kvm_book3e_206_tlb_entry *tlbe = + &vcpu_e500->gtlb_arch[offset + set_base + i]; unsigned int tid; if (eaddr < get_tlb_eaddr(tlbe)) @@ -491,7 +486,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, } static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, - struct tlbe *gtlbe, + struct kvm_book3e_206_tlb_entry *gtlbe, pfn_t pfn) { ref->pfn = pfn; @@ -518,7 +513,7 @@ static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) int tlbsel = 0; int i; - for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) { + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][i].ref; kvmppc_e500_ref_release(ref); @@ -530,6 +525,8 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) int stlbsel = 1; int i; + kvmppc_e500_id_table_reset_all(vcpu_e500); + for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { struct tlbe_ref *ref = &vcpu_e500->tlb_refs[stlbsel][i]; @@ -559,18 +556,18 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, | MAS1_TSIZE(tsized); vcpu_e500->mas2 = (eaddr & MAS2_EPN) | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK); - vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1) | (get_cur_pid(vcpu) << 16) | (as ? MAS6_SAS : 0); - vcpu_e500->mas7 = 0; } /* TID must be supplied by the caller */ -static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - struct tlbe *gtlbe, int tsize, - struct tlbe_ref *ref, - u64 gvaddr, struct tlbe *stlbe) +static inline void kvmppc_e500_setup_stlbe( + struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe, + int tsize, struct tlbe_ref *ref, u64 gvaddr, + struct kvm_book3e_206_tlb_entry *stlbe) { pfn_t pfn = ref->pfn; @@ -581,16 +578,16 @@ static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, stlbe->mas2 = (gvaddr & MAS2_EPN) | e500_shadow_mas2_attrib(gtlbe->mas2, vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN) - | e500_shadow_mas3_attrib(gtlbe->mas3, + stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) + | e500_shadow_mas3_attrib(gtlbe->mas7_3, vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN; } /* sesel is an index into the entire array, not just the set */ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int sesel, - struct tlbe *stlbe, struct tlbe_ref *ref) + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe, + struct tlbe_ref *ref) { struct kvm_memory_slot *slot; unsigned long pfn, hva; @@ -700,15 +697,16 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* XXX only map the one-one case, for now use TLB0 */ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, - int esel, struct tlbe *stlbe) + int esel, + struct kvm_book3e_206_tlb_entry *stlbe) { - struct tlbe *gtlbe; + struct kvm_book3e_206_tlb_entry *gtlbe; struct tlbe_ref *ref; int sesel = esel & (host_tlb_params[0].ways - 1); int sesel_base; gva_t ea; - gtlbe = &vcpu_e500->gtlb_arch[0][esel]; + gtlbe = get_entry(vcpu_e500, 0, esel); ref = &vcpu_e500->gtlb_priv[0][esel].ref; ea = get_tlb_eaddr(gtlbe); @@ -725,7 +723,8 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, * the shadow TLB. */ /* XXX for both one-one and one-to-many , for now use TLB1 */ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe) + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe) { struct tlbe_ref *ref; unsigned int victim; @@ -754,7 +753,8 @@ static inline int kvmppc_e500_gtlbe_invalidate( struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel) { - struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + struct kvm_book3e_206_tlb_entry *gtlbe = + get_entry(vcpu_e500, tlbsel, esel); if (unlikely(get_tlb_iprot(gtlbe))) return -1; @@ -769,10 +769,10 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) int esel; if (value & MMUCSR0_TLB0FI) - for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++) + for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); if (value & MMUCSR0_TLB1FI) - for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++) + for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); /* Invalidate all vcpu id mappings */ @@ -797,7 +797,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) if (ia) { /* invalidate all entries */ - for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++) + for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; + esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); } else { ea &= 0xfffff000; @@ -817,18 +818,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int tlbsel, esel; - struct tlbe *gtlbe; + struct kvm_book3e_206_tlb_entry *gtlbe; tlbsel = get_tlb_tlbsel(vcpu_e500); esel = get_tlb_esel(vcpu_e500, tlbsel); - gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + gtlbe = get_entry(vcpu_e500, tlbsel, esel); vcpu_e500->mas0 &= ~MAS0_NV(~0); vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu_e500->mas1 = gtlbe->mas1; vcpu_e500->mas2 = gtlbe->mas2; - vcpu_e500->mas3 = gtlbe->mas3; - vcpu_e500->mas7 = gtlbe->mas7; + vcpu_e500->mas7_3 = gtlbe->mas7_3; return EMULATE_DONE; } @@ -839,7 +839,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) int as = !!get_cur_sas(vcpu_e500); unsigned int pid = get_cur_spid(vcpu_e500); int esel, tlbsel; - struct tlbe *gtlbe = NULL; + struct kvm_book3e_206_tlb_entry *gtlbe = NULL; gva_t ea; ea = kvmppc_get_gpr(vcpu, rb); @@ -847,7 +847,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) for (tlbsel = 0; tlbsel < 2; tlbsel++) { esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); if (esel >= 0) { - gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + gtlbe = get_entry(vcpu_e500, tlbsel, esel); break; } } @@ -857,8 +857,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu_e500->mas1 = gtlbe->mas1; vcpu_e500->mas2 = gtlbe->mas2; - vcpu_e500->mas3 = gtlbe->mas3; - vcpu_e500->mas7 = gtlbe->mas7; + vcpu_e500->mas7_3 = gtlbe->mas7_3; } else { int victim; @@ -873,8 +872,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) | (vcpu_e500->mas4 & MAS4_TSIZED(~0)); vcpu_e500->mas2 &= MAS2_EPN; vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK; - vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; - vcpu_e500->mas7 = 0; + vcpu_e500->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; } kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); @@ -883,8 +881,8 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) /* sesel is index into the set, not the whole array */ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - struct tlbe *gtlbe, - struct tlbe *stlbe, + struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe, int stlbsel, int sesel) { int stid; @@ -902,28 +900,27 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe *gtlbe; + struct kvm_book3e_206_tlb_entry *gtlbe; int tlbsel, esel; tlbsel = get_tlb_tlbsel(vcpu_e500); esel = get_tlb_esel(vcpu_e500, tlbsel); - gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + gtlbe = get_entry(vcpu_e500, tlbsel, esel); if (get_tlb_v(gtlbe)) inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); gtlbe->mas1 = vcpu_e500->mas1; gtlbe->mas2 = vcpu_e500->mas2; - gtlbe->mas3 = vcpu_e500->mas3; - gtlbe->mas7 = vcpu_e500->mas7; + gtlbe->mas7_3 = vcpu_e500->mas7_3; trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2, - gtlbe->mas3, gtlbe->mas7); + (u32)gtlbe->mas7_3, (u32)(gtlbe->mas7_3 >> 32)); /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { - struct tlbe stlbe; + struct kvm_book3e_206_tlb_entry stlbe; int stlbsel, sesel; u64 eaddr; u64 raddr; @@ -996,9 +993,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, gva_t eaddr) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe *gtlbe = - &vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)]; - u64 pgmask = get_tlb_bytes(gtlbe) - 1; + struct kvm_book3e_206_tlb_entry *gtlbe; + u64 pgmask; + + gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index)); + pgmask = get_tlb_bytes(gtlbe) - 1; return get_tlb_raddr(gtlbe) | (eaddr & pgmask); } @@ -1012,12 +1011,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); struct tlbe_priv *priv; - struct tlbe *gtlbe, stlbe; + struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; int tlbsel = tlbsel_of(index); int esel = esel_of(index); int stlbsel, sesel; - gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + gtlbe = get_entry(vcpu_e500, tlbsel, esel); switch (tlbsel) { case 0: @@ -1073,25 +1072,174 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) { - struct tlbe *tlbe; + struct kvm_book3e_206_tlb_entry *tlbe; /* Insert large initial mapping for guest. */ - tlbe = &vcpu_e500->gtlb_arch[1][0]; + tlbe = get_entry(vcpu_e500, 1, 0); tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); tlbe->mas2 = 0; - tlbe->mas3 = E500_TLB_SUPER_PERM_MASK; - tlbe->mas7 = 0; + tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK; /* 4K map for serial output. Used by kernel wrapper. */ - tlbe = &vcpu_e500->gtlb_arch[1][1]; + tlbe = get_entry(vcpu_e500, 1, 1); tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; - tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; - tlbe->mas7 = 0; + tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; +} + +static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int i; + + clear_tlb_refs(vcpu_e500); + kfree(vcpu_e500->gtlb_priv[0]); + kfree(vcpu_e500->gtlb_priv[1]); + + if (vcpu_e500->shared_tlb_pages) { + vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch, + PAGE_SIZE))); + + for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) { + set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]); + put_page(vcpu_e500->shared_tlb_pages[i]); + } + + vcpu_e500->num_shared_tlb_pages = 0; + vcpu_e500->shared_tlb_pages = NULL; + } else { + kfree(vcpu_e500->gtlb_arch); + } + + vcpu_e500->gtlb_arch = NULL; +} + +int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, + struct kvm_config_tlb *cfg) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_params params; + char *virt; + struct page **pages; + struct tlbe_priv *privs[2] = {}; + size_t array_len; + u32 sets; + int num_pages, ret, i; + + if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV) + return -EINVAL; + + if (copy_from_user(¶ms, (void __user *)(uintptr_t)cfg->params, + sizeof(params))) + return -EFAULT; + + if (params.tlb_sizes[1] > 64) + return -EINVAL; + if (params.tlb_ways[1] != params.tlb_sizes[1]) + return -EINVAL; + if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0) + return -EINVAL; + if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0) + return -EINVAL; + + if (!is_power_of_2(params.tlb_ways[0])) + return -EINVAL; + + sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]); + if (!is_power_of_2(sets)) + return -EINVAL; + + array_len = params.tlb_sizes[0] + params.tlb_sizes[1]; + array_len *= sizeof(struct kvm_book3e_206_tlb_entry); + + if (cfg->array_len < array_len) + return -EINVAL; + + num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) - + cfg->array / PAGE_SIZE; + pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); + if (!pages) + return -ENOMEM; + + ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); + if (ret < 0) + goto err_pages; + + if (ret != num_pages) { + num_pages = ret; + ret = -EFAULT; + goto err_put_page; + } + + virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); + if (!virt) + goto err_put_page; + + privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], + GFP_KERNEL); + privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], + GFP_KERNEL); + + if (!privs[0] || !privs[1]) + goto err_put_page; + + free_gtlb(vcpu_e500); + + vcpu_e500->gtlb_priv[0] = privs[0]; + vcpu_e500->gtlb_priv[1] = privs[1]; + + vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *) + (virt + (cfg->array & (PAGE_SIZE - 1))); + + vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0]; + vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1]; + + vcpu_e500->gtlb_offset[0] = 0; + vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; + + vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; + if (params.tlb_sizes[0] <= 2048) + vcpu_e500->tlb0cfg |= params.tlb_sizes[0]; + + vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; + vcpu_e500->tlb1cfg |= params.tlb_sizes[1]; + + vcpu_e500->shared_tlb_pages = pages; + vcpu_e500->num_shared_tlb_pages = num_pages; + + vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0]; + vcpu_e500->gtlb_params[0].sets = sets; + + vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1]; + vcpu_e500->gtlb_params[1].sets = 1; + + return 0; + +err_put_page: + kfree(privs[0]); + kfree(privs[1]); + + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + +err_pages: + kfree(pages); + return ret; +} + +int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, + struct kvm_dirty_tlb *dirty) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + clear_tlb_refs(vcpu_e500); + return 0; } int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) { + int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); + int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; + host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; @@ -1124,17 +1272,22 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) host_tlb_params[0].entries / host_tlb_params[0].ways; host_tlb_params[1].sets = 1; - vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE; - vcpu_e500->gtlb_arch[0] = - kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); - if (vcpu_e500->gtlb_arch[0] == NULL) - goto err; + vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; + vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; - vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE; - vcpu_e500->gtlb_arch[1] = - kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL); - if (vcpu_e500->gtlb_arch[1] == NULL) - goto err; + vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM; + vcpu_e500->gtlb_params[0].sets = + KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM; + + vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE; + vcpu_e500->gtlb_params[1].sets = 1; + + vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL); + if (!vcpu_e500->gtlb_arch) + return -ENOMEM; + + vcpu_e500->gtlb_offset[0] = 0; + vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; vcpu_e500->tlb_refs[0] = kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, @@ -1148,15 +1301,15 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->tlb_refs[1]) goto err; - vcpu_e500->gtlb_priv[0] = - kzalloc(sizeof(struct tlbe_ref) * vcpu_e500->gtlb_size[0], - GFP_KERNEL); + vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * + vcpu_e500->gtlb_params[0].entries, + GFP_KERNEL); if (!vcpu_e500->gtlb_priv[0]) goto err; - vcpu_e500->gtlb_priv[1] = - kzalloc(sizeof(struct tlbe_ref) * vcpu_e500->gtlb_size[1], - GFP_KERNEL); + vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) * + vcpu_e500->gtlb_params[1].entries, + GFP_KERNEL); if (!vcpu_e500->gtlb_priv[1]) goto err; @@ -1165,32 +1318,24 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) /* Init TLB configuration register */ vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; - vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0]; + vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries; vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; - vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1]; + vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_params[1].entries; return 0; err: + free_gtlb(vcpu_e500); kfree(vcpu_e500->tlb_refs[0]); kfree(vcpu_e500->tlb_refs[1]); - kfree(vcpu_e500->gtlb_priv[0]); - kfree(vcpu_e500->gtlb_priv[1]); - kfree(vcpu_e500->gtlb_arch[0]); - kfree(vcpu_e500->gtlb_arch[1]); return -1; } void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { - clear_tlb_refs(vcpu_e500); - + free_gtlb(vcpu_e500); kvmppc_e500_id_table_free(vcpu_e500); kfree(vcpu_e500->tlb_refs[0]); kfree(vcpu_e500->tlb_refs[1]); - kfree(vcpu_e500->gtlb_priv[0]); - kfree(vcpu_e500->gtlb_priv[1]); - kfree(vcpu_e500->gtlb_arch[1]); - kfree(vcpu_e500->gtlb_arch[0]); } diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index b587f691459..2c296407e75 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -20,13 +20,9 @@ #include #include -#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */ -#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT) -#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1) - -#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */ -#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT) -#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1) +/* This geometry is the legacy default -- can be overridden by userspace */ +#define KVM_E500_TLB0_WAY_SIZE 128 +#define KVM_E500_TLB0_WAY_NUM 2 #define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) #define KVM_E500_TLB1_SIZE 16 @@ -58,50 +54,54 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); /* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 7) & 0x1f; } -static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) +static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) { return tlbe->mas2 & 0xfffff000; } -static inline u64 get_tlb_bytes(const struct tlbe *tlbe) +static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) { unsigned int pgsize = get_tlb_size(tlbe); return 1ULL << 10 << pgsize; } -static inline gva_t get_tlb_end(const struct tlbe *tlbe) +static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe) { u64 bytes = get_tlb_bytes(tlbe); return get_tlb_eaddr(tlbe) + bytes - 1; } -static inline u64 get_tlb_raddr(const struct tlbe *tlbe) +static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe) { - u64 rpn = tlbe->mas7; - return (rpn << 32) | (tlbe->mas3 & 0xfffff000); + return tlbe->mas7_3 & ~0xfffULL; } -static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 16) & 0xff; } -static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 12) & 0x1; } -static inline unsigned int get_tlb_v(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 31) & 0x1; } -static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe) +static inline unsigned int +get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe) { return (tlbe->mas1 >> 30) & 0x1; } @@ -156,7 +156,7 @@ static inline unsigned int get_tlb_esel_bit( } static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct tlbe *tlbe) + const struct kvm_book3e_206_tlb_entry *tlbe) { gpa_t gpa; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a5671616af8..3cf6fba513a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -222,6 +222,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: +#ifdef CONFIG_KVM_E500 + case KVM_CAP_SW_TLB: +#endif r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -602,6 +605,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; +#ifdef CONFIG_KVM_E500 + case KVM_CAP_SW_TLB: { + struct kvm_config_tlb cfg; + void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0]; + + r = -EFAULT; + if (copy_from_user(&cfg, user_ptr, sizeof(cfg))) + break; + + r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg); + break; + } +#endif default: r = -EINVAL; break; @@ -651,6 +667,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } + +#ifdef CONFIG_KVM_E500 + case KVM_DIRTY_TLB: { + struct kvm_dirty_tlb dirty; + r = -EFAULT; + if (copy_from_user(&dirty, argp, sizeof(dirty))) + goto out; + r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); + break; + } +#endif + default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 245bcb3a0fc..fa029ced4bd 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -581,6 +581,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_RMA 65 #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ #define KVM_CAP_PPC_PAPR 68 +#define KVM_CAP_SW_TLB 69 #define KVM_CAP_S390_GMAP 71 #define KVM_CAP_TSC_DEADLINE_TIMER 72 #define KVM_CAP_S390_UCONTROL 73 @@ -664,6 +665,21 @@ struct kvm_clock_data { __u32 pad[9]; }; +#define KVM_MMU_FSL_BOOKE_NOHV 0 +#define KVM_MMU_FSL_BOOKE_HV 1 + +struct kvm_config_tlb { + __u64 params; + __u64 array; + __u32 mmu_type; + __u32 array_len; +}; + +struct kvm_dirty_tlb { + __u64 bitmap; + __u32 num_dirty; +}; + /* * ioctls for VM fds */ @@ -801,6 +817,8 @@ struct kvm_s390_ucas_mapping { #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_SW_TLB */ +#define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3-70-g09d2 From a355aa54f1d25dff83c0feef8863d83a76988fdb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Dec 2011 12:37:21 +0000 Subject: KVM: Add barriers to allow mmu_notifier_retry to be used locklessly This adds an smp_wmb in kvm_mmu_notifier_invalidate_range_end() and an smp_rmb in mmu_notifier_retry() so that mmu_notifier_retry() will give the correct answer when called without kvm->mmu_lock being held. PowerPC Book3S HV KVM wants to use a bitlock per guest page rather than a single global spinlock in order to improve the scalability of updates to the guest MMU hashed page table, and so needs this. Signed-off-by: Paul Mackerras Acked-by: Avi Kivity Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 14 +++++++++----- virt/kvm/kvm_main.c | 6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d4d4d709211..eada8e69fe5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -702,12 +702,16 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se if (unlikely(vcpu->kvm->mmu_notifier_count)) return 1; /* - * Both reads happen under the mmu_lock and both values are - * modified under mmu_lock, so there's no need of smb_rmb() - * here in between, otherwise mmu_notifier_count should be - * read before mmu_notifier_seq, see - * mmu_notifier_invalidate_range_end write side. + * Ensure the read of mmu_notifier_count happens before the read + * of mmu_notifier_seq. This interacts with the smp_wmb() in + * mmu_notifier_invalidate_range_end to make sure that the caller + * either sees the old (non-zero) value of mmu_notifier_count or + * the new (incremented) value of mmu_notifier_seq. + * PowerPC Book3s HV KVM calls this under a per-page lock + * rather than under kvm->mmu_lock, for scalability, so + * can't rely on kvm->mmu_lock to keep things ordered. */ + smp_rmb(); if (vcpu->kvm->mmu_notifier_seq != mmu_seq) return 1; return 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 64be836f334..9f32bffd37c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, * been freed. */ kvm->mmu_notifier_seq++; + smp_wmb(); /* * The above sequence increase must be visible before the - * below count decrease but both values are read by the kvm - * page fault under mmu_lock spinlock so we don't need to add - * a smb_wmb() here in between the two. + * below count decrease, which is ensured by the smp_wmb above + * in conjunction with the smp_rmb in mmu_notifier_retry(). */ kvm->mmu_notifier_count--; spin_unlock(&kvm->mmu_lock); -- cgit v1.2.3-70-g09d2 From e24ed81fedd551e80378be62fa0b0532480ea7d4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 14 Sep 2011 10:02:41 +0200 Subject: KVM: PPC: Add generic single register ioctls Right now we transfer a static struct every time we want to get or set registers. Unfortunately, over time we realize that there are more of these than we thought of before and the extensibility and flexibility of transferring a full struct every time is limited. So this is a new approach to the problem. With these new ioctls, we can get and set a single register that is identified by an ID. This allows for very precise and limited transmittal of data. When we later realize that it's a better idea to shove over multiple registers at once, we can reuse most of the infrastructure and simply implement a GET_MANY_REGS / SET_MANY_REGS interface. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 46 ++++++++++++++++++++++++++++++++++++--- arch/powerpc/kvm/powerpc.c | 41 ++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 35 +++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index bcd45d5afca..e0c75dcb2ca 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1533,7 +1533,7 @@ following algorithm: Some guests configure the LINT1 NMI input to cause a panic, aiding in debugging. -4.64 KVM_S390_UCAS_MAP +4.65 KVM_S390_UCAS_MAP Capability: KVM_CAP_S390_UCONTROL Architectures: s390 @@ -1552,7 +1552,7 @@ This ioctl maps the memory at "user_addr" with the length "length" to the vcpu's address space starting at "vcpu_addr". All parameters need to be alligned by 1 megabyte. -4.65 KVM_S390_UCAS_UNMAP +4.66 KVM_S390_UCAS_UNMAP Capability: KVM_CAP_S390_UCONTROL Architectures: s390 @@ -1571,7 +1571,7 @@ This ioctl unmaps the memory in the vcpu's address space starting at "vcpu_addr" with the length "length". The field "user_addr" is ignored. All parameters need to be alligned by 1 megabyte. -4.66 KVM_S390_VCPU_FAULT +4.67 KVM_S390_VCPU_FAULT Capability: KVM_CAP_S390_UCONTROL Architectures: s390 @@ -1587,6 +1587,46 @@ table upfront. This is useful to handle validity intercepts for user controlled virtual machines to fault in the virtual cpu's lowcore pages prior to calling the KVM_RUN ioctl. +4.68 KVM_SET_ONE_REG + +Capability: KVM_CAP_ONE_REG +Architectures: all +Type: vcpu ioctl +Parameters: struct kvm_one_reg (in) +Returns: 0 on success, negative value on failure + +struct kvm_one_reg { + __u64 id; + __u64 addr; +}; + +Using this ioctl, a single vcpu register can be set to a specific value +defined by user space with the passed in struct kvm_one_reg, where id +refers to the register identifier as described below and addr is a pointer +to a variable with the respective size. There can be architecture agnostic +and architecture specific registers. Each have their own range of operation +and their own constants and width. To keep track of the implemented +registers, find a list below: + + Arch | Register | Width (bits) + | | + +4.69 KVM_GET_ONE_REG + +Capability: KVM_CAP_ONE_REG +Architectures: all +Type: vcpu ioctl +Parameters: struct kvm_one_reg (in and out) +Returns: 0 on success, negative value on failure + +This ioctl allows to receive the value of a single register implemented +in a vcpu. The register to read is indicated by the "id" field of the +kvm_one_reg struct passed in. On success, the register value can be found +at the memory location pointed to by "addr". + +The list of registers accessible using this interface is identical to the +list in 4.64. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f4380cb264e..089c61bf0e1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -217,6 +217,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_ONE_REG: r = 1; break; #ifndef CONFIG_KVM_BOOK3S_64_HV @@ -645,6 +646,32 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } +static int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + default: + break; + } + + return r; +} + +static int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + default: + break; + } + + return r; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -684,6 +711,20 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: + { + struct kvm_one_reg reg; + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + goto out; + if (ioctl == KVM_SET_ONE_REG) + r = kvm_vcpu_ioctl_set_one_reg(vcpu, ®); + else + r = kvm_vcpu_ioctl_get_one_reg(vcpu, ®); + break; + } + #ifdef CONFIG_KVM_E500 case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index fa029ced4bd..4f7a9fb8ab0 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -582,6 +582,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ #define KVM_CAP_PPC_PAPR 68 #define KVM_CAP_SW_TLB 69 +#define KVM_CAP_ONE_REG 70 #define KVM_CAP_S390_GMAP 71 #define KVM_CAP_TSC_DEADLINE_TIMER 72 #define KVM_CAP_S390_UCONTROL 73 @@ -680,6 +681,37 @@ struct kvm_dirty_tlb { __u32 num_dirty; }; +/* Available with KVM_CAP_ONE_REG */ + +#define KVM_REG_ARCH_MASK 0xff00000000000000ULL +#define KVM_REG_GENERIC 0x0000000000000000ULL + +/* + * Architecture specific registers are to be defined in arch headers and + * ORed with the arch identifier. + */ +#define KVM_REG_PPC 0x1000000000000000ULL +#define KVM_REG_X86 0x2000000000000000ULL +#define KVM_REG_IA64 0x3000000000000000ULL +#define KVM_REG_ARM 0x4000000000000000ULL +#define KVM_REG_S390 0x5000000000000000ULL + +#define KVM_REG_SIZE_SHIFT 52 +#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL +#define KVM_REG_SIZE_U8 0x0000000000000000ULL +#define KVM_REG_SIZE_U16 0x0010000000000000ULL +#define KVM_REG_SIZE_U32 0x0020000000000000ULL +#define KVM_REG_SIZE_U64 0x0030000000000000ULL +#define KVM_REG_SIZE_U128 0x0040000000000000ULL +#define KVM_REG_SIZE_U256 0x0050000000000000ULL +#define KVM_REG_SIZE_U512 0x0060000000000000ULL +#define KVM_REG_SIZE_U1024 0x0070000000000000ULL + +struct kvm_one_reg { + __u64 id; + __u64 addr; +}; + /* * ioctls for VM fds */ @@ -819,6 +851,9 @@ struct kvm_s390_ucas_mapping { #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* Available with KVM_CAP_SW_TLB */ #define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) +/* Available with KVM_CAP_ONE_REG */ +#define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg) +#define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3-70-g09d2 From 1022fc3d3bfaca09d5d6bfcc93a168de16840814 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 14 Sep 2011 21:45:23 +0200 Subject: KVM: PPC: Add support for explicit HIOR setting Until now, we always set HIOR based on the PVR, but this is just wrong. Instead, we should be setting HIOR explicitly, so user space can decide what the initial HIOR value is - just like on real hardware. We keep the old PVR based way around for backwards compatibility, but once user space uses the SET_ONE_REG based method, we drop the PVR logic. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/asm/kvm.h | 2 ++ arch/powerpc/include/asm/kvm_book3s.h | 2 ++ arch/powerpc/kvm/book3s_pr.c | 6 ++++-- arch/powerpc/kvm/powerpc.c | 13 +++++++++++++ include/linux/kvm.h | 1 + 6 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e0c75dcb2ca..59a38264a0e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1610,6 +1610,7 @@ registers, find a list below: Arch | Register | Width (bits) | | + PPC | KVM_REG_PPC_HIOR | 64 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 663c57f8716..f41adcda146 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -331,4 +331,6 @@ struct kvm_book3e_206_tlb_params { __u32 reserved[8]; }; +#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 3c3edee672a..aa795ccef29 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s { #endif int context_id[SID_CONTEXTS]; + bool hior_explicit; /* HIOR is set by ioctl, not PVR */ + struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c193625d528..00efda6dc0e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -157,14 +157,16 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) #ifdef CONFIG_PPC_BOOK3S_64 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { kvmppc_mmu_book3s_64_init(vcpu); - to_book3s(vcpu)->hior = 0xfff00000; + if (!to_book3s(vcpu)->hior_explicit) + to_book3s(vcpu)->hior = 0xfff00000; to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; vcpu->arch.cpu_type = KVM_CPU_3S_64; } else #endif { kvmppc_mmu_book3s_32_init(vcpu); - to_book3s(vcpu)->hior = 0; + if (!to_book3s(vcpu)->hior_explicit) + to_book3s(vcpu)->hior = 0; to_book3s(vcpu)->msr_mask = 0xffffffffULL; vcpu->arch.cpu_type = KVM_CPU_3S_32; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 089c61bf0e1..59852091b38 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -212,6 +212,7 @@ int kvm_dev_ioctl_check_extension(l


context:
space:
mode: