aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt55
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h10
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h31
-rw-r--r--arch/powerpc/include/asm/kvm_booke_hv_asm.h25
-rw-r--r--arch/powerpc/include/asm/kvm_host.h13
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h23
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h2
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h10
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h27
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Makefile5
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c407
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c16
-rw-r--r--arch/powerpc/kvm/book3s_exports.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv.c32
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c144
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c139
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S131
-rw-r--r--arch/powerpc/kvm/book3s_pr.c114
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S3
-rw-r--r--arch/powerpc/kvm/booke.c36
-rw-r--r--arch/powerpc/kvm/booke.h1
-rw-r--r--arch/powerpc/kvm/booke_emulate.c14
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S145
-rw-r--r--arch/powerpc/kvm/e500.h8
-rw-r--r--arch/powerpc/kvm/e500_emulate.c14
-rw-r--r--arch/powerpc/kvm/e500_tlb.c38
-rw-r--r--arch/powerpc/kvm/powerpc.c34
-rw-r--r--include/linux/kvm_host.h12
-rw-r--r--include/uapi/linux/kvm.h3
-rw-r--r--virt/kvm/eventfd.c6
34 files changed, 1279 insertions, 229 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6671fdc0afb..a4df5535996 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1773,6 +1773,7 @@ registers, find a list below:
PPC | KVM_REG_PPC_VPA_ADDR | 64
PPC | KVM_REG_PPC_VPA_SLB | 128
PPC | KVM_REG_PPC_VPA_DTL | 128
+ PPC | KVM_REG_PPC_EPCR | 32
4.69 KVM_GET_ONE_REG
@@ -2071,6 +2072,60 @@ KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm
Note that the vcpu ioctl is asynchronous to vcpu execution.
+4.78 KVM_PPC_GET_HTAB_FD
+
+Capability: KVM_CAP_PPC_HTAB_FD
+Architectures: powerpc
+Type: vm ioctl
+Parameters: Pointer to struct kvm_get_htab_fd (in)
+Returns: file descriptor number (>= 0) on success, -1 on error
+
+This returns a file descriptor that can be used either to read out the
+entries in the guest's hashed page table (HPT), or to write entries to
+initialize the HPT. The returned fd can only be written to if the
+KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and
+can only be read if that bit is clear. The argument struct looks like
+this:
+
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+ __u64 flags;
+ __u64 start_index;
+ __u64 reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
+#define KVM_GET_HTAB_WRITE ((__u64)0x2)
+
+The `start_index' field gives the index in the HPT of the entry at
+which to start reading. It is ignored when writing.
+
+Reads on the fd will initially supply information about all
+"interesting" HPT entries. Interesting entries are those with the
+bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise
+all entries. When the end of the HPT is reached, the read() will
+return. If read() is called again on the fd, it will start again from
+the beginning of the HPT, but will only return HPT entries that have
+changed since they were last read.
+
+Data read or written is structured as a header (8 bytes) followed by a
+series of valid HPT entries (16 bytes) each. The header indicates how
+many valid HPT entries there are and how many invalid entries follow
+the valid entries. The invalid entries are not represented explicitly
+in the stream. The header format is:
+
+struct kvm_get_htab_header {
+ __u32 index;
+ __u16 n_valid;
+ __u16 n_invalid;
+};
+
+Writes to the fd create HPT entries starting at the index given in the
+header; first `n_valid' valid entries with contents from the data
+written, then `n_invalid' invalid entries, invalidating any previously
+valid entries found.
+
5. The kvm_run structure
------------------------
diff --git a/MAINTAINERS b/MAINTAINERS
index 4376c528f75..703446720a2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4253,6 +4253,7 @@ KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
M: Alexander Graf <agraf@suse.de>
L: kvm-ppc@vger.kernel.org
W: http://kvm.qumranet.com
+T: git git://github.com/agraf/linux-2.6.git
S: Supported
F: arch/powerpc/include/asm/kvm*
F: arch/powerpc/kvm/
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 36fcf419046..5a56e1c5f85 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -81,6 +81,8 @@ struct kvmppc_vcpu_book3s {
u64 sdr1;
u64 hior;
u64 msr_mask;
+ u64 purr_offset;
+ u64 spurr_offset;
#ifdef CONFIG_PPC_BOOK3S_32
u32 vsid_pool[VSID_POOL_SIZE];
u32 vsid_next;
@@ -157,8 +159,12 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
-extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel);
+extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel,
+ pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
+extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long *hpret);
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 1472a5b4e4e..38bec1dc992 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -50,6 +50,15 @@ extern int kvm_hpt_order; /* order of preallocated HPTs */
#define HPTE_V_HVLOCK 0x40UL
#define HPTE_V_ABSENT 0x20UL
+/*
+ * We use this bit in the guest_rpte field of the revmap entry
+ * to indicate a modified HPTE.
+ */
+#define HPTE_GR_MODIFIED (1ul << 62)
+
+/* These bits are reserved in the guest view of the HPTE */
+#define HPTE_GR_RESERVED HPTE_GR_MODIFIED
+
static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
{
unsigned long tmp, old;
@@ -237,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
}
+/*
+ * This works for 4k, 64k and 16M pages on POWER7,
+ * and 4k and 16M pages on PPC970.
+ */
+static inline unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+ unsigned long senc = 0;
+
+ if (psize > 0x1000) {
+ senc = SLB_VSID_L;
+ if (psize == 0x10000)
+ senc |= SLB_VSID_LP_01;
+ }
+ return senc;
+}
+
+static inline int is_vrma_hpte(unsigned long hpte_v)
+{
+ return (hpte_v & ~0xffffffUL) ==
+ (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
+}
+
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
index a37a12a9a7d..3a79f532571 100644
--- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -17,6 +17,7 @@
* there are no exceptions for which we fall through directly to
* the normal host handler.
*
+ * 32-bit host
* Expected inputs (normal exceptions):
* SCRATCH0 = saved r10
* r10 = thread struct
@@ -33,6 +34,30 @@
* *(r8 + GPR9) = saved r9
* *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
* *(r8 + GPR11) = saved r11
+ *
+ * 64-bit host
+ * Expected inputs (GEN/GDBELL/DBG/MC exception types):
+ * r10 = saved CR
+ * r13 = PACA_POINTER
+ * *(r13 + PACA_EX##type + EX_R10) = saved r10
+ * *(r13 + PACA_EX##type + EX_R11) = saved r11
+ * SPRN_SPRG_##type##_SCRATCH = saved r13
+ *
+ * Expected inputs (CRIT exception type):
+ * r10 = saved CR
+ * r13 = PACA_POINTER
+ * *(r13 + PACA_EX##type + EX_R10) = saved r10
+ * *(r13 + PACA_EX##type + EX_R11) = saved r11
+ * *(r13 + PACA_EX##type + EX_R13) = saved r13
+ *
+ * Expected inputs (TLB exception type):
+ * r10 = saved CR
+ * r13 = PACA_POINTER
+ * *(r13 + PACA_EX##type + EX_TLB_R10) = saved r10
+ * *(r13 + PACA_EX##type + EX_TLB_R11) = saved r11
+ * SPRN_SPRG_GEN_SCRATCH = saved r13
+ *
+ * Only the bolted version of TLB miss exception handlers is supported now.
*/
.macro DO_KVM intno srr1
#ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3093896015f..ca9bf459db6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -246,10 +246,12 @@ struct kvm_arch {
int using_mmu_notifiers;
u32 hpt_order;
atomic_t vcpus_running;
+ u32 online_vcores;
unsigned long hpt_npte;
unsigned long hpt_mask;
+ atomic_t hpte_mod_interest;
spinlock_t slot_phys_lock;
- unsigned short last_vcpu[NR_CPUS];
+ cpumask_t need_tlb_flush;
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
struct kvmppc_linear_info *hpt_li;
#endif /* CONFIG_KVM_BOOK3S_64_HV */
@@ -274,6 +276,7 @@ struct kvmppc_vcore {
int nap_count;
int napping_threads;
u16 pcpu;
+ u16 last_cpu;
u8 vcore_state;
u8 in_guest;
struct list_head runnable_threads;
@@ -403,13 +406,18 @@ struct kvm_vcpu_arch {
u32 host_mas4;
u32 host_mas6;
u32 shadow_epcr;
- u32 epcr;
u32 shadow_msrp;
u32 eplc;
u32 epsc;
u32 oldpir;
#endif
+#if defined(CONFIG_BOOKE)
+#if defined(CONFIG_KVM_BOOKE_HV) || defined(CONFIG_64BIT)
+ u32 epcr;
+#endif
+#endif
+
#ifdef CONFIG_PPC_BOOK3S
/* For Gekko paired singles */
u32 qpr[32];
@@ -522,7 +530,6 @@ struct kvm_vcpu_arch {
u64 dec_jiffies;
u64 dec_expires;
unsigned long pending_exceptions;
- u16 last_cpu;
u8 ceded;
u8 prodded;
u32 last_inst;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 609cca3e942..572aa753061 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -164,6 +164,8 @@ extern void kvmppc_bookehv_exit(void);
extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
+extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
+
/*
* Cuts out inst bits with ordering according to spec.
* That means the leftmost bit is zero. All given bits are included.
@@ -293,4 +295,25 @@ static inline void kvmppc_lazy_ee_enable(void)
#endif
}
+static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+ ulong ea;
+ ulong msr_64bit = 0;
+
+ ea = kvmppc_get_gpr(vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(vcpu, ra);
+
+#if defined(CONFIG_PPC_BOOK3E_64)
+ msr_64bit = MSR_CM;
+#elif defined(CONFIG_PPC_BOOK3S_64)
+ msr_64bit = MSR_SF;
+#endif
+
+ if (!(vcpu->arch.shared->msr & msr_64bit))
+ ea = (uint32_t)ea;
+
+ return ea;
+}
+
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index eeabcdbc30f..99d43e0c1e4 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -59,7 +59,7 @@
#define MAS1_TSIZE_SHIFT 7
#define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
-#define MAS2_EPN 0xFFFFF000
+#define MAS2_EPN (~0xFFFUL)
#define MAS2_X0 0x00000040
#define MAS2_X1 0x00000020
#define MAS2_W 0x00000010
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 9673f73eb8d..2fdb47a19ef 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -121,6 +121,16 @@ extern char initial_stab[];
#define PP_RXRX 3 /* Supervisor read, User read */
#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
+/* Fields for tlbiel instruction in architecture 2.06 */
+#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */
+#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */
+#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */
+#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */
+#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */
+#define TLBIEL_INVAL_SET_SHIFT 12
+
+#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
+
#ifndef __ASSEMBLY__
struct hash_pte {
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d24c1416396..97d37278ea2 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -518,6 +518,7 @@
#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
+#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
#define SRR1_PROGTRAP 0x00020000 /* Trap */
#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index b89ae4db45c..2fba8a66fb1 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -331,6 +331,31 @@ struct kvm_book3e_206_tlb_params {
__u32 reserved[8];
};
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+ __u64 flags;
+ __u64 start_index;
+ __u64 reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
+#define KVM_GET_HTAB_WRITE ((__u64)0x2)
+
+/*
+ * Data read on the file descriptor is formatted as a series of
+ * records, each consisting of a header followed by a series of
+ * `n_valid' HPTEs (16 bytes each), which are all valid. Following
+ * those valid HPTEs there are `n_invalid' invalid HPTEs, which
+ * are not represented explicitly in the stream. The same format
+ * is used for writing.
+ */
+struct kvm_get_htab_header {
+ __u32 index;
+ __u16 n_valid;
+ __u16 n_invalid;
+};
+
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
@@ -386,4 +411,6 @@ struct kvm_book3e_206_tlb_params {
#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
+#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 7523539cfe9..4e23ba2f3ca 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -441,8 +441,7 @@ int main(void)
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
- DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter));
- DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
+ DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
@@ -470,7 +469,6 @@ int main(void)
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
- DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu));
DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 71f0cd9edf3..4730c953f43 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
bool
select PREEMPT_NOTIFIERS
select ANON_INODES
+ select HAVE_KVM_EVENTFD
config KVM_BOOK3S_HANDLER
bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index c2a08636e6d..1e473d46322 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,7 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
+ eventfd.o)
CFLAGS_44x_tlb.o := -I.
CFLAGS_e500_tlb.o := -I.
@@ -72,10 +73,12 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
+ book3s_hv_ras.o \
book3s_hv_builtin.o
kvm-book3s_64-module-objs := \
../../../virt/kvm/kvm_main.o \
+ ../../../virt/kvm/eventfd.o \
powerpc.o \
emulate.o \
book3s.o \
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2a89a36e726..8cc18abd6dd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -25,6 +25,8 @@
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/srcu.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
@@ -41,6 +43,11 @@
/* Power architecture requires HPT is at least 256kB */
#define PPC_MIN_HPT_ORDER 18
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel, unsigned long *pte_idx_ret);
+static void kvmppc_rmap_reset(struct kvm *kvm);
+
long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
unsigned long hpt;
@@ -138,10 +145,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
/* Set the entire HPT to 0, i.e. invalid HPTEs */
memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
/*
- * Set the whole last_vcpu array to an invalid vcpu number.
- * This ensures that each vcpu will flush its TLB on next entry.
+ * Reset all the reverse-mapping chains for all memslots
*/
- memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu));
+ kvmppc_rmap_reset(kvm);
+ /* Ensure that each vcpu will flush its TLB on next entry. */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
*htab_orderp = order;
err = 0;
} else {
@@ -185,6 +193,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
unsigned long addr, hash;
unsigned long psize;
unsigned long hp0, hp1;
+ unsigned long idx_ret;
long ret;
struct kvm *kvm = vcpu->kvm;
@@ -216,7 +225,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
hash = (hash << 3) + 7;
hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
hp_r = hp1 | addr;
- ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
+ ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
+ &idx_ret);
if (ret != H_SUCCESS) {
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
addr, ret);
@@ -354,15 +364,10 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
return err;
}
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel, unsigned long *pte_idx_ret)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long psize, gpa, gfn;
struct kvm_memory_slot *memslot;
long ret;
@@ -390,8 +395,8 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
do_insert:
/* Protect linux PTE lookup from page table destruction */
rcu_read_lock_sched(); /* this disables preemption too */
- vcpu->arch.pgdir = current->mm->pgd;
- ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
+ ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
+ current->mm->pgd, false, pte_idx_ret);
rcu_read_unlock_sched();
if (ret == H_TOO_HARD) {
/* this can't happen */
@@ -402,6 +407,19 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
}
+/*
+ * We come here on a H_ENTER call from the guest when we are not
+ * using mmu notifiers and we don't have the requested page pinned
+ * already.
+ */
+long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel)
+{
+ return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
+ pteh, ptel, &vcpu->arch.gpr[4]);
+}
+
static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
gva_t eaddr)
{
@@ -756,6 +774,25 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_put;
}
+static void kvmppc_rmap_reset(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm->memslots;
+ kvm_for_each_memslot(memslot, slots) {
+ /*
+ * This assumes it is acceptable to lose reference and
+ * change bits across a reset.
+ */
+ memset(memslot->arch.rmap, 0,
+ memslot->npages * sizeof(*memslot->arch.rmap));
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
static int kvm_handle_hva_range(struct kvm *kvm,
unsigned long start,
unsigned long end,
@@ -1131,6 +1168,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
put_page(page);
}
+/*
+ * Functions for reading and writing the hash table via reads and
+ * writes on a file descriptor.
+ *
+ * Reads return the guest view of the hash table, which has to be
+ * pieced together from the real hash table and the guest_rpte
+ * values in the revmap array.
+ *
+ * On writes, each HPTE written is considered in turn, and if it
+ * is valid, it is written to the HPT as if an H_ENTER with the
+ * exact flag set was done. When the invalid count is non-zero
+ * in the header written to the stream, the kernel will make
+ * sure that that many HPTEs are invalid, and invalidate them
+ * if not.
+ */
+
+struct kvm_htab_ctx {
+ unsigned long index;
+ unsigned long flags;
+ struct kvm *kvm;
+ int first_pass;
+};
+
+#define HPTE_SIZE (2 * sizeof(unsigned long))
+
+static long record_hpte(unsigned long flags, unsigned long *hptp,
+ unsigned long *hpte, struct revmap_entry *revp,
+ int want_valid, int first_pass)
+{
+ unsigned long v, r;
+ int ok = 1;
+ int valid, dirty;
+
+ /* Unmodified entries are uninteresting except on the first pass */
+ dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+ if (!first_pass && !dirty)
+ return 0;
+
+ valid = 0;
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ valid = 1;
+ if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
+ !(hptp[0] & HPTE_V_BOLTED))
+ valid = 0;
+ }
+ if (valid != want_valid)
+ return 0;
+
+ v = r = 0;
+ if (valid || dirty) {
+ /* lock the HPTE so it's stable and read it */
+ preempt_disable();
+ while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+ cpu_relax();
+ v = hptp[0];
+ if (v & HPTE_V_ABSENT) {
+ v &= ~HPTE_V_ABSENT;
+ v |= HPTE_V_VALID;
+ }
+ /* re-evaluate valid and dirty from synchronized HPTE value */
+ valid = !!(v & HPTE_V_VALID);
+ if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
+ valid = 0;
+ r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
+ dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+ /* only clear modified if this is the right sort of entry */
+ if (valid == want_valid && dirty) {
+ r &= ~HPTE_GR_MODIFIED;
+ revp->guest_rpte = r;
+ }
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+ hptp[0] &= ~HPTE_V_HVLOCK;
+ preempt_enable();
+ if (!(valid == want_valid && (first_pass || dirty)))
+ ok = 0;
+ }
+ hpte[0] = v;
+ hpte[1] = r;
+ return ok;
+}
+
+static ssize_t kvm_htab_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kvm_htab_ctx *ctx = file->private_data;
+ struct kvm *kvm = ctx->kvm;
+ struct kvm_get_htab_header hdr;
+ unsigned long *hptp;
+ struct revmap_entry *revp;
+ unsigned long i, nb, nw;
+ unsigned long __user *lbuf;
+ struct kvm_get_htab_header __user *hptr;
+ unsigned long flags;
+ int first_pass;
+ unsigned long hpte[2];
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ first_pass = ctx->first_pass;
+ flags = ctx->flags;
+
+ i = ctx->index;
+ hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+ revp = kvm->arch.revmap + i;
+ lbuf = (unsigned long __user *)buf;
+
+ nb = 0;
+ while (nb + sizeof(hdr) + HPTE_SIZE < count) {
+ /* Initialize header */
+ hptr = (struct kvm_get_htab_header __user *)buf;
+ hdr.n_valid = 0;
+ hdr.n_invalid = 0;
+ nw = nb;
+ nb += sizeof(hdr);
+ lbuf = (unsigned long __user *)(buf + sizeof(hdr));
+
+ /* Skip uninteresting entries, i.e. clean on not-first pass */
+ if (!first_pass) {
+ while (i < kvm->arch.hpt_npte &&
+ !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+ }
+ hdr.index = i;
+
+ /* Grab a series of valid entries */
+ while (i < kvm->arch.hpt_npte &&
+ hdr.n_valid < 0xffff &&
+ nb + HPTE_SIZE < count &&
+ record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
+ /* valid entry, write it out */
+ ++hdr.n_valid;
+ if (__put_user(hpte[0], lbuf) ||
+ __put_user(hpte[1], lbuf + 1))
+ return -EFAULT;
+ nb += HPTE_SIZE;
+ lbuf += 2;
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+ /* Now skip invalid entries while we can */
+ while (i < kvm->arch.hpt_npte &&
+ hdr.n_invalid < 0xffff &&
+ record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
+ /* found an invalid entry */
+ ++hdr.n_invalid;
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+
+ if (hdr.n_valid || hdr.n_invalid) {
+ /* write back the header */
+ if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+ nw = nb;
+ buf = (char __user *)lbuf;
+ } else {
+ nb = nw;
+ }
+
+ /* Check if we've wrapped around the hash table */
+ if (i >= kvm->arch.hpt_npte) {
+ i = 0;
+ ctx->first_pass = 0;
+ break;
+ }
+ }
+
+ ctx->index = i;
+
+ return nb;
+}
+
+static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kvm_htab_ctx *ctx = file->private_data;
+ struct kvm *kvm = ctx->kvm;
+ struct kvm_get_htab_header hdr;
+ unsigned long i, j;
+ unsigned long v, r;
+ unsigned long __user *lbuf;
+ unsigned long *hptp;
+ unsigned long tmp[2];
+ ssize_t nb;
+ long int err, ret;
+ int rma_setup;
+
+ if (!access_ok(VERIFY_READ, buf, count))
+ return -EFAULT;
+
+ /* lock out vcpus from running while we're doing this */
+ mutex_lock(&kvm->lock);
+ rma_setup = kvm->arch.rma_setup_done;
+ if (rma_setup) {
+ kvm->arch.rma_setup_done = 0; /* temporarily */
+ /* order rma_setup_done vs. vcpus_running */
+ smp_mb();
+ i