aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2009-03-28 14:57:13 +1100
committerJames Morris <jmorris@namei.org>2009-03-28 14:57:13 +1100
commitbb798169d1bb860b07192cf9c75937fadc8610b4 (patch)
treefa67f14406a1e79897e6f29e59fed7c02ec31c30 /arch
parenta106cbfd1f3703402fc2d95d97e7a054102250f0 (diff)
parent5d80f8e5a9dc9c9a94d4aeaa567e219a808b8a4a (diff)
Merge branch 'master' of ssh://master.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into next
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/socket.h3
-rw-r--r--arch/alpha/include/asm/statfs.h2
-rw-r--r--arch/alpha/include/asm/swab.h2
-rw-r--r--arch/alpha/kernel/entry.S3
-rw-r--r--arch/alpha/kernel/irq.c2
-rw-r--r--arch/alpha/kernel/irq_alpha.c2
-rw-r--r--arch/alpha/kernel/osf_sys.c2
-rw-r--r--arch/arm/include/asm/a.out.h2
-rw-r--r--arch/arm/include/asm/setup.h2
-rw-r--r--arch/arm/include/asm/socket.h3
-rw-r--r--arch/arm/include/asm/swab.h2
-rw-r--r--arch/arm/kernel/irq.c2
-rw-r--r--arch/arm/mach-kirkwood/common.c5
-rw-r--r--arch/arm/mach-kirkwood/rd88f6281-setup.c13
-rw-r--r--arch/arm/mach-ns9xxx/irq.c3
-rw-r--r--arch/arm/mach-orion5x/common.c34
-rw-r--r--arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c9
-rw-r--r--arch/arm/mach-orion5x/rd88f5181l-ge-setup.c10
-rw-r--r--arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c10
-rw-r--r--arch/arm/mach-orion5x/wrt350n-v2-setup.c9
-rw-r--r--arch/arm/plat-orion/include/plat/orion5x_wdt.h18
-rw-r--r--arch/avr32/include/asm/socket.h3
-rw-r--r--arch/avr32/include/asm/swab.h2
-rw-r--r--arch/avr32/kernel/irq.c2
-rw-r--r--arch/blackfin/include/asm/socket.h3
-rw-r--r--arch/blackfin/include/asm/swab.h2
-rw-r--r--arch/blackfin/kernel/irqchip.c2
-rw-r--r--arch/cris/include/asm/socket.h3
-rw-r--r--arch/cris/kernel/irq.c2
-rw-r--r--arch/frv/kernel/irq.c2
-rw-r--r--arch/h8300/include/asm/socket.h3
-rw-r--r--arch/h8300/include/asm/swab.h2
-rw-r--r--arch/h8300/kernel/irq.c4
-rw-r--r--arch/ia64/ia32/ia32_entry.S2
-rw-r--r--arch/ia64/include/asm/fpu.h2
-rw-r--r--arch/ia64/include/asm/gcc_intrin.h1
-rw-r--r--arch/ia64/include/asm/intrinsics.h1
-rw-r--r--arch/ia64/include/asm/kvm.h52
-rw-r--r--arch/ia64/include/asm/kvm_host.h18
-rw-r--r--arch/ia64/include/asm/msidef.h42
-rw-r--r--arch/ia64/include/asm/socket.h3
-rw-r--r--arch/ia64/include/asm/swab.h2
-rw-r--r--arch/ia64/kernel/irq.c2
-rw-r--r--arch/ia64/kernel/msi_ia64.c55
-rw-r--r--arch/ia64/kernel/perfmon.c2
-rw-r--r--arch/ia64/kvm/Kconfig4
-rw-r--r--arch/ia64/kvm/irq.h2
-rw-r--r--arch/ia64/kvm/kvm-ia64.c125
-rw-r--r--arch/ia64/kvm/kvm_fw.c151
-rw-r--r--arch/ia64/kvm/process.c71
-rw-r--r--arch/ia64/kvm/vcpu.c44
-rw-r--r--arch/ia64/kvm/vcpu.h4
-rw-r--r--arch/ia64/kvm/vtlb.c44
-rw-r--r--arch/m32r/kernel/irq.c2
-rw-r--r--arch/m68k/Makefile3
-rw-r--r--arch/m68k/include/asm/irq_mm.h3
-rw-r--r--arch/m68k/include/asm/macintosh.h7
-rw-r--r--arch/m68k/include/asm/socket.h3
-rw-r--r--arch/m68k/install.sh52
-rw-r--r--arch/m68k/mac/config.c207
-rw-r--r--arch/m68k/mac/via.c9
-rw-r--r--arch/m68knommu/platform/520x/config.c56
-rw-r--r--arch/m68knommu/platform/523x/config.c51
-rw-r--r--arch/m68knommu/platform/5272/config.c48
-rw-r--r--arch/m68knommu/platform/527x/config.c113
-rw-r--r--arch/m68knommu/platform/528x/config.c58
-rw-r--r--arch/m68knommu/platform/532x/config.c49
-rw-r--r--arch/mips/include/asm/sigcontext.h1
-rw-r--r--arch/mips/include/asm/socket.h3
-rw-r--r--arch/mips/include/asm/swab.h2
-rw-r--r--arch/mips/kernel/irq.c2
-rw-r--r--arch/mips/kernel/linux32.c34
-rw-r--r--arch/mips/kernel/scall64-n32.S2
-rw-r--r--arch/mips/kernel/scall64-o32.S2
-rw-r--r--arch/mn10300/kernel/irq.c2
-rw-r--r--arch/parisc/include/asm/pdc.h3
-rw-r--r--arch/parisc/include/asm/socket.h3
-rw-r--r--arch/parisc/include/asm/swab.h2
-rw-r--r--arch/parisc/kernel/irq.c2
-rw-r--r--arch/parisc/kernel/syscall_table.S2
-rw-r--r--arch/powerpc/include/asm/bootx.h2
-rw-r--r--arch/powerpc/include/asm/elf.h2
-rw-r--r--arch/powerpc/include/asm/kvm.h9
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h7
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h7
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h67
-rw-r--r--arch/powerpc/include/asm/kvm_host.h21
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h15
-rw-r--r--arch/powerpc/include/asm/mmu-fsl-booke.h2
-rw-r--r--arch/powerpc/include/asm/ps3fb.h1
-rw-r--r--arch/powerpc/include/asm/socket.h3
-rw-r--r--arch/powerpc/include/asm/spu_info.h3
-rw-r--r--arch/powerpc/include/asm/swab.h2
-rw-r--r--arch/powerpc/include/asm/systbl.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/irq.c2
-rw-r--r--arch/powerpc/kvm/44x.c72
-rw-r--r--arch/powerpc/kvm/44x_emulate.c217
-rw-r--r--arch/powerpc/kvm/44x_tlb.c39
-rw-r--r--arch/powerpc/kvm/44x_tlb.h9
-rw-r--r--arch/powerpc/kvm/Kconfig16
-rw-r--r--arch/powerpc/kvm/Makefile10
-rw-r--r--arch/powerpc/kvm/booke.c50
-rw-r--r--arch/powerpc/kvm/booke.h35
-rw-r--r--arch/powerpc/kvm/booke_emulate.c266
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S5
-rw-r--r--arch/powerpc/kvm/e500.c169
-rw-r--r--arch/powerpc/kvm/e500_emulate.c202
-rw-r--r--arch/powerpc/kvm/e500_tlb.c757
-rw-r--r--arch/powerpc/kvm/e500_tlb.h185
-rw-r--r--arch/powerpc/kvm/emulate.c93
-rw-r--r--arch/powerpc/kvm/powerpc.c31
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/s390/Kconfig11
-rw-r--r--arch/s390/crypto/prng.c3
-rw-r--r--arch/s390/include/asm/bitops.h14
-rw-r--r--arch/s390/include/asm/crw.h68
-rw-r--r--arch/s390/include/asm/dasd.h10
-rw-r--r--arch/s390/include/asm/idals.h17
-rw-r--r--arch/s390/include/asm/kvm.h7
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/include/asm/lowcore.h653
-rw-r--r--arch/s390/include/asm/mmu_context.h2
-rw-r--r--arch/s390/include/asm/nmi.h66
-rw-r--r--arch/s390/include/asm/processor.h16
-rw-r--r--arch/s390/include/asm/ptrace.h4
-rw-r--r--arch/s390/include/asm/qdio.h1
-rw-r--r--arch/s390/include/asm/smp.h7
-rw-r--r--arch/s390/include/asm/socket.h3
-rw-r--r--arch/s390/include/asm/string.h16
-rw-r--r--arch/s390/include/asm/sysinfo.h1
-rw-r--r--arch/s390/include/asm/tlbflush.h4
-rw-r--r--arch/s390/include/asm/topology.h1
-rw-r--r--arch/s390/include/asm/vtoc.h16
-rw-r--r--arch/s390/kernel/Makefile4
-rw-r--r--arch/s390/kernel/bitmap.S56
-rw-r--r--arch/s390/kernel/bitmap.c54
-rw-r--r--arch/s390/kernel/compat_ptrace.h3
-rw-r--r--arch/s390/kernel/compat_wrapper.S2
-rw-r--r--arch/s390/kernel/debug.c9
-rw-r--r--arch/s390/kernel/early.c23
-rw-r--r--arch/s390/kernel/head.S2
-rw-r--r--arch/s390/kernel/head31.S1
-rw-r--r--arch/s390/kernel/head64.S1
-rw-r--r--arch/s390/kernel/ipl.c74
-rw-r--r--arch/s390/kernel/module.c19
-rw-r--r--arch/s390/kernel/nmi.c376
-rw-r--r--arch/s390/kernel/process.c73
-rw-r--r--arch/s390/kernel/processor.c73
-rw-r--r--arch/s390/kernel/reipl64.S11
-rw-r--r--arch/s390/kernel/s390_ksyms.c44
-rw-r--r--arch/s390/kernel/setup.c52
-rw-r--r--arch/s390/kernel/smp.c68
-rw-r--r--arch/s390/kernel/sysinfo.c428
-rw-r--r--arch/s390/kernel/time.c71
-rw-r--r--arch/s390/kernel/topology.c2
-rw-r--r--arch/s390/kernel/traps.c4
-rw-r--r--arch/s390/kernel/vdso.c2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S2
-rw-r--r--arch/s390/kvm/Kconfig3
-rw-r--r--arch/s390/kvm/intercept.c2
-rw-r--r--arch/s390/kvm/interrupt.c7
-rw-r--r--arch/s390/kvm/kvm-s390.c10
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/s390/kvm/priv.c18
-rw-r--r--arch/s390/kvm/sigp.c2
-rw-r--r--arch/s390/lib/delay.c2
-rw-r--r--arch/s390/lib/string.c8
-rw-r--r--arch/s390/mm/fault.c28
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/s390/mm/pgtable.c6
-rw-r--r--arch/sh/include/asm/socket.h3
-rw-r--r--arch/sh/kernel/irq.c2
-rw-r--r--arch/sparc/include/asm/socket.h3
-rw-r--r--arch/sparc/include/asm/tlb_64.h4
-rw-r--r--arch/sparc/kernel/irq_64.c2
-rw-r--r--arch/sparc/kernel/smp_64.c4
-rw-r--r--arch/sparc/kernel/systbls_64.S2
-rw-r--r--arch/sparc/kernel/time_64.c6
-rw-r--r--arch/um/drivers/net_kern.c39
-rw-r--r--arch/um/include/shared/net_kern.h2
-rw-r--r--arch/um/kernel/irq.c2
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/ia32/sys_ia32.c22
-rw-r--r--arch/x86/include/asm/ia32.h7
-rw-r--r--arch/x86/include/asm/kvm.h24
-rw-r--r--arch/x86/include/asm/kvm_host.h61
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/prctl.h4
-rw-r--r--arch/x86/include/asm/setup.h49
-rw-r--r--arch/x86/include/asm/socket.h3
-rw-r--r--arch/x86/include/asm/svm.h4
-rw-r--r--arch/x86/include/asm/sys_ia32.h2
-rw-r--r--arch/x86/include/asm/syscalls.h1
-rw-r--r--arch/x86/include/asm/virtext.h2
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/kernel/cpu/intel.c8
-rw-r--r--arch/x86/kernel/hpet.c80
-rw-r--r--arch/x86/kernel/quirks.c3
-rw-r--r--arch/x86/kernel/tsc.c9
-rw-r--r--arch/x86/kvm/Kconfig4
-rw-r--r--arch/x86/kvm/i8254.c21
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c25
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/kvm_svm.h16
-rw-r--r--arch/x86/kvm/mmu.c237
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/paging_tmpl.h219
-rw-r--r--arch/x86/kvm/svm.c916
-rw-r--r--arch/x86/kvm/vmx.c393
-rw-r--r--arch/x86/kvm/x86.c432
-rw-r--r--arch/x86/kvm/x86_emulate.c56
-rw-r--r--arch/xtensa/include/asm/socket.h3
-rw-r--r--arch/xtensa/include/asm/swab.h2
-rw-r--r--arch/xtensa/kernel/irq.c2
218 files changed, 6730 insertions, 2263 deletions
diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index a1057c2d95e..3641ec1452f 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -62,6 +62,9 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
* have to define SOCK_NONBLOCK to a different value here.
*/
diff --git a/arch/alpha/include/asm/statfs.h b/arch/alpha/include/asm/statfs.h
index de35cd438a1..ccd2e186bfd 100644
--- a/arch/alpha/include/asm/statfs.h
+++ b/arch/alpha/include/asm/statfs.h
@@ -1,6 +1,8 @@
#ifndef _ALPHA_STATFS_H
#define _ALPHA_STATFS_H
+#include <linux/types.h>
+
/* Alpha is the only 64-bit platform with 32-bit statfs. And doesn't
even seem to implement statfs64 */
#define __statfs_word __u32
diff --git a/arch/alpha/include/asm/swab.h b/arch/alpha/include/asm/swab.h
index 68e7089e02d..4d682b16c7c 100644
--- a/arch/alpha/include/asm/swab.h
+++ b/arch/alpha/include/asm/swab.h
@@ -1,7 +1,7 @@
#ifndef _ALPHA_SWAB_H
#define _ALPHA_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#include <asm/compiler.h>
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index e4a54b61589..b45d913a51c 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -903,8 +903,9 @@ sys_alpha_pipe:
stq $26, 0($sp)
.prologue 0
+ mov $31, $17
lda $16, 8($sp)
- jsr $26, do_pipe
+ jsr $26, do_pipe_flags
ldq $26, 0($sp)
bne $0, 1f
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 703731accda..d3812eb8401 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -90,7 +90,7 @@ show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%10u ", kstat_irqs(irq));
#else
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[irq]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, j));
#endif
seq_printf(p, " %14s", irq_desc[irq].chip->typename);
seq_printf(p, " %c%s",
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index e16aeb6e79e..67c19f8a994 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -64,7 +64,7 @@ do_entInt(unsigned long type, unsigned long vector,
smp_percpu_timer_interrupt(regs);
cpu = smp_processor_id();
if (cpu != boot_cpuid) {
- kstat_cpu(cpu).irqs[RTC_IRQ]++;
+ kstat_incr_irqs_this_cpu(RTC_IRQ, irq_to_desc(RTC_IRQ));
} else {
handle_irq(RTC_IRQ);
}
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index ae41f097864..42ee05981e7 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -46,8 +46,6 @@
#include <asm/hwrpb.h>
#include <asm/processor.h>
-extern int do_pipe(int *);
-
/*
* Brk needs to return an error. Still support Linux's brk(0) query idiom,
* which OSF programs just shouldn't be doing. We're still not quite
diff --git a/arch/arm/include/asm/a.out.h b/arch/arm/include/asm/a.out.h
index 79489fdcc8b..083894b2e3b 100644
--- a/arch/arm/include/asm/a.out.h
+++ b/arch/arm/include/asm/a.out.h
@@ -2,7 +2,7 @@
#define __ARM_A_OUT_H__
#include <linux/personality.h>
-#include <asm/types.h>
+#include <linux/types.h>
struct exec
{
diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index f2cd18a0932..ee1304f22f9 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -14,7 +14,7 @@
#ifndef __ASMARM_SETUP_H
#define __ASMARM_SETUP_H
-#include <asm/types.h>
+#include <linux/types.h>
#define COMMAND_LINE_SIZE 1024
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 6817be9573a..537de4e0ef5 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -54,4 +54,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/arm/include/asm/swab.h b/arch/arm/include/asm/swab.h
index 27a689be085..ca2bf2f6d6e 100644
--- a/arch/arm/include/asm/swab.h
+++ b/arch/arm/include/asm/swab.h
@@ -16,7 +16,7 @@
#define __ASM_ARM_SWAB_H
#include <linux/compiler.h>
-#include <asm/types.h>
+#include <linux/types.h>
#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
# define __SWAB_64_THRU_32__
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 363db186cb9..7296f041628 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%3d: ", i);
for_each_present_cpu(cpu)
- seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-");
seq_printf(p, " %s", action->name);
for (action = action->next; action; action = action->next)
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index b3404b7775b..0d2074f51a5 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -231,14 +231,17 @@ static struct platform_device kirkwood_switch_device = {
void __init kirkwood_ge00_switch_init(struct dsa_platform_data *d, int irq)
{
+ int i;
+
if (irq != NO_IRQ) {
kirkwood_switch_resources[0].start = irq;
kirkwood_switch_resources[0].end = irq;
kirkwood_switch_device.num_resources = 1;
}
- d->mii_bus = &kirkwood_ge00_shared.dev;
d->netdev = &kirkwood_ge00.dev;
+ for (i = 0; i < d->nr_chips; i++)
+ d->chip[i].mii_bus = &kirkwood_ge00_shared.dev;
kirkwood_switch_device.dev.platform_data = d;
platform_device_register(&kirkwood_switch_device);
diff --git a/arch/arm/mach-kirkwood/rd88f6281-setup.c b/arch/arm/mach-kirkwood/rd88f6281-setup.c
index 9a0e905d10c..e1c0516c4df 100644
--- a/arch/arm/mach-kirkwood/rd88f6281-setup.c
+++ b/arch/arm/mach-kirkwood/rd88f6281-setup.c
@@ -75,7 +75,7 @@ static struct mv643xx_eth_platform_data rd88f6281_ge00_data = {
.duplex = DUPLEX_FULL,
};
-static struct dsa_platform_data rd88f6281_switch_data = {
+static struct dsa_chip_data rd88f6281_switch_chip_data = {
.port_names[0] = "lan1",
.port_names[1] = "lan2",
.port_names[2] = "lan3",
@@ -83,6 +83,11 @@ static struct dsa_platform_data rd88f6281_switch_data = {
.port_names[5] = "cpu",
};
+static struct dsa_platform_data rd88f6281_switch_plat_data = {
+ .nr_chips = 1,
+ .chip = &rd88f6281_switch_chip_data,
+};
+
static struct mv643xx_eth_platform_data rd88f6281_ge01_data = {
.phy_addr = MV643XX_ETH_PHY_ADDR(11),
};
@@ -105,12 +110,12 @@ static void __init rd88f6281_init(void)
kirkwood_ge00_init(&rd88f6281_ge00_data);
kirkwood_pcie_id(&dev, &rev);
if (rev == MV88F6281_REV_A0) {
- rd88f6281_switch_data.sw_addr = 10;
+ rd88f6281_switch_chip_data.sw_addr = 10;
kirkwood_ge01_init(&rd88f6281_ge01_data);
} else {
- rd88f6281_switch_data.port_names[4] = "wan";
+ rd88f6281_switch_chip_data.port_names[4] = "wan";
}
- kirkwood_ge00_switch_init(&rd88f6281_switch_data, NO_IRQ);
+ kirkwood_ge00_switch_init(&rd88f6281_switch_plat_data, NO_IRQ);
kirkwood_rtc_init();
kirkwood_sata_init(&rd88f6281_sata_data);
diff --git a/arch/arm/mach-ns9xxx/irq.c b/arch/arm/mach-ns9xxx/irq.c
index 22e0eb6e9ec..feb0e54a91d 100644
--- a/arch/arm/mach-ns9xxx/irq.c
+++ b/arch/arm/mach-ns9xxx/irq.c
@@ -63,7 +63,6 @@ static struct irq_chip ns9xxx_chip = {
#else
static void handle_prio_irq(unsigned int irq, struct irq_desc *desc)
{
- unsigned int cpu = smp_processor_id();
struct irqaction *action;
irqreturn_t action_ret;
@@ -72,7 +71,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc)
BUG_ON(desc->status & IRQ_INPROGRESS);
desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
- kstat_cpu(cpu).irqs[irq]++;
+ kstat_incr_irqs_this_cpu(irq, desc);
action = desc->action;
if (unlikely(!action || (desc->status & IRQ_DISABLED)))
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 8a0e49d8425..68cc3efae56 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -31,6 +31,7 @@
#include <plat/ehci-orion.h>
#include <plat/mv_xor.h>
#include <plat/orion_nand.h>
+#include <plat/orion5x_wdt.h>
#include <plat/time.h>
#include "common.h"
@@ -219,14 +220,17 @@ static struct platform_device orion5x_switch_device = {
void __init orion5x_eth_switch_init(struct dsa_platform_data *d, int irq)
{
+ int i;
+
if (irq != NO_IRQ) {
orion5x_switch_resources[0].start = irq;
orion5x_switch_resources[0].end = irq;
orion5x_switch_device.num_resources = 1;
}
- d->mii_bus = &orion5x_eth_shared.dev;
d->netdev = &orion5x_eth.dev;
+ for (i = 0; i < d->nr_chips; i++)
+ d->chip[i].mii_bus = &orion5x_eth_shared.dev;
orion5x_switch_device.dev.platform_data = d;
platform_device_register(&orion5x_switch_device);
@@ -533,6 +537,29 @@ void __init orion5x_xor_init(void)
/*****************************************************************************
+ * Watchdog
+ ****************************************************************************/
+static struct orion5x_wdt_platform_data orion5x_wdt_data = {
+ .tclk = 0,
+};
+
+static struct platform_device orion5x_wdt_device = {
+ .name = "orion5x_wdt",
+ .id = -1,
+ .dev = {
+ .platform_data = &orion5x_wdt_data,
+ },
+ .num_resources = 0,
+};
+
+void __init orion5x_wdt_init(void)
+{
+ orion5x_wdt_data.tclk = orion5x_tclk;
+ platform_device_register(&orion5x_wdt_device);
+}
+
+
+/*****************************************************************************
* Time handling
****************************************************************************/
int orion5x_tclk;
@@ -631,6 +658,11 @@ void __init orion5x_init(void)
printk(KERN_INFO "Orion: Applying 5281 D0 WFI workaround.\n");
disable_hlt();
}
+
+ /*
+ * Register watchdog driver
+ */
+ orion5x_wdt_init();
}
/*
diff --git a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
index 15f53235ee3..9c1ca41730b 100644
--- a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
+++ b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
@@ -94,7 +94,7 @@ static struct mv643xx_eth_platform_data rd88f5181l_fxo_eth_data = {
.duplex = DUPLEX_FULL,
};
-static struct dsa_platform_data rd88f5181l_fxo_switch_data = {
+static struct dsa_chip_data rd88f5181l_fxo_switch_chip_data = {
.port_names[0] = "lan2",
.port_names[1] = "lan1",
.port_names[2] = "wan",
@@ -103,6 +103,11 @@ static struct dsa_platform_data rd88f5181l_fxo_switch_data = {
.port_names[7] = "lan3",
};
+static struct dsa_platform_data rd88f5181l_fxo_switch_plat_data = {
+ .nr_chips = 1,
+ .chip = &rd88f5181l_fxo_switch_chip_data,
+};
+
static void __init rd88f5181l_fxo_init(void)
{
/*
@@ -117,7 +122,7 @@ static void __init rd88f5181l_fxo_init(void)
*/
orion5x_ehci0_init();
orion5x_eth_init(&rd88f5181l_fxo_eth_data);
- orion5x_eth_switch_init(&rd88f5181l_fxo_switch_data, NO_IRQ);
+ orion5x_eth_switch_init(&rd88f5181l_fxo_switch_plat_data, NO_IRQ);
orion5x_uart0_init();
orion5x_setup_dev_boot_win(RD88F5181L_FXO_NOR_BOOT_BASE,
diff --git a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
index 8ad3934399d..ee1399ff0ce 100644
--- a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
+++ b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
@@ -95,7 +95,7 @@ static struct mv643xx_eth_platform_data rd88f5181l_ge_eth_data = {
.duplex = DUPLEX_FULL,
};
-static struct dsa_platform_data rd88f5181l_ge_switch_data = {
+static struct dsa_chip_data rd88f5181l_ge_switch_chip_data = {
.port_names[0] = "lan2",
.port_names[1] = "lan1",
.port_names[2] = "wan",
@@ -104,6 +104,11 @@ static struct dsa_platform_data rd88f5181l_ge_switch_data = {
.port_names[7] = "lan3",
};
+static struct dsa_platform_data rd88f5181l_ge_switch_plat_data = {
+ .nr_chips = 1,
+ .chip = &rd88f5181l_ge_switch_chip_data,
+};
+
static struct i2c_board_info __initdata rd88f5181l_ge_i2c_rtc = {
I2C_BOARD_INFO("ds1338", 0x68),
};
@@ -122,7 +127,8 @@ static void __init rd88f5181l_ge_init(void)
*/
orion5x_ehci0_init();
orion5x_eth_init(&rd88f5181l_ge_eth_data);
- orion5x_eth_switch_init(&rd88f5181l_ge_switch_data, gpio_to_irq(8));
+ orion5x_eth_switch_init(&rd88f5181l_ge_switch_plat_data,
+ gpio_to_irq(8));
orion5x_i2c_init();
orion5x_uart0_init();
diff --git a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
index 262e25e4dac..7737cf9a8f5 100644
--- a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
+++ b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
@@ -35,7 +35,7 @@ static struct mv643xx_eth_platform_data rd88f6183ap_ge_eth_data = {
.duplex = DUPLEX_FULL,
};
-static struct dsa_platform_data rd88f6183ap_ge_switch_data = {
+static struct dsa_chip_data rd88f6183ap_ge_switch_chip_data = {
.port_names[0] = "lan1",
.port_names[1] = "lan2",
.port_names[2] = "lan3",
@@ -44,6 +44,11 @@ static struct dsa_platform_data rd88f6183ap_ge_switch_data = {
.port_names[5] = "cpu",
};
+static struct dsa_platform_data rd88f6183ap_ge_switch_plat_data = {
+ .nr_chips = 1,
+ .chip = &rd88f6183ap_ge_switch_chip_data,
+};
+
static struct mtd_partition rd88f6183ap_ge_partitions[] = {
{
.name = "kernel",
@@ -89,7 +94,8 @@ static void __init rd88f6183ap_ge_init(void)
*/
orion5x_ehci0_init();
orion5x_eth_init(&rd88f6183ap_ge_eth_data);
- orion5x_eth_switch_init(&rd88f6183ap_ge_switch_data, gpio_to_irq(3));
+ orion5x_eth_switch_init(&rd88f6183ap_ge_switch_plat_data,
+ gpio_to_irq(3));
spi_register_board_info(rd88f6183ap_ge_spi_slave_info,
ARRAY_SIZE(rd88f6183ap_ge_spi_slave_info));
orion5x_spi_init();
diff --git a/arch/arm/mach-orion5x/wrt350n-v2-setup.c b/arch/arm/mach-orion5x/wrt350n-v2-setup.c
index cc8f8920086..1b4ad9d5e2e 100644
--- a/arch/arm/mach-orion5x/wrt350n-v2-setup.c
+++ b/arch/arm/mach-orion5x/wrt350n-v2-setup.c
@@ -106,7 +106,7 @@ static struct mv643xx_eth_platform_data wrt350n_v2_eth_data = {
.duplex = DUPLEX_FULL,
};
-static struct dsa_platform_data wrt350n_v2_switch_data = {
+static struct dsa_chip_data wrt350n_v2_switch_chip_data = {
.port_names[0] = "lan2",
.port_names[1] = "lan1",
.port_names[2] = "wan",
@@ -115,6 +115,11 @@ static struct dsa_platform_data wrt350n_v2_switch_data = {
.port_names[7] = "lan4",
};
+static struct dsa_platform_data wrt350n_v2_switch_plat_data = {
+ .nr_chips = 1,
+ .chip = &wrt350n_v2_switch_chip_data,
+};
+
static void __init wrt350n_v2_init(void)
{
/*
@@ -129,7 +134,7 @@ static void __init wrt350n_v2_init(void)
*/
orion5x_ehci0_init();
orion5x_eth_init(&wrt350n_v2_eth_data);
- orion5x_eth_switch_init(&wrt350n_v2_switch_data, NO_IRQ);
+ orion5x_eth_switch_init(&wrt350n_v2_switch_plat_data, NO_IRQ);
orion5x_uart0_init();
orion5x_setup_dev_boot_win(WRT350N_V2_NOR_BOOT_BASE,
diff --git a/arch/arm/plat-orion/include/plat/orion5x_wdt.h b/arch/arm/plat-orion/include/plat/orion5x_wdt.h
new file mode 100644
index 00000000000..3c9cf6a305e
--- /dev/null
+++ b/arch/arm/plat-orion/include/plat/orion5x_wdt.h
@@ -0,0 +1,18 @@
+/*
+ * arch/arm/plat-orion/include/plat/orion5x_wdt.h
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __PLAT_ORION5X_WDT_H
+#define __PLAT_ORION5X_WDT_H
+
+struct orion5x_wdt_platform_data {
+ u32 tclk; /* no <linux/clk.h> support yet */
+};
+
+
+#endif
+
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index 35863f26092..04c86061970 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -54,4 +54,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/avr32/include/asm/swab.h b/arch/avr32/include/asm/swab.h
index a14aa5b46d9..14cc737bbca 100644
--- a/arch/avr32/include/asm/swab.h
+++ b/arch/avr32/include/asm/swab.h
@@ -4,7 +4,7 @@
#ifndef __ASM_AVR32_SWAB_H
#define __ASM_AVR32_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#define __SWAB_64_THRU_32__
diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c
index a8e767d836a..9f572229d31 100644
--- a/arch/avr32/kernel/irq.c
+++ b/arch/avr32/kernel/irq.c
@@ -58,7 +58,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%3d: ", i);
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
seq_printf(p, " %8s", irq_desc[i].chip->name ? : "-");
seq_printf(p, " %s", action->name);
for (action = action->next; action; action = action->next)
diff --git a/arch/blackfin/include/asm/socket.h b/arch/blackfin/include/asm/socket.h
index 2ca702e44d4..fac7fe9e1f8 100644
--- a/arch/blackfin/include/asm/socket.h
+++ b/arch/blackfin/include/asm/socket.h
@@ -53,4 +53,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/blackfin/include/asm/swab.h b/arch/blackfin/include/asm/swab.h
index 69a051b612b..6403ad2932e 100644
--- a/arch/blackfin/include/asm/swab.h
+++ b/arch/blackfin/include/asm/swab.h
@@ -1,7 +1,7 @@
#ifndef _BLACKFIN_SWAB_H
#define _BLACKFIN_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c
index 7fd12656484..bd052a67032 100644
--- a/arch/blackfin/kernel/irqchip.c
+++ b/arch/blackfin/kernel/irqchip.c
@@ -83,7 +83,7 @@ int show_interrupts(struct seq_file *p, void *v)
goto skip;
seq_printf(p, "%3d: ", i);
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
seq_printf(p, " %8s", irq_desc[i].chip->name);
seq_printf(p, " %s", action->name);
for (action = action->next; action; action = action->next)
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index 9df0ca82f5d..d5cf7400540 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -56,6 +56,9 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c
index 2dfac8c7909..7f642fcffbf 100644
--- a/arch/cris/kernel/irq.c
+++ b/arch/cris/kernel/irq.c
@@ -66,7 +66,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
seq_printf(p, " %14s", irq_desc[i].chip->typename);
seq_printf(p, " %s", action->name);
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index 73abae767fd..af3e824b91b 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -74,7 +74,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (action) {
seq_printf(p, "%3d: ", i);
for_each_present_cpu(cpu)
- seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-");
seq_printf(p, " %s", action->name);
for (action = action->next;
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index da2520dbf25..602518a70a1 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -54,4 +54,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/h8300/include/asm/swab.h b/arch/h8300/include/asm/swab.h
index c108f39b8bc..39abbf52807 100644
--- a/arch/h8300/include/asm/swab.h
+++ b/arch/h8300/include/asm/swab.h
@@ -1,7 +1,7 @@
#ifndef _H8300_SWAB_H
#define _H8300_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
# define __SWAB_64_THRU_32__
diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c
index ef4f0047067..74f8dd7b34d 100644
--- a/arch/h8300/kernel/irq.c
+++ b/arch/h8300/kernel/irq.c
@@ -183,7 +183,7 @@ asmlinkage void do_IRQ(int irq)
#if defined(CONFIG_PROC_FS)
int show_interrupts(struct seq_file *p, void *v)
{
- int i = *(loff_t *) v, j;
+ int i = *(loff_t *) v;
struct irqaction * action;
unsigned long flags;
@@ -196,7 +196,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (!action)
goto unlock;
seq_printf(p, "%3d: ",i);
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs(i));
seq_printf(p, " %14s", irq_desc[i].chip->name);
seq_printf(p, "-%-8s", irq_desc[i].name);
seq_printf(p, " %s", action->name);
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index a46f8395e9a..af9405cd70e 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -240,7 +240,7 @@ ia32_syscall_table:
data8 sys_ni_syscall
data8 sys_umask /* 60 */
data8 sys_chroot
- data8 sys_ustat
+ data8 compat_sys_ustat
data8 sys_dup2
data8 sys_getppid
data8 sys_getpgrp /* 65 */
diff --git a/arch/ia64/include/asm/fpu.h b/arch/ia64/include/asm/fpu.h
index 3859558ff0a..0c26157cffa 100644
--- a/arch/ia64/include/asm/fpu.h
+++ b/arch/ia64/include/asm/fpu.h
@@ -6,8 +6,6 @@
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
-#include <asm/types.h>
-
/* floating point status register: */
#define FPSR_TRAP_VD (1 << 0) /* invalid op trap disabled */
#define FPSR_TRAP_DD (1 << 1) /* denormal trap disabled */
diff --git a/arch/ia64/include/asm/gcc_intrin.h b/arch/ia64/include/asm/gcc_intrin.h
index 0f5b5592175..c2c5fd8fcac 100644
--- a/arch/ia64/include/asm/gcc_intrin.h
+++ b/arch/ia64/include/asm/gcc_intrin.h
@@ -6,6 +6,7 @@
* Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
*/
+#include <linux/types.h>
#include <linux/compiler.h>
/* define this macro to get some asm stmts included in 'c' files */
diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h
index a3e44a5ed49..c47830e26cb 100644
--- a/arch/ia64/include/asm/intrinsics.h
+++ b/arch/ia64/include/asm/intrinsics.h
@@ -10,6 +10,7 @@
#ifndef __ASSEMBLY__
+#include <linux/types.h>
/* include compiler specific intrinsics */
#include <asm/ia64regs.h>
#ifdef __INTEL_COMPILER
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index bfa86b6af7c..18a7e49abbc 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -21,8 +21,7 @@
*
*/
-#include <asm/types.h>
-
+#include <linux/types.h>
#include <linux/ioctl.h>
/* Select x86 specific features in <linux/kvm.h> */
@@ -166,7 +165,40 @@ struct saved_vpd {
unsigned long vcpuid[5];
unsigned long vpsr;
unsigned long vpr;
- unsigned long vcr[128];
+ union {
+ unsigned long vcr[128];
+ struct {
+ unsigned long dcr;
+ unsigned long itm;
+ unsigned long iva;
+ unsigned long rsv1[5];
+ unsigned long pta;
+ unsigned long rsv2[7];
+ unsigned long ipsr;
+ unsigned long isr;
+ unsigned long rsv3;
+ unsigned long iip;
+ unsigned long ifa;
+ unsigned long itir;
+ unsigned long iipa;
+ unsigned long ifs;
+ unsigned long iim;
+ unsigned long iha;
+ unsigned long rsv4[38];
+ unsigned long lid;
+ unsigned long ivr;
+ unsigned long tpr;
+ unsigned long eoi;
+ unsigned long irr[4];
+ unsigned long itv;
+ unsigned long pmv;
+ unsigned long cmcv;
+ unsigned long rsv5[5];
+ unsigned long lrr0;
+ unsigned long lrr1;
+ unsigned long rsv6[46];
+ };
+ };
};
struct kvm_regs {
@@ -214,4 +246,18 @@ struct kvm_sregs {
struct kvm_fpu {
};
+#define KVM_IA64_VCPU_STACK_SHIFT 16
+#define KVM_IA64_VCPU_STACK_SIZE (1UL << KVM_IA64_VCPU_STACK_SHIFT)
+
+struct kvm_ia64_vcpu_stack {
+ unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
+};
+
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
#endif
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 34866366165..4542651e6ac 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -112,7 +112,11 @@
#define VCPU_STRUCT_SHIFT 16
#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-#define KVM_STK_OFFSET VCPU_STRUCT_SIZE
+/*
+ * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
+ */
+#define KVM_STK_SHIFT 16
+#define KVM_STK_OFFSET (__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
#define KVM_VM_STRUCT_SHIFT 19
#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
@@ -153,10 +157,10 @@ struct kvm_vm_data {
struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
};
-#define VCPU_BASE(n) KVM_VM_DATA_BASE + \
- offsetof(struct kvm_vm_data, vcpu_data[n])
-#define VM_BASE KVM_VM_DATA_BASE + \
- offsetof(struct kvm_vm_data, kvm_vm_struct)
+#define VCPU_BASE(n) (KVM_VM_DATA_BASE + \
+ offsetof(struct kvm_vm_data, vcpu_data[n]))
+#define KVM_VM_BASE (KVM_VM_DATA_BASE + \
+ offsetof(struct kvm_vm_data, kvm_vm_struct))
#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
@@ -235,8 +239,6 @@ struct kvm_vm_data {
struct kvm;
struct kvm_vcpu;
-struct kvm_guest_debug{
-};
struct kvm_mmio_req {
uint64_t addr; /* physical address */
@@ -462,6 +464,8 @@ struct kvm_arch {
unsigned long metaphysical_rr4;
unsigned long vmm_init_rr;
+ int online_vcpus;
+
struct kvm_ioapic *vioapic;
struct kvm_vm_stat stat;
struct kvm_sal_data rdv_sal_data;
diff --git a/arch/ia64/include/asm/msidef.h b/arch/ia64/include/asm/msidef.h
new file mode 100644
index 00000000000..592c1047a0c
--- /dev/null
+++ b/arch/ia64/include/asm/msidef.h
@@ -0,0 +1,42 @@
+#ifndef _IA64_MSI_DEF_H
+#define _IA64_MSI_DEF_H
+
+/*
+ * Shifts for APIC-based data
+ */
+
+#define MSI_DATA_VECTOR_SHIFT 0
+#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT)
+#define MSI_DATA_VECTOR_MASK 0xffffff00
+
+#define MSI_DATA_DELIVERY_MODE_SHIFT 8
+#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT)
+#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT)
+
+#define MSI_DATA_LEVEL_SHIFT 14
+#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
+#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
+
+#define MSI_DATA_TRIGGER_SHIFT 15
+#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
+#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
+
+/*
+ * Shift/mask fields for APIC-based bus address
+ */
+
+#define MSI_ADDR_DEST_ID_SHIFT 4
+#define MSI_ADDR_HEADER 0xfee00000
+
+#define MSI_ADDR_DEST_ID_MASK 0xfff0000f
+#define MSI_ADDR_DEST_ID_CPU(cpu) ((cpu) << MSI_ADDR_DEST_ID_SHIFT)
+
+#define MSI_ADDR_DEST_MODE_SHIFT 2
+#define MSI_ADDR_DEST_MODE_PHYS (0 << MSI_ADDR_DEST_MODE_SHIFT)
+#define MSI_ADDR_DEST_MODE_LOGIC (1 << MSI_ADDR_DEST_MODE_SHIFT)
+
+#define MSI_ADDR_REDIRECTION_SHIFT 3
+#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
+#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
+
+#endif/* _IA64_MSI_DEF_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index d5ef0aa3e31..745421225ec 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -63,4 +63,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/ia64/include/asm/swab.h b/arch/ia64/include/asm/swab.h
index 6aa58b699ee..c89a8cb5d8a 100644
--- a/arch/ia64/include/asm/swab.h
+++ b/arch/ia64/include/asm/swab.h
@@ -6,7 +6,7 @@
* David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co.
*/
-#include <asm/types.h>
+#include <linux/types.h>
#include <asm/intrinsics.h>
#include <linux/compiler.h>
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index a58f64ca9f0..4f596613bff 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -80,7 +80,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for_each_online_cpu(j) {
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
}
#endif
seq_printf(p, " %14s", irq_desc[i].chip->name);
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 89033933903..368ee4e5266 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -7,44 +7,7 @@
#include <linux/msi.h>
#include <linux/dmar.h>
#include <asm/smp.h>
-
-/*
- * Shifts for APIC-based data
- */
-
-#define MSI_DATA_VECTOR_SHIFT 0
-#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT)
-#define MSI_DATA_VECTOR_MASK 0xffffff00
-
-#define MSI_DATA_DELIVERY_SHIFT 8
-#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT)
-#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_SHIFT)
-
-#define MSI_DATA_LEVEL_SHIFT 14
-#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
-#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
-
-#define MSI_DATA_TRIGGER_SHIFT 15
-#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
-#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
-
-/*
- * Shift/mask fields for APIC-based bus address
- */
-
-#define MSI_TARGET_CPU_SHIFT 4
-#define MSI_ADDR_HEADER 0xfee00000
-
-#define MSI_ADDR_DESTID_MASK 0xfff0000f
-#define MSI_ADDR_DESTID_CPU(cpu) ((cpu) << MSI_TARGET_CPU_SHIFT)
-
-#define MSI_ADDR_DESTMODE_SHIFT 2
-#define MSI_ADDR_DESTMODE_PHYS (0 << MSI_ADDR_DESTMODE_SHIFT)
-#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT)
-
-#define MSI_ADDR_REDIRECTION_SHIFT 3
-#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
-#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
+#include <asm/msidef.h>
static struct irq_chip ia64_msi_chip;
@@ -65,8 +28,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
read_msi_msg(irq, &msg);
addr = msg.address_lo;
- addr &= MSI_ADDR_DESTID_MASK;
- addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
+ addr &= MSI_ADDR_DEST_ID_MASK;
+ addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
msg.address_lo = addr;
data = msg.data;
@@ -98,9 +61,9 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
msg.address_hi = 0;
msg.address_lo =
MSI_ADDR_HEADER |
- MSI_ADDR_DESTMODE_PHYS |
+ MSI_ADDR_DEST_MODE_PHYS |
MSI_ADDR_REDIRECTION_CPU |
- MSI_ADDR_DESTID_CPU(dest_phys_id);
+ MSI_ADDR_DEST_ID_CPU(dest_phys_id);
msg.data =
MSI_DATA_TRIGGER_EDGE |
@@ -183,8 +146,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
- msg.address_lo &= ~MSI_ADDR_DESTID_MASK;
- msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
dmar_msi_write(irq, &msg);
irq_desc[irq].affinity = *mask;
@@ -215,9 +178,9 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
msg->address_hi = 0;
msg->address_lo =
MSI_ADDR_HEADER |
- MSI_ADDR_DESTMODE_PHYS |
+ MSI_ADDR_DEST_MODE_PHYS |
MSI_ADDR_REDIRECTION_CPU |
- MSI_ADDR_DESTID_CPU(dest);
+ MSI_ADDR_DEST_ID_CPU(dest);
msg->data =
MSI_DATA_TRIGGER_EDGE |
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 0e499757309..5c0f408cfd7 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2196,7 +2196,7 @@ pfmfs_delete_dentry(struct dentry *dentry)
return 1;
}
-static struct dentry_operations pfmfs_dentry_operations = {
+static const struct dentry_operations pfmfs_dentry_operations = {
.d_delete = pfmfs_delete_dentry,
};
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index f833a0b4188..0a2d6b86075 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -4,6 +4,10 @@
config HAVE_KVM
bool
+config HAVE_KVM_IRQCHIP
+ bool
+ default y
+
menuconfig VIRTUALIZATION
bool "Virtualization"
depends on HAVE_KVM || IA64
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
index c6786e8b1bf..c0785a72827 100644
--- a/arch/ia64/kvm/irq.h
+++ b/arch/ia64/kvm/irq.h
@@ -23,6 +23,8 @@
#ifndef __IRQ_H
#define __IRQ_H
+#include "lapic.h"
+
static inline int irqchip_in_kernel(struct kvm *kvm)
{
return 1;
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 28f982045f2..076b00d1dbf 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -182,7 +182,7 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
case KVM_CAP_IRQCHIP:
case KVM_CAP_MP_STATE:
-
+ case KVM_CAP_IRQ_INJECT_STATUS:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -314,7 +314,7 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
union ia64_lid lid;
int i;
- for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ for (i = 0; i < kvm->arch.online_vcpus; i++) {
if (kvm->vcpus[i]) {
lid.val = VCPU_LID(kvm->vcpus[i]);
if (lid.id == id && lid.eid == eid)
@@ -388,7 +388,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
call_data.ptc_g_data = p->u.ptc_g_data;
- for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ for (i = 0; i < kvm->arch.online_vcpus; i++) {
if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
KVM_MP_STATE_UNINITIALIZED ||
vcpu == kvm->vcpus[i])
@@ -788,6 +788,8 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
kvm_init_vm(kvm);
+ kvm->arch.online_vcpus = 0;
+
return kvm;
}
@@ -919,7 +921,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_ioapic_init(kvm);
if (r)
goto out;
+ r = kvm_setup_default_irq_routing(kvm);
+ if (r) {
+ kfree(kvm->arch.vioapic);
+ goto out;
+ }
break;
+ case KVM_IRQ_LINE_STATUS:
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -927,10 +935,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (copy_from_user(&irq_event, argp, sizeof irq_event))
goto out;
if (irqchip_in_kernel(kvm)) {
+ __s32 status;
mutex_lock(&kvm->lock);
- kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level);
mutex_unlock(&kvm->lock);
+ if (ioctl == KVM_IRQ_LINE_STATUS) {
+ irq_event.status = status;
+ if (copy_to_user(argp, &irq_event,
+ sizeof irq_event))
+ goto out;
+ }
r = 0;
}
break;
@@ -1149,7 +1164,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
/*Initialize itc offset for vcpus*/
itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
- for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ for (i = 0; i < kvm->arch.online_vcpus; i++) {
v = (struct kvm_vcpu *)((char *)vcpu +
sizeof(struct kvm_vcpu_data) * i);
v->arch.itc_offset = itc_offset;
@@ -1283,6 +1298,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
goto fail;
}
+ kvm->arch.online_vcpus++;
+
return vcpu;
fail:
return ERR_PTR(r);
@@ -1303,8 +1320,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -EINVAL;
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
@@ -1421,6 +1438,23 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
+int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
+ struct kvm_ia64_vcpu_stack *stack)
+{
+ memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
+ struct kvm_ia64_vcpu_stack *stack)
+{
+ memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
+ sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
+
+ vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
+ return 0;
+}
+
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
@@ -1430,9 +1464,78 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+ unsigned int ioctl, unsigned long arg)
{
- return -EINVAL;
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ struct kvm_ia64_vcpu_stack *stack = NULL;
+ long r;
+
+ switch (ioctl) {
+ case KVM_IA64_VCPU_GET_STACK: {
+ struct kvm_ia64_vcpu_stack __user *user_stack;
+ void __user *first_p = argp;
+
+ r = -EFAULT;
+ if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+ goto out;
+
+ if (!access_ok(VERIFY_WRITE, user_stack,
+ sizeof(struct kvm_ia64_vcpu_stack))) {
+ printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
+ "Illegal user destination address for stack\n");
+ goto out;
+ }
+ stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+ if (!stack) {
+ r = -ENOMEM;
+ goto out;
+ }
+
+ r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
+ if (r)
+ goto out;
+
+ if (copy_to_user(user_stack, stack,
+ sizeof(struct kvm_ia64_vcpu_stack)))
+ goto out;
+
+ break;
+ }
+ case KVM_IA64_VCPU_SET_STACK: {
+ struct kvm_ia64_vcpu_stack __user *user_stack;
+ void __user *first_p = argp;
+
+ r = -EFAULT;
+ if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+ goto out;
+
+ if (!access_ok(VERIFY_READ, user_stack,
+ sizeof(struct kvm_ia64_vcpu_stack))) {
+ printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
+ "Illegal user address for stack\n");
+ goto out;
+ }
+ stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+ if (!stack) {
+ r = -ENOMEM;
+ goto out;
+ }
+ if (copy_from_user(stack, user_stack,
+ sizeof(struct kvm_ia64_vcpu_stack)))
+ goto out;
+
+ r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
+ break;
+ }
+
+ default:
+ r = -EINVAL;
+ }
+
+out:
+ kfree(stack);
+ return r;
}
int kvm_arch_set_memory_region(struct kvm *kvm,
@@ -1472,7 +1575,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
}
long kvm_arch_dev_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+ unsigned int ioctl, unsigned long arg)
{
return -EINVAL;
}
@@ -1737,7 +1840,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
struct kvm_vcpu *lvcpu = kvm->vcpus[0];
int i;
- for (i = 1; i < KVM_MAX_VCPUS; i++) {
+ for (i = 1; i < kvm->arch.online_vcpus; i++) {
if (!kvm->vcpus[i])
continue;
if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
index cb7600bdff9..a8ae52ed563 100644
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -227,6 +227,18 @@ static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
return result;
}
+static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
+{
+
+ struct ia64_pal_retval result = {0, 0, 0, 0};
+ long in0, in1, in2, in3;
+
+ kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+ result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
+
+ return result;
+}
+
static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
{
@@ -268,8 +280,12 @@ static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
{
struct ia64_pal_retval result;
+ unsigned long in0, in1, in2, in3;
- INIT_PAL_STATUS_UNIMPLEMENTED(result);
+ kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+ result.status = ia64_pal_vm_info(in1, in2,
+ (pal_tc_info_u_t *)&result.v1, &result.v2);
return result;
}
@@ -292,6 +308,108 @@ static void prepare_for_halt(struct kvm_vcpu *vcpu)
vcpu->arch.timer_fired = 0;
}
+static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
+{
+ long status;
+ unsigned long in0, in1, in2, in3, r9;
+ unsigned long pm_buffer[16];
+
+ kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+ status = ia64_pal_perf_mon_info(pm_buffer,
+ (pal_perf_mon_info_u_t *) &r9);
+ if (status != 0) {
+ printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
+ } else {
+ if (in1)
+ memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
+ else {
+ status = PAL_STATUS_EINVAL;
+ printk(KERN_WARNING"Invalid parameters "
+ "for PAL call:0x%lx!\n", in0);
+ }
+ }
+ return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
+{
+ unsigned long in0, in1, in2, in3;
+ long status;
+ unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
+ | (1UL << 61) | (1UL << 60);
+
+ kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+ if (in1) {
+ memcpy((void *)in1, &res, sizeof(res));
+ status = 0;
+ } else{
+ status = PAL_STATUS_EINVAL;
+ printk(KERN_WARNING"Invalid parameters "
+ "for PAL call:0x%lx!\n", in0);
+ }
+
+ return (struct ia64_pal_retval){status, 0, 0, 0};
+}
+
+static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
+{
+ unsigned long r9;
+ long status;
+
+ status = ia64_pal_mem_attrib(&r9);
+
+ return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static void remote_pal_prefetch_visibility(void *v)
+{
+ s64 trans_type = (s64)v;
+ ia64_pal_prefetch_visibility(trans_type);
+}
+
+static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
+{
+ struct ia64_pal_retval result = {0, 0, 0, 0};
+ unsigned long in0, in1, in2, in3;
+ kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+ result.status = ia64_pal_prefetch_visibility(in1);
+ if (result.status == 0) {
+ /* Must be performed on all remote processors
+ in the coherence domain. */
+ smp_call_function(remote_pal_prefetch_visibility,
+ (void *)in1, 1);
+ /* Unnecessary on remote processor for other vcpus!*/
+ result.status = 1;
+ }
+ return result;
+}
+
+static void remote_pal_mc_drain(void *v)
+{
+ ia64_pal_mc_drain();
+}
+
+static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
+{
+ struct ia64_pal_retval result = {0, 0, 0, 0};
+ unsigned long in0, in1, in2, in3;
+
+ kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+ if (in1 == 0 && in2) {
+ char brand_info[128];
+ result.status = ia64_pal_get_brand_info(brand_info);
+ if (result.status == PAL_STATUS_SUCCESS)
+ memcpy((void *)in2, brand_info, 128);
+ } else {
+ result.status = PAL_STATUS_REQUIRES_MEMORY;
+ printk(KERN_WARNING"Invalid parameters for "
+ "PAL call:0x%lx!\n", in0);
+ }
+
+ return result;
+}
+
int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
@@ -300,14 +418,22 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
int ret = 1;
gr28 = kvm_get_pal_call_index(vcpu);
- /*printk("pal_call index:%lx\n",gr28);*/
switch (gr28) {
case PAL_CACHE_FLUSH:
result = pal_cache_flush(vcpu);
break;
+ case PAL_MEM_ATTRIB:
+ result = pal_mem_attrib(vcpu);
+ break;
case PAL_CACHE_SUMMARY:
result = pal_cache_summary(vcpu);
break;
+ case PAL_PERF_MON_INFO:
+ result = pal_perf_mon_info(vcpu);
+ break;
+ case PAL_HALT_INFO:
+ result = pal_halt_info(vcpu);
+ break;
case PAL_HALT_LIGHT:
{
INIT_PAL_STATUS_SUCCESS(result);
@@ -317,6 +443,16 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
break;
+ case PAL_PREFETCH_VISIBILITY:
+ result = pal_prefetch_visibility(vcpu);
+ break;
+ case PAL_MC_DRAIN:
+ result.status = ia64_pal_mc_drain();
+ /* FIXME: All vcpus likely call PAL_MC_DRAIN.
+ That causes the congestion. */
+ smp_call_function(remote_pal_mc_drain, NULL, 1);
+ break;
+
case PAL_FREQ_RATIOS:
result = pal_freq_ratios(vcpu);
break;
@@ -346,6 +482,9 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
INIT_PAL_STATUS_SUCCESS(result);
result.v1 = (1L << 32) | 1L;
break;
+ case PAL_REGISTER_INFO:
+ result = pal_register_info(vcpu);
+ break;
case PAL_VM_PAGE_SIZE:
result.status = ia64_pal_vm_page_size(&result.v0,
&result.v1);
@@ -365,12 +504,18 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
result.status = ia64_pal_version(
(pal_version_u_t *)&result.v0,
(pal_version_u_t *)&result.v1);
-
break;
case PAL_FIXED_ADDR:
result.status = PAL_STATUS_SUCCESS;
result.v0 = vcpu->vcpu_id;
break;
+ case PAL_BRAND_INFO:
+ result = pal_get_brand_info(vcpu);
+ break;
+ case PAL_GET_PSTATE:
+ case PAL_CACHE_SHARED_INFO:
+ INIT_PAL_STATUS_UNIMPLEMENTED(result);
+ break;
default:
INIT_PAL_STATUS_UNIMPLEMENTED(result);
printk(KERN_WARNING"kvm: Unsupported pal call,"
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 230eae482f3..b1dc80952d9 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -167,7 +167,6 @@ static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
return (rr1.val);
}
-
/*
* Set vIFA & vITIR & vIHA, when vPSR.ic =1
* Parameter:
@@ -222,8 +221,6 @@ void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
}
-
-
/*
* Data Nested TLB Fault
* @ Data Nested TLB Vector
@@ -245,7 +242,6 @@ void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
}
-
/*
* Data TLB Fault
* @ Data TLB vector
@@ -265,8 +261,6 @@ static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
/* If vPSR.ic, IFA, ITIR, IHA*/
set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-
-
}
/*
@@ -279,7 +273,6 @@ void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
_vhpt_fault(vcpu, vadr);
}
-
/*
* VHPT Data Fault
* @ VHPT Translation vector
@@ -290,8 +283,6 @@ void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
_vhpt_fault(vcpu, vadr);
}
-
-
/*
* Deal with:
* General Exception vector
@@ -301,7 +292,6 @@ void _general_exception(struct kvm_vcpu *vcpu)
inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
}
-
/*
* Illegal Operation Fault
* @ General Exception Vector
@@ -419,19 +409,16 @@ static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
}
-
void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
{
__page_not_present(vcpu, vadr);
}
-
void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
{
__page_not_present(vcpu, vadr);
}
-
/* Deal with
* Data access rights vector
*/
@@ -563,22 +550,64 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
inject_guest_interruption(vcpu, vector);
}
+static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
+ unsigned long arg)
+{
+ struct thash_data *data;
+ unsigned long gpa, poff;
+
+ if (!is_physical_mode(vcpu)) {
+ /* Depends on caller to provide the DTR or DTC mapping.*/
+ data = vtlb_lookup(vcpu, arg, D_TLB);
+ if (data)
+ gpa = data->page_flags & _PAGE_PPN_MASK;
+ else {
+ data = vhpt_lookup(arg);
+ if (!data)
+ return 0;
+ gpa = data->gpaddr & _PAGE_PPN_MASK;
+ }
+
+ poff = arg & (PSIZE(data->ps) - 1);
+ arg = PAGEALIGN(gpa, data->ps) | poff;
+ }
+ arg = kvm_gpa_to_mpa(arg << 1 >> 1);
+
+ return (unsigned long)__va(arg);
+}
+
static void set_pal_call_data(struct kvm_vcpu *vcpu)
{
struct exit_ctl_data *p = &vcpu->arch.exit_data;
+ unsigned long gr28 = vcpu_get_gr(vcpu, 28);
+ unsigned long gr29 = vcpu_get_gr(vcpu, 29);
+ unsigned long gr30 = vcpu_get_gr(vcpu, 30);
/*FIXME:For static and stacked convention, firmware
* has put the parameters in gr28-gr31 before
* break to vmm !!*/
- p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28);
- p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29);
- p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+ switch (gr28) {
+ case PAL_PERF_MON_INFO:
+ case PAL_HALT_INFO:
+ p->u.pal_data.gr29 = kvm_trans_pal_call_args(vcpu, gr29);
+ p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+ break;
+ case PAL_BRAND_INFO:
+ p->u.pal_data.gr29 = gr29;;
+ p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
+ break;
+ default:
+ p->u.pal_data.gr29 = gr29;;
+ p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+ }
+ p->u.pal_data.gr28 = gr28;
p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
+
p->exit_reason = EXIT_REASON_PAL_CALL;
}
-static void set_pal_call_result(struct kvm_vcpu *vcpu)
+static void get_pal_call_result(struct kvm_vcpu *vcpu)
{
struct exit_ctl_data *p = &vcpu->arch.exit_data;
@@ -606,7 +635,7 @@ static void set_sal_call_data(struct kvm_vcpu *vcpu)
p->exit_reason = EXIT_REASON_SAL_CALL;
}
-static void set_sal_call_result(struct kvm_vcpu *vcpu)
+static void get_sal_call_result(struct kvm_vcpu *vcpu)
{
struct exit_ctl_data *p = &vcpu->arch.exit_data;
@@ -629,13 +658,13 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
if (iim == DOMN_PAL_REQUEST) {
set_pal_call_data(v);
vmm_transition(v);
- set_pal_call_result(v);
+ get_pal_call_result(v);
vcpu_increment_iip(v);
return;
} else if (iim == DOMN_SAL_REQUEST) {
set_sal_call_data(v);
vmm_transition(v);
- set_sal_call_result(v);
+ get_sal_call_result(v);
vcpu_increment_iip(v);
return;
}
@@ -703,7 +732,6 @@ void vhpi_detection(struct kvm_vcpu *vcpu)
}
}
-
void leave_hypervisor_tail(void)
{
struct kvm_vcpu *v = current_vcpu;
@@ -737,7 +765,6 @@ void leave_hypervisor_tail(void)
}
}
-
static inline void handle_lds(struct kvm_pt_regs *regs)
{
regs->cr_ipsr |= IA64_PSR_ED;
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index ecd526b5532..d4d28050587 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -112,7 +112,6 @@ void switch_to_physical_rid(struct kvm_vcpu *vcpu)
return;
}
-
void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
{
unsigned long psr;
@@ -166,8 +165,6 @@ void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
return;
}
-
-
/*
* In physical mode, insert tc/tr for region 0 and 4 uses
* RID[0] and RID[4] which is for physical mode emulation.
@@ -269,7 +266,6 @@ static inline unsigned long fph_index(struct kvm_pt_regs *regs,
return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
}
-
/*
* The inverse of the above: given bspstore and the number of
* registers, calculate ar.bsp.
@@ -811,12 +807,15 @@ static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
{
struct kvm_vcpu *v;
+ struct kvm *kvm;
int i;
long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
unsigned long vitv = VCPU(vcpu, itv);
+ kvm = (struct kvm *)KVM_VM_BASE;
+
if (vcpu->vcpu_id == 0) {
- for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ for (i = 0; i < kvm->arch.online_vcpus; i++) {
v = (struct kvm_vcpu *)((char *)vcpu +
sizeof(struct kvm_vcpu_data) * i);
VMX(v, itc_offset) = itc_offset;
@@ -1039,8 +1038,6 @@ u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
return key;
}
-
-
void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long thash, vadr;
@@ -1050,7 +1047,6 @@ void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
}
-
void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long tag, vadr;
@@ -1131,7 +1127,6 @@ int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
return IA64_NO_FAULT;
}
-
int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long r1, r3;
@@ -1154,7 +1149,6 @@ void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
}
-
/************************************
* Insert/Purge translation register/cache
************************************/
@@ -1385,7 +1379,6 @@ void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_itc(vcpu, r2);
}
-
void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long r1;
@@ -1393,8 +1386,9 @@ void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
r1 = vcpu_get_itc(vcpu);
vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
}
+
/**************************************************************************
- struct kvm_vcpu*protection key register access routines
+ struct kvm_vcpu protection key register access routines
**************************************************************************/
unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
@@ -1407,20 +1401,6 @@ void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
ia64_set_pkr(reg, val);
}
-
-unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
-{
- union ia64_rr rr, rr1;
-
- rr.val = vcpu_get_rr(vcpu, ifa);
- rr1.val = 0;
- rr1.ps = rr.ps;
- rr1.rid = rr.rid;
- return (rr1.val);
-}
-
-
-
/********************************
* Moves to privileged registers
********************************/
@@ -1464,8 +1444,6 @@ unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
return (IA64_NO_FAULT);
}
-
-
void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long r3, r2;
@@ -1510,8 +1488,6 @@ void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_pkr(vcpu, r3, r2);
}
-
-
void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long r3, r1;
@@ -1557,7 +1533,6 @@ void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
}
-
unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
{
/* FIXME: This could get called as a result of a rsvd-reg fault */
@@ -1609,7 +1584,6 @@ unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
return 0;
}
-
unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long tgt = inst.M33.r1;
@@ -1633,8 +1607,6 @@ unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
return 0;
}
-
-
void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
{
@@ -1776,9 +1748,6 @@ void vcpu_bsw1(struct kvm_vcpu *vcpu)
}
}
-
-
-
void vcpu_rfi(struct kvm_vcpu *vcpu)
{
unsigned long ifs, psr;
@@ -1796,7 +1765,6 @@ void vcpu_rfi(struct kvm_vcpu *vcpu)
regs->cr_iip = VCPU(vcpu, iip);
}
-
/*
VPSR can't keep track of below bits of guest PSR
This function gets guest PSR
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index b2f12a562bd..042af92ced8 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -703,7 +703,7 @@ extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
-extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
+extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
u64 itir, u64 ifa, int type);
extern void thash_purge_all(struct kvm_vcpu *v);
extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
@@ -738,7 +738,7 @@ void kvm_init_vhpt(struct kvm_vcpu *v);
void thash_init(struct thash_cb *hcb, u64 sz);
void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
-
+u64 kvm_gpa_to_mpa(u64 gpa);
extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
u64 arg4, u64 arg5, u64 arg6, u64 arg7);
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 6b6307a3bd5..38232b37668 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -164,11 +164,11 @@ static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
unsigned long ps, gpaddr;
ps = itir_ps(itir);
+ rr.val = ia64_get_rr(ifa);
- gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
- (ifa & ((1UL << ps) - 1));
+ gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
+ (ifa & ((1UL << ps) - 1));
- rr.val = ia64_get_rr(ifa);
head = (struct thash_data *)ia64_thash(ifa);
head->etag = INVALID_TI_TAG;
ia64_mf();
@@ -412,16 +412,14 @@ u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
/*
* Purge overlap TCs and then insert the new entry to emulate itc ops.
- * Notes: Only TC entry can purge and insert.
- * 1 indicates this is MMIO
+ * Notes: Only TC entry can purge and insert.
*/
-int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
+void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
u64 ifa, int type)
{
u64 ps;
u64 phy_pte, io_mask, index;
union ia64_rr vrr, mrr;
- int ret = 0;
ps = itir_ps(itir);
vrr.val = vcpu_get_rr(v, ifa);
@@ -441,25 +439,19 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
phy_pte &= ~_PAGE_MA_MASK;
}
- if (pte & VTLB_PTE_IO)
- ret = 1;
-
vtlb_purge(v, ifa, ps);
vhpt_purge(v, ifa, ps);
- if (ps == mrr.ps) {
- if (!(pte&VTLB_PTE_IO)) {
- vhpt_insert(phy_pte, itir, ifa, pte);
- } else {
- vtlb_insert(v, pte, itir, ifa);
- vcpu_quick_region_set(VMX(v, tc_regions), ifa);
- }
- } else if (ps > mrr.ps) {
+ if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
vtlb_insert(v, pte, itir, ifa);
vcpu_quick_region_set(VMX(v, tc_regions), ifa);
- if (!(pte&VTLB_PTE_IO))
- vhpt_insert(phy_pte, itir, ifa, pte);
- } else {
+ }
+ if (pte & VTLB_PTE_IO)
+ return;
+
+ if (ps >= mrr.ps)
+ vhpt_insert(phy_pte, itir, ifa, pte);
+ else {
u64 psr;
phy_pte &= ~PAGE_FLAGS_RV_MASK;
psr = ia64_clear_ic();
@@ -469,7 +461,6 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
if (!(pte&VTLB_PTE_IO))
mark_pages_dirty(v, pte, ps);
- return ret;
}
/*
@@ -509,7 +500,6 @@ void thash_purge_all(struct kvm_vcpu *v)
local_flush_tlb_all();
}
-
/*
* Lookup the hash table and its collision chain to find an entry
* covering this address rid:va or the entry.
@@ -517,7 +507,6 @@ void thash_purge_all(struct kvm_vcpu *v)
* INPUT:
* in: TLB format for both VHPT & TLB.
*/
-
struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
{
struct thash_data *cch;
@@ -547,7 +536,6 @@ struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
return NULL;
}
-
/*
* Initialize internal control data before service.
*/
@@ -573,6 +561,10 @@ void thash_init(struct thash_cb *hcb, u64 sz)
u64 kvm_get_mpt_entry(u64 gpfn)
{
u64 *base = (u64 *) KVM_P2M_BASE;
+
+ if (gpfn >= (KVM_P2M_SIZE >> 3))
+ panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
+
return *(base + gpfn);
}
@@ -589,7 +581,6 @@ u64 kvm_gpa_to_mpa(u64 gpa)
return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
}
-
/*
* Fetch guest bundle code.
* INPUT:
@@ -631,7 +622,6 @@ int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
return IA64_NO_FAULT;
}
-
void kvm_init_vhpt(struct kvm_vcpu *v)
{
v->arch.vhpt.num = VHPT_NUM_ENTRIES;
diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c
index 2aeae467009..8dfd31e87c4 100644
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c
@@ -49,7 +49,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
seq_printf(p, " %14s", irq_desc[i].chip->typename);
seq_printf(p, " %s", action->name);
diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index 8133dbc4496..570d85c3f97 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -117,3 +117,6 @@ endif
archclean:
rm -f vmlinux.gz vmlinux.bz2
+
+install:
+ sh $(srctree)/arch/m68k/install.sh $(KERNELRELEASE) vmlinux.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/m68k/include/asm/irq_mm.h b/arch/m68k/include/asm/irq_mm.h
index 226bfc0f21b..0cab42cad79 100644
--- a/arch/m68k/include/asm/irq_mm.h
+++ b/arch/m68k/include/asm/irq_mm.h
@@ -3,6 +3,7 @@
#include <linux/linkage.h>
#include <linux/hardirq.h>
+#include <linux/irqreturn.h>
#include <linux/spinlock_types.h>
/*
@@ -80,7 +81,7 @@ struct pt_regs;
* interrupt source (if it supports chaining).
*/
typedef struct irq_node {
- int (*handler)(int, void *);
+ irqreturn_t (*handler)(int, void *);
void *dev_id;
struct irq_node *next;
unsigned long flags;
diff --git a/arch/m68k/include/asm/macintosh.h b/arch/m68k/include/asm/macintosh.h
index 05309f7e3d0..50db3591ca1 100644
--- a/arch/m68k/include/asm/macintosh.h
+++ b/arch/m68k/include/asm/macintosh.h
@@ -34,6 +34,7 @@ struct mac_model
char scc_type;
char ether_type;
char nubus_type;
+ char floppy_type;
};
#define MAC_ADB_NONE 0
@@ -71,6 +72,12 @@ struct mac_model
#define MAC_NO_NUBUS 0
#define MAC_NUBUS 1
+#define MAC_FLOPPY_IWM 0
+#define MAC_FLOPPY_SWIM_ADDR1 1
+#define MAC_FLOPPY_SWIM_ADDR2 2
+#define MAC_FLOPPY_SWIM_IOP 3
+#define MAC_FLOPPY_AV 4
+
/*
* Gestalt numbers
*/
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index dbc64e92c41..ca87f938b03 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -54,4 +54,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/m68k/install.sh b/arch/m68k/install.sh
new file mode 100644
index 00000000000..9c6bae6112e
--- /dev/null
+++ b/arch/m68k/install.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for m68k architecture
+#
+# Arguments:
+# $1 - kernel version
+# $2 - kernel image file
+# $3 - kernel map file
+# $4 - default install path (blank if root directory)
+#
+
+verify () {
+ if [ ! -f "$1" ]; then
+ echo "" 1>&2
+ echo " *** Missing file: $1" 1>&2
+ echo ' *** You need to run "make" before "make install".' 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
+# User may have a custom install script
+
+if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+ mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+ mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
+
+sync
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 98b6bcfb37b..be017984a45 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -22,6 +22,7 @@
/* keyb */
#include <linux/init.h>
#include <linux/vt_kern.h>
+#include <linux/platform_device.h>
#define BOOTINFO_COMPAT_1_0
#include <asm/setup.h>
@@ -43,6 +44,10 @@
#include <asm/mac_oss.h>
#include <asm/mac_psc.h>
+/* platform device info */
+
+#define SWIM_IO_SIZE 0x2000 /* SWIM IO resource size */
+
/* Mac bootinfo struct */
struct mac_booter_data mac_bi_data;
@@ -224,7 +229,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_II,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_IWM
},
/*
@@ -239,7 +245,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_II,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_IWM
}, {
.ident = MAC_MODEL_IIX,
.name = "IIx",
@@ -247,7 +254,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_II,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_IICX,
.name = "IIcx",
@@ -255,7 +263,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_II,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_SE30,
.name = "SE/30",
@@ -263,7 +272,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_II,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
},
/*
@@ -280,7 +290,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_IIFX,
.name = "IIfx",
@@ -288,7 +299,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_IOP,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_IOP
}, {
.ident = MAC_MODEL_IISI,
.name = "IIsi",
@@ -296,7 +308,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_IIVI,
.name = "IIvi",
@@ -304,7 +317,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_IIVX,
.name = "IIvx",
@@ -312,7 +326,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
},
/*
@@ -326,7 +341,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_CCL,
.name = "Color Classic",
@@ -334,7 +350,9 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS},
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
+ },
/*
* Some Mac LC machines. Basically the same as the IIci, ADB like IIsi
@@ -347,7 +365,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_LCII,
.name = "LC II",
@@ -355,7 +374,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_LCIII,
.name = "LC III",
@@ -363,7 +383,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
},
/*
@@ -383,7 +404,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_Q605_ACC,
.name = "Quadra 605",
@@ -391,7 +413,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_Q610,
.name = "Quadra 610",
@@ -400,7 +423,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_QUADRA,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_Q630,
.name = "Quadra 630",
@@ -410,7 +434,8 @@ static struct mac_model mac_data_table[] = {
.ide_type = MAC_IDE_QUADRA,
.scc_type = MAC_SCC_QUADRA,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_Q650,
.name = "Quadra 650",
@@ -419,7 +444,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_QUADRA,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
},
/* The Q700 does have a NS Sonic */
{
@@ -430,7 +456,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA2,
.scc_type = MAC_SCC_QUADRA,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_Q800,
.name = "Quadra 800",
@@ -439,7 +466,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_QUADRA,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_Q840,
.name = "Quadra 840AV",
@@ -448,7 +476,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA3,
.scc_type = MAC_SCC_PSC,
.ether_type = MAC_ETHER_MACE,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_AV
}, {
.ident = MAC_MODEL_Q900,
.name = "Quadra 900",
@@ -457,7 +486,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA2,
.scc_type = MAC_SCC_IOP,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_IOP
}, {
.ident = MAC_MODEL_Q950,
.name = "Quadra 950",
@@ -466,7 +496,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA2,
.scc_type = MAC_SCC_IOP,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_IOP
},
/*
@@ -480,7 +511,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_P475,
.name = "Performa 475",
@@ -488,7 +520,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_P475F,
.name = "Performa 475",
@@ -496,7 +529,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_P520,
.name = "Performa 520",
@@ -504,7 +538,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_P550,
.name = "Performa 550",
@@ -512,7 +547,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
},
/* These have the comm slot, and therefore the possibility of SONIC ethernet */
{
@@ -523,7 +559,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_II,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_P588,
.name = "Performa 588",
@@ -533,7 +570,8 @@ static struct mac_model mac_data_table[] = {
.ide_type = MAC_IDE_QUADRA,
.scc_type = MAC_SCC_II,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_TV,
.name = "TV",
@@ -541,7 +579,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_P600,
.name = "Performa 600",
@@ -549,7 +588,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_II,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
},
/*
@@ -565,7 +605,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_QUADRA,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_C650,
.name = "Centris 650",
@@ -574,7 +615,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA,
.scc_type = MAC_SCC_QUADRA,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR1
}, {
.ident = MAC_MODEL_C660,
.name = "Centris 660AV",
@@ -583,7 +625,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_QUADRA3,
.scc_type = MAC_SCC_PSC,
.ether_type = MAC_ETHER_MACE,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_AV
},
/*
@@ -599,7 +642,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB145,
.name = "PowerBook 145",
@@ -607,7 +651,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB150,
.name = "PowerBook 150",
@@ -616,7 +661,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_OLD,
.ide_type = MAC_IDE_PB,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB160,
.name = "PowerBook 160",
@@ -624,7 +670,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB165,
.name = "PowerBook 165",
@@ -632,7 +679,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB165C,
.name = "PowerBook 165c",
@@ -640,7 +688,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB170,
.name = "PowerBook 170",
@@ -648,7 +697,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB180,
.name = "PowerBook 180",
@@ -656,7 +706,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB180C,
.name = "PowerBook 180c",
@@ -664,7 +715,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_QUADRA,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB190,
.name = "PowerBook 190",
@@ -673,7 +725,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_OLD,
.ide_type = MAC_IDE_BABOON,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB520,
.name = "PowerBook 520",
@@ -682,7 +735,8 @@ static struct mac_model mac_data_table[] = {
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
.ether_type = MAC_ETHER_SONIC,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
},
/*
@@ -702,7 +756,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB230,
.name = "PowerBook Duo 230",
@@ -710,7 +765,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB250,
.name = "PowerBook Duo 250",
@@ -718,7 +774,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB270C,
.name = "PowerBook Duo 270c",
@@ -726,7 +783,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB280,
.name = "PowerBook Duo 280",
@@ -734,7 +792,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
}, {
.ident = MAC_MODEL_PB280C,
.name = "PowerBook Duo 280c",
@@ -742,7 +801,8 @@ static struct mac_model mac_data_table[] = {
.via_type = MAC_VIA_IIci,
.scsi_type = MAC_SCSI_OLD,
.scc_type = MAC_SCC_QUADRA,
- .nubus_type = MAC_NUBUS
+ .nubus_type = MAC_NUBUS,
+ .floppy_type = MAC_FLOPPY_SWIM_ADDR2
},
/*
@@ -815,3 +875,42 @@ static void mac_get_model(char *str)
strcpy(str, "Macintosh ");
strcat(str, macintosh_config->name);
}
+
+static struct resource swim_resources[1];
+
+static struct platform_device swim_device = {
+ .name = "swim",
+ .id = -1,
+ .num_resources = ARRAY_SIZE(swim_resources),
+ .resource = swim_resources,
+};
+
+static struct platform_device *mac_platform_devices[] __initdata = {
+ &swim_device
+};
+
+int __init mac_platform_init(void)
+{
+ u8 *swim_base;
+
+ switch (macintosh_config->floppy_type) {
+ case MAC_FLOPPY_SWIM_ADDR1:
+ swim_base = (u8 *)(VIA1_BASE + 0x1E000);
+ break;
+ case MAC_FLOPPY_SWIM_ADDR2:
+ swim_base = (u8 *)(VIA1_BASE + 0x16000);
+ break;
+ default:
+ return 0;
+ }
+
+ swim_resources[0].name = "swim-regs";
+ swim_resources[0].start = (resource_size_t)swim_base;
+ swim_resources[0].end = (resource_size_t)(swim_base + SWIM_IO_SIZE);
+ swim_resources[0].flags = IORESOURCE_MEM;
+
+ return platform_add_devices(mac_platform_devices,
+ ARRAY_SIZE(mac_platform_devices));
+}
+
+arch_initcall(mac_platform_init);
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 7d97ba54536..11bce3cb648 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -645,3 +645,12 @@ int via_irq_pending(int irq)
}
return 0;
}
+
+void via1_set_head(int head)
+{
+ if (head == 0)
+ via1[vBufA] &= ~VIA1A_vHeadSel;
+ else
+ via1[vBufA] |= VIA1A_vHeadSel;
+}
+EXPORT_SYMBOL(via1_set_head);
diff --git a/arch/m68knommu/platform/520x/config.c b/arch/m68knommu/platform/520x/config.c
index 06d887cdcbf..855fc6a79d7 100644
--- a/arch/m68knommu/platform/520x/config.c
+++ b/arch/m68knommu/platform/520x/config.c
@@ -49,8 +49,39 @@ static struct platform_device m520x_uart = {
.dev.platform_data = m520x_uart_platform,
};
+static struct resource m520x_fec_resources[] = {
+ {
+ .start = MCF_MBAR + 0x30000,
+ .end = MCF_MBAR + 0x30000 + 0x7ff,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = 64 + 36,
+ .end = 64 + 36,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 40,
+ .end = 64 + 40,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 42,
+ .end = 64 + 42,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device m520x_fec = {
+ .name = "fec",
+ .id = 0,
+ .num_resources = ARRAY_SIZE(m520x_fec_resources),
+ .resource = m520x_fec_resources,
+};
+
static struct platform_device *m520x_devices[] __initdata = {
&m520x_uart,
+ &m520x_fec,
};
/***************************************************************************/
@@ -103,6 +134,30 @@ static void __init m520x_uarts_init(void)
/***************************************************************************/
+static void __init m520x_fec_init(void)
+{
+ u32 imr;
+ u8 v;
+
+ /* Unmask FEC interrupts at ColdFire interrupt controller */
+ writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 36);
+ writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 40);
+ writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 42);
+
+ imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH);
+ imr &= ~0x0001FFF0;
+ writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH);
+
+ /* Set multi-function pins to ethernet mode */
+ v = readb(MCF_IPSBAR + MCF_GPIO_PAR_FEC);
+ writeb(v | 0xf0, MCF_IPSBAR + MCF_GPIO_PAR_FEC);
+
+ v = readb(MCF_IPSBAR + MCF_GPIO_PAR_FECI2C);
+ writeb(v | 0x0f, MCF_IPSBAR + MCF_GPIO_PAR_FECI2C);
+}
+
+/***************************************************************************/
+
/*
* Program the vector to be an auto-vectored.
*/
@@ -118,6 +173,7 @@ void __init config_BSP(char *commandp, int size)
{
mach_reset = coldfire_reset;
m520x_uarts_init();
+ m520x_fec_init();
}
/***************************************************************************/
diff --git a/arch/m68knommu/platform/523x/config.c b/arch/m68knommu/platform/523x/config.c
index 13f02611ea2..74133f27b30 100644
--- a/arch/m68knommu/platform/523x/config.c
+++ b/arch/m68knommu/platform/523x/config.c
@@ -50,8 +50,39 @@ static struct platform_device m523x_uart = {
.dev.platform_data = m523x_uart_platform,
};
+static struct resource m523x_fec_resources[] = {
+ {
+ .start = MCF_MBAR + 0x1000,
+ .end = MCF_MBAR + 0x1000 + 0x7ff,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = 64 + 23,
+ .end = 64 + 23,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 27,
+ .end = 64 + 27,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 29,
+ .end = 64 + 29,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device m523x_fec = {
+ .name = "fec",
+ .id = 0,
+ .num_resources = ARRAY_SIZE(m523x_fec_resources),
+ .resource = m523x_fec_resources,
+};
+
static struct platform_device *m523x_devices[] __initdata = {
&m523x_uart,
+ &m523x_fec,
};
/***************************************************************************/
@@ -83,6 +114,25 @@ static void __init m523x_uarts_init(void)
/***************************************************************************/
+static void __init m523x_fec_init(void)
+{
+ u32 imr;
+
+ /* Unmask FEC interrupts at ColdFire interrupt controller */
+ writeb(0x28, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 23);
+ writeb(0x27, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 27);
+ writeb(0x26, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 29);
+
+ imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH);
+ imr &= ~0xf;
+ writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH);
+ imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL);
+ imr &= ~0xff800001;
+ writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL);
+}
+
+/***************************************************************************/
+
void mcf_disableall(void)
{
*((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH)) = 0xffffffff;
@@ -103,6 +153,7 @@ void __init config_BSP(char *commandp, int size)
mcf_disableall();
mach_reset = coldfire_reset;
m523x_uarts_init();
+ m523x_fec_init();
}
/***************************************************************************/
diff --git a/arch/m68knommu/platform/5272/config.c b/arch/m68knommu/platform/5272/config.c
index 230bae691a7..e049245f409 100644
--- a/arch/m68knommu/platform/5272/config.c
+++ b/arch/m68knommu/platform/5272/config.c
@@ -55,8 +55,39 @@ static struct platform_device m5272_uart = {
.dev.platform_data = m5272_uart_platform,
};
+static struct resource m5272_fec_resources[] = {
+ {
+ .start = MCF_MBAR + 0x840,
+ .end = MCF_MBAR + 0x840 + 0x1cf,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = 86,
+ .end = 86,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 87,
+ .end = 87,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 88,
+ .end = 88,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device m5272_fec = {
+ .name = "fec",
+ .id = 0,
+ .num_resources = ARRAY_SIZE(m5272_fec_resources),
+ .resource = m5272_fec_resources,
+};
+
static struct platform_device *m5272_devices[] __initdata = {
&m5272_uart,
+ &m5272_fec,
};
/***************************************************************************/
@@ -91,6 +122,22 @@ static void __init m5272_uarts_init(void)
/***************************************************************************/
+static void __init m5272_fec_init(void)
+{
+ u32 imr;
+
+ /* Unmask FEC interrupts at ColdFire interrupt controller */
+ imr = readl(MCF_MBAR + MCFSIM_ICR3);
+ imr = (imr & ~0x00000fff) | 0x00000ddd;
+ writel(imr, MCF_MBAR + MCFSIM_ICR3);
+
+ imr = readl(MCF_MBAR + MCFSIM_ICR1);
+ imr = (imr & ~0x0f000000) | 0x0d000000;
+ writel(imr, MCF_MBAR + MCFSIM_ICR1);
+}
+
+/***************************************************************************/
+
void mcf_disableall(void)
{
volatile unsigned long *icrp;
@@ -155,6 +202,7 @@ void __init config_BSP(char *commandp, int size)
static int __init init_BSP(void)
{
m5272_uarts_init();
+ m5272_fec_init();
platform_add_devices(m5272_devices, ARRAY_SIZE(m5272_devices));
return 0;
}
diff --git a/arch/m68knommu/platform/527x/config.c b/arch/m68knommu/platform/527x/config.c
index 73cd1aef4a9..49343fb157b 100644
--- a/arch/m68knommu/platform/527x/config.c
+++ b/arch/m68knommu/platform/527x/config.c
@@ -50,8 +50,73 @@ static struct platform_device m527x_uart = {
.dev.platform_data = m527x_uart_platform,
};
+static struct resource m527x_fec0_resources[] = {
+ {
+ .start = MCF_MBAR + 0x1000,
+ .end = MCF_MBAR + 0x1000 + 0x7ff,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = 64 + 23,
+ .end = 64 + 23,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 27,
+ .end = 64 + 27,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 29,
+ .end = 64 + 29,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct resource m527x_fec1_resources[] = {
+ {
+ .start = MCF_MBAR + 0x1800,
+ .end = MCF_MBAR + 0x1800 + 0x7ff,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = 128 + 23,
+ .end = 128 + 23,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 128 + 27,
+ .end = 128 + 27,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 128 + 29,
+ .end = 128 + 29,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device m527x_fec[] = {
+ {
+ .name = "fec",
+ .id = 0,
+ .num_resources = ARRAY_SIZE(m527x_fec0_resources),
+ .resource = m527x_fec0_resources,
+ },
+ {
+ .name = "fec",
+ .id = 1,
+ .num_resources = ARRAY_SIZE(m527x_fec1_resources),
+ .resource = m527x_fec1_resources,
+ },
+};
+
static struct platform_device *m527x_devices[] __initdata = {
&m527x_uart,
+ &m527x_fec[0],
+#ifdef CONFIG_FEC2
+ &m527x_fec[1],
+#endif
};
/***************************************************************************/
@@ -97,6 +162,51 @@ static void __init m527x_uarts_init(void)
/***************************************************************************/
+static void __init m527x_fec_irq_init(int nr)
+{
+ unsigned long base;
+ u32 imr;
+
+ base = MCF_IPSBAR + (nr ? MCFICM_INTC1 : MCFICM_INTC0);
+
+ writeb(0x28, base + MCFINTC_ICR0 + 23);
+ writeb(0x27, base + MCFINTC_ICR0 + 27);
+ writeb(0x26, base + MCFINTC_ICR0 + 29);
+
+ imr = readl(base + MCFINTC_IMRH);
+ imr &= ~0xf;
+ writel(imr, base + MCFINTC_IMRH);
+ imr = readl(base + MCFINTC_IMRL);
+ imr &= ~0xff800001;
+ writel(imr, base + MCFINTC_IMRL);
+}
+
+static void __init m527x_fec_init(void)
+{
+ u16 par;
+ u8 v;
+
+ m527x_fec_irq_init(0);
+
+ /* Set multi-function pins to ethernet mode for fec0 */
+ par = readw(MCF_IPSBAR + 0x100082);
+ writew(par | 0xf00, MCF_IPSBAR + 0x100082);
+ v = readb(MCF_IPSBAR + 0x100078);
+ writeb(v | 0xc0, MCF_IPSBAR + 0x100078);
+
+#ifdef CONFIG_FEC2
+ m527x_fec_irq_init(1);
+
+ /* Set multi-function pins to ethernet mode for fec1 */
+ par = readw(MCF_IPSBAR + 0x100082);
+ writew(par | 0xa0, MCF_IPSBAR + 0x100082);
+ v = readb(MCF_IPSBAR + 0x100079);
+ writeb(v | 0xc0, MCF_IPSBAR + 0x100079);
+#endif
+}
+
+/***************************************************************************/
+
void mcf_disableall(void)
{
*((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH)) = 0xffffffff;
@@ -116,13 +226,14 @@ void __init config_BSP(char *commandp, int size)
{
mcf_disableall();
mach_reset = coldfire_reset;
+ m527x_uarts_init();
+ m527x_fec_init();
}
/***************************************************************************/
static int __init init_BSP(void)
{
- m527x_uarts_init();
platform_add_devices(m527x_devices, ARRAY_SIZE(m527x_devices));
return 0;
}
diff --git a/arch/m68knommu/platform/528x/config.c b/arch/m68knommu/platform/528x/config.c
index 44baeb225dc..bee526f4d1a 100644
--- a/arch/m68knommu/platform/528x/config.c
+++ b/arch/m68knommu/platform/528x/config.c
@@ -57,8 +57,40 @@ static struct platform_device m528x_uart = {
.dev.platform_data = m528x_uart_platform,
};
+static struct resource m528x_fec_resources[] = {
+ {
+ .start = MCF_MBAR + 0x1000,
+ .end = MCF_MBAR + 0x1000 + 0x7ff,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = 64 + 23,
+ .end = 64 + 23,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 27,
+ .end = 64 + 27,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 29,
+ .end = 64 + 29,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device m528x_fec = {
+ .name = "fec",
+ .id = 0,
+ .num_resources = ARRAY_SIZE(m528x_fec_resources),
+ .resource = m528x_fec_resources,
+};
+
+
static struct platform_device *m528x_devices[] __initdata = {
&m528x_uart,
+ &m528x_fec,
};
/***************************************************************************/
@@ -99,6 +131,31 @@ static void __init m528x_uarts_init(void)
/***************************************************************************/
+static void __init m528x_fec_init(void)
+{
+ u32 imr;
+ u16 v16;
+
+ /* Unmask FEC interrupts at ColdFire interrupt controller */
+ writeb(0x28, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 23);
+ writeb(0x27, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 27);
+ writeb(0x26, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 29);
+
+ imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH);
+ imr &= ~0xf;
+ writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH);
+ imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL);
+ imr &= ~0xff800001;
+ writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL);
+
+ /* Set multi-function pins to ethernet mode for fec0 */
+ v16 = readw(MCF_IPSBAR + 0x100056);
+ writew(v16 | 0xf00, MCF_IPSBAR + 0x100056);
+ writeb(0xc0, MCF_IPSBAR + 0x100058);
+}
+
+/***************************************************************************/
+
void mcf_disableall(void)
{
*((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH)) = 0xffffffff;
@@ -158,6 +215,7 @@ void __init config_BSP(char *commandp, int size)
static int __init init_BSP(void)
{
m528x_uarts_init();
+ m528x_fec_init();
platform_add_devices(m528x_devices, ARRAY_SIZE(m528x_devices));
return 0;
}
diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68knommu/platform/532x/config.c
index a347623d6ee..591f2f80113 100644
--- a/arch/m68knommu/platform/532x/config.c
+++ b/arch/m68knommu/platform/532x/config.c
@@ -61,8 +61,38 @@ static struct platform_device m532x_uart = {
.dev.platform_data = m532x_uart_platform,
};
+static struct resource m532x_fec_resources[] = {
+ {
+ .start = 0xfc030000,
+ .end = 0xfc0307ff,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = 64 + 36,
+ .end = 64 + 36,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 40,
+ .end = 64 + 40,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .start = 64 + 42,
+ .end = 64 + 42,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device m532x_fec = {
+ .name = "fec",
+ .id = 0,
+ .num_resources = ARRAY_SIZE(m532x_fec_resources),
+ .resource = m532x_fec_resources,
+};
static struct platform_device *m532x_devices[] __initdata = {
&m532x_uart,
+ &m532x_fec,
};
/***************************************************************************/
@@ -93,6 +123,24 @@ static void __init m532x_uarts_init(void)
for (line = 0; (line < nrlines); line++)
m532x_uart_init_line(line, m532x_uart_platform[line].irq);
}
+/***************************************************************************/
+
+static void __init m532x_fec_init(void)
+{
+ /* Unmask FEC interrupts at ColdFire interrupt controller */
+ MCF_INTC0_ICR36 = 0x2;
+ MCF_INTC0_ICR40 = 0x2;
+ MCF_INTC0_ICR42 = 0x2;
+
+ MCF_INTC0_IMRH &= ~(MCF_INTC_IMRH_INT_MASK36 |
+ MCF_INTC_IMRH_INT_MASK40 | MCF_INTC_IMRH_INT_MASK42);
+
+ /* Set multi-function pins to ethernet mode for fec0 */
+ MCF_GPIO_PAR_FECI2C |= (MCF_GPIO_PAR_FECI2C_PAR_MDC_EMDC |
+ MCF_GPIO_PAR_FECI2C_PAR_MDIO_EMDIO);
+ MCF_GPIO_PAR_FEC = (MCF_GPIO_PAR_FEC_PAR_FEC_7W_FEC |
+ MCF_GPIO_PAR_FEC_PAR_FEC_MII_FEC);
+}
/***************************************************************************/
@@ -150,6 +198,7 @@ void __init config_BSP(char *commandp, int size)
static int __init init_BSP(void)
{
m532x_uarts_init();
+ m532x_fec_init();
platform_add_devices(m532x_devices, ARRAY_SIZE(m532x_devices));
return 0;
}
diff --git a/arch/mips/include/asm/sigcontext.h b/arch/mips/include/asm/sigcontext.h
index 9ce0607d7a4..9e89cf99d4e 100644
--- a/arch/mips/include/asm/sigcontext.h
+++ b/arch/mips/include/asm/sigcontext.h
@@ -9,6 +9,7 @@
#ifndef _ASM_SIGCONTEXT_H
#define _ASM_SIGCONTEXT_H
+#include <linux/types.h>
#include <asm/sgidefs.h>
#if _MIPS_SIM == _MIPS_SIM_ABI32
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index facc2d7a87c..2abca178016 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -75,6 +75,9 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#ifdef __KERNEL__
/** sock_type - Socket types
diff --git a/arch/mips/include/asm/swab.h b/arch/mips/include/asm/swab.h
index 88f1f7d555c..99993c0d6c1 100644
--- a/arch/mips/include/asm/swab.h
+++ b/arch/mips/include/asm/swab.h
@@ -9,7 +9,7 @@
#define _ASM_SWAB_H
#include <linux/compiler.h>
-#include <asm/types.h>
+#include <linux/types.h>
#define __SWAB_64_THRU_32__
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 4b4007b3083..7b845ba9dff 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -108,7 +108,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
seq_printf(p, " %14s", irq_desc[i].chip->name);
seq_printf(p, " %s", action->name);
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 49aac6e17df..2a472713de8 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -355,40 +355,6 @@ SYSCALL_DEFINE1(32_personality, unsigned long, personality)
return ret;
}
-/* ustat compatibility */
-struct ustat32 {
- compat_daddr_t f_tfree;
- compat_ino_t f_tinode;
- char f_fname[6];
- char f_fpack[6];
-};
-
-extern asmlinkage long sys_ustat(dev_t dev, struct ustat __user * ubuf);
-
-SYSCALL_DEFINE2(32_ustat, dev_t, dev, struct ustat32 __user *, ubuf32)
-{
- int err;
- struct ustat tmp;
- struct ustat32 tmp32;
- mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- err = sys_ustat(dev, (struct ustat __user *)&tmp);
- set_fs(old_fs);
-
- if (err)
- goto out;
-
- memset(&tmp32, 0, sizeof(struct ustat32));
- tmp32.f_tfree = tmp.f_tfree;
- tmp32.f_tinode = tmp.f_tinode;
-
- err = copy_to_user(ubuf32, &tmp32, sizeof(struct ustat32)) ? -EFAULT : 0;
-
-out:
- return err;
-}
-
SYSCALL_DEFINE4(32_sendfile, long, out_fd, long, in_fd,
compat_off_t __user *, offset, s32, count)
{
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 7438e92f8a0..f61d6b0e573 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -253,7 +253,7 @@ EXPORT(sysn32_call_table)
PTR compat_sys_utime /* 6130 */
PTR sys_mknod
PTR sys_32_personality
- PTR sys_32_ustat
+ PTR compat_sys_ustat
PTR compat_sys_statfs
PTR compat_sys_fstatfs /* 6135 */
PTR sys_sysfs
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index b0fef4ff982..60997f1f69d 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -265,7 +265,7 @@ sys_call_table:
PTR sys_olduname
PTR sys_umask /* 4060 */
PTR sys_chroot
- PTR sys_32_ustat
+ PTR compat_sys_ustat
PTR sys_dup2
PTR sys_getppid
PTR sys_getpgrp /* 4065 */
diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
index 56c64ccc9c2..50fdb5c16e0 100644
--- a/arch/mn10300/kernel/irq.c
+++ b/arch/mn10300/kernel/irq.c
@@ -221,7 +221,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (action) {
seq_printf(p, "%3d: ", i);
for_each_present_cpu(cpu)
- seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
seq_printf(p, " %14s.%u", irq_desc[i].chip->name,
(GxICR(i) & GxICR_LEVEL) >>
GxICR_LEVEL_SHIFT);
diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index c584b00c607..430f1aeea0b 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -336,10 +336,11 @@
#define NUM_PDC_RESULT 32
#if !defined(__ASSEMBLY__)
-#ifdef __KERNEL__
#include <linux/types.h>
+#ifdef __KERNEL__
+
extern int pdc_type;
/* Values for pdc_type */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index fba402c95ac..885472bf7b7 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -54,6 +54,9 @@
#define SO_MARK 0x401f
+#define SO_TIMESTAMPING 0x4020
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
* have to define SOCK_NONBLOCK to a different value here.
*/
diff --git a/arch/parisc/include/asm/swab.h b/arch/parisc/include/asm/swab.h
index 3ff16c5a335..e78403b129e 100644
--- a/arch/parisc/include/asm/swab.h
+++ b/arch/parisc/include/asm/swab.h
@@ -1,7 +1,7 @@
#ifndef _PARISC_SWAB_H
#define _PARISC_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#define __SWAB_64_THRU_32__
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 29e70e16ede..adfd617b4c1 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -185,7 +185,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%3d: ", i);
#ifdef CONFIG_SMP
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#else
seq_printf(p, "%10u ", kstat_irqs(i));
#endif
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 303d2b647e4..03b9a01bc16 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -130,7 +130,7 @@
ENTRY_OURS(newuname)
ENTRY_SAME(umask) /* 60 */
ENTRY_SAME(chroot)
- ENTRY_SAME(ustat)
+ ENTRY_COMP(ustat)
ENTRY_SAME(dup2)
ENTRY_SAME(getppid)
ENTRY_SAME(getpgrp) /* 65 */
diff --git a/arch/powerpc/include/asm/bootx.h b/arch/powerpc/include/asm/bootx.h
index 57b82e3f89c..60a3c9ef301 100644
--- a/arch/powerpc/include/asm/bootx.h
+++ b/arch/powerpc/include/asm/bootx.h
@@ -9,7 +9,7 @@
#ifndef __ASM_BOOTX_H__
#define __ASM_BOOTX_H__
-#include <asm/types.h>
+#include <linux/types.h>
#ifdef macintosh
#include <Types.h>
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index cd46f023ec6..b5600ce6055 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -7,7 +7,7 @@
#include <asm/string.h>
#endif
-#include <asm/types.h>
+#include <linux/types.h>
#include <asm/ptrace.h>
#include <asm/cputable.h>
#include <asm/auxvec.h>
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index f993e4198d5..bb2de6aa5ce 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -20,7 +20,7 @@
#ifndef __LINUX_KVM_POWERPC_H
#define __LINUX_KVM_POWERPC_H
-#include <asm/types.h>
+#include <linux/types.h>
struct kvm_regs {
__u64 pc;
@@ -52,4 +52,11 @@ struct kvm_fpu {
__u64 fpr[32];
};
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index f49031b632c..d22d39942a9 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -28,6 +28,13 @@
* need to find some way of advertising it. */
#define KVM44x_GUEST_TLB_SIZE 64
+struct kvmppc_44x_tlbe {
+ u32 tid; /* Only the low 8 bits are used. */
+ u32 word0;
+ u32 word1;
+ u32 word2;
+};
+
struct kvmppc_44x_shadow_ref {
struct page *page;
u16 gtlb_index;
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 2197764796d..56bfae59837 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -42,7 +42,12 @@
#define BOOKE_INTERRUPT_DTLB_MISS 13
#define BOOKE_INTERRUPT_ITLB_MISS 14
#define BOOKE_INTERRUPT_DEBUG 15
-#define BOOKE_MAX_INTERRUPT 15
+
+/* E500 */
+#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
+#define BOOKE_INTERRUPT_SPE_FP_DATA 33
+#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
+#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
new file mode 100644
index 00000000000..9d497ce4972
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/include/asm/kvm_44x.h,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_KVM_E500_H__
+#define __ASM_KVM_E500_H__
+
+#include <linux/kvm_host.h>
+
+#define BOOKE_INTERRUPT_SIZE 36
+
+#define E500_PID_NUM 3
+#define E500_TLB_NUM 2
+
+struct tlbe{
+ u32 mas1;
+ u32 mas2;
+ u32 mas3;
+ u32 mas7;
+};
+
+struct kvmppc_vcpu_e500 {
+ /* Unmodified copy of the guest's TLB. */
+ struct tlbe *guest_tlb[E500_TLB_NUM];
+ /* TLB that's actually used when the guest is running. */
+ struct tlbe *shadow_tlb[E500_TLB_NUM];
+ /* Pages which are referenced in the shadow TLB. */
+ struct page **shadow_pages[E500_TLB_NUM];
+
+ unsigned int guest_tlb_size[E500_TLB_NUM];
+ unsigned int shadow_tlb_size[E500_TLB_NUM];
+ unsigned int guest_tlb_nv[E500_TLB_NUM];
+
+ u32 host_pid[E500_PID_NUM];
+ u32 pid[E500_PID_NUM];
+
+ u32 mas0;
+ u32 mas1;
+ u32 mas2;
+ u32 mas3;
+ u32 mas4;
+ u32 mas5;
+ u32 mas6;
+ u32 mas7;
+ u32 l1csr1;
+ u32 hid0;
+ u32 hid1;
+
+ struct kvm_vcpu vcpu;
+};
+
+static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
+{
+ return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
+}
+
+#endif /* __ASM_KVM_E500_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c1e436fe773..dfdf13c9fef 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,13 +64,6 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
-struct kvmppc_44x_tlbe {
- u32 tid; /* Only the low 8 bits are used. */
- u32 word0;
- u32 word1;
- u32 word2;
-};
-
enum kvm_exit_types {
MMIO_EXITS,
DCR_EXITS,
@@ -118,11 +111,6 @@ struct kvm_arch {
struct kvm_vcpu_arch {
u32 host_stack;
u32 host_pid;
- u32 host_dbcr0;
- u32 host_dbcr1;
- u32 host_dbcr2;
- u32 host_iac[4];
- u32 host_msr;
u64 fpr[32];
ulong gpr[32];
@@ -157,7 +145,7 @@ struct kvm_vcpu_arch {
u32 tbu;
u32 tcr;
u32 tsr;
- u32 ivor[16];
+ u32 ivor[64];
ulong ivpr;
u32 pir;
@@ -170,6 +158,7 @@ struct kvm_vcpu_arch {
u32 ccr1;
u32 dbcr0;
u32 dbcr1;
+ u32 dbsr;
#ifdef CONFIG_KVM_EXIT_TIMING
struct kvmppc_exit_timing timing_exit;
@@ -200,10 +189,4 @@ struct kvm_vcpu_arch {
unsigned long pending_exceptions;
};
-struct kvm_guest_debug {
- int enabled;
- unsigned long bp[4];
- int singlestep;
-};
-
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 36d2a50a848..2c6ee349df5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -52,13 +52,19 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
+/* Core-specific hooks */
+
extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
- u64 asid, u32 flags, u32 max_bytes,
unsigned int gtlb_idx);
extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
-
-/* Core-specific hooks */
+extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
+extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+ gva_t eaddr);
+extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
unsigned int id);
@@ -71,9 +77,6 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
-
extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/mmu-fsl-booke.h b/arch/powerpc/include/asm/mmu-fsl-booke.h
index 3f941c0f7e8..4285b64a65e 100644
--- a/arch/powerpc/include/asm/mmu-fsl-booke.h
+++ b/arch/powerpc/include/asm/mmu-fsl-booke.h
@@ -75,6 +75,8 @@
#ifndef __ASSEMBLY__
+extern unsigned int tlbcam_index;
+
typedef struct {
unsigned int id;
unsigned int active;
diff --git a/arch/powerpc/include/asm/ps3fb.h b/arch/powerpc/include/asm/ps3fb.h
index 3f121fe4010..e7233a84968 100644
--- a/arch/powerpc/include/asm/ps3fb.h
+++ b/arch/powerpc/include/asm/ps3fb.h
@@ -19,6 +19,7 @@
#ifndef _ASM_POWERPC_PS3FB_H_
#define _ASM_POWERPC_PS3FB_H_
+#include <linux/types.h>
#include <linux/ioctl.h>
/* ioctl */
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index f5a4e168e49..1e5cfad0e3f 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -61,4 +61,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/include/asm/spu_info.h b/arch/powerpc/include/asm/spu_info.h
index 3545efbf989..1286c823f0d 100644
--- a/arch/powerpc/include/asm/spu_info.h
+++ b/arch/powerpc/include/asm/spu_info.h
@@ -23,9 +23,10 @@
#ifndef _SPU_INFO_H
#define _SPU_INFO_H
+#include <linux/types.h>
+
#ifdef __KERNEL__
#include <asm/spu.h>
-#include <linux/types.h>
#else
struct mfc_cq_sr {
__u64 mfc_cq_data0_RW;
diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h
index ef824ae4b79..c581e3ef73e 100644
--- a/arch/powerpc/include/asm/swab.h
+++ b/arch/powerpc/include/asm/swab.h
@@ -8,7 +8,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#ifdef __GNUC__
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 72353f6070a..fe166491e9d 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -65,7 +65,7 @@ SYSCALL(ni_syscall)
SYSX(sys_ni_syscall,sys_olduname, sys_olduname)
COMPAT_SYS_SPU(umask)
SYSCALL_SPU(chroot)
-SYSCALL(ustat)
+COMPAT_SYS(ustat)
SYSCALL_SPU(dup2)
SYSCALL_SPU(getppid)
SYSCALL_SPU(getpgrp)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 19ee491e9e2..42fe4da4e8a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -49,7 +49,7 @@
#include <asm/iseries/alpaca.h>
#endif
#ifdef CONFIG_KVM
-#include <asm/kvm_44x.h>
+#include <linux/kvm_host.h>
#endif
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -361,8 +361,6 @@ int main(void)
DEFINE(PTE_SIZE, sizeof(pte_t));
#ifdef CONFIG_KVM
- DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
-
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 23b8b5e36f9..17efb7118db 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -190,7 +190,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%3d: ", i);
#ifdef CONFIG_SMP
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#else
seq_printf(p, "%10u ", kstat_irqs(i));
#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index a66bec57265..0cef809cec2 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -28,72 +28,6 @@
#include "44x_tlb.h"
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvm44x_disable_debug_interrupts(void)
-{
- mtmsr(mfmsr() & ~MSR_DE);
-}
-
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
- kvm44x_disable_debug_interrupts();
-
- mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
- mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
- mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
- mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
- mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
- mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
- mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
- mtmsr(vcpu->arch.host_msr);
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
- struct kvm_guest_debug *dbg = &vcpu->guest_debug;
- u32 dbcr0 = 0;
-
- vcpu->arch.host_msr = mfmsr();
- kvm44x_disable_debug_interrupts();
-
- /* Save host debug register state. */
- vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
- vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
- vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
- vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
- vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
- vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
- vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
- /* set registers up for guest */
-
- if (dbg->bp[0]) {
- mtspr(SPRN_IAC1, dbg->bp[0]);
- dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
- }
- if (dbg->bp[1]) {
- mtspr(SPRN_IAC2, dbg->bp[1]);
- dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
- }
- if (dbg->bp[2]) {
- mtspr(SPRN_IAC3, dbg->bp[2]);
- dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
- }
- if (dbg->bp[3]) {
- mtspr(SPRN_IAC4, dbg->bp[3]);
- dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
- }
-
- mtspr(SPRN_DBCR0, dbcr0);
- mtspr(SPRN_DBCR1, 0);
- mtspr(SPRN_DBCR2, 0);
-}
-
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvmppc_44x_tlb_load(vcpu);
@@ -149,8 +83,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *gtlbe;
int index;
gva_t eaddr;
u8 pid;
@@ -166,9 +98,7 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
- gtlbe = &vcpu_44x->guest_tlb[index];
-
- tr->physical_address = tlb_xlate(gtlbe, eaddr);
+ tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
/* XXX what does "writeable" and "usermode" even mean? */
tr->valid = 1;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 82489a743a6..61af58fcece 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,25 +27,12 @@
#include "booke.h"
#include "44x_tlb.h"
-#define OP_RFI 19
-
-#define XOP_RFI 50
-#define XOP_MFMSR 83
-#define XOP_WRTEE 131
-#define XOP_MTMSR 146
-#define XOP_WRTEEI 163
#define XOP_MFDCR 323
#define XOP_MTDCR 451
#define XOP_TLBSX 914
#define XOP_ICCCI 966
#define XOP_TLBWE 978
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pc = vcpu->arch.srr0;
- kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
@@ -59,48 +46,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
int ws;
switch (get_op(inst)) {
- case OP_RFI:
- switch (get_xop(inst)) {
- case XOP_RFI:
- kvmppc_emul_rfi(vcpu);
- kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
- *advance = 0;
- break;
-
- default:
- emulated = EMULATE_FAIL;
- break;
- }
- break;
-
case 31:
switch (get_xop(inst)) {
- case XOP_MFMSR:
- rt = get_rt(inst);
- vcpu->arch.gpr[rt] = vcpu->arch.msr;
- kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
- break;
-
- case XOP_MTMSR:
- rs = get_rs(inst);
- kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
- kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
- break;
-
- case XOP_WRTEE:
- rs = get_rs(inst);
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (vcpu->arch.gpr[rs] & MSR_EE);
- kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
- break;
-
- case XOP_WRTEEI:
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (inst & MSR_EE);
- kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
- break;
-
case XOP_MFDCR:
dcrn = get_dcrn(inst);
rt = get_rt(inst);
@@ -186,186 +134,51 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
emulated = EMULATE_FAIL;
}
+ if (emulated == EMULATE_FAIL)
+ emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
return emulated;
}
int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
{
+ int emulated = EMULATE_DONE;
+
switch (sprn) {
- case SPRN_MMUCR:
- vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
case SPRN_PID:
kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+ case SPRN_MMUCR:
+ vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
case SPRN_CCR0:
vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
case SPRN_CCR1:
vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_DEAR:
- vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
- case SPRN_ESR:
- vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR0:
- vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR1:
- vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_TSR:
- vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
- case SPRN_TCR:
- vcpu->arch.tcr = vcpu->arch.gpr[rs];
- kvmppc_emulate_dec(vcpu);
- break;
-
- /* Note: SPRG4-7 are user-readable. These values are
- * loaded into the real SPRGs when resuming the
- * guest. */
- case SPRN_SPRG4:
- vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG5:
- vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG6:
- vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG7:
- vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
- case SPRN_IVPR:
- vcpu->arch.ivpr = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR0:
- vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR1:
- vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR2:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR3:
- vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR4:
- vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR5:
- vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR6:
- vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR7:
- vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR8:
- vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR9:
- vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR10:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR11:
- vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR12:
- vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR13:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR14:
- vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR15:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
- break;
-
default:
- return EMULATE_FAIL;
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
}
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
- return EMULATE_DONE;
+ return emulated;
}
int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
{
+ int emulated = EMULATE_DONE;
+
switch (sprn) {
- /* 440 */
+ case SPRN_PID:
+ vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
case SPRN_MMUCR:
vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
case SPRN_CCR0:
vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
case SPRN_CCR1:
vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
-
- /* Book E */
- case SPRN_PID:
- vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
- case SPRN_IVPR:
- vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
- case SPRN_DEAR:
- vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
- case SPRN_ESR:
- vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
- case SPRN_DBCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
- case SPRN_DBCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
-
- case SPRN_IVOR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
- break;
- case SPRN_IVOR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
- break;
- case SPRN_IVOR2:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
- break;
- case SPRN_IVOR3:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
- break;
- case SPRN_IVOR4:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
- break;
- case SPRN_IVOR5:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
- break;
- case SPRN_IVOR6:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
- break;
- case SPRN_IVOR7:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
- break;
- case SPRN_IVOR8:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
- break;
- case SPRN_IVOR9:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
- break;
- case SPRN_IVOR10:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
- break;
- case SPRN_IVOR11:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
- break;
- case SPRN_IVOR12:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
- break;
- case SPRN_IVOR13:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
- break;
- case SPRN_IVOR14:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
- break;
- case SPRN_IVOR15:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
- break;
-
default:
- return EMULATE_FAIL;
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
}
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
- return EMULATE_DONE;
+ return emulated;
}
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 9a34b8edb9e..4a16f472cc1 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -208,20 +208,38 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
return -1;
}
-int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+ gva_t eaddr)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+ unsigned int pgmask = get_tlb_bytes(gtlbe) - 1;
+
+ return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_IS);
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
-int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_DS);
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
unsigned int stlb_index)
{
@@ -248,7 +266,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
}
-void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
@@ -269,15 +287,19 @@ void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
* Caller must ensure that the specified guest TLB entry is safe to insert into
* the shadow TLB.
*/
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
- u32 flags, u32 max_bytes, unsigned int gtlb_index)
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+ unsigned int gtlb_index)
{
struct kvmppc_44x_tlbe stlbe;
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
struct kvmppc_44x_shadow_ref *ref;
struct page *new_page;
hpa_t hpaddr;
gfn_t gfn;
+ u32 asid = gtlbe->tid;
+ u32 flags = gtlbe->word2;
+ u32 max_bytes = get_tlb_bytes(gtlbe);
unsigned int victim;
/* Select TLB entry to clobber. Indirectly guard against races with the TLB
@@ -448,10 +470,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
}
if (tlbe_is_host_safe(vcpu, tlbe)) {
- u64 asid;
gva_t eaddr;
gpa_t gpaddr;
- u32 flags;
u32 bytes;
eaddr = get_tlb_eaddr(tlbe);
@@ -462,10 +482,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
eaddr &= ~(bytes - 1);
gpaddr &= ~(bytes - 1);
- asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
- flags = tlbe->word2 & 0xffff;
-
- kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
}
KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 772191f29e6..a9ff80e5152 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,8 +25,6 @@
extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
unsigned int pid, unsigned int as);
-extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
u8 rc);
@@ -85,11 +83,4 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
return (vcpu->arch.mmucr >> 16) & 0x1;
}
-static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
-{
- unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
-
- return get_tlb_raddr(tlbe) | (eaddr & pgmask);
-}
-
#endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 6dbdc4817d8..5a152a52796 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -2,6 +2,9 @@
# KVM configuration
#
+config HAVE_KVM_IRQCHIP
+ bool
+
menuconfig VIRTUALIZATION
bool "Virtualization"
---help---
@@ -43,6 +46,19 @@ config KVM_EXIT_TIMING
If unsure, say N.
+config KVM_E500
+ bool "KVM support for PowerPC E500 processors"
+ depends on EXPERIMENTAL && E500
+ select KVM
+ ---help---
+ Support running unmodified E500 guest kernels in virtual machines on
+ E500 host processors.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
config KVM_TRACE
bool "KVM trace support"
depends on KVM && MARKERS && SYSFS
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index df7ba59e6d5..4b2df66c79d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -16,8 +16,18 @@ AFLAGS_booke_interrupts.o := -I$(obj)
kvm-440-objs := \
booke.o \
+ booke_emulate.o \
booke_interrupts.o \
44x.o \
44x_tlb.o \
44x_emulate.o
obj-$(CONFIG_KVM_440) += kvm-440.o
+
+kvm-e500-objs := \
+ booke.o \
+ booke_emulate.o \
+ booke_interrupts.o \
+ e500.o \
+ e500_tlb.o \
+ e500_emulate.o
+obj-$(CONFIG_KVM_E500) += kvm-e500.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 35485dd6927..642e4204cf2 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -30,10 +30,8 @@
#include <asm/kvm_ppc.h>
#include "timing.h"
#include <asm/cacheflush.h>
-#include <asm/kvm_44x.h>
#include "booke.h"
-#include "44x_tlb.h"
unsigned long kvmppc_booke_handlers;
@@ -120,6 +118,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_DATA_STORAGE:
case BOOKE_IRQPRIO_INST_STORAGE:
case BOOKE_IRQPRIO_FP_UNAVAIL:
+ case BOOKE_IRQPRIO_SPE_UNAVAIL:
+ case BOOKE_IRQPRIO_SPE_FP_DATA:
+ case BOOKE_IRQPRIO_SPE_FP_ROUND:
case BOOKE_IRQPRIO_AP_UNAVAIL:
case BOOKE_IRQPRIO_ALIGNMENT:
allowed = 1;
@@ -165,7 +166,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
unsigned int priority;
priority = __ffs(*pending);
- while (priority <= BOOKE_MAX_INTERRUPT) {
+ while (priority <= BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
break;
@@ -263,6 +264,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
+ case BOOKE_INTERRUPT_SPE_UNAVAIL:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_SPE_FP_DATA:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_SPE_FP_ROUND:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
+ r = RESUME_GUEST;
+ break;
+
case BOOKE_INTERRUPT_DATA_STORAGE:
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
@@ -284,29 +300,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
- /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_DTLB_MISS: {
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.fault_dear;
int gtlb_index;
+ gpa_t gpaddr;
gfn_t gfn;
/* Check the guest TLB. */
- gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
+ gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
+ kvmppc_mmu_dtlb_miss(vcpu);
kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
r = RESUME_GUEST;
break;
}
- gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
- vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
- gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
+ gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+ gfn = gpaddr >> PAGE_SHIFT;
if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
/* The guest TLB had a mapping, but the shadow TLB
@@ -315,13 +329,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
- gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
r = RESUME_GUEST;
} else {
/* Guest has mapped and accessed a page which is not
* actually RAM. */
+ vcpu->arch.paddr_accessed = gpaddr;
r = kvmppc_emulate_mmio(run, vcpu);
kvmppc_account_exit(vcpu, MMIO_EXITS);
}
@@ -329,10 +343,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
- /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_ITLB_MISS: {
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.pc;
gpa_t gpaddr;
gfn_t gfn;
@@ -341,18 +352,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
/* Check the guest TLB. */
- gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
+ gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+ kvmppc_mmu_itlb_miss(vcpu);
kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
break;
}
kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
- gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
- gpaddr = tlb_xlate(gtlbe, eaddr);
+ gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
gfn = gpaddr >> PAGE_SHIFT;
if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
@@ -362,8 +373,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
- gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
} else {
/* Guest mapped and leaped at non-RAM! */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index cf7c94ca24b..d59bcca1f9d 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_ppc.h>
#include "timing.h"
/* interrupt priortity ordering */
@@ -30,17 +31,24 @@
#define BOOKE_IRQPRIO_ALIGNMENT 2
#define BOOKE_IRQPRIO_PROGRAM 3
#define BOOKE_IRQPRIO_FP_UNAVAIL 4
-#define BOOKE_IRQPRIO_SYSCALL 5
-#define BOOKE_IRQPRIO_AP_UNAVAIL 6
-#define BOOKE_IRQPRIO_DTLB_MISS 7
-#define BOOKE_IRQPRIO_ITLB_MISS 8
-#define BOOKE_IRQPRIO_MACHINE_CHECK 9
-#define BOOKE_IRQPRIO_DEBUG 10
-#define BOOKE_IRQPRIO_CRITICAL 11
-#define BOOKE_IRQPRIO_WATCHDOG 12
-#define BOOKE_IRQPRIO_EXTERNAL 13
-#define BOOKE_IRQPRIO_FIT 14
-#define BOOKE_IRQPRIO_DECREMENTER 15
+#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
+#define BOOKE_IRQPRIO_SPE_FP_DATA 6
+#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#define BOOKE_IRQPRIO_SYSCALL 8
+#define BOOKE_IRQPRIO_AP_UNAVAIL 9
+#define BOOKE_IRQPRIO_DTLB_MISS 10
+#define BOOKE_IRQPRIO_ITLB_MISS 11
+#define BOOKE_IRQPRIO_MACHINE_CHECK 12
+#define BOOKE_IRQPRIO_DEBUG 13
+#define BOOKE_IRQPRIO_CRITICAL 14
+#define BOOKE_IRQPRIO_WATCHDOG 15
+#define BOOKE_IRQPRIO_EXTERNAL 16
+#define BOOKE_IRQPRIO_FIT 17
+#define BOOKE_IRQPRIO_DECREMENTER 18
+#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
+#define BOOKE_IRQPRIO_MAX 19
+
+extern unsigned long kvmppc_booke_handlers;
/* Helper function for "full" MSR writes. No need to call this if only EE is
* changing. */
@@ -57,4 +65,9 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
};
}
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+
#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
new file mode 100644
index 00000000000..aebc65e93f4
--- /dev/null
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -0,0 +1,266 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/disassemble.h>
+
+#include "booke.h"
+
+#define OP_19_XOP_RFI 50
+
+#define OP_31_XOP_MFMSR 83
+#define OP_31_XOP_WRTEE 131
+#define OP_31_XOP_MTMSR 146
+#define OP_31_XOP_WRTEEI 163
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.srr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int rs;
+ int rt;
+
+ switch (get_op(inst)) {
+ case 19:
+ switch (get_xop(inst)) {
+ case OP_19_XOP_RFI:
+ kvmppc_emul_rfi(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+ *advance = 0;
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+
+ case 31:
+ switch (get_xop(inst)) {
+
+ case OP_31_XOP_MFMSR:
+ rt = get_rt(inst);
+ vcpu->arch.gpr[rt] = vcpu->arch.msr;
+ kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+ break;
+
+ case OP_31_XOP_MTMSR:
+ rs = get_rs(inst);
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+ kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+ break;
+
+ case OP_31_XOP_WRTEE:
+ rs = get_rs(inst);
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (vcpu->arch.gpr[rs] & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ case OP_31_XOP_WRTEEI:
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (inst & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
+
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_DEAR:
+ vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+ case SPRN_ESR:
+ vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR0:
+ vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR1:
+ vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBSR:
+ vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break;
+ case SPRN_TSR:
+ vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+ case SPRN_TCR:
+ vcpu->arch.tcr = vcpu->arch.gpr[rs];
+ kvmppc_emulate_dec(vcpu);
+ break;
+
+ /* Note: SPRG4-7 are user-readable. These values are
+ * loaded into the real SPRGs when resuming the
+ * guest. */
+ case SPRN_SPRG4:
+ vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG5:
+ vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG6:
+ vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG7:
+ vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+ case SPRN_IVPR:
+ vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR0:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR1:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR2:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR3:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR4:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR5:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR6:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR7:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR8:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR9:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR10:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR11:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR12:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR13:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR14:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR15:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
+
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_IVPR:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+ case SPRN_DEAR:
+ vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+ case SPRN_ESR:
+ vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+ case SPRN_DBCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+ case SPRN_DBCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+ case SPRN_DBSR:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break;
+
+ case SPRN_IVOR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+ break;
+ case SPRN_IVOR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+ break;
+ case SPRN_IVOR2:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+ break;
+ case SPRN_IVOR3:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+ break;
+ case SPRN_IVOR4:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+ break;
+ case SPRN_IVOR5:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+ break;
+ case SPRN_IVOR6:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+ break;
+ case SPRN_IVOR7:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+ break;
+ case SPRN_IVOR8:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+ break;
+ case SPRN_IVOR9:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+ break;
+ case SPRN_IVOR10:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+ break;
+ case SPRN_IVOR11:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+ break;
+ case SPRN_IVOR12:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+ break;
+ case SPRN_IVOR13:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+ break;
+ case SPRN_IVOR14:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+ break;
+ case SPRN_IVOR15:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 084ebcd7dd8..d0c6f841bbd 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -86,6 +86,9 @@ KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
KVM_HANDLER BOOKE_INTERRUPT_DEBUG
+KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND
_GLOBAL(kvmppc_handler_len)
.long kvmppc_handler_1 - kvmppc_handler_0
@@ -347,7 +350,9 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
+#ifdef CONFIG_44x
iccci 0, 0 /* XXX hack */
+#endif
/* Load some guest volatiles. */
lwz r0, VCPU_GPR(r0)(r4)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
new file mode 100644
index 00000000000..d8067fd81cd
--- /dev/null
+++ b/arch/powerpc/kvm/e500.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_e500.h>
+#include <asm/kvm_ppc.h>
+
+#include "booke.h"
+#include "e500_tlb.h"
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ kvmppc_e500_tlb_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ kvmppc_e500_tlb_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ int r;
+
+ if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ kvmppc_e500_tlb_setup(vcpu_e500);
+
+ /* Use the same core vertion as host's */
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+
+ return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
+ if (index < 0) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500;
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu_e500) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ vcpu = &vcpu_e500->vcpu;
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ err = kvmppc_e500_tlb_init(vcpu_e500);
+ if (err)
+ goto uninit_vcpu;
+
+ return vcpu;
+
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+}
+
+static int kvmppc_e500_init(void)
+{
+ int r, i;
+ unsigned long ivor[3];
+ unsigned long max_ivor = 0;
+
+ r = kvmppc_booke_init();
+ if (r)
+ return r;
+
+ /* copy extra E500 exception handlers */
+ ivor[0] = mfspr(SPRN_IVOR32);
+ ivor[1] = mfspr(SPRN_IVOR33);
+ ivor[2] = mfspr(SPRN_IVOR34);
+ for (i = 0; i < 3; i++) {
+ if (ivor[i] > max_ivor)
+ max_ivor = ivor[i];
+
+ memcpy((void *)kvmppc_booke_handlers + ivor[i],
+ kvmppc_handlers_start + (i + 16) * kvmppc_handler_len,
+ kvmppc_handler_len);
+ }
+ flush_icache_range(kvmppc_booke_handlers,
+ kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+ return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
+}
+
+static void kvmppc_e500_exit(void)
+{
+ kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500_init);
+module_exit(kvmppc_e500_exit);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
new file mode 100644
index 00000000000..3f760414b9f
--- /dev/null
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x_emulate.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_e500.h>
+
+#include "booke.h"
+#include "e500_tlb.h"
+
+#define XOP_TLBIVAX 786
+#define XOP_TLBSX 914
+#define XOP_TLBRE 946
+#define XOP_TLBWE 978
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int ra;
+ int rb;
+
+ switch (get_op(inst)) {
+ case 31:
+ switch (get_xop(inst)) {
+
+ case XOP_TLBRE:
+ emulated = kvmppc_e500_emul_tlbre(vcpu);
+ break;
+
+ case XOP_TLBWE:
+ emulated = kvmppc_e500_emul_tlbwe(vcpu);
+ break;
+
+ case XOP_TLBSX:
+ rb = get_rb(inst);
+ emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
+ break;
+
+ case XOP_TLBIVAX:
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+ emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ if (emulated == EMULATE_FAIL)
+ emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_PID:
+ vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
+ vcpu->arch.pid = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_PID1:
+ vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break;
+ case SPRN_PID2:
+ vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS0:
+ vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS1:
+ vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS2:
+ vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS3:
+ vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS4:
+ vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS6:
+ vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS7:
+ vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break;
+ case SPRN_L1CSR1:
+ vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_HID0:
+ vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_HID1:
+ vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
+
+ case SPRN_MMUCSR0:
+ emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
+ vcpu->arch.gpr[rs]);
+ break;
+
+ /* extra exceptions */
+ case SPRN_IVOR32:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR33:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR34:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR35:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ }
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_PID:
+ vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break;
+ case SPRN_PID1:
+ vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break;
+ case SPRN_PID2:
+ vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break;
+ case SPRN_MAS0:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas0; break;
+ case SPRN_MAS1:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas1; break;
+ case SPRN_MAS2:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas2; break;
+ case SPRN_MAS3:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas3; break;
+ case SPRN_MAS4:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas4; break;
+ case SPRN_MAS6:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas6; break;
+ case SPRN_MAS7:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas7; break;
+
+ case SPRN_TLB0CFG:
+ vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG);
+ vcpu->arch.gpr[rt] &= ~0xfffUL;
+ vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
+ break;
+
+ case SPRN_TLB1CFG:
+ vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG);
+ vcpu->arch.gpr[rt] &= ~0xfffUL;
+ vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1];
+ break;
+
+ case SPRN_L1CSR1:
+ vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break;
+ case SPRN_HID0:
+ vcpu->arch.gpr[rt] = vcpu_e500->hid0; break;
+ case SPRN_HID1:
+ vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
+
+ case SPRN_MMUCSR0:
+ vcpu->arch.gpr[rt] = 0; break;
+
+ /* extra exceptions */
+ case SPRN_IVOR32:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+ break;
+ case SPRN_IVOR33:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+ break;
+ case SPRN_IVOR34:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+ break;
+ case SPRN_IVOR35:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+ break;
+ default:
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ }
+
+ return emulated;
+}
+
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
new file mode 100644
index 00000000000..0e773fc2d5e
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_e500.h>
+
+#include "../mm/mmu_decl.h"
+#include "e500_tlb.h"
+
+#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
+
+static unsigned int tlb1_entry_num;
+
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct tlbe *tlbe;
+ int i, tlbsel;
+
+ printk("| %8s | %8s | %8s | %8s | %8s |\n",
+ "nr", "mas1", "mas2", "mas3", "mas7");
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ printk("Guest TLB%d:\n", tlbsel);
+ for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
+ tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+ if (tlbe->mas1 & MAS1_VALID)
+ printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n",
+ tlbsel, i, tlbe->mas1, tlbe->mas2,
+ tlbe->mas3, tlbe->mas7);
+ }
+ }
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ printk("Shadow TLB%d:\n", tlbsel);
+ for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
+ tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
+ if (tlbe->mas1 & MAS1_VALID)
+ printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n",
+ tlbsel, i, tlbe->mas1, tlbe->mas2,
+ tlbe->mas3, tlbe->mas7);
+ }
+ }
+}
+
+static inline unsigned int tlb0_get_next_victim(
+ struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ unsigned int victim;
+
+ victim = vcpu_e500->guest_tlb_nv[0]++;
+ if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
+ vcpu_e500->guest_tlb_nv[0] = 0;
+
+ return victim;
+}
+
+static inline unsigned int tlb1_max_shadow_size(void)
+{
+ return tlb1_entry_num - tlbcam_index;
+}
+
+static inline int tlbe_is_writable(struct tlbe *tlbe)
+{
+ return tlbe->mas3 & (MAS3_SW|MAS3_UW);
+}
+
+static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
+{
+ /* Mask off reserved bits. */
+ mas3 &= MAS3_ATTRIB_MASK;
+
+ if (!usermode) {
+ /* Guest is in supervisor mode,
+ * so we need to translate guest
+ * supervisor permissions into user permissions. */
+ mas3 &= ~E500_TLB_USER_PERM_MASK;
+ mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
+ }
+
+ return mas3 | E500_TLB_SUPER_PERM_MASK;
+}
+
+static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
+{
+#ifdef CONFIG_SMP
+ return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
+#else
+ return mas2 & MAS2_ATTRIB_MASK;
+#endif
+}
+
+/*
+ * writing shadow tlb entry to host TLB
+ */
+static inline void __write_host_tlbe(struct tlbe *stlbe)
+{
+ mtspr(SPRN_MAS1, stlbe->mas1);
+ mtspr(SPRN_MAS2, stlbe->mas2);
+ mtspr(SPRN_MAS3, stlbe->mas3);
+ mtspr(SPRN_MAS7, stlbe->mas7);
+ __asm__ __volatile__ ("tlbwe\n" : : );
+}
+
+static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+ local_irq_disable();
+ if (tlbsel == 0) {
+ __write_host_tlbe(stlbe);
+ } else {
+ unsigned register mas0;
+
+ mas0 = mfspr(SPRN_MAS0);
+
+ mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
+ __write_host_tlbe(stlbe);
+
+ mtspr(SPRN_MAS0, mas0);
+ }
+ local_irq_enable();
+}
+
+void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int i;
+ unsigned register mas0;
+
+ /* Load all valid TLB1 entries to reduce guest tlb miss fault */
+ local_irq_disable();
+ mas0 = mfspr(SPRN_MAS0);
+ for (i = 0; i < tlb1_max_shadow_size(); i++) {
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
+
+ if (get_tlb_v(stlbe)) {
+ mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
+ | MAS0_ESEL(to_htlb1_esel(i)));
+ __write_host_tlbe(stlbe);
+ }
+ }
+ mtspr(SPRN_MAS0, mas0);
+ local_irq_enable();
+}
+
+void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
+{
+ _tlbil_all();
+}
+
+/* Search the guest TLB for a matching entry. */
+static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
+ gva_t eaddr, int tlbsel, unsigned int pid, int as)
+{
+ int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
+ struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+ unsigned int tid;
+
+ if (eaddr < get_tlb_eaddr(tlbe))
+ continue;
+
+ if (eaddr > get_tlb_end(tlbe))
+ continue;
+
+ tid = get_tlb_tid(tlbe);
+ if (tid && (tid != pid))
+ continue;
+
+ if (!get_tlb_v(tlbe))
+ continue;
+
+ if (get_tlb_ts(tlbe) != as && as != -1)
+ continue;
+
+ return i;
+ }
+
+ return -1;
+}
+
+static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+ struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
+
+ if (page) {
+ vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
+
+ if (get_tlb_v(stlbe)) {
+ if (tlbe_is_writable(stlbe))
+ kvm_release_page_dirty(page);
+ else
+ kvm_release_page_clean(page);
+ }
+ }
+}
+
+static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+ kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
+ stlbe->mas1 = 0;
+ KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
+ stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
+ handler);
+}
+
+static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+ gva_t eaddr, gva_t eend, u32 tid)
+{
+ unsigned int pid = tid & 0xff;
+ unsigned int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) {
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
+ unsigned int tid;
+
+ if (!get_tlb_v(stlbe))
+ continue;
+
+ if (eend < get_tlb_eaddr(stlbe))
+ continue;
+
+ if (eaddr > get_tlb_end(stlbe))
+ continue;
+
+ tid = get_tlb_tid(stlbe);
+ if (tid && (tid != pid))
+ continue;
+
+ kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
+ write_host_tlbe(vcpu_e500, 1, i);
+ }
+}
+
+static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
+ unsigned int eaddr, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int victim, pidsel, tsized;
+ int tlbsel;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
+ victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
+ pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
+ tsized = (vcpu_e500->mas4 >> 8) & 0xf;
+
+ vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+ | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
+ | MAS1_TID(vcpu_e500->pid[pidsel])
+ | MAS1_TSIZE(tsized);
+ vcpu_e500->mas2 = (eaddr & MAS2_EPN)
+ | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
+ vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+ vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
+ | (get_cur_pid(vcpu) << 16)
+ | (as ? MAS6_SAS : 0);
+ vcpu_e500->mas7 = 0;
+}
+
+static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
+{
+ struct page *new_page;
+ struct tlbe *stlbe;
+ hpa_t hpaddr;
+
+ stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+ /* Get reference to new page. */
+ new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
+ if (is_error_page(new_page)) {
+ printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
+ kvm_release_page_clean(new_page);
+ return;
+ }
+ hpaddr = page_to_phys(new_page);
+
+ /* Drop reference to old page. */
+ kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
+
+ vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
+
+ /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
+ stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
+ | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
+ stlbe->mas2 = (gvaddr & MAS2_EPN)
+ | e500_shadow_mas2_attrib(gtlbe->mas2,
+ vcpu_e500->vcpu.arch.msr & MSR_PR);
+ stlbe->mas3 = (hpaddr & MAS3_RPN)
+ | e500_shadow_mas3_attrib(gtlbe->mas3,
+ vcpu_e500->vcpu.arch.msr & MSR_PR);
+ stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
+
+ KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
+ stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
+ handler);
+}
+
+/* XXX only map the one-one case, for now use TLB0 */
+static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *gtlbe;
+
+ gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+ kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
+ get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+ gtlbe, tlbsel, esel);
+
+ return esel;
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+/* XXX for both one-one and one-to-many , for now use TLB1 */
+static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
+{
+ unsigned int victim;
+
+ victim = vcpu_e500->guest_tlb_nv[1]++;
+
+ if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
+ vcpu_e500->guest_tlb_nv[1] = 0;
+
+ kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim);
+
+ return victim;
+}
+
+/* Invalidate all guest kernel mappings when enter usermode,
+ * so that when they fault back in they will get the
+ * proper permission bits. */
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+{
+ if (usermode) {
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i < tlb1_max_shadow_size(); i++)
+ kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
+
+ _tlbil_all();
+ }
+}
+
+static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+ if (unlikely(get_tlb_iprot(gtlbe)))
+ return -1;
+
+ if (tlbsel == 1) {
+ kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
+ get_tlb_end(gtlbe),
+ get_tlb_tid(gtlbe));
+ } else {
+ kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
+ }
+
+ gtlbe->mas1 = 0;
+
+ return 0;
+}
+
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
+{
+ int esel;
+
+ if (value & MMUCSR0_TLB0FI)
+ for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
+ if (value & MMUCSR0_TLB1FI)
+ for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
+
+ _tlbil_all();
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int ia;
+ int esel, tlbsel;
+ gva_t ea;
+
+ ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
+
+ ia = (ea >> 2) & 0x1;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = (ea >> 3) & 0x1;
+
+ if (ia) {
+ /* invalidate all entries */
+ for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ } else {
+ ea &= 0xfffff000;
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel,
+ get_cur_pid(vcpu), -1);
+ if (esel >= 0)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ }
+
+ _tlbil_all();
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int tlbsel, esel;
+ struct tlbe *gtlbe;
+
+ tlbsel = get_tlb_tlbsel(vcpu_e500);
+ esel = get_tlb_esel(vcpu_e500, tlbsel);
+
+ gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+ vcpu_e500->mas0 &= ~MAS0_NV(~0);
+ vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ vcpu_e500->mas1 = gtlbe->mas1;
+ vcpu_e500->mas2 = gtlbe->mas2;
+ vcpu_e500->mas3 = gtlbe->mas3;
+ vcpu_e500->mas7 = gtlbe->mas7;
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int as = !!get_cur_sas(vcpu_e500);
+ unsigned int pid = get_cur_spid(vcpu_e500);
+ int esel, tlbsel;
+ struct tlbe *gtlbe = NULL;
+ gva_t ea;
+
+ ea = vcpu->arch.gpr[rb];
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
+ if (esel >= 0) {
+ gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+ break;
+ }
+ }
+
+ if (gtlbe) {
+ vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
+ | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ vcpu_e500->mas1 = gtlbe->mas1;
+ vcpu_e500->mas2 = gtlbe->mas2;
+ vcpu_e500->mas3 = gtlbe->mas3;
+ vcpu_e500->mas7 = gtlbe->mas7;
+ } else {
+ int victim;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = vcpu_e500->mas4 >> 28 & 0x1;
+ victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
+
+ vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+ | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
+ | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+ | (vcpu_e500->mas4 & MAS4_TSIZED(~0));
+ vcpu_e500->mas2 &= MAS2_EPN;
+ vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
+ vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+ vcpu_e500->mas7 = 0;
+ }
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ u64 eaddr;
+ u64 raddr;
+ u32 tid;
+ struct tlbe *gtlbe;
+ int tlbsel, esel, stlbsel, sesel;
+
+ tlbsel = get_tlb_tlbsel(vcpu_e500);
+ esel = get_tlb_esel(vcpu_e500, tlbsel);
+
+ gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+ if (get_tlb_v(gtlbe) && tlbsel == 1) {
+ eaddr = get_tlb_eaddr(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+ kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
+ get_tlb_end(gtlbe), tid);
+ }
+
+ gtlbe->mas1 = vcpu_e500->mas1;
+ gtlbe->mas2 = vcpu_e500->mas2;
+ gtlbe->mas3 = vcpu_e500->mas3;
+ gtlbe->mas7 = vcpu_e500->mas7;
+
+ KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
+ gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
+ handler);
+
+ /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+ if (tlbe_is_host_safe(vcpu, gtlbe)) {
+ switch (tlbsel) {
+ case 0:
+ /* TLB0 */
+ gtlbe->mas1 &= ~MAS1_TSIZE(~0);
+ gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
+
+ stlbsel = 0;
+ sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
+
+ break;
+
+ case 1:
+ /* TLB1 */
+ eaddr = get_tlb_eaddr(gtlbe);
+ raddr = get_tlb_raddr(gtlbe);
+
+ /* Create a 4KB mapping on the host.
+ * If the guest wanted a large page,
+ * only the first 4KB is mapped here and the rest
+ * are mapped on the fly. */
+ stlbsel = 1;
+ sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
+ raddr >> PAGE_SHIFT, gtlbe);
+ break;
+
+ default:
+ BUG();
+ }
+ write_host_tlbe(vcpu_e500, stlbsel, sesel);
+ }
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+
+ return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+
+ return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+
+ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+
+ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
+}
+
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
+ gva_t eaddr)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct tlbe *gtlbe =
+ &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)];
+ u64 pgmask = get_tlb_bytes(gtlbe) - 1;
+
+ return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int tlbsel, i;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++)
+ for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
+ kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
+
+ /* discard all guest mapping */
+ _tlbil_all();
+}
+
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
+ unsigned int index)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int tlbsel = tlbsel_of(index);
+ int esel = esel_of(index);
+ int stlbsel, sesel;
+
+ switch (tlbsel) {
+ case 0:
+ stlbsel = 0;
+ sesel = esel;
+ break;
+
+ case 1: {
+ gfn_t gfn = gpaddr >> PAGE_SHIFT;
+ struct tlbe *gtlbe
+ = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+ stlbsel = 1;
+ sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
+ break;
+ }
+
+ default:
+ BUG();
+ break;
+ }
+ write_host_tlbe(vcpu_e500, stlbsel, sesel);
+}
+
+int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
+ gva_t eaddr, unsigned int pid, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int esel, tlbsel;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
+ if (esel >= 0)
+ return index_of(tlbsel, esel);
+ }
+
+ return -1;
+}
+
+void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ struct tlbe *tlbe;
+
+ /* Insert large initial mapping for guest. */
+ tlbe = &vcpu_e500->guest_tlb[1][0];
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
+ tlbe->mas2 = 0;
+ tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
+ tlbe->mas7 = 0;
+
+ /* 4K map for serial output. Used by kernel wrapper. */
+ tlbe = &vcpu_e500->guest_tlb[1][1];
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
+ tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
+ tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+ tlbe->mas7 = 0;
+}
+
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
+
+ vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
+ vcpu_e500->guest_tlb[0] =
+ kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+ if (vcpu_e500->guest_tlb[0] == NULL)
+ goto err_out;
+
+ vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE;
+ vcpu_e500->shadow_tlb[0] =
+ kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+ if (vcpu_e500->shadow_tlb[0] == NULL)
+ goto err_out_guest0;
+
+ vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
+ vcpu_e500->guest_tlb[1] =
+ kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
+ if (vcpu_e500->guest_tlb[1] == NULL)
+ goto err_out_shadow0;
+
+ vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num;
+ vcpu_e500->shadow_tlb[1] =
+ kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL);
+ if (vcpu_e500->shadow_tlb[1] == NULL)
+ goto err_out_guest1;
+
+ vcpu_e500->shadow_pages[0] = (struct page **)
+ kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+ if (vcpu_e500->shadow_pages[0] == NULL)
+ goto err_out_shadow1;
+
+ vcpu_e500->shadow_pages[1] = (struct page **)
+ kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL);
+ if (vcpu_e500->shadow_pages[1] == NULL)
+ goto err_out_page0;
+
+ return 0;
+
+err_out_page0:
+ kfree(vcpu_e500->shadow_pages[0]);
+err_out_shadow1:
+ kfree(vcpu_e500->shadow_tlb[1]);
+err_out_guest1:
+ kfree(vcpu_e500->guest_tlb[1]);
+err_out_shadow0:
+ kfree(vcpu_e500->shadow_tlb[0]);
+err_out_guest0:
+ kfree(vcpu_e500->guest_tlb[0]);
+err_out:
+ return -1;
+}
+
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kfree(vcpu_e500->shadow_pages[1]);
+ kfree(vcpu_e500->shadow_pages[0]);
+ kfree(vcpu_e500->shadow_tlb[1]);
+ kfree(vcpu_e500->guest_tlb[1]);
+ kfree(vcpu_e500->shadow_tlb[0]);
+ kfree(vcpu_e500->guest_tlb[0]);
+}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
new file mode 100644
index 00000000000..45b064b7690
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.h,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __KVM_E500_TLB_H__
+#define __KVM_E500_TLB_H__
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-fsl-booke.h>
+#include <asm/tlb.h>
+#include <asm/kvm_e500.h>
+
+#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */
+#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
+#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1)
+
+#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */
+#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT)
+#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1)
+
+#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
+#define KVM_E500_TLB1_SIZE 16
+
+#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
+#define tlbsel_of(index) ((index) >> 16)
+#define esel_of(index) ((index) & 0xFFFF)
+
+#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
+#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
+#define MAS2_ATTRIB_MASK \
+ (MAS2_X0 | MAS2_X1)
+#define MAS3_ATTRIB_MASK \
+ (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
+ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+
+extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong);
+extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
+extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
+extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
+extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
+
+/* TLB helper functions */
+static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 8) & 0xf;
+}
+
+static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+{
+ return tlbe->mas2 & 0xfffff000;
+}
+
+static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
+{
+ unsigned int pgsize = get_tlb_size(tlbe);
+ return 1ULL << 10 << (pgsize << 1);
+}
+
+static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+{
+ u64 bytes = get_tlb_bytes(tlbe);
+ return get_tlb_eaddr(tlbe) + bytes - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+{
+ u64 rpn = tlbe->mas7;
+ return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
+}
+
+static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 16) & 0xff;
+}
+
+static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 12) & 0x1;
+}
+
+static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 31) & 0x1;
+}
+
+static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 30) & 0x1;
+}
+
+static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pid & 0xff;
+}
+
+static inline unsigned int get_cur_spid(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ return (vcpu_e500->mas6 >> 16) & 0xff;
+}
+
+static inline unsigned int get_cur_sas(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ return vcpu_e500->mas6 & 0x1;
+}
+
+static inline unsigned int get_tlb_tlbsel(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ /*
+ * Manual says that tlbsel has 2 bits wide.
+ * Since we only have two TLBs, only lower bit is used.
+ */
+ return (vcpu_e500->mas0 >> 28) & 0x1;
+}
+
+static inline unsigned int get_tlb_nv_bit(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ return vcpu_e500->mas0 & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel_bit(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ return (vcpu_e500->mas0 >> 16) & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel(
+ const struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel)
+{
+ unsigned int esel = get_tlb_esel_bit(vcpu_e500);
+
+ if (tlbsel == 0) {
+ esel &= KVM_E500_TLB0_WAY_NUM_MASK;
+ esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
+ << KVM_E500_TLB0_WAY_NUM_BIT;
+ } else {
+ esel &= KVM_E500_TLB1_SIZE - 1;
+ }
+
+ return esel;
+}
+
+static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct tlbe *tlbe)
+{
+ gpa_t gpa;
+
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+ return 0;
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+#endif /* __KVM_E500_TLB_H__ */
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index d1d38daa93f..a561d6e8da1 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -30,6 +30,39 @@
#include <asm/disassemble.h>
#include "timing.h"
+#define OP_TRAP 3
+
+#define OP_31_XOP_LWZX 23
+#define OP_31_XOP_LBZX 87
+#define OP_31_XOP_STWX 151
+#define OP_31_XOP_STBX 215
+#define OP_31_XOP_STBUX 247
+#define OP_31_XOP_LHZX 279
+#define OP_31_XOP_LHZUX 311
+#define OP_31_XOP_MFSPR 339
+#define OP_31_XOP_STHX 407
+#define OP_31_XOP_STHUX 439
+#define OP_31_XOP_MTSPR 467
+#define OP_31_XOP_DCBI 470
+#define OP_31_XOP_LWBRX 534
+#define OP_31_XOP_TLBSYNC 566
+#define OP_31_XOP_STWBRX 662
+#define OP_31_XOP_LHBRX 790
+#define OP_31_XOP_STHBRX 918
+
+#define OP_LWZ 32
+#define OP_LWZU 33
+#define OP_LBZ 34
+#define OP_LBZU 35
+#define OP_STW 36
+#define OP_STWU 37
+#define OP_STB 38
+#define OP_STBU 39
+#define OP_LHZ 40
+#define OP_LHZU 41
+#define OP_STH 44
+#define OP_STHU 45
+
void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.tcr & TCR_DIE) {
@@ -78,7 +111,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
switch (get_op(inst)) {
- case 3: /* trap */
+ case OP_TRAP:
vcpu->arch.esr |= ESR_PTR;
kvmppc_core_queue_program(vcpu);
advance = 0;
@@ -87,31 +120,31 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case 31:
switch (get_xop(inst)) {
- case 23: /* lwzx */
+ case OP_31_XOP_LWZX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
- case 87: /* lbzx */
+ case OP_31_XOP_LBZX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
- case 151: /* stwx */
+ case OP_31_XOP_STWX:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
4, 1);
break;
- case 215: /* stbx */
+ case OP_31_XOP_STBX:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
1, 1);
break;
- case 247: /* stbux */
+ case OP_31_XOP_STBUX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -126,12 +159,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[rs] = ea;
break;
- case 279: /* lhzx */
+ case OP_31_XOP_LHZX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
- case 311: /* lhzux */
+ case OP_31_XOP_LHZUX:
rt = get_rt(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -144,7 +177,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
- case 339: /* mfspr */
+ case OP_31_XOP_MFSPR:
sprn = get_sprn(inst);
rt = get_rt(inst);
@@ -185,7 +218,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
- case 407: /* sthx */
+ case OP_31_XOP_STHX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -195,7 +228,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
2, 1);
break;
- case 439: /* sthux */
+ case OP_31_XOP_STHUX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -210,7 +243,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
- case 467: /* mtspr */
+ case OP_31_XOP_MTSPR:
sprn = get_sprn(inst);
rs = get_rs(inst);
switch (sprn) {
@@ -246,7 +279,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
- case 470: /* dcbi */
+ case OP_31_XOP_DCBI:
/* Do nothing. The guest is performing dcbi because
* hardware DMA is not snooped by the dcache, but
* emulated DMA either goes through the dcache as
@@ -254,15 +287,15 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
* coherence. */
break;
- case 534: /* lwbrx */
+ case OP_31_XOP_LWBRX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
break;
- case 566: /* tlbsync */
+ case OP_31_XOP_TLBSYNC:
break;
- case 662: /* stwbrx */
+ case OP_31_XOP_STWBRX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -272,12 +305,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 0);
break;
- case 790: /* lhbrx */
+ case OP_31_XOP_LHBRX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
break;
- case 918: /* sthbrx */
+ case OP_31_XOP_STHBRX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -293,37 +326,37 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
- case 32: /* lwz */
+ case OP_LWZ:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
- case 33: /* lwzu */
+ case OP_LWZU:
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 34: /* lbz */
+ case OP_LBZ:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
- case 35: /* lbzu */
+ case OP_LBZU:
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 36: /* stw */
+ case OP_STW:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
4, 1);
break;
- case 37: /* stwu */
+ case OP_STWU:
ra = get_ra(inst);
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -331,13 +364,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 38: /* stb */
+ case OP_STB:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
1, 1);
break;
- case 39: /* stbu */
+ case OP_STBU:
ra = get_ra(inst);
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -345,25 +378,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 40: /* lhz */
+ case OP_LHZ:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
- case 41: /* lhzu */
+ case OP_LHZU:
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 44: /* sth */
+ case OP_STH:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
2, 1);
break;
- case 45: /* sthu */
+ case OP_STHU:
ra = get_ra(inst);
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 5f81256287f..9057335fdc6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -216,46 +216,23 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
- kvmppc_core_destroy_mmu(vcpu);
+ kvmppc_mmu_destroy(vcpu);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- if (vcpu->guest_debug.enabled)
- kvmppc_core_load_guest_debugstate(vcpu);
-
kvmppc_core_vcpu_load(vcpu, cpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
- if (vcpu->guest_debug.enabled)
- kvmppc_core_load_host_debugstate(vcpu);
-
- /* Don't leave guest TLB entries resident when being de-scheduled. */
- /* XXX It would be nice to differentiate between heavyweight exit and
- * sched_out here, since we could avoid the TLB flush for heavyweight
- * exits. */
- _tlbil_all();
kvmppc_core_vcpu_put(vcpu);
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
- int i;
-
- vcpu->guest_debug.enabled = dbg->enabled;
- if (vcpu->guest_debug.enabled) {
- for (i=0; i < ARRAY_SIZE(vcpu->guest_debug.bp); i++) {
- if (dbg->breakpoints[i].enabled)
- vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
- else
- vcpu->guest_debug.bp[i] = 0;
- }
- }
-
- return 0;
+ return -EINVAL;
}
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 28c04dab263..882e47080e7 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -237,8 +237,6 @@ extern int noirqdebug;
static void handle_iic_irq(unsigned int irq, struct irq_desc *desc)
{
- const unsigned int cpu = smp_processor_id();
-
spin_lock(&desc->lock);
desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
@@ -254,7 +252,7 @@ static void handle_iic_irq(unsigned int irq, struct irq_desc *desc)
goto out_eoi;
}
- kstat_cpu(cpu).irqs[irq]++;
+ kstat_incr_irqs_this_cpu(irq, desc);
/* Mark the IRQ currently in progress.*/
desc->status |= IRQ_INPROGRESS;
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 6a0ad196aeb..f085369301b 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -508,7 +508,7 @@ static void __spu_add_to_rq(struct spu_context *ctx)
list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
set_bit(ctx->prio, spu_prio->bitmap);
if (!spu_prio->nr_waiting++)
- __mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+ mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
}
}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 6b0a3538dc6..2a8af5e1634 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -343,13 +343,6 @@ source "mm/Kconfig"
comment "I/O subsystem configuration"
-config MACHCHK_WARNING
- bool "Process warning machine checks"
- help
- Select this option if you want the machine check handler on IBM S/390 or
- zSeries to process warning machine checks (e.g. on power failures).
- If unsure, say "Y".
-
config QDIO
tristate "QDIO support"
---help---
@@ -521,7 +514,7 @@ config APPLDATA_OS
config APPLDATA_NET_SUM
tristate "Monitor overall network statistics"
- depends on APPLDATA_BASE
+ depends on APPLDATA_BASE && NET
help
This provides network related data to the Linux - VM Monitor Stream,
currently there is only a total sum of network I/O statistics, no
@@ -552,7 +545,7 @@ config KEXEC
but is independent of hardware/microcode support.
config ZFCPDUMP
- tristate "zfcpdump support"
+ bool "zfcpdump support"
select SMP
default n
help
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index eca724d229e..b49c00ce65e 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -201,8 +201,7 @@ out_free:
static void __exit prng_exit(void)
{
/* wipe me */
- memset(p->buf, 0, prng_chunk_size);
- kfree(p->buf);
+ kzfree(p->buf);
kfree(p);
misc_deregister(&prng_dev);
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 8e9243ae0c1..b30606f6d52 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -57,7 +57,7 @@
* with operation of the form "set_bit(bitnr, flags)".
*/
-/* bitmap tables from arch/S390/kernel/bitmap.S */
+/* bitmap tables from arch/s390/kernel/bitmap.c */
extern const char _oi_bitmap[];
extern const char _ni_bitmap[];
extern const char _zb_findmap[];
@@ -525,16 +525,16 @@ static inline unsigned long __ffs_word_loop(const unsigned long *addr,
static inline unsigned long __ffz_word(unsigned long nr, unsigned long word)
{
#ifdef __s390x__
- if (likely((word & 0xffffffff) == 0xffffffff)) {
+ if ((word & 0xffffffff) == 0xffffffff) {
word >>= 32;
nr += 32;
}
#endif
- if (likely((word & 0xffff) == 0xffff)) {
+ if ((word & 0xffff) == 0xffff) {
word >>= 16;
nr += 16;
}
- if (likely((word & 0xff) == 0xff)) {
+ if ((word & 0xff) == 0xff) {
word >>= 8;
nr += 8;
}
@@ -549,16 +549,16 @@ static inline unsigned long __ffz_word(unsigned long nr, unsigned long word)
static inline unsigned long __ffs_word(unsigned long nr, unsigned long word)
{
#ifdef __s390x__
- if (likely((word & 0xffffffff) == 0)) {
+ if ((word & 0xffffffff) == 0) {
word >>= 32;
nr += 32;
}
#endif
- if (likely((word & 0xffff) == 0)) {
+ if ((word & 0xffff) == 0) {
word >>= 16;
nr += 16;
}
- if (likely((word & 0xff) == 0)) {
+ if ((word & 0xff) == 0) {
word >>= 8;
nr += 8;
}
diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h
new file mode 100644
index 00000000000..2185a6d619d
--- /dev/null
+++ b/arch/s390/include/asm/crw.h
@@ -0,0 +1,68 @@
+/*
+ * Data definitions for channel report processing
+ * Copyright IBM Corp. 2000,2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CRW_H
+#define _ASM_S390_CRW_H
+
+#include <linux/types.h>
+
+/*
+ * Channel Report Word
+ */
+struct crw {
+ __u32 res1 : 1; /* reserved zero */
+ __u32 slct : 1; /* solicited */
+ __u32 oflw : 1; /* overflow */
+ __u32 chn : 1; /* chained */
+ __u32 rsc : 4; /* reporting source code */
+ __u32 anc : 1; /* ancillary report */
+ __u32 res2 : 1; /* reserved zero */
+ __u32 erc : 6; /* error-recovery code */
+ __u32 rsid : 16; /* reporting-source ID */
+} __attribute__ ((packed));
+
+typedef void (*crw_handler_t)(struct crw *, struct crw *, int);
+
+extern int crw_register_handler(int rsc, crw_handler_t handler);
+extern void crw_unregister_handler(int rsc);
+extern void crw_handle_channel_report(void);
+
+#define NR_RSCS 16
+
+#define CRW_RSC_MONITOR 0x2 /* monitoring facility */
+#define CRW_RSC_SCH 0x3 /* subchannel */
+#define CRW_RSC_CPATH 0x4 /* channel path */
+#define CRW_RSC_CONFIG 0x9 /* configuration-alert facility */
+#define CRW_RSC_CSS 0xB /* channel subsystem */
+
+#define CRW_ERC_EVENT 0x00 /* event information pending */
+#define CRW_ERC_AVAIL 0x01 /* available */
+#define CRW_ERC_INIT 0x02 /* initialized */
+#define CRW_ERC_TERROR 0x03 /* temporary error */
+#define CRW_ERC_IPARM 0x04 /* installed parm initialized */
+#define CRW_ERC_TERM 0x05 /* terminal */
+#define CRW_ERC_PERRN 0x06 /* perm. error, fac. not init */
+#define CRW_ERC_PERRI 0x07 /* perm. error, facility init */
+#define CRW_ERC_PMOD 0x08 /* installed parameters modified */
+
+static inline int stcrw(struct crw *pcrw)
+{
+ int ccode;
+
+ asm volatile(
+ " stcrw 0(%2)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (ccode), "=m" (*pcrw)
+ : "a" (pcrw)
+ : "cc" );
+ return ccode;
+}
+
+#endif /* _ASM_S390_CRW_H */
diff --git a/arch/s390/include/asm/dasd.h b/arch/s390/include/asm/dasd.h
index e2db6f16d9c..218bce81ec7 100644
--- a/arch/s390/include/asm/dasd.h
+++ b/arch/s390/include/asm/dasd.h
@@ -162,15 +162,15 @@ typedef struct dasd_profile_info_t {
unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */
} dasd_profile_info_t;
-/*
+/*
* struct format_data_t
* represents all data necessary to format a dasd
*/
typedef struct format_data_t {
- int start_unit; /* from track */
- int stop_unit; /* to track */
- int blksize; /* sectorsize */
- int intensity;
+ unsigned int start_unit; /* from track */
+ unsigned int stop_unit; /* to track */
+ unsigned int blksize; /* sectorsize */
+ unsigned int intensity;
} format_data_t;
/*
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index e82c10efe65..aae276d0038 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -44,24 +44,18 @@ idal_is_needed(void *vaddr, unsigned int length)
/*
* Return the number of idal words needed for an address/length pair.
*/
-static inline unsigned int
-idal_nr_words(void *vaddr, unsigned int length)
+static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
{
-#ifdef __s390x__
- if (idal_is_needed(vaddr, length))
- return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length +
- (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
-#endif
- return 0;
+ return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length +
+ (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
}
/*
* Create the list of idal words for an address/length pair.
*/
-static inline unsigned long *
-idal_create_words(unsigned long *idaws, void *vaddr, unsigned int length)
+static inline unsigned long *idal_create_words(unsigned long *idaws,
+ void *vaddr, unsigned int length)
{
-#ifdef __s390x__
unsigned long paddr;
unsigned int cidaw;
@@ -74,7 +68,6 @@ idal_create_words(unsigned long *idaws, void *vaddr, unsigned int length)
paddr += IDA_BLOCK_SIZE;
*idaws++ = paddr;
}
-#endif
return idaws;
}
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index e1f54654e3a..0b2f829f6d5 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -42,4 +42,11 @@ struct kvm_fpu {
__u64 fprs[16];
};
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
#endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3c55e4107dc..c6e674f5fca 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -21,9 +21,6 @@
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
-struct kvm_guest_debug {
-};
-
struct sca_entry {
atomic_t scn;
__u64 reserved;
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index f3720defdd1..b349f1c7fdf 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -11,129 +11,118 @@
#ifndef _ASM_S390_LOWCORE_H
#define _ASM_S390_LOWCORE_H
-#ifndef __s390x__
-#define __LC_EXT_OLD_PSW 0x018
-#define __LC_SVC_OLD_PSW 0x020
-#define __LC_PGM_OLD_PSW 0x028
-#define __LC_MCK_OLD_PSW 0x030
-#define __LC_IO_OLD_PSW 0x038
-#define __LC_EXT_NEW_PSW 0x058
-#define __LC_SVC_NEW_PSW 0x060
-#define __LC_PGM_NEW_PSW 0x068
-#define __LC_MCK_NEW_PSW 0x070
-#define __LC_IO_NEW_PSW 0x078
-#else /* !__s390x__ */
-#define __LC_EXT_OLD_PSW 0x0130
-#define __LC_SVC_OLD_PSW 0x0140
-#define __LC_PGM_OLD_PSW 0x0150
-#define __LC_MCK_OLD_PSW 0x0160
-#define __LC_IO_OLD_PSW 0x0170
-#define __LC_EXT_NEW_PSW 0x01b0
-#define __LC_SVC_NEW_PSW 0x01c0
-#define __LC_PGM_NEW_PSW 0x01d0
-#define __LC_MCK_NEW_PSW 0x01e0
-#define __LC_IO_NEW_PSW 0x01f0
-#endif /* !__s390x__ */
-
-#define __LC_IPL_PARMBLOCK_PTR 0x014
-#define __LC_EXT_PARAMS 0x080
-#define __LC_CPU_ADDRESS 0x084
-#define __LC_EXT_INT_CODE 0x086
-
-#define __LC_SVC_ILC 0x088
-#define __LC_SVC_INT_CODE 0x08A
-#define __LC_PGM_ILC 0x08C
-#define __LC_PGM_INT_CODE 0x08E
+#define __LC_IPL_PARMBLOCK_PTR 0x0014
+#define __LC_EXT_PARAMS 0x0080
+#define __LC_CPU_ADDRESS 0x0084
+#define __LC_EXT_INT_CODE 0x0086
-#define __LC_PER_ATMID 0x096
-#define __LC_PER_ADDRESS 0x098
-#define __LC_PER_ACCESS_ID 0x0A1
-#define __LC_AR_MODE_ID 0x0A3
+#define __LC_SVC_ILC 0x0088
+#define __LC_SVC_INT_CODE 0x008a
+#define __LC_PGM_ILC 0x008c
+#define __LC_PGM_INT_CODE 0x008e
-#define __LC_SUBCHANNEL_ID 0x0B8
-#define __LC_SUBCHANNEL_NR 0x0BA
-#define __LC_IO_INT_PARM 0x0BC
-#define __LC_IO_INT_WORD 0x0C0
-#define __LC_MCCK_CODE 0x0E8
+#define __LC_PER_ATMID 0x0096
+#define __LC_PER_ADDRESS 0x0098
+#define __LC_PER_ACCESS_ID 0x00a1
+#define __LC_AR_MODE_ID 0x00a3
-#define __LC_LAST_BREAK 0x110
-
-#define __LC_RETURN_PSW 0x200
-
-#define __LC_SAVE_AREA 0xC00
-
-#ifndef __s390x__
-#define __LC_IRB 0x208
-#define __LC_SYNC_ENTER_TIMER 0x248
-#define __LC_ASYNC_ENTER_TIMER 0x250
-#define __LC_EXIT_TIMER 0x258
-#define __LC_USER_TIMER 0x260
-#define __LC_SYSTEM_TIMER 0x268
-#define __LC_STEAL_TIMER 0x270
-#define __LC_LAST_UPDATE_TIMER 0x278
-#define __LC_LAST_UPDATE_CLOCK 0x280
-#define __LC_RETURN_MCCK_PSW 0x288
-#define __LC_KERNEL_STACK 0xC40
-#define __LC_THREAD_INFO 0xC44
-#define __LC_ASYNC_STACK 0xC48
-#define __LC_KERNEL_ASCE 0xC4C
-#define __LC_USER_ASCE 0xC50
-#define __LC_PANIC_STACK 0xC54
-#define __LC_CPUID 0xC60
-#define __LC_CPUADDR 0xC68
-#define __LC_IPLDEV 0xC7C
-#define __LC_CURRENT 0xC90
-#define __LC_INT_CLOCK 0xC98
-#else /* __s390x__ */
-#define __LC_IRB 0x210
-#define __LC_SYNC_ENTER_TIMER 0x250
-#define __LC_ASYNC_ENTER_TIMER 0x258
-#define __LC_EXIT_TIMER 0x260
-#define __LC_USER_TIMER 0x268
-#define __LC_SYSTEM_TIMER 0x270
-#define __LC_STEAL_TIMER 0x278
-#define __LC_LAST_UPDATE_TIMER 0x280
-#define __LC_LAST_UPDATE_CLOCK 0x288
-#define __LC_RETURN_MCCK_PSW 0x290
-#define __LC_KERNEL_STACK 0xD40
-#define __LC_THREAD_INFO 0xD48
-#define __LC_ASYNC_STACK 0xD50
-#define __LC_KERNEL_ASCE 0xD58
-#define __LC_USER_ASCE 0xD60
-#define __LC_PANIC_STACK 0xD68
-#define __LC_CPUID 0xD80
-#define __LC_CPUADDR 0xD88
-#define __LC_IPLDEV 0xDB8
-#define __LC_CURRENT 0xDD8
-#define __LC_INT_CLOCK 0xDE8
-#define __LC_VDSO_PER_CPU 0xE38
-#endif /* __s390x__ */
+#define __LC_SUBCHANNEL_ID 0x00b8
+#define __LC_SUBCHANNEL_NR 0x00ba
+#define __LC_IO_INT_PARM 0x00bc
+#define __LC_IO_INT_WORD 0x00c0
+#define __LC_MCCK_CODE 0x00e8
-#define __LC_PASTE 0xE40
+#define __LC_DUMP_REIPL 0x0e00
-#define __LC_PANIC_MAGIC 0xE00
#ifndef __s390x__
-#define __LC_PFAULT_INTPARM 0x080
-#define __LC_CPU_TIMER_SAVE_AREA 0x0D8
-#define __LC_CLOCK_COMP_SAVE_AREA 0x0E0
-#define __LC_PSW_SAVE_AREA 0x100
-#define __LC_PREFIX_SAVE_AREA 0x108
-#define __LC_AREGS_SAVE_AREA 0x120
-#define __LC_FPREGS_SAVE_AREA 0x160
-#define __LC_GPREGS_SAVE_AREA 0x180
-#define __LC_CREGS_SAVE_AREA 0x1C0
+#define __LC_EXT_OLD_PSW 0x0018
+#define __LC_SVC_OLD_PSW 0x0020
+#define __LC_PGM_OLD_PSW 0x0028
+#define __LC_MCK_OLD_PSW 0x0030
+#define __LC_IO_OLD_PSW 0x0038
+#define __LC_EXT_NEW_PSW 0x0058
+#define __LC_SVC_NEW_PSW 0x0060
+#define __LC_PGM_NEW_PSW 0x0068
+#define __LC_MCK_NEW_PSW 0x0070
+#define __LC_IO_NEW_PSW 0x0078
+#define __LC_SAVE_AREA 0x0200
+#define __LC_RETURN_PSW 0x0240
+#define __LC_RETURN_MCCK_PSW 0x0248
+#define __LC_SYNC_ENTER_TIMER 0x0250
+#define __LC_ASYNC_ENTER_TIMER 0x0258
+#define __LC_EXIT_TIMER 0x0260
+#define __LC_USER_TIMER 0x0268
+#define __LC_SYSTEM_TIMER 0x0270
+#define __LC_STEAL_TIMER 0x0278
+#define __LC_LAST_UPDATE_TIMER 0x0280
+#define __LC_LAST_UPDATE_CLOCK 0x0288
+#define __LC_CURRENT 0x0290
+#define __LC_THREAD_INFO 0x0294
+#define __LC_KERNEL_STACK 0x0298
+#define __LC_ASYNC_STACK 0x029c
+#define __LC_PANIC_STACK 0x02a0
+#define __LC_KERNEL_ASCE 0x02a4
+#define __LC_USER_ASCE 0x02a8
+#define __LC_USER_EXEC_ASCE 0x02ac
+#define __LC_CPUID 0x02b0
+#define __LC_INT_CLOCK 0x02c8
+#define __LC_IRB 0x0300
+#define __LC_PFAULT_INTPARM 0x0080
+#define __LC_CPU_TIMER_SAVE_AREA 0x00d8
+#define __LC_CLOCK_COMP_SAVE_AREA 0x00e0
+#define __LC_PSW_SAVE_AREA 0x0100
+#define __LC_PREFIX_SAVE_AREA 0x0108
+#define __LC_AREGS_SAVE_AREA 0x0120
+#define __LC_FPREGS_SAVE_AREA 0x0160
+#define __LC_GPREGS_SAVE_AREA 0x0180
+#define __LC_CREGS_SAVE_AREA 0x01c0
#else /* __s390x__ */
-#define __LC_PFAULT_INTPARM 0x11B8
+#define __LC_LAST_BREAK 0x0110
+#define __LC_EXT_OLD_PSW 0x0130
+#define __LC_SVC_OLD_PSW 0x0140
+#define __LC_PGM_OLD_PSW 0x0150
+#define __LC_MCK_OLD_PSW 0x0160
+#define __LC_IO_OLD_PSW 0x0170
+#define __LC_EXT_NEW_PSW 0x01b0
+#define __LC_SVC_NEW_PSW 0x01c0
+#define __LC_PGM_NEW_PSW 0x01d0
+#define __LC_MCK_NEW_PSW 0x01e0
+#define __LC_IO_NEW_PSW 0x01f0
+#define __LC_SAVE_AREA 0x0200
+#define __LC_RETURN_PSW 0x0280
+#define __LC_RETURN_MCCK_PSW 0x0290
+#define __LC_SYNC_ENTER_TIMER 0x02a0
+#define __LC_ASYNC_ENTER_TIMER 0x02a8
+#define __LC_EXIT_TIMER 0x02b0
+#define __LC_USER_TIMER 0x02b8
+#define __LC_SYSTEM_TIMER 0x02c0
+#define __LC_STEAL_TIMER 0x02c8
+#define __LC_LAST_UPDATE_TIMER 0x02d0
+#define __LC_LAST_UPDATE_CLOCK 0x02d8
+#define __LC_CURRENT 0x02e0
+#define __LC_THREAD_INFO 0x02e8
+#define __LC_KERNEL_STACK 0x02f0
+#define __LC_ASYNC_STACK 0x02f8
+#define __LC_PANIC_STACK 0x0300
+#define __LC_KERNEL_ASCE 0x0308
+#define __LC_USER_ASCE 0x0310
+#define __LC_USER_EXEC_ASCE 0x0318
+#define __LC_CPUID 0x0320
+#define __LC_INT_CLOCK 0x0340
+#define __LC_VDSO_PER_CPU 0x0350
+#define __LC_IRB 0x0380
+#define __LC_PASTE 0x03c0
+#define __LC_PFAULT_INTPARM 0x11b8
#define __LC_FPREGS_SAVE_AREA 0x1200
-#define __LC_GPREGS_SAVE_AREA 0x1280
+#define __LC_GPREGS_SAVE_AREA 0x1280
#define __LC_PSW_SAVE_AREA 0x1300
#define __LC_PREFIX_SAVE_AREA 0x1318
-#define __LC_FP_CREG_SAVE_AREA 0x131C
+#define __LC_FP_CREG_SAVE_AREA 0x131c
#define __LC_TODREG_SAVE_AREA 0x1324
-#define __LC_CPU_TIMER_SAVE_AREA 0x1328
+#define __LC_CPU_TIMER_SAVE_AREA 0x1328
#define __LC_CLOCK_COMP_SAVE_AREA 0x1331
-#define __LC_AREGS_SAVE_AREA 0x1340
-#define __LC_CREGS_SAVE_AREA 0x1380
+#define __LC_AREGS_SAVE_AREA 0x1340
+#define __LC_CREGS_SAVE_AREA 0x1380
#endif /* __s390x__ */
#ifndef __ASSEMBLY__
@@ -198,222 +187,240 @@ union save_area {
struct _lowcore
{
#ifndef __s390x__
- /* prefix area: defined by architecture */
- psw_t restart_psw; /* 0x000 */
- __u32 ccw2[4]; /* 0x008 */
- psw_t external_old_psw; /* 0x018 */
- psw_t svc_old_psw; /* 0x020 */
- psw_t program_old_psw; /* 0x028 */
- psw_t mcck_old_psw; /* 0x030 */
- psw_t io_old_psw; /* 0x038 */
- __u8 pad1[0x58-0x40]; /* 0x040 */
- psw_t external_new_psw; /* 0x058 */
- psw_t svc_new_psw; /* 0x060 */
- psw_t program_new_psw; /* 0x068 */
- psw_t mcck_new_psw; /* 0x070 */
- psw_t io_new_psw; /* 0x078 */
- __u32 ext_params; /* 0x080 */
- __u16 cpu_addr; /* 0x084 */
- __u16 ext_int_code; /* 0x086 */
- __u16 svc_ilc; /* 0x088 */
- __u16 svc_code; /* 0x08a */
- __u16 pgm_ilc; /* 0x08c */
- __u16 pgm_code; /* 0x08e */
- __u32 trans_exc_code; /* 0x090 */
- __u16 mon_class_num; /* 0x094 */
- __u16 per_perc_atmid; /* 0x096 */
- __u32 per_address; /* 0x098 */
- __u32 monitor_code; /* 0x09c */
- __u8 exc_access_id; /* 0x0a0 */
- __u8 per_access_id; /* 0x0a1 */
- __u8 pad2[0xB8-0xA2]; /* 0x0a2 */
- __u16 subchannel_id; /* 0x0b8 */
- __u16 subchannel_nr; /* 0x0ba */
- __u32 io_int_parm; /* 0x0bc */
- __u32 io_int_word; /* 0x0c0 */
- __u8 pad3[0xc8-0xc4]; /* 0x0c4 */
- __u32 stfl_fac_list; /* 0x0c8 */
- __u8 pad4[0xd4-0xcc]; /* 0x0cc */
- __u32 extended_save_area_addr; /* 0x0d4 */
- __u32 cpu_timer_save_area[2]; /* 0x0d8 */
- __u32 clock_comp_save_area[2]; /* 0x0e0 */
- __u32 mcck_interruption_code[2]; /* 0x0e8 */
- __u8 pad5[0xf4-0xf0]; /* 0x0f0 */
- __u32 external_damage_code; /* 0x0f4 */
- __u32 failing_storage_address; /* 0x0f8 */
- __u8 pad6[0x100-0xfc]; /* 0x0fc */
- __u32 st_status_fixed_logout[4];/* 0x100 */
- __u8 pad7[0x120-0x110]; /* 0x110 */
- __u32 access_regs_save_area[16];/* 0x120 */
- __u32 floating_pt_save_area[8]; /* 0x160 */
- __u32 gpregs_save_area[16]; /* 0x180 */
- __u32 cregs_save_area[16]; /* 0x1c0 */
-
- psw_t return_psw; /* 0x200 */
- __u8 irb[64]; /* 0x208 */
- __u64 sync_enter_timer; /* 0x248 */
- __u64 async_enter_timer; /* 0x250 */
- __u64 exit_timer; /* 0x258 */
- __u64 user_timer; /* 0x260 */
- __u64 system_timer; /* 0x268 */
- __u64 steal_timer; /* 0x270 */
- __u64 last_update_timer; /* 0x278 */
- __u64 last_update_clock; /* 0x280 */
- psw_t return_mcck_psw; /* 0x288 */
- __u8 pad8[0xc00-0x290]; /* 0x290 */
-
- /* System info area */
- __u32 save_area[16]; /* 0xc00 */
- __u32 kernel_stack; /* 0xc40 */
- __u32 thread_info; /* 0xc44 */
- __u32 async_stack; /* 0xc48 */
- __u32 kernel_asce; /* 0xc4c */
- __u32 user_asce; /* 0xc50 */
- __u32 panic_stack; /* 0xc54 */
- __u32 user_exec_asce; /* 0xc58 */
- __u8 pad10[0xc60-0xc5c]; /* 0xc5c */
- /* entry.S sensitive area start */
- struct cpuinfo_S390 cpu_data; /* 0xc60 */
- __u32 ipl_device; /* 0xc7c */
- /* entry.S sensitive area end */
-
- /* SMP info area: defined by DJB */
- __u64 clock_comparator; /* 0xc80 */
- __u32 ext_call_fast; /* 0xc88 */
- __u32 percpu_offset; /* 0xc8c */
- __u32 current_task; /* 0xc90 */
- __u32 softirq_pending; /* 0xc94 */
- __u64 int_clock; /* 0xc98 */
- __u8 pad11[0xe00-0xca0]; /* 0xca0 */
-
- /* 0xe00 is used as indicator for dump tools */
- /* whether the kernel died with panic() or not */
- __u32 panic_magic; /* 0xe00 */
-
- /* Align to the top 1k of prefix area */
- __u8 pad12[0x1000-0xe04]; /* 0xe04 */
+ /* 0x0000 - 0x01ff: defined by architecture */
+ psw_t restart_psw; /* 0x0000 */
+ __u32 ccw2[4]; /* 0x0008 */
+ psw_t external_old_psw; /* 0x0018 */
+ psw_t svc_old_psw; /* 0x0020 */
+ psw_t program_old_psw; /* 0x0028 */
+ psw_t mcck_old_psw; /* 0x0030 */
+ psw_t io_old_psw; /* 0x0038 */
+ __u8 pad_0x0040[0x0058-0x0040]; /* 0x0040 */
+ psw_t external_new_psw; /* 0x0058 */
+ psw_t svc_new_psw; /* 0x0060 */
+ psw_t program_new_psw; /* 0x0068 */
+ psw_t mcck_new_psw; /* 0x0070 */
+ psw_t io_new_psw; /* 0x0078 */
+ __u32 ext_params; /* 0x0080 */
+ __u16 cpu_addr; /* 0x0084 */
+ __u16 ext_int_code; /* 0x0086 */
+ __u16 svc_ilc; /* 0x0088 */
+ __u16 svc_code; /* 0x008a */
+ __u16 pgm_ilc; /* 0x008c */
+ __u16 pgm_code; /* 0x008e */
+ __u32 trans_exc_code; /* 0x0090 */
+ __u16 mon_class_num; /* 0x0094 */
+ __u16 per_perc_atmid; /* 0x0096 */
+ __u32 per_address; /* 0x0098 */
+ __u32 monitor_code; /* 0x009c */
+ __u8 exc_access_id; /* 0x00a0 */
+ __u8 per_access_id; /* 0x00a1 */
+ __u8 pad_0x00a2[0x00b8-0x00a2]; /* 0x00a2 */
+ __u16 subchannel_id; /* 0x00b8 */
+ __u16 subchannel_nr; /* 0x00ba */
+ __u32 io_int_parm; /* 0x00bc */
+ __u32 io_int_word; /* 0x00c0 */
+ __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */
+ __u32 stfl_fac_list; /* 0x00c8 */
+ __u8 pad_0x00cc[0x00d4-0x00cc]; /* 0x00cc */
+ __u32 extended_save_area_addr; /* 0x00d4 */
+ __u32 cpu_timer_save_area[2]; /* 0x00d8 */
+ __u32 clock_comp_save_area[2]; /* 0x00e0 */
+ __u32 mcck_interruption_code[2]; /* 0x00e8 */
+ __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */
+ __u32 external_damage_code; /* 0x00f4 */
+ __u32 failing_storage_address; /* 0x00f8 */
+ __u8 pad_0x00fc[0x0100-0x00fc]; /* 0x00fc */
+ __u32 st_status_fixed_logout[4]; /* 0x0100 */
+ __u8 pad_0x0110[0x0120-0x0110]; /* 0x0110 */
+
+ /* CPU register save area: defined by architecture */
+ __u32 access_regs_save_area[16]; /* 0x0120 */
+ __u32 floating_pt_save_area[8]; /* 0x0160 */
+ __u32 gpregs_save_area[16]; /* 0x0180 */
+ __u32 cregs_save_area[16]; /* 0x01c0 */
+
+ /* Return psws. */
+ __u32 save_area[16]; /* 0x0200 */
+ psw_t return_psw; /* 0x0240 */
+ psw_t return_mcck_psw; /* 0x0248 */
+
+ /* CPU time accounting values */
+ __u64 sync_enter_timer; /* 0x0250 */
+ __u64 async_enter_timer; /* 0x0258 */
+ __u64 exit_timer; /* 0x0260 */
+ __u64 user_timer; /* 0x0268 */
+ __u64 system_timer; /* 0x0270 */
+ __u64 steal_timer; /* 0x0278 */
+ __u64 last_update_timer; /* 0x0280 */
+ __u64 last_update_clock; /* 0x0288 */
+
+ /* Current process. */
+ __u32 current_task; /* 0x0290 */
+ __u32 thread_info; /* 0x0294 */
+ __u32 kernel_stack; /* 0x0298 */
+
+ /* Interrupt and panic stack. */
+ __u32 async_stack; /* 0x029c */
+ __u32 panic_stack; /* 0x02a0 */
+
+ /* Address space pointer. */
+ __u32 kernel_asce; /* 0x02a4 */
+ __u32 user_asce; /* 0x02a8 */
+ __u32 user_exec_asce; /* 0x02ac */
+
+ /* SMP info area */
+ cpuid_t cpu_id; /* 0x02b0 */
+ __u32 cpu_nr; /* 0x02b8 */
+ __u32 softirq_pending; /* 0x02bc */
+ __u32 percpu_offset; /* 0x02c0 */
+ __u32 ext_call_fast; /* 0x02c4 */
+ __u64 int_clock; /* 0x02c8 */
+ __u64 clock_comparator; /* 0x02d0 */
+ __u8 pad_0x02d8[0x0300-0x02d8]; /* 0x02d8 */
+
+ /* Interrupt response block */
+ __u8 irb[64]; /* 0x0300 */
+
+ __u8 pad_0x0400[0x0e00-0x0400]; /* 0x0400 */
+
+ /*
+ * 0xe00 contains the address of the IPL Parameter Information
+ * block. Dump tools need IPIB for IPL after dump.
+ * Note: do not change the position of any fields in 0x0e00-0x0f00
+ */
+ __u32 ipib; /* 0x0e00 */
+ __u32 ipib_checksum; /* 0x0e04 */
+
+ /* Align to the top 1k of prefix area */
+ __u8 pad_0x0e08[0x1000-0x0e08]; /* 0x0e08 */
#else /* !__s390x__ */
- /* prefix area: defined by architecture */
- __u32 ccw1[2]; /* 0x000 */
- __u32 ccw2[4]; /* 0x008 */
- __u8 pad1[0x80-0x18]; /* 0x018 */
- __u32 ext_params; /* 0x080 */
- __u16 cpu_addr; /* 0x084 */
- __u16 ext_int_code; /* 0x086 */
- __u16 svc_ilc; /* 0x088 */
- __u16 svc_code; /* 0x08a */
- __u16 pgm_ilc; /* 0x08c */
- __u16 pgm_code; /* 0x08e */
- __u32 data_exc_code; /* 0x090 */
- __u16 mon_class_num; /* 0x094 */
- __u16 per_perc_atmid; /* 0x096 */
- addr_t per_address; /* 0x098 */
- __u8 exc_access_id; /* 0x0a0 */
- __u8 per_access_id; /* 0x0a1 */
- __u8 op_access_id; /* 0x0a2 */
- __u8 ar_access_id; /* 0x0a3 */
- __u8 pad2[0xA8-0xA4]; /* 0x0a4 */
- addr_t trans_exc_code; /* 0x0A0 */
- addr_t monitor_code; /* 0x09c */
- __u16 subchannel_id; /* 0x0b8 */
- __u16 subchannel_nr; /* 0x0ba */
- __u32 io_int_parm; /* 0x0bc */
- __u32 io_int_word; /* 0x0c0 */
- __u8 pad3[0xc8-0xc4]; /* 0x0c4 */
- __u32 stfl_fac_list; /* 0x0c8 */
- __u8 pad4[0xe8-0xcc]; /* 0x0cc */
- __u32 mcck_interruption_code[2]; /* 0x0e8 */
- __u8 pad5[0xf4-0xf0]; /* 0x0f0 */
- __u32 external_damage_code; /* 0x0f4 */
- addr_t failing_storage_address; /* 0x0f8 */
- __u8 pad6[0x120-0x100]; /* 0x100 */
- psw_t restart_old_psw; /* 0x120 */
- psw_t external_old_psw; /* 0x130 */
- psw_t svc_old_psw; /* 0x140 */
- psw_t program_old_psw; /* 0x150 */
- psw_t mcck_old_psw; /* 0x160 */
- psw_t io_old_psw; /* 0x170 */
- __u8 pad7[0x1a0-0x180]; /* 0x180 */
- psw_t restart_psw; /* 0x1a0 */
- psw_t external_new_psw; /* 0x1b0 */
- psw_t svc_new_psw; /* 0x1c0 */
- psw_t program_new_psw; /* 0x1d0 */
- psw_t mcck_new_psw; /* 0x1e0 */
- psw_t io_new_psw; /* 0x1f0 */
- psw_t return_psw; /* 0x200 */
- __u8 irb[64]; /* 0x210 */
- __u64 sync_enter_timer; /* 0x250 */
- __u64 async_enter_timer; /* 0x258 */
- __u64 exit_timer; /* 0x260 */
- __u64 user_timer; /* 0x268 */
- __u64 system_timer; /* 0x270 */
- __u64 steal_timer; /* 0x278 */
- __u64 last_update_timer; /* 0x280 */
- __u64 last_update_clock; /* 0x288 */
- psw_t return_mcck_psw; /* 0x290 */
- __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */
- /* System info area */
- __u64 save_area[16]; /* 0xc00 */
- __u8 pad9[0xd40-0xc80]; /* 0xc80 */
- __u64 kernel_stack; /* 0xd40 */
- __u64 thread_info; /* 0xd48 */
- __u64 async_stack; /* 0xd50 */
- __u64 kernel_asce; /* 0xd58 */
- __u64 user_asce; /* 0xd60 */
- __u64 panic_stack; /* 0xd68 */
- __u64 user_exec_asce; /* 0xd70 */
- __u8 pad10[0xd80-0xd78]; /* 0xd78 */
- /* entry.S sensitive area start */
- struct cpuinfo_S390 cpu_data; /* 0xd80 */
- __u32 ipl_device; /* 0xdb8 */
- __u32 pad11; /* 0xdbc */
- /* entry.S sensitive area end */
-
- /* SMP info area: defined by DJB */
- __u64 clock_comparator; /* 0xdc0 */
- __u64 ext_call_fast; /* 0xdc8 */
- __u64 percpu_offset; /* 0xdd0 */
- __u64 current_task; /* 0xdd8 */
- __u32 softirq_pending; /* 0xde0 */
- __u32 pad_0x0de4; /* 0xde4 */
- __u64 int_clock; /* 0xde8 */
- __u8 pad12[0xe00-0xdf0]; /* 0xdf0 */
-
- /* 0xe00 is used as indicator for dump tools */
- /* whether the kernel died with panic() or not */
- __u32 panic_magic; /* 0xe00 */
+ /* 0x0000 - 0x01ff: defined by architecture */
+ __u32 ccw1[2]; /* 0x0000 */
+ __u32 ccw2[4]; /* 0x0008 */
+ __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */
+ __u32 ext_params; /* 0x0080 */
+ __u16 cpu_addr; /* 0x0084 */
+ __u16 ext_int_code; /* 0x0086 */
+ __u16 svc_ilc; /* 0x0088 */
+ __u16 svc_code; /* 0x008a */
+ __u16 pgm_ilc; /* 0x008c */
+ __u16 pgm_code; /* 0x008e */
+ __u32 data_exc_code; /* 0x0090 */
+ __u16 mon_class_num; /* 0x0094 */
+ __u16 per_perc_atmid; /* 0x0096 */
+ addr_t per_address; /* 0x0098 */
+ __u8 exc_access_id; /* 0x00a0 */
+ __u8 per_access_id; /* 0x00a1 */
+ __u8 op_access_id; /* 0x00a2 */
+ __u8 ar_access_id; /* 0x00a3 */
+ __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */
+ addr_t trans_exc_code; /* 0x00a8 */
+ addr_t monitor_code; /* 0x00b0 */
+ __u16 subchannel_id; /* 0x00b8 */
+ __u16 subchannel_nr; /* 0x00ba */
+ __u32 io_int_parm; /* 0x00bc */
+ __u32 io_int_word; /* 0x00c0 */
+ __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */
+ __u32 stfl_fac_list; /* 0x00c8 */
+ __u8 pad_0x00cc[0x00e8-0x00cc]; /* 0x00cc */
+ __u32 mcck_interruption_code[2]; /* 0x00e8 */
+ __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */
+ __u32 external_damage_code; /* 0x00f4 */
+ addr_t failing_storage_address; /* 0x00f8 */
+ __u8 pad_0x0100[0x0120-0x0100]; /* 0x0100 */
+ psw_t restart_old_psw; /* 0x0120 */
+ psw_t external_old_psw; /* 0x0130 */
+ psw_t svc_old_psw; /* 0x0140 */
+ psw_t program_old_psw; /* 0x0150 */
+ psw_t mcck_old_psw; /* 0x0160 */
+ psw_t io_old_psw; /* 0x0170 */
+ __u8 pad_0x0180[0x01a0-0x0180]; /* 0x0180 */
+ psw_t restart_psw; /* 0x01a0 */
+ psw_t external_new_psw; /* 0x01b0 */
+ psw_t svc_new_psw; /* 0x01c0 */
+ psw_t program_new_psw; /* 0x01d0 */
+ psw_t mcck_new_psw; /* 0x01e0 */
+ psw_t io_new_psw; /* 0x01f0 */
+
+ /* Entry/exit save area & return psws. */
+ __u64 save_area[16]; /* 0x0200 */
+ psw_t return_psw; /* 0x0280 */
+ psw_t return_mcck_psw; /* 0x0290 */
+
+ /* CPU accounting and timing values. */
+ __u64 sync_enter_timer; /* 0x02a0 */
+ __u64 async_enter_timer; /* 0x02a8 */
+ __u64 exit_timer; /* 0x02b0 */
+ __u64 user_timer; /* 0x02b8 */
+ __u64 system_timer; /* 0x02c0 */
+ __u64 steal_timer; /* 0x02c8 */
+ __u64 last_update_timer; /* 0x02d0 */
+ __u64 last_update_clock; /* 0x02d8 */
+
+ /* Current process. */
+ __u64 current_task; /* 0x02e0 */
+ __u64 thread_info; /* 0x02e8 */
+ __u64 kernel_stack; /* 0x02f0 */
+
+ /* Interrupt and panic stack. */
+ __u64 async_stack; /* 0x02f8 */
+ __u64 panic_stack; /* 0x0300 */
+
+ /* Address space pointer. */
+ __u64 kernel_asce; /* 0x0308 */
+ __u64 user_asce; /* 0x0310 */
+ __u64 user_exec_asce; /* 0x0318 */
+
+ /* SMP info area */
+ cpuid_t cpu_id; /* 0x0320 */
+ __u32 cpu_nr; /* 0x0328 */
+ __u32 softirq_pending; /* 0x032c */
+ __u64 percpu_offset; /* 0x0330 */
+ __u64 ext_call_fast; /* 0x0338 */
+ __u64 int_clock; /* 0x0340 */
+ __u64 clock_comparator; /* 0x0348 */
+ __u64 vdso_per_cpu_data; /* 0x0350 */
+ __u8 pad_0x0358[0x0380-0x0358]; /* 0x0358 */
+
+ /* Interrupt response block. */
+ __u8 irb[64]; /* 0x0380 */
/* Per cpu primary space access list */
- __u8 pad_0xe04[0xe38-0xe04]; /* 0xe04 */
- __u64 vdso_per_cpu_data; /* 0xe38 */
- __u32 paste[16]; /* 0xe40 */
-
- __u8 pad13[0x11b8-0xe80]; /* 0xe80 */
-
- /* 64 bit extparam used for pfault, diag 250 etc */
- __u64 ext_params2; /* 0x11B8 */
-
- __u8 pad14[0x1200-0x11C0]; /* 0x11C0 */
-
- /* System info area */
-
- __u64 floating_pt_save_area[16]; /* 0x1200 */
- __u64 gpregs_save_area[16]; /* 0x1280 */
- __u32 st_status_fixed_logout[4]; /* 0x1300 */
- __u8 pad15[0x1318-0x1310]; /* 0x1310 */
- __u32 prefixreg_save_area; /* 0x1318 */
- __u32 fpt_creg_save_area; /* 0x131c */
- __u8 pad16[0x1324-0x1320]; /* 0x1320 */
- __u32 tod_progreg_save_area; /* 0x1324 */
- __u32 cpu_timer_save_area[2]; /* 0x1328 */
- __u32 clock_comp_save_area[2]; /* 0x1330 */
- __u8 pad17[0x1340-0x1338]; /* 0x1338 */
- __u32 access_regs_save_area[16]; /* 0x1340 */
- __u64 cregs_save_area[16]; /* 0x1380 */
+ __u32 paste[16]; /* 0x03c0 */
+
+ __u8 pad_0x0400[0x0e00-0x0400]; /* 0x0400 */
+
+ /*
+ * 0xe00 contains the address of the IPL Parameter Information
+ * block. Dump tools need IPIB for IPL after dump.
+ * Note: do not change the position of any fields in 0x0e00-0x0f00
+ */
+ __u64 ipib; /* 0x0e00 */
+ __u32 ipib_checksum; /* 0x0e08 */
+ __u8 pad_0x0e0c[0x11b8-0x0e0c]; /* 0x0e0c */
+
+ /* 64 bit extparam used for pfault/diag 250: defined by architecture */
+ __u64 ext_params2; /* 0x11B8 */
+ __u8 pad_0x11c0[0x1200-0x11C0]; /* 0x11C0 */
+
+ /* CPU register save area: defined by architecture */
+ __u64 floating_pt_save_area[16]; /* 0x1200 */
+ __u64 gpregs_save_area[16]; /* 0x1280 */
+ __u32 st_status_fixed_logout[4]; /* 0x1300 */
+ __u8 pad_0x1310[0x1318-0x1310]; /* 0x1310 */
+ __u32 prefixreg_save_area; /* 0x1318 */
+ __u32 fpt_creg_save_area; /* 0x131c */
+ __u8 pad_0x1320[0x1324-0x1320]; /* 0x1320 */
+ __u32 tod_progreg_save_area; /* 0x1324 */
+ __u32 cpu_timer_save_area[2]; /* 0x1328 */
+ __u32 clock_comp_save_area[2]; /* 0x1330 */
+ __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */
+ __u32 access_regs_save_area[16]; /* 0x1340 */
+ __u64 cregs_save_area[16]; /* 0x1380 */
/* align to the top of the prefix area */
-
- __u8 pad18[0x2000-0x1400]; /* 0x1400 */
+ __u8 pad_0x1400[0x2000-0x1400]; /* 0x1400 */
#endif /* !__s390x__ */
} __attribute__((packed)); /* End structure*/
@@ -433,8 +440,6 @@ static inline __u32 store_prefix(void)
return address;
}
-#define __PANIC_MAGIC 0xDEADC0DE
-
#endif
#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 28ec870655a..fc7edd6f41b 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -74,7 +74,7 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
- cpu_set(smp_processor_id(), next->cpu_vm_mask);
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
update_mm(next, tsk);
}
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
new file mode 100644
index 00000000000..f4b60441adc
--- /dev/null
+++ b/arch/s390/include/asm/nmi.h
@@ -0,0 +1,66 @@
+/*
+ * Machine check handler definitions
+ *
+ * Copyright IBM Corp. 2000,2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_NMI_H
+#define _ASM_S390_NMI_H
+
+#include <linux/types.h>
+
+struct mci {
+ __u32 sd : 1; /* 00 system damage */
+ __u32 pd : 1; /* 01 instruction-processing damage */
+ __u32 sr : 1; /* 02 system recovery */
+ __u32 : 1; /* 03 */
+ __u32 cd : 1; /* 04 timing-facility damage */
+ __u32 ed : 1; /* 05 external damage */
+ __u32 : 1; /* 06 */
+ __u32 dg : 1; /* 07 degradation */
+ __u32 w : 1; /* 08 warning pending */
+ __u32 cp : 1; /* 09 channel-report pending */
+ __u32 sp : 1; /* 10 service-processor damage */
+ __u32 ck : 1; /* 11 channel-subsystem damage */
+ __u32 : 2; /* 12-13 */
+ __u32 b : 1; /* 14 backed up */
+ __u32 : 1; /* 15 */
+ __u32 se : 1; /* 16 storage error uncorrected */
+ __u32 sc : 1; /* 17 storage error corrected */
+ __u32 ke : 1; /* 18 storage-key error uncorrected */
+ __u32 ds : 1; /* 19 storage degradation */
+ __u32 wp : 1; /* 20 psw mwp validity */
+ __u32 ms : 1; /* 21 psw mask and key validity */
+ __u32 pm : 1; /* 22 psw program mask and cc validity */
+ __u32 ia : 1; /* 23 psw instruction address validity */
+ __u32 fa : 1; /* 24 failing storage address validity */
+ __u32 : 1; /* 25 */
+ __u32 ec : 1; /* 26 external damage code validity */
+ __u32 fp : 1; /* 27 floating point register validity */
+ __u32 gr : 1; /* 28 general register validity */
+ __u32 cr : 1; /* 29 control register validity */
+ __u32 : 1; /* 30 */
+ __u32 st : 1; /* 31 storage logical validity */
+ __u32 ie : 1; /* 32 indirect storage error */
+ __u32 ar : 1; /* 33 access register validity */
+ __u32 da : 1; /* 34 delayed access exception */
+ __u32 : 7; /* 35-41 */
+ __u32 pr : 1; /* 42 tod programmable register validity */
+ __u32 fc : 1; /* 43 fp control register validity */
+ __u32 ap : 1; /* 44 ancillary report */
+ __u32 : 1; /* 45 */
+ __u32 ct : 1; /* 46 cpu timer validity */
+ __u32 cc : 1; /* 47 clock comparator validity */
+ __u32 : 16; /* 47-63 */
+};
+
+struct pt_regs;
+
+extern void s390_handle_mcck(void);
+extern void s390_do_machine_check(struct pt_regs *regs);
+
+#endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index db4523fe38a..61862b3ac79 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -42,22 +42,8 @@ static inline void get_cpu_id(cpuid_t *ptr)
asm volatile("stidp 0(%1)" : "=m" (*ptr) : "a" (ptr));
}
-struct cpuinfo_S390
-{
- cpuid_t cpu_id;
- __u16 cpu_addr;
- __u16 cpu_nr;
- unsigned long loops_per_jiffy;
- unsigned long *pgd_quick;
-#ifdef __s390x__
- unsigned long *pmd_quick;
-#endif /* __s390x__ */
- unsigned long *pte_quick;
- unsigned long pgtable_cache_sz;
-};
-
extern void s390_adjust_jiffies(void);
-extern void print_cpu_info(struct cpuinfo_S390 *);
+extern void print_cpu_info(void);
extern int get_cpu_capability(unsigned int *);
/*
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 8920025c3c0..f1b051630c5 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -172,6 +172,8 @@
#define NUM_CRS 16
#define NUM_ACRS 16
+#define NUM_CR_WORDS 3
+
#define FPR_SIZE 8
#define FPC_SIZE 4
#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */
@@ -334,7 +336,7 @@ struct pt_regs
*/
typedef struct
{
- unsigned long cr[3];
+ unsigned long cr[NUM_CR_WORDS];
} per_cr_words;
#define PER_EM_MASK 0xE8000000UL
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 27fc1746de1..402d6dcf0d2 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -314,6 +314,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
int, int, unsigned long);
/* qdio errors reported to the upper-layer program */
+#define QDIO_ERROR_SIGA_TARGET 0x02
#define QDIO_ERROR_SIGA_ACCESS_EXCEPTION 0x10
#define QDIO_ERROR_SIGA_BUSY 0x20
#define QDIO_ERROR_ACTIVATE_CHECK_CONDITION 0x40
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 024b91e0623..2009158a450 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -50,12 +50,7 @@ extern void machine_power_off_smp(void);
#define PROC_CHANGE_PENALTY 20 /* Schedule penalty */
-#define raw_smp_processor_id() (S390_lowcore.cpu_data.cpu_nr)
-
-static inline __u16 hard_smp_processor_id(void)
-{
- return stap();
-}
+#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
/*
* returns 1 if cpu is in stopped/check stopped state or not operational
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index c786ab623b2..02330c50241 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -62,4 +62,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index d074673a6d9..cd0241db5a4 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -100,6 +100,7 @@ static inline char *strcat(char *dst, const char *src)
static inline char *strcpy(char *dst, const char *src)
{
+#if __GNUC__ < 4
register int r0 asm("0") = 0;
char *ret = dst;
@@ -109,10 +110,14 @@ static inline char *strcpy(char *dst, const char *src)
: "+&a" (dst), "+&a" (src) : "d" (r0)
: "cc", "memory");
return ret;
+#else
+ return __builtin_strcpy(dst, src);
+#endif
}
static inline size_t strlen(const char *s)
{
+#if __GNUC__ < 4
register unsigned long r0 asm("0") = 0;
const char *tmp = s;
@@ -121,6 +126,9 @@ static inline size_t strlen(const char *s)
" jo 0b"
: "+d" (r0), "+a" (tmp) : : "cc");
return r0 - (unsigned long) s;
+#else
+ return __builtin_strlen(s);
+#endif
}
static inline size_t strnlen(const char * s, size_t n)
@@ -135,7 +143,13 @@ static inline size_t strnlen(const char * s, size_t n)
: "+a" (end), "+a" (tmp) : "d" (r0) : "cc");
return end - s;
}
-
+#else /* IN_ARCH_STRING_C */
+void *memchr(const void * s, int c, size_t n);
+void *memscan(void *s, int c, size_t n);
+char *strcat(char *dst, const char *src);
+char *strcpy(char *dst, const char *src);
+size_t strlen(const char *s);
+size_t strnlen(const char * s, size_t n);
#endif /* !IN_ARCH_STRING_C */
#endif /* __KERNEL__ */
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index ad93212d9e1..9d70057d828 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -100,6 +100,7 @@ struct sysinfo_3_2_2 {
char reserved_1[24];
} vm[8];
+ char reserved_544[3552];
};
static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index d60394b9745..304cffa623e 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -51,7 +51,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
* If the process only ran on the local cpu, do a local flush.
*/
local_cpumask = cpumask_of_cpu(smp_processor_id());
- if (cpus_equal(mm->cpu_vm_mask, local_cpumask))
+ if (cpumask_equal(mm_cpumask(mm), &local_cpumask))
__tlb_flush_local();
else
__tlb_flush_global();
@@ -73,7 +73,7 @@ static inline void __tlb_flush_idte(unsigned long asce)
static inline void __tlb_flush_mm(struct mm_struct * mm)
{
- if (unlikely(cpus_empty(mm->cpu_vm_mask)))
+ if (unlikely(cpumask_empty(mm_cpumask(mm))))
return;
/*
* If the machine has IDTE we prefer to do a per mm flush
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index c979c3b56ab..5e0ad618dc4 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -5,7 +5,6 @@
#define mc_capable() (1)
-cpumask_t cpu_coregroup_map(unsigned int cpu);
const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
extern cpumask_t cpu_core_map[NR_CPUS];
diff --git a/arch/s390/include/asm/vtoc.h b/arch/s390/include/asm/vtoc.h
index 3a5267d90d2..8406a2b3157 100644
--- a/arch/s390/include/asm/vtoc.h
+++ b/arch/s390/include/asm/vtoc.h
@@ -39,7 +39,7 @@ struct vtoc_labeldate
__u16 day;
} __attribute__ ((packed));
-struct vtoc_volume_label
+struct vtoc_volume_label_cdl
{
char volkey[4]; /* volume key = volume label */
char vollbl[4]; /* volume label */
@@ -56,6 +56,14 @@ struct vtoc_volume_label
char res3[29]; /* reserved */
} __attribute__ ((packed));
+struct vtoc_volume_label_ldl {
+ char vollbl[4]; /* volume label */
+ char volid[6]; /* volume identifier */
+ char res3[69]; /* reserved */
+ char ldl_version; /* version number, valid for ldl format */
+ __u64 formatted_blocks; /* valid when ldl_version >= f2 */
+} __attribute__ ((packed));
+
struct vtoc_extent
{
__u8 typeind; /* extent type indicator */
@@ -140,7 +148,11 @@ struct vtoc_format4_label
char res2[10]; /* reserved */
__u8 DS4EFLVL; /* extended free-space management level */
struct vtoc_cchhb DS4EFPTR; /* pointer to extended free-space info */
- char res3[9]; /* reserved */
+ char res3; /* reserved */
+ __u32 DS4DCYL; /* number of logical cyls */
+ char res4[2]; /* reserved */
+ __u8 DS4DEVF2; /* device flags */
+ char res5; /* reserved */
} __attribute__ ((packed));
struct vtoc_ds5ext
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 3edc6c6f258..228e3105ded 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -17,10 +17,12 @@ CFLAGS_smp.o := -Wno-nonnull
#
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
+
obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \
processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \
- vdso.o vtime.o
+ vdso.o vtime.o sysinfo.o nmi.o
obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
diff --git a/arch/s390/kernel/bitmap.S b/arch/s390/kernel/bitmap.S
deleted file mode 100644
index dfb41f946e2..00000000000
--- a/arch/s390/kernel/bitmap.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * arch/s390/kernel/bitmap.S
- * Bitmaps for set_bit, clear_bit, test_and_set_bit, ...
- * See include/asm-s390/{bitops.h|posix_types.h} for details
- *
- * S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- */
-
- .globl _oi_bitmap
-_oi_bitmap:
- .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
-
- .globl _ni_bitmap
-_ni_bitmap:
- .byte 0xFE,0xFD,0xFB,0xF7,0xEF,0xDF,0xBF,0x7F
-
- .globl _zb_findmap
-_zb_findmap:
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8
-
- .globl _sb_findmap
-_sb_findmap:
- .byte 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
-
diff --git a/arch/s390/kernel/bitmap.c b/arch/s390/kernel/bitmap.c
new file mode 100644
index 00000000000..3ae4757b006
--- /dev/null
+++ b/arch/s390/kernel/bitmap.c
@@ -0,0 +1,54 @@
+/*
+ * Bitmaps for set_bit, clear_bit, test_and_set_bit, ...
+ * See include/asm/{bitops.h|posix_types.h} for details
+ *
+ * Copyright IBM Corp. 1999,2009
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#include <linux/bitops.h>
+#include <linux/module.h>
+
+const char _oi_bitmap[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
+EXPORT_SYMBOL(_oi_bitmap);
+
+const char _ni_bitmap[] = { 0xfe, 0xfd, 0xfb, 0xf7, 0xef, 0xdf, 0xbf, 0x7f };
+EXPORT_SYMBOL(_ni_bitmap);
+
+const char _zb_findmap[] = {
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
+ 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 };
+EXPORT_SYMBOL(_zb_findmap);
+
+const char _sb_findmap[] = {
+ 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+ 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 };
+EXPORT_SYMBOL(_sb_findmap);
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
index a2be3a978d5..123dd660d7f 100644
--- a/arch/s390/kernel/compat_ptrace.h
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -1,10 +1,11 @@
#ifndef _PTRACE32_H
#define _PTRACE32_H
+#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */
#include "compat_linux.h" /* needed for psw_compat_t */
typedef struct {
- __u32 cr[3];
+ __u32 cr[NUM_CR_WORDS];
} per_cr_words32;
typedef struct {
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 62c706eb0de..87cf5a79a35 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -252,7 +252,7 @@ sys32_chroot_wrapper:
sys32_ustat_wrapper:
llgfr %r2,%r2 # dev_t
llgtr %r3,%r3 # struct ustat *
- jg sys_ustat
+ jg compat_sys_ustat
.globl sys32_dup2_wrapper
sys32_dup2_wrapper:
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index ba03fc0a3a5..be8bceaf37d 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -603,7 +603,7 @@ debug_input(struct file *file, const char __user *user_buf, size_t length,
static int
debug_open(struct inode *inode, struct file *file)
{
- int i = 0, rc = 0;
+ int i, rc = 0;
file_private_info_t *p_info;
debug_info_t *debug_info, *debug_info_snapshot;
@@ -642,8 +642,7 @@ found:
p_info = kmalloc(sizeof(file_private_info_t),
GFP_KERNEL);
if(!p_info){
- if(debug_info_snapshot)
- debug_info_free(debug_info_snapshot);
+ debug_info_free(debug_info_snapshot);
rc = -ENOMEM;
goto out;
}
@@ -698,8 +697,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area,
if ((uid != 0) || (gid != 0))
pr_warning("Root becomes the owner of all s390dbf files "
"in sysfs\n");
- if (!initialized)
- BUG();
+ BUG_ON(!initialized);
mutex_lock(&debug_mutex);
/* create new debug_info */
@@ -1156,7 +1154,6 @@ debug_unregister_view(debug_info_t * id, struct debug_view *view)
else {
debugfs_remove(id->debugfs_entries[i]);
id->views[i] = NULL;
- rc = 0;
}
spin_unlock_irqrestore(&id->lock, flags);
out:
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 2a2ca268b1d..4d221c81c84 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -6,6 +6,7 @@
* Heiko Carstens <heiko.carstens@de.ibm.com>
*/
+#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/string.h>
@@ -20,6 +21,7 @@
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/sysinfo.h>
#include <asm/cpcmd.h>
#include <asm/sclp.h>
#include "entry.h"
@@ -173,19 +175,21 @@ static noinline __init void init_kernel_storage_key(void)
page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY);
}
+static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE);
+
static noinline __init void detect_machine_type(void)
{
- struct cpuinfo_S390 *cpuinfo = &S390_lowcore.cpu_data;
-
- get_cpu_id(&S390_lowcore.cpu_data.cpu_id);
-
- /* Running under z/VM ? */
- if (cpuinfo->cpu_id.version == 0xff)
- machine_flags |= MACHINE_FLAG_VM;
+ /* No VM information? Looks like LPAR */
+ if (stsi(&vmms, 3, 2, 2) == -ENOSYS)
+ return;
+ if (!vmms.count)
+ return;
- /* Running under KVM ? */
- if (cpuinfo->cpu_id.version == 0xfe)
+ /* Running under KVM? If not we assume z/VM */
+ if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3))
machine_flags |= MACHINE_FLAG_KVM;
+ else
+ machine_flags |= MACHINE_FLAG_VM;
}
static __init void early_pgm_check_handler(void)
@@ -348,7 +352,6 @@ static void __init setup_boot_command_line(void)
/* copy arch command line */
strlcpy(boot_command_line, COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
- boot_command_line[ARCH_COMMAND_LINE_SIZE - 1] = 0;
/* append IPL PARM data to the boot command line */
if (MACHINE_IS_VM) {
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index ec7e35f6055..1046c2c9f8d 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -469,6 +469,8 @@ start:
.org 0x10000
startup:basr %r13,0 # get base
.LPG0:
+ xc 0x200(256),0x200 # partially clear lowcore
+ xc 0x300(256),0x300
#ifndef CONFIG_MARCH_G5
# check processor version against MARCH_{G5,Z900,Z990,Z9_109,Z10}
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index db476d114ca..2ced846065b 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -20,7 +20,6 @@ startup_continue:
lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
# move IPL device to lowcore
- mvc __LC_IPLDEV(4),IPL_DEVICE-PARMAREA(%r12)
#
# Setup stack
#
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index f9f70aa1524..65667b2e65c 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -86,7 +86,6 @@ startup_continue:
lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
# move IPL device to lowcore
- mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
lghi %r0,__LC_PASTE
stg %r0,__LC_VDSO_PER_CPU
#
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 2dcf590faba..6f3711a0eaa 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -23,7 +23,7 @@
#include <asm/ebcdic.h>
#include <asm/reset.h>
#include <asm/sclp.h>
-#include <asm/setup.h>
+#include <asm/checksum.h>
#define IPL_PARM_BLOCK_VERSION 0
@@ -56,13 +56,14 @@ struct shutdown_trigger {
};
/*
- * Five shutdown action types are supported:
+ * The following shutdown action types are supported:
*/
#define SHUTDOWN_ACTION_IPL_STR "ipl"
#define SHUTDOWN_ACTION_REIPL_STR "reipl"
#define SHUTDOWN_ACTION_DUMP_STR "dump"
#define SHUTDOWN_ACTION_VMCMD_STR "vmcmd"
#define SHUTDOWN_ACTION_STOP_STR "stop"
+#define SHUTDOWN_ACTION_DUMP_REIPL_STR "dump_reipl"
struct shutdown_action {
char *name;
@@ -146,6 +147,7 @@ static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT;
static struct ipl_parameter_block *reipl_block_fcp;
static struct ipl_parameter_block *reipl_block_ccw;
static struct ipl_parameter_block *reipl_block_nss;
+static struct ipl_parameter_block *reipl_block_actual;
static int dump_capabilities = DUMP_TYPE_NONE;
static enum dump_type dump_type = DUMP_TYPE_NONE;
@@ -835,6 +837,7 @@ static int reipl_set_type(enum ipl_type type)
reipl_method = REIPL_METHOD_CCW_VM;
else
reipl_method = REIPL_METHOD_CCW_CIO;
+ reipl_block_actual = reipl_block_ccw;
break;
case IPL_TYPE_FCP:
if (diag308_set_works)
@@ -843,6 +846,7 @@ static int reipl_set_type(enum ipl_type type)
reipl_method = REIPL_METHOD_FCP_RO_VM;
else
reipl_method = REIPL_METHOD_FCP_RO_DIAG;
+ reipl_block_actual = reipl_block_fcp;
break;
case IPL_TYPE_FCP_DUMP:
reipl_method = REIPL_METHOD_FCP_DUMP;
@@ -852,6 +856,7 @@ static int reipl_set_type(enum ipl_type type)
reipl_method = REIPL_METHOD_NSS_DIAG;
else
reipl_method = REIPL_METHOD_NSS;
+ reipl_block_actual = reipl_block_nss;
break;
case IPL_TYPE_UNKNOWN:
reipl_method = REIPL_METHOD_DEFAULT;
@@ -960,7 +965,6 @@ static void reipl_run(struct shutdown_trigger *trigger)
diag308(DIAG308_IPL, NULL);
break;
case REIPL_METHOD_FCP_DUMP:
- default:
break;
}
disabled_wait((unsigned long) __builtin_return_address(0));
@@ -1069,10 +1073,12 @@ static int __init reipl_fcp_init(void)
{
int rc;
- if ((!diag308_set_works) && (ipl_info.type != IPL_TYPE_FCP))
- return 0;
- if ((!diag308_set_works) && (ipl_info.type == IPL_TYPE_FCP))
- make_attrs_ro(reipl_fcp_attrs);
+ if (!diag308_set_works) {
+ if (ipl_info.type == IPL_TYPE_FCP)
+ make_attrs_ro(reipl_fcp_attrs);
+ else
+ return 0;
+ }
reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
if (!reipl_block_fcp)
@@ -1253,7 +1259,6 @@ static void dump_run(struct shutdown_trigger *trigger)
diag308(DIAG308_DUMP, NULL);
break;
case DUMP_METHOD_NONE:
- default:
return;
}
printk(KERN_EMERG "Dump failed!\n");
@@ -1332,6 +1337,49 @@ static struct shutdown_action __refdata dump_action = {
.init = dump_init,
};
+static void dump_reipl_run(struct shutdown_trigger *trigger)
+{
+ preempt_disable();
+ /*
+ * Bypass dynamic address translation (DAT) when storing IPL parameter
+ * information block address and checksum into the prefix area
+ * (corresponding to absolute addresses 0-8191).
+ * When enhanced DAT applies and the STE format control in one,
+ * the absolute address is formed without prefixing. In this case a
+ * normal store (stg/st) into the prefix area would no more match to
+ * absolute addresses 0-8191.
+ */
+#ifdef CONFIG_64BIT
+ asm volatile("sturg %0,%1"
+ :: "a" ((unsigned long) reipl_block_actual),
+ "a" (&lowcore_ptr[smp_processor_id()]->ipib));
+#else
+ asm volatile("stura %0,%1"
+ :: "a" ((unsigned long) reipl_block_actual),
+ "a" (&lowcore_ptr[smp_processor_id()]->ipib));
+#endif
+ asm volatile("stura %0,%1"
+ :: "a" (csum_partial(reipl_block_actual,
+ reipl_block_actual->hdr.len, 0)),
+ "a" (&lowcore_ptr[smp_processor_id()]->ipib_checksum));
+ preempt_enable();
+ dump_run(trigger);
+}
+
+static int __init dump_reipl_init(void)
+{
+ if (!diag308_set_works)
+ return -EOPNOTSUPP;
+ else
+ return 0;
+}
+
+static struct shutdown_action __refdata dump_reipl_action = {
+ .name = SHUTDOWN_ACTION_DUMP_REIPL_STR,
+ .fn = dump_reipl_run,
+ .init = dump_reipl_init,
+};
+
/*
* vmcmd shutdown action: Trigger vm command on shutdown.
*/
@@ -1421,7 +1469,8 @@ static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
/* action list */
static struct shutdown_action *shutdown_actions_list[] = {
- &ipl_action, &reipl_action, &dump_action, &vmcmd_action, &stop_action};
+ &ipl_action, &reipl_action, &dump_reipl_action, &dump_action,
+ &vmcmd_action, &stop_action};
#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *))
/*
@@ -1434,11 +1483,11 @@ static int set_trigger(const char *buf, struct shutdown_trigger *trigger,
size_t len)
{
int i;
+
for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
if (!shutdown_actions_list[i])
continue;
- if (strncmp(buf, shutdown_actions_list[i]->name,
- strlen(shutdown_actions_list[i]->name)) == 0) {
+ if (sysfs_streq(buf, shutdown_actions_list[i]->name)) {
trigger->action = shutdown_actions_list[i];
return len;
}
@@ -1672,7 +1721,7 @@ static int on_panic_notify(struct notifier_block *self,
static struct notifier_block on_panic_nb = {
.notifier_call = on_panic_notify,
- .priority = 0,
+ .priority = INT_MIN,
};
void __init setup_ipl(void)
@@ -1696,7 +1745,6 @@ void __init setup_ipl(void)
sizeof(ipl_info.data.nss.name));
break;
case IPL_TYPE_UNKNOWN:
- default:
/* We have no info to copy */
break;
}
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 59b4e796680..eed4a00cb67 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -310,15 +310,20 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
info->plt_initialized = 1;
}
if (r_type == R_390_PLTOFF16 ||
- r_type == R_390_PLTOFF32
- || r_type == R_390_PLTOFF64
- )
+ r_type == R_390_PLTOFF32 ||
+ r_type == R_390_PLTOFF64)
val = me->arch.plt_offset - me->arch.got_offset +
info->plt_offset + rela->r_addend;
- else
- val = (Elf_Addr) me->module_core +
- me->arch.plt_offset + info->plt_offset +
- rela->r_addend - loc;
+ else {
+ if (!((r_type == R_390_PLT16DBL &&
+ val - loc + 0xffffUL < 0x1ffffeUL) ||
+ (r_type == R_390_PLT32DBL &&
+ val - loc + 0xffffffffULL < 0x1fffffffeULL)))
+ val = (Elf_Addr) me->module_core +
+ me->arch.plt_offset +
+ info->plt_offset;
+ val += rela->r_addend - loc;
+ }
if (r_type == R_390_PLT16DBL)
*(unsigned short *) loc = val >> 1;
else if (r_type == R_390_PLTOFF16)
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
new file mode 100644
index 00000000000..4bfdc421d7e
--- /dev/null
+++ b/arch/s390/kernel/nmi.c
@@ -0,0 +1,376 @@
+/*
+ * Machine check handler
+ *
+ * Copyright IBM Corp. 2000,2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <asm/lowcore.h>
+#include <asm/smp.h>
+#include <asm/etr.h>
+#include <asm/cpu.h>
+#include <asm/nmi.h>
+#include <asm/crw.h>
+
+struct mcck_struct {
+ int kill_task;
+ int channel_report;
+ int warning;
+ unsigned long long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
+static NORET_TYPE void s390_handle_damage(char *msg)
+{
+ smp_send_stop();
+ disabled_wait((unsigned long) __builtin_return_address(0));
+ while (1);
+}
+
+/*
+ * Main machine check handler function. Will be called with interrupts enabled
+ * or disabled and machine checks enabled or disabled.
+ */
+void s390_handle_mcck(void)
+{
+ unsigned long flags;
+ struct mcck_struct mcck;
+
+ /*
+ * Disable machine checks and get the current state of accumulated
+ * machine checks. Afterwards delete the old state and enable machine
+ * checks again.
+ */
+ local_irq_save(flags);
+ local_mcck_disable();
+ mcck = __get_cpu_var(cpu_mcck);
+ memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
+ clear_thread_flag(TIF_MCCK_PENDING);
+ local_mcck_enable();
+ local_irq_restore(flags);
+
+ if (mcck.channel_report)
+ crw_handle_channel_report();
+ /*
+ * A warning may remain for a prolonged period on the bare iron.
+ * (actually until the machine is powered off, or the problem is gone)
+ * So we just stop listening for the WARNING MCH and avoid continuously
+ * being interrupted. One caveat is however, that we must do this per
+ * processor and cannot use the smp version of ctl_clear_bit().
+ * On VM we only get one interrupt per virtally presented machinecheck.
+ * Though one suffices, we may get one interrupt per (virtual) cpu.
+ */
+ if (mcck.warning) { /* WARNING pending ? */
+ static int mchchk_wng_posted = 0;
+
+ /* Use single cpu clear, as we cannot handle smp here. */
+ __ctl_clear_bit(14, 24); /* Disable WARNING MCH */
+ if (xchg(&mchchk_wng_posted, 1) == 0)
+ kill_cad_pid(SIGPWR, 1);
+ }
+ if (mcck.kill_task) {
+ local_irq_enable();
+ printk(KERN_EMERG "mcck: Terminating task because of machine "
+ "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+ printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+ current->comm, current->pid);
+ do_exit(SIGSEGV);
+ }
+}
+EXPORT_SYMBOL_GPL(s390_handle_mcck);
+
+/*
+ * returns 0 if all registers could be validated
+ * returns 1 otherwise
+ */
+static int notrace s390_revalidate_registers(struct mci *mci)
+{
+ int kill_task;
+ u64 tmpclock;
+ u64 zero;
+ void *fpt_save_area, *fpt_creg_save_area;
+
+ kill_task = 0;
+ zero = 0;
+
+ if (!mci->gr) {
+ /*
+ * General purpose registers couldn't be restored and have
+ * unknown contents. Process needs to be terminated.
+ */
+ kill_task = 1;
+ }
+ if (!mci->fp) {
+ /*
+ * Floating point registers can't be restored and
+ * therefore the process needs to be terminated.
+ */
+ kill_task = 1;
+ }
+#ifndef CONFIG_64BIT
+ asm volatile(
+ " ld 0,0(%0)\n"
+ " ld 2,8(%0)\n"
+ " ld 4,16(%0)\n"
+ " ld 6,24(%0)"
+ : : "a" (&S390_lowcore.floating_pt_save_area));
+#endif
+
+ if (MACHINE_HAS_IEEE) {
+#ifdef CONFIG_64BIT
+ fpt_save_area = &S390_lowcore.floating_pt_save_area;
+ fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
+#else
+ fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
+ fpt_creg_save_area = fpt_save_area + 128;
+#endif
+ if (!mci->fc) {
+ /*
+ * Floating point control register can't be restored.
+ * Task will be terminated.
+ */
+ asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
+ kill_task = 1;
+
+ } else
+ asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
+
+ asm volatile(
+ " ld 0,0(%0)\n"
+ " ld 1,8(%0)\n"
+ " ld 2,16(%0)\n"
+ " ld 3,24(%0)\n"
+ " ld 4,32(%0)\n"
+ " ld 5,40(%0)\n"
+ " ld 6,48(%0)\n"
+ " ld 7,56(%0)\n"
+ " ld 8,64(%0)\n"
+ " ld 9,72(%0)\n"
+ " ld 10,80(%0)\n"
+ " ld 11,88(%0)\n"
+ " ld 12,96(%0)\n"
+ " ld 13,104(%0)\n"
+ " ld 14,112(%0)\n"
+ " ld 15,120(%0)\n"
+ : : "a" (fpt_save_area));
+ }
+ /* Revalidate access registers */
+ asm volatile(
+ " lam 0,15,0(%0)"
+ : : "a" (&S390_lowcore.access_regs_save_area));
+ if (!mci->ar) {
+ /*
+ * Access registers have unknown contents.
+ * Terminating task.
+ */
+ kill_task = 1;
+ }
+ /* Revalidate control registers */
+ if (!mci->cr) {
+ /*
+ * Control registers have unknown contents.
+ * Can't recover and therefore stopping machine.
+ */
+ s390_handle_damage("invalid control registers.");
+ } else {
+#ifdef CONFIG_64BIT
+ asm volatile(
+ " lctlg 0,15,0(%0)"
+ : : "a" (&S390_lowcore.cregs_save_area));
+#else
+ asm volatile(
+ " lctl 0,15,0(%0)"
+ : : "a" (&S390_lowcore.cregs_save_area));
+#endif
+ }
+ /*
+ * We don't even try to revalidate the TOD register, since we simply
+ * can't write something sensible into that register.
+ */
+#ifdef CONFIG_64BIT
+ /*
+ * See if we can revalidate the TOD programmable register with its
+ * old contents (should be zero) otherwise set it to zero.
+ */
+ if (!mci->pr)
+ asm volatile(
+ " sr 0,0\n"
+ " sckpf"
+ : : : "0", "cc");
+ else
+ asm volatile(
+ " l 0,0(%0)\n"
+ " sckpf"
+ : : "a" (&S390_lowcore.tod_progreg_save_area)
+ : "0", "cc");
+#endif
+ /* Revalidate clock comparator register */
+ asm volatile(
+ " stck 0(%1)\n"
+ " sckc 0(%1)"
+ : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
+
+ /* Check if old PSW is valid */
+ if (!mci->wp)
+ /*
+ * Can't tell if we come from user or kernel mode
+ * -> stopping machine.
+ */
+ s390_handle_damage("old psw invalid.");
+
+ if (!mci->ms || !mci->pm || !mci->ia)
+ kill_task = 1;
+
+ return kill_task;
+}
+
+#define MAX_IPD_COUNT 29
+#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
+
+#define ED_STP_ISLAND 6 /* External damage STP island check */
+#define ED_STP_SYNC 7 /* External damage STP sync check */
+#define ED_ETR_SYNC 12 /* External damage ETR sync check */
+#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */
+
+/*
+ * machine check handler.
+ */
+void notrace s390_do_machine_check(struct pt_regs *regs)
+{
+ static int ipd_count;
+ static DEFINE_SPINLOCK(ipd_lock);
+ static unsigned long long last_ipd;
+ struct mcck_struct *mcck;
+ unsigned long long tmp;
+ struct mci *mci;
+ int umode;
+
+ lockdep_off();
+ s390_idle_check();
+
+ mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+ mcck = &__get_cpu_var(cpu_mcck);
+ umode = user_mode(regs);
+
+ if (mci->sd) {
+ /* System damage -> stopping machine */
+ s390_handle_damage("received system damage machine check.");
+ }
+ if (mci->pd) {
+ if (mci->b) {
+ /* Processing backup -> verify if we can survive this */
+ u64 z_mcic, o_mcic, t_mcic;
+#ifdef CONFIG_64BIT
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+ 1ULL<<16);
+#else
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
+ 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
+#endif
+ t_mcic = *(u64 *)mci;
+
+ if (((t_mcic & z_mcic) != 0) ||
+ ((t_mcic & o_mcic) != o_mcic)) {
+ s390_handle_damage("processing backup machine "
+ "check with damage.");
+ }
+
+ /*
+ * Nullifying exigent condition, therefore we might
+ * retry this instruction.
+ */
+ spin_lock(&ipd_lock);
+ tmp = get_clock();
+ if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
+ ipd_count++;
+ else
+ ipd_count = 1;
+ last_ipd = tmp;
+ if (ipd_count == MAX_IPD_COUNT)
+ s390_handle_damage("too many ipd retries.");
+ spin_unlock(&ipd_lock);
+ } else {
+ /* Processing damage -> stopping machine */
+ s390_handle_damage("received instruction processing "
+ "damage machine check.");
+ }
+ }
+ if (s390_revalidate_registers(mci)) {
+ if (umode) {
+ /*
+ * Couldn't restore all register contents while in
+ * user mode -> mark task for termination.
+ */
+ mcck->kill_task = 1;
+ mcck->mcck_code = *(unsigned long long *) mci;
+ set_thread_flag(TIF_MCCK_PENDING);
+ } else {
+ /*
+ * Couldn't restore all register contents while in
+ * kernel mode -> stopping machine.
+ */
+ s390_handle_damage("unable to revalidate registers.");
+ }
+ }
+ if (mci->cd) {
+ /* Timing facility damage */
+ s390_handle_damage("TOD clock damaged");
+ }
+ if (mci->ed && mci->ec) {
+ /* External damage */
+ if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
+ etr_sync_check();
+ if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
+ etr_switch_to_local();
+ if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+ stp_sync_check();
+ if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+ stp_island_check();
+ }
+ if (mci->se)
+ /* Storage error uncorrected */
+ s390_handle_damage("received storage error uncorrected "
+ "machine check.");
+ if (mci->ke)
+ /* Storage key-error uncorrected */
+ s390_handle_damage("received storage key-error uncorrected "
+ "machine check.");
+ if (mci->ds && mci->fa)
+ /* Storage degradation */
+ s390_handle_damage("received storage degradation machine "
+ "check.");
+ if (mci->cp) {
+ /* Channel report word pending */
+ mcck->channel_report = 1;
+ set_thread_flag(TIF_MCCK_PENDING);
+ }
+ if (mci->w) {
+ /* Warning pending */
+ mcck->warning = 1;
+ set_thread_flag(TIF_MCCK_PENDING);
+ }
+ lockdep_on();
+}
+
+static int __init machine_check_init(void)
+{
+ ctl_set_bit(14, 25); /* enable external damage MCH */
+ ctl_set_bit(14, 27); /* enable system recovery MCH */
+ ctl_set_bit(14, 24); /* enable warning MCH */
+ return 0;
+}
+arch_initcall(machine_check_init);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 5cd38a90e64..b48e961a38f 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -1,18 +1,10 @@
/*
- * arch/s390/kernel/process.c
+ * This file handles the architecture dependent parts of process handling.
*
- * S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Hartmut Penner (hp@de.ibm.com),
- * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- *
- * Derived from "arch/i386/kernel/process.c"
- * Copyright (C) 1995, Linus Torvalds
- */
-
-/*
- * This file handles the architecture-dependent parts of process handling..
+ * Copyright IBM Corp. 1999,2009
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Hartmut Penner <hp@de.ibm.com>,
+ * Denis Joseph Barrow,
*/
#include <linux/compiler.h>
@@ -47,6 +39,7 @@
#include <asm/processor.h>
#include <asm/irq.h>
#include <asm/timer.h>
+#include <asm/nmi.h>
#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -76,7 +69,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return sf->gprs[8];
}
-extern void s390_handle_mcck(void);
/*
* The idle loop on a S390...
*/
@@ -149,6 +141,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
0, &regs, 0, NULL, NULL);
}
+EXPORT_SYMBOL(kernel_thread);
/*
* Free current thread data structures etc..
@@ -168,34 +161,35 @@ void release_thread(struct task_struct *dead_task)
}
int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
- unsigned long unused,
- struct task_struct * p, struct pt_regs * regs)
+ unsigned long unused,
+ struct task_struct *p, struct pt_regs *regs)
{
- struct fake_frame
- {
- struct stack_frame sf;
- struct pt_regs childregs;
- } *frame;
-
- frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
- p->thread.ksp = (unsigned long) frame;
+ struct thread_info *ti;
+ struct fake_frame
+ {
+ struct stack_frame sf;
+ struct pt_regs childregs;
+ } *frame;
+
+ frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
+ p->thread.ksp = (unsigned long) frame;
/* Store access registers to kernel stack of new process. */
- frame->childregs = *regs;
+ frame->childregs = *regs;
frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
- frame->childregs.gprs[15] = new_stackp;
- frame->sf.back_chain = 0;
+ frame->childregs.gprs[15] = new_stackp;
+ frame->sf.back_chain = 0;
- /* new return point is ret_from_fork */
- frame->sf.gprs[8] = (unsigned long) ret_from_fork;
+ /* new return point is ret_from_fork */
+ frame->sf.gprs[8] = (unsigned long) ret_from_fork;
- /* fake return stack for resume(), don't go back to schedule */
- frame->sf.gprs[9] = (unsigned long) frame;
+ /* fake return stack for resume(), don't go back to schedule */
+ frame->sf.gprs[9] = (unsigned long) frame;
/* Save access registers to new thread structure. */
save_access_regs(&p->thread.acrs[0]);
#ifndef CONFIG_64BIT
- /*
+ /*
* save fprs to current->thread.fp_regs to merge them with
* the emulated registers and then copy the result to the child.
*/
@@ -220,10 +214,13 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
#endif /* CONFIG_64BIT */
/* start new process with ar4 pointing to the correct address space */
p->thread.mm_segment = get_fs();
- /* Don't copy debug registers */
- memset(&p->thread.per_info,0,sizeof(p->thread.per_info));
-
- return 0;
+ /* Don't copy debug registers */
+ memset(&p->thread.per_info, 0, sizeof(p->thread.per_info));
+ /* Initialize per thread user and system timer values */
+ ti = task_thread_info(p);
+ ti->user_timer = 0;
+ ti->system_timer = 0;
+ return 0;
}
SYSCALL_DEFINE0(fork)
@@ -311,7 +308,7 @@ out:
int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
{
#ifndef CONFIG_64BIT
- /*
+ /*
* save fprs to current->thread.fp_regs to merge them with
* the emulated registers and then copy the result to the dump.
*/
@@ -322,6 +319,7 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
#endif /* CONFIG_64BIT */
return 1;
}
+EXPORT_SYMBOL(dump_fpu);
unsigned long get_wchan(struct task_struct *p)
{
@@ -346,4 +344,3 @@ unsigned long get_wchan(struct task_struct *p)
}
return 0;
}
-
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 82c1872cfe8..802c8ab247f 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -18,10 +18,11 @@
#include <asm/lowcore.h>
#include <asm/param.h>
-void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo)
+void __cpuinit print_cpu_info(void)
{
pr_info("Processor %d started, address %d, identification %06X\n",
- cpuinfo->cpu_nr, cpuinfo->cpu_addr, cpuinfo->cpu_id.ident);
+ S390_lowcore.cpu_nr, S390_lowcore.cpu_addr,
+ S390_lowcore.cpu_id.ident);
}
/*
@@ -30,48 +31,46 @@ void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo)
static int show_cpuinfo(struct seq_file *m, void *v)
{
- static const char *hwcap_str[8] = {
+ static const char *hwcap_str[9] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat"
+ "edat", "etf3eh"
};
- struct cpuinfo_S390 *cpuinfo;
- unsigned long n = (unsigned long) v - 1;
- int i;
+ struct _lowcore *lc;
+ unsigned long n = (unsigned long) v - 1;
+ int i;
- s390_adjust_jiffies();
- preempt_disable();
- if (!n) {
- seq_printf(m, "vendor_id : IBM/S390\n"
- "# processors : %i\n"
- "bogomips per cpu: %lu.%02lu\n",
- num_online_cpus(), loops_per_jiffy/(500000/HZ),
- (loops_per_jiffy/(5000/HZ))%100);
- seq_puts(m, "features\t: ");
- for (i = 0; i < 8; i++)
- if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
- seq_printf(m, "%s ", hwcap_str[i]);
- seq_puts(m, "\n");
- }
+ s390_adjust_jiffies();
+ preempt_disable();
+ if (!n) {
+ seq_printf(m, "vendor_id : IBM/S390\n"
+ "# processors : %i\n"
+ "bogomips per cpu: %lu.%02lu\n",
+ num_online_cpus(), loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ))%100);
+ seq_puts(m, "features\t: ");
+ for (i = 0; i < 9; i++)
+ if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+ seq_printf(m, "%s ", hwcap_str[i]);
+ seq_puts(m, "\n");
+ }
- if (cpu_online(n)) {
+ if (cpu_online(n)) {
#ifdef CONFIG_SMP
- if (smp_processor_id() == n)
- cpuinfo = &S390_lowcore.cpu_data;
- else
- cpuinfo = &lowcore_ptr[n]->cpu_data;
+ lc = (smp_processor_id() == n) ?
+ &S390_lowcore : lowcore_ptr[n];
#else
- cpuinfo = &S390_lowcore.cpu_data;
+ lc = &S390_lowcore;
#endif
- seq_printf(m, "processor %li: "
- "version = %02X, "
- "identification = %06X, "
- "machine = %04X\n",
- n, cpuinfo->cpu_id.version,
- cpuinfo->cpu_id.ident,
- cpuinfo->cpu_id.machine);
- }
- preempt_enable();
- return 0;
+ seq_printf(m, "processor %li: "
+ "version = %02X, "
+ "identification = %06X, "
+ "machine = %04X\n",
+ n, lc->cpu_id.version,
+ lc->cpu_id.ident,
+ lc->cpu_id.machine);
+ }
+ preempt_enable();
+ return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S
index c41930499a5..774147824c3 100644
--- a/arch/s390/kernel/reipl64.S
+++ b/arch/s390/kernel/reipl64.S
@@ -1,10 +1,7 @@
/*
- * arch/s390/kernel/reipl.S
- *
- * S390 version
- * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com)
- Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ * Copyright IBM Corp 2000,2009
+ * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ * Denis Joseph Barrow,
*/
#include <asm/lowcore.h>
@@ -30,7 +27,7 @@ do_reipl_asm: basr %r13,0
mvc __LC_PREFIX_SAVE_AREA-0x1000(4,%r1),0(%r10)
stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1)
stckc .Lclkcmp-.Lpg0(%r13)
- mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(8,%r1),.Lclkcmp-.Lpg0(%r13)
+ mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(7,%r1),.Lclkcmp-.Lpg0(%r13)
stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1)
stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1)
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 46b90cb0370..656fcbb9bd8 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -1,49 +1,5 @@
-/*
- * arch/s390/kernel/s390_ksyms.c
- *
- * S390 version
- */
-#include <linux/highuid.h>
#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/syscalls.h>
-#include <linux/interrupt.h>
-#include <asm/checksum.h>
-#include <asm/cpcmd.h>
-#include <asm/delay.h>
-#include <asm/pgalloc.h>
-#include <asm/setup.h>
#include <asm/ftrace.h>
-#ifdef CONFIG_IP_MULTICAST
-#include <net/arp.h>
-#endif
-
-/*
- * memory management
- */
-EXPORT_SYMBOL(_oi_bitmap);
-EXPORT_SYMBOL(_ni_bitmap);
-EXPORT_SYMBOL(_zb_findmap);
-EXPORT_SYMBOL(_sb_findmap);
-
-/*
- * binfmt_elf loader
- */
-extern int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs);
-EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(empty_zero_page);
-
-/*
- * misc.
- */
-EXPORT_SYMBOL(machine_flags);
-EXPORT_SYMBOL(__udelay);
-EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(csum_fold);
-EXPORT_SYMBOL(console_mode);
-EXPORT_SYMBOL(console_devno);
-EXPORT_SYMBOL(console_irq);
#ifdef CONFIG_FUNCTION_TRACER
EXPORT_SYMBOL(_mcount);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c5cfb6185ea..06201b93cbb 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -74,9 +74,17 @@ EXPORT_SYMBOL(uaccess);
* Machine setup..
*/
unsigned int console_mode = 0;
+EXPORT_SYMBOL(console_mode);
+
unsigned int console_devno = -1;
+EXPORT_SYMBOL(console_devno);
+
unsigned int console_irq = -1;
+EXPORT_SYMBOL(console_irq);
+
unsigned long machine_flags;
+EXPORT_SYMBOL(machine_flags);
+
unsigned long elf_hwcap = 0;
char elf_platform[ELF_PLATFORM_SIZE];
@@ -86,6 +94,10 @@ volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
int __initdata memory_end_set;
unsigned long __initdata memory_end;
+/* An array with a pointer to the lowcore of every CPU. */
+struct _lowcore *lowcore_ptr[NR_CPUS];
+EXPORT_SYMBOL(lowcore_ptr);
+
/*
* This is set up by the setup-routine at boot-time
* for S390 need to find out, what we have to setup
@@ -109,13 +121,10 @@ static struct resource data_resource = {
*/
void __cpuinit cpu_init(void)
{
- int addr = hard_smp_processor_id();
-
/*
* Store processor id in lowcore (used e.g. in timer_interrupt)
*/
- get_cpu_id(&S390_lowcore.cpu_data.cpu_id);
- S390_lowcore.cpu_data.cpu_addr = addr;
+ get_cpu_id(&S390_lowcore.cpu_id);
/*
* Force FPU initialization:
@@ -125,8 +134,7 @@ void __cpuinit cpu_init(void)
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
- if (current->mm)
- BUG();
+ BUG_ON(current->mm);
enter_lazy_tlb(&init_mm, current);
}
@@ -217,7 +225,7 @@ static void __init conmode_default(void)
}
}
-#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
+#ifdef CONFIG_ZFCPDUMP
static void __init setup_zfcpdump(unsigned int console_devno)
{
static char str[41];
@@ -289,11 +297,7 @@ static int __init early_parse_mem(char *p)
early_param("mem", early_parse_mem);
#ifdef CONFIG_S390_SWITCH_AMODE
-#ifdef CONFIG_PGSTE
-unsigned int switch_amode = 1;
-#else
unsigned int switch_amode = 0;
-#endif
EXPORT_SYMBOL_GPL(switch_amode);
static int set_amode_and_uaccess(unsigned long user_amode,
@@ -414,7 +418,6 @@ setup_lowcore(void)
PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
lc->io_new_psw.mask = psw_kernel_bits;
lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
- lc->ipl_device = S390_lowcore.ipl_device;
lc->clock_comparator = -1ULL;
lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
lc->async_stack = (unsigned long)
@@ -434,6 +437,7 @@ setup_lowcore(void)
lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
#endif
set_prefix((u32)(unsigned long) lc);
+ lowcore_ptr[0] = lc;
}
static void __init
@@ -510,7 +514,7 @@ static void __init setup_memory_end(void)
unsigned long max_mem;
int i;
-#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
+#ifdef CONFIG_ZFCPDUMP
if (ipl_info.type == IPL_TYPE_FCP_DUMP) {
memory_end = ZFCPDUMP_HSA_SIZE;
memory_end_set = 1;
@@ -677,7 +681,6 @@ setup_memory(void)
static void __init setup_hwcaps(void)
{
static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
- struct cpuinfo_S390 *cpuinfo = &S390_lowcore.cpu_data;
unsigned long long facility_list_extended;
unsigned int facility_list;
int i;
@@ -693,15 +696,22 @@ static void __init setup_hwcaps(void)
* Bit 17: the message-security assist is installed
* Bit 19: the long-displacement facility is installed
* Bit 21: the extended-immediate facility is installed
+ * Bit 22: extended-translation facility 3 is installed
+ * Bit 30: extended-translation facility 3 enhancement facility
* These get translated to:
* HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
* HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
- * HWCAP_S390_LDISP bit 4, and HWCAP_S390_EIMM bit 5.
+ * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
+ * HWCAP_S390_ETF3EH bit 8 (22 && 30).
*/
for (i = 0; i < 6; i++)
if (facility_list & (1UL << (31 - stfl_bits[i])))
elf_hwcap |= 1UL << i;
+ if ((facility_list & (1UL << (31 - 22)))
+ && (facility_list & (1UL << (31 - 30))))
+ elf_hwcap |= 1UL << 8;
+
/*
* Check for additional facilities with store-facility-list-extended.
* stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
@@ -710,20 +720,22 @@ static void __init setup_hwcaps(void)
* How many facility words are stored depends on the number of
* doublewords passed to the instruction. The additional facilites
* are:
- * Bit 43: decimal floating point facility is installed
+ * Bit 42: decimal floating point facility is installed
+ * Bit 44: perform floating point operation facility is installed
* translated to:
- * HWCAP_S390_DFP bit 6.
+ * HWCAP_S390_DFP bit 6 (42 && 44).
*/
if ((elf_hwcap & (1UL << 2)) &&
__stfle(&facility_list_extended, 1) > 0) {
- if (facility_list_extended & (1ULL << (64 - 43)))
+ if ((facility_list_extended & (1ULL << (63 - 42)))
+ && (facility_list_extended & (1ULL << (63 - 44))))
elf_hwcap |= 1UL << 6;
}
if (MACHINE_HAS_HPAGE)
elf_hwcap |= 1UL << 7;
- switch (cpuinfo->cpu_id.machine) {
+ switch (S390_lowcore.cpu_id.machine) {
case 0x9672:
#if !defined(CONFIG_64BIT)
default: /* Use "g5" as default for 31 bit kernels. */
@@ -816,7 +828,7 @@ setup_arch(char **cmdline_p)
setup_lowcore();
cpu_init();
- __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
+ __cpu_logical_map[0] = stap();
s390_init_cpu_topology();
/*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2d337cbb932..006ed5016eb 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -32,6 +32,7 @@
#include <linux/delay.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
+#include <linux/irqflags.h>
#include <linux/cpu.h>
#include <linux/timex.h>
#include <linux/bootmem.h>
@@ -50,12 +51,6 @@
#include <asm/vdso.h>
#include "entry.h"
-/*
- * An array with a pointer the lowcore of every CPU.
- */
-struct _lowcore *lowcore_ptr[NR_CPUS];
-EXPORT_SYMBOL(lowcore_ptr);
-
static struct task_struct *current_set[NR_CPUS];
static u8 smp_cpu_type;
@@ -81,9 +76,7 @@ void smp_send_stop(void)
/* Disable all interrupts/machine checks */
__load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK);
-
- /* write magic number to zero page (absolute 0) */
- lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC;
+ trace_hardirqs_off();
/* stop all processors */
for_each_online_cpu(cpu) {
@@ -233,7 +226,7 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
*/
#define CPU_INIT_NO 1
-#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
+#ifdef CONFIG_ZFCPDUMP
/*
* zfcpdump_prefix_array holds prefix registers for the following scenario:
@@ -274,7 +267,7 @@ EXPORT_SYMBOL_GPL(zfcpdump_save_areas);
static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { }
-#endif /* CONFIG_ZFCPDUMP || CONFIG_ZFCPDUMP_MODULE */
+#endif /* CONFIG_ZFCPDUMP */
static int cpu_stopped(int cpu)
{
@@ -304,8 +297,8 @@ static int smp_rescan_cpus_sigp(cpumask_t avail)
{
int cpu_id, logical_cpu;
- logical_cpu = first_cpu(avail);
- if (logical_cpu == NR_CPUS)
+ logical_cpu = cpumask_first(&avail);
+ if (logical_cpu >= nr_cpu_ids)
return 0;
for (cpu_id = 0; cpu_id <= 65535; cpu_id++) {
if (cpu_known(cpu_id))
@@ -316,8 +309,8 @@ static int smp_rescan_cpus_sigp(cpumask_t avail)
continue;
cpu_set(logical_cpu, cpu_present_map);
smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
- logical_cpu = next_cpu(logical_cpu, avail);
- if (logical_cpu == NR_CPUS)
+ logical_cpu = cpumask_next(logical_cpu, &avail);
+ if (logical_cpu >= nr_cpu_ids)
break;
}
return 0;
@@ -329,8 +322,8 @@ static int smp_rescan_cpus_sclp(cpumask_t avail)
int cpu_id, logical_cpu, cpu;
int rc;
- logical_cpu = first_cpu(avail);
- if (logical_cpu == NR_CPUS)
+ logical_cpu = cpumask_first(&avail);
+ if (logical_cpu >= nr_cpu_ids)
return 0;
info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info)
@@ -351,8 +344,8 @@ static int smp_rescan_cpus_sclp(cpumask_t avail)
smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
else
smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
- logical_cpu = next_cpu(logical_cpu, avail);
- if (logical_cpu == NR_CPUS)
+ logical_cpu = cpumask_next(logical_cpu, &avail);
+ if (logical_cpu >= nr_cpu_ids)
break;
}
out:
@@ -379,7 +372,7 @@ static void __init smp_detect_cpus(void)
c_cpus = 1;
s_cpus = 0;
- boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr;
+ boot_cpu_addr = __cpu_logical_map[0];
info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info)
panic("smp_detect_cpus failed to allocate memory\n");
@@ -453,7 +446,7 @@ int __cpuinit start_secondary(void *cpuvoid)
/* Switch on interrupts */
local_irq_enable();
/* Print info about this processor */
- print_cpu_info(&S390_lowcore.cpu_data);
+ print_cpu_info();
/* cpu_idle will call schedule for us */
cpu_idle();
return 0;
@@ -515,7 +508,6 @@ out:
return -ENOMEM;
}
-#ifdef CONFIG_HOTPLUG_CPU
static void smp_free_lowcore(int cpu)
{
struct _lowcore *lowcore;
@@ -534,7 +526,6 @@ static void smp_free_lowcore(int cpu)
free_pages((unsigned long) lowcore, lc_order);
lowcore_ptr[cpu] = NULL;
}
-#endif /* CONFIG_HOTPLUG_CPU */
/* Upping and downing of CPUs */
int __cpuinit __cpu_up(unsigned int cpu)
@@ -543,16 +534,23 @@ int __cpuinit __cpu_up(unsigned int cpu)
struct _lowcore *cpu_lowcore;
struct stack_frame *sf;
sigp_ccode ccode;
+ u32 lowcore;
if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED)
return -EIO;
if (smp_alloc_lowcore(cpu))
return -ENOMEM;
-
- ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]),
- cpu, sigp_set_prefix);
- if (ccode)
- return -EIO;
+ do {
+ ccode = signal_processor(cpu, sigp_initial_cpu_reset);
+ if (ccode == sigp_busy)
+ udelay(10);
+ if (ccode == sigp_not_operational)
+ goto err_out;
+ } while (ccode == sigp_busy);
+
+ lowcore = (u32)(unsigned long)lowcore_ptr[cpu];
+ while (signal_processor_p(lowcore, cpu, sigp_set_prefix) == sigp_busy)
+ udelay(10);
idle = current_set[cpu];
cpu_lowcore = lowcore_ptr[cpu];
@@ -571,9 +569,8 @@ int __cpuinit __cpu_up(unsigned int cpu)
: : "a" (&cpu_lowcore->access_regs_save_area) : "memory");
cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
cpu_lowcore->current_task = (unsigned long) idle;
- cpu_lowcore->cpu_data.cpu_nr = cpu;
+ cpu_lowcore->cpu_nr = cpu;
cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce;
- cpu_lowcore->ipl_device = S390_lowcore.ipl_device;
eieio();
while (signal_processor(cpu, sigp_restart) == sigp_busy)
@@ -582,6 +579,10 @@ int __cpuinit __cpu_up(unsigned int cpu)
while (!cpu_online(cpu))
cpu_relax();
return 0;
+
+err_out:
+ smp_free_lowcore(cpu);
+ return -EIO;
}
static int __init setup_possible_cpus(char *s)
@@ -589,9 +590,8 @@ static int __init setup_possible_cpus(char *s)
int pcpus, cpu;
pcpus = simple_strtoul(s, NULL, 0);
- cpu_possible_map = cpumask_of_cpu(0);
- for (cpu = 1; cpu < pcpus && cpu < NR_CPUS; cpu++)
- cpu_set(cpu, cpu_possible_map);
+ for (cpu = 0; cpu < pcpus && cpu < nr_cpu_ids; cpu++)
+ set_cpu_possible(cpu, true);
return 0;
}
early_param("possible_cpus", setup_possible_cpus);
@@ -663,7 +663,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
/* request the 0x1201 emergency signal external interrupt */
if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
panic("Couldn't request external interrupt 0x1201");
- print_cpu_info(&S390_lowcore.cpu_data);
+ print_cpu_info();
/* Reallocate current lowcore, but keep its contents. */
lc_order = sizeof(long) == 8 ? 1 : 0;
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
new file mode 100644
index 00000000000..b5e75e1061c
--- /dev/null
+++ b/arch/s390/kernel/sysinfo.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright IBM Corp. 2001, 2009
+ * Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+
+/* Sigh, math-emu. Don't ask. */
+#include <asm/sfp-util.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+static inline int stsi_0(void)
+{
+ int rc = stsi(NULL, 0, 0, 0);
+ return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28);
+}
+
+static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len)
+{
+ if (stsi(info, 1, 1, 1) == -ENOSYS)
+ return len;
+
+ EBCASC(info->manufacturer, sizeof(info->manufacturer));
+ EBCASC(info->type, sizeof(info->type));
+ EBCASC(info->model, sizeof(info->model));
+ EBCASC(info->sequence, sizeof(info->sequence));
+ EBCASC(info->plant, sizeof(info->plant));
+ EBCASC(info->model_capacity, sizeof(info->model_capacity));
+ EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
+ EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+ len += sprintf(page + len, "Manufacturer: %-16.16s\n",
+ info->manufacturer);
+ len += sprintf(page + len, "Type: %-4.4s\n",
+ info->type);
+ if (info->model[0] != '\0')
+ /*
+ * Sigh: the model field has been renamed with System z9
+ * to model_capacity and a new model field has been added
+ * after the plant field. To avoid confusing older programs
+ * the "Model:" prints "model_capacity model" or just
+ * "model_capacity" if the model string is empty .
+ */
+ len += sprintf(page + len,
+ "Model: %-16.16s %-16.16s\n",
+ info->model_capacity, info->model);
+ else
+ len += sprintf(page + len, "Model: %-16.16s\n",
+ info->model_capacity);
+ len += sprintf(page + len, "Sequence Code: %-16.16s\n",
+ info->sequence);
+ len += sprintf(page + len, "Plant: %-4.4s\n",
+ info->plant);
+ len += sprintf(page + len, "Model Capacity: %-16.16s %08u\n",
+ info->model_capacity, *(u32 *) info->model_cap_rating);
+ if (info->model_perm_cap[0] != '\0')
+ len += sprintf(page + len,
+ "Model Perm. Capacity: %-16.16s %08u\n",
+ info->model_perm_cap,
+ *(u32 *) info->model_perm_cap_rating);
+ if (info->model_temp_cap[0] != '\0')
+ len += sprintf(page + len,
+ "Model Temp. Capacity: %-16.16s %08u\n",
+ info->model_temp_cap,
+ *(u32 *) info->model_temp_cap_rating);
+ return len;
+}
+
+static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len)
+{
+ struct sysinfo_1_2_2_extension *ext;
+ int i;
+
+ if (stsi(info, 1, 2, 2) == -ENOSYS)
+ return len;
+ ext = (struct sysinfo_1_2_2_extension *)
+ ((unsigned long) info + info->acc_offset);
+
+ len += sprintf(page + len, "\n");
+ len += sprintf(page + len, "CPUs Total: %d\n",
+ info->cpus_total);
+ len += sprintf(page + len, "CPUs Configured: %d\n",
+ info->cpus_configured);
+ len += sprintf(page + len, "CPUs Standby: %d\n",
+ info->cpus_standby);
+ len += sprintf(page + len, "CPUs Reserved: %d\n",
+ info->cpus_reserved);
+
+ if (info->format == 1) {
+ /*
+ * Sigh 2. According to the specification the alternate
+ * capability field is a 32 bit floating point number
+ * if the higher order 8 bits are not zero. Printing
+ * a floating point number in the kernel is a no-no,
+ * always print the number as 32 bit unsigned integer.
+ * The user-space needs to know about the strange
+ * encoding of the alternate cpu capability.
+ */
+ len += sprintf(page + len, "Capability: %u %u\n",
+ info->capability, ext->alt_capability);
+ for (i = 2; i <= info->cpus_total; i++)
+ len += sprintf(page + len,
+ "Adjustment %02d-way: %u %u\n",
+ i, info->adjustment[i-2],
+ ext->alt_adjustment[i-2]);
+
+ } else {
+ len += sprintf(page + len, "Capability: %u\n",
+ info->capability);
+ for (i = 2; i <= info->cpus_total; i++)
+ len += sprintf(page + len,
+ "Adjustment %02d-way: %u\n",
+ i, info->adjustment[i-2]);
+ }
+
+ if (info->secondary_capability != 0)
+ len += sprintf(page + len, "Secondary Capability: %d\n",
+ info->secondary_capability);
+ return len;
+}
+
+static int stsi_2_2_2(struct sysinfo_2_2_2 *info, char *page, int len)
+{
+ if (stsi(info, 2, 2, 2) == -ENOSYS)
+ return len;
+
+ EBCASC(info->name, sizeof(info->name));
+
+ len += sprintf(page + len, "\n");
+ len += sprintf(page + len, "LPAR Number: %d\n",
+ info->lpar_number);
+
+ len += sprintf(page + len, "LPAR Characteristics: ");
+ if (info->characteristics & LPAR_CHAR_DEDICATED)
+ len += sprintf(page + len, "Dedicated ");
+ if (info->characteristics & LPAR_CHAR_SHARED)
+ len += sprintf(page + len, "Shared ");
+ if (info->characteristics & LPAR_CHAR_LIMITED)
+ len += sprintf(page + len, "Limited ");
+ len += sprintf(page + len, "\n");
+
+ len += sprintf(page + len, "LPAR Name: %-8.8s\n",
+ info->name);
+
+ len += sprintf(page + len, "LPAR Adjustment: %d\n",
+ info->caf);
+
+ len += sprintf(page + len, "LPAR CPUs Total: %d\n",
+ info->cpus_total);
+ len += sprintf(page + len, "LPAR CPUs Configured: %d\n",
+ info->cpus_configured);
+ len += sprintf(page + len, "LPAR CPUs Standby: %d\n",
+ info->cpus_standby);
+ len += sprintf(page + len, "LPAR CPUs Reserved: %d\n",
+ info->cpus_reserved);
+ len += sprintf(page + len, "LPAR CPUs Dedicated: %d\n",
+ info->cpus_dedicated);
+ len += sprintf(page + len, "LPAR CPUs Shared: %d\n",
+ info->cpus_shared);
+ return len;
+}
+
+static int stsi_3_2_2(struct sysinfo_3_2_2 *info, char *page, int len)
+{
+ int i;
+
+ if (stsi(info, 3, 2, 2) == -ENOSYS)
+ return len;
+ for (i = 0; i < info->count; i++) {
+ EBCASC(info->vm[i].name, sizeof(info->vm[i].name));
+ EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi));
+ len += sprintf(page + len, "\n");
+ len += sprintf(page + len, "VM%02d Name: %-8.8s\n",
+ i, info->vm[i].name);
+ len += sprintf(page + len, "VM%02d Control Program: %-16.16s\n",
+ i, info->vm[i].cpi);
+
+ len += sprintf(page + len, "VM%02d Adjustment: %d\n",
+ i, info->vm[i].caf);
+
+ len += sprintf(page + len, "VM%02d CPUs Total: %d\n",
+ i, info->vm[i].cpus_total);
+ len += sprintf(page + len, "VM%02d CPUs Configured: %d\n",
+ i, info->vm[i].cpus_configured);
+ len += sprintf(page + len, "VM%02d CPUs Standby: %d\n",
+ i, info->vm[i].cpus_standby);
+ len += sprintf(page + len, "VM%02d CPUs Reserved: %d\n",
+ i, info->vm[i].cpus_reserved);
+ }
+ return len;
+}
+
+static int proc_read_sysinfo(char *page, char **start,
+ off_t off, int count,
+ int *eof, void *data)
+{
+ unsigned long info = get_zeroed_page(GFP_KERNEL);
+ int level, len;
+
+ if (!info)
+ return 0;
+
+ len = 0;
+ level = stsi_0();
+ if (level >= 1)
+ len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len);
+
+ if (level >= 1)
+ len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len);
+
+ if (level >= 2)
+ len = stsi_2_2_2((struct sysinfo_2_2_2 *) info, page, len);
+
+ if (level >= 3)
+ len = stsi_3_2_2((struct sysinfo_3_2_2 *) info, page, len);
+
+ free_page(info);
+ return len;
+}
+
+static __init int create_proc_sysinfo(void)
+{
+ create_proc_read_entry("sysinfo", 0444, NULL,
+ proc_read_sysinfo, NULL);
+ return 0;
+}
+device_initcall(create_proc_sysinfo);
+
+/*
+ * Service levels interface.
+ */
+
+static DECLARE_RWSEM(service_level_sem);
+static LIST_HEAD(service_level_list);
+
+int register_service_level(struct service_level *slr)
+{
+ struct service_level *ptr;
+
+ down_write(&service_level_sem);
+ list_for_each_entry(ptr, &service_level_list, list)
+ if (ptr == slr) {
+ up_write(&service_level_sem);
+ return -EEXIST;
+ }
+ list_add_tail(&slr->list, &service_level_list);
+ up_write(&service_level_sem);
+ return 0;
+}
+EXPORT_SYMBOL(register_service_level);
+
+int unregister_service_level(struct service_level *slr)
+{
+ struct service_level *ptr, *next;
+ int rc = -ENOENT;
+
+ down_write(&service_level_sem);
+ list_for_each_entry_safe(ptr, next, &service_level_list, list) {
+ if (ptr != slr)
+ continue;
+ list_del(&ptr->list);
+ rc = 0;
+ break;
+ }
+ up_write(&service_level_sem);
+ return rc;
+}
+EXPORT_SYMBOL(unregister_service_level);
+
+static void *service_level_start(struct seq_file *m, loff_t *pos)
+{
+ down_read(&service_level_sem);
+ return seq_list_start(&service_level_list, *pos);
+}
+
+static void *service_level_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ return seq_list_next(p, &service_level_list, pos);
+}
+
+static void service_level_stop(struct seq_file *m, void *p)
+{
+ up_read(&service_level_sem);
+}
+
+static int service_level_show(struct seq_file *m, void *p)
+{
+ struct service_level *slr;
+
+ slr = list_entry(p, struct service_level, list);
+ slr->seq_print(m, slr);
+ return 0;
+}
+
+static const struct seq_operations service_level_seq_ops = {
+ .start = service_level_start,
+ .next = service_level_next,
+ .stop = service_level_stop,
+ .show = service_level_show
+};
+
+static int service_level_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &service_level_seq_ops);
+}
+
+static const struct file_operations service_level_ops = {
+ .open = service_level_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static void service_level_vm_print(struct seq_file *m,
+ struct service_level *slr)
+{
+ char *query_buffer, *str;
+
+ query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+ if (!query_buffer)
+ return;
+ cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
+ str = strchr(query_buffer, '\n');
+ if (str)
+ *str = 0;
+ seq_printf(m, "VM: %s\n", query_buffer);
+ kfree(query_buffer);
+}
+
+static struct service_level service_level_vm = {
+ .seq_print = service_level_vm_print
+};
+
+static __init int create_proc_service_level(void)
+{
+ proc_create("service_levels", 0, NULL, &service_level_ops);
+ if (MACHINE_IS_VM)
+ register_service_level(&service_level_vm);
+ return 0;
+}
+subsys_initcall(create_proc_service_level);
+
+/*
+ * Bogomips calculation based on cpu capability.
+ */
+int get_cpu_capability(unsigned int *capability)
+{
+ struct sysinfo_1_2_2 *info;
+ int rc;
+
+ info = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ rc = stsi(info, 1, 2, 2);
+ if (rc == -ENOSYS)
+ goto out;
+ rc = 0;
+ *capability = info->capability;
+out:
+ free_page((unsigned long) info);
+ return rc;
+}
+
+/*
+ * CPU capability might have changed. Therefore recalculate loops_per_jiffy.
+ */
+void s390_adjust_jiffies(void)
+{
+ struct sysinfo_1_2_2 *info;
+ const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */
+ FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+ FP_DECL_EX;
+ unsigned int capability;
+
+ info = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!info)
+ return;
+
+ if (stsi(info, 1, 2, 2) != -ENOSYS) {
+ /*
+ * Major sigh. The cpu capability encoding is "special".
+ * If the first 9 bits of info->capability are 0 then it
+ * is a 32 bit unsigned integer in the range 0 .. 2^23.
+ * If the first 9 bits are != 0 then it is a 32 bit float.
+ * In addition a lower value indicates a proportionally
+ * higher cpu capacity. Bogomips are the other way round.
+ * To get to a halfway suitable number we divide 1e7
+ * by the cpu capability number. Yes, that means a floating
+ * point division .. math-emu here we come :-)
+ */
+ FP_UNPACK_SP(SA, &fmil);
+ if ((info->capability >> 23) == 0)
+ FP_FROM_INT_S(SB, info->capability, 32, int);
+ else
+ FP_UNPACK_SP(SB, &info->capability);
+ FP_DIV_S(SR, SA, SB);
+ FP_TO_INT_S(capability, SR, 32, 0);
+ } else
+ /*
+ * Really old machine without stsi block for basic
+ * cpu information. Report 42.0 bogomips.
+ */
+ capability = 42;
+ loops_per_jiffy = capability * (500000/HZ);
+ free_page((unsigned long) info);
+}
+
+/*
+ * calibrate the delay loop
+ */
+void __cpuinit calibrate_delay(void)
+{
+ s390_adjust_jiffies();
+ /* Print the good old Bogomips line .. */
+ printk(KERN_DEBUG "Calibrating delay loop (skipped)... "
+ "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
+}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index fc468cae446..f72d41068dc 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -331,6 +331,7 @@ static unsigned long long adjust_time(unsigned long long old,
}
static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+static DEFINE_MUTEX(clock_sync_mutex);
static unsigned long clock_sync_flags;
#define CLOCK_SYNC_HAS_ETR 0
@@ -394,6 +395,20 @@ static void enable_sync_clock(void)
atomic_set_mask(0x80000000, sw_ptr);
}
+/*
+ * Function to check if the clock is in sync.
+ */
+static inline int check_sync_clock(void)
+{
+ atomic_t *sw_ptr;
+ int rc;
+
+ sw_ptr = &get_cpu_var(clock_sync_word);
+ rc = (atomic_read(sw_ptr) & 0x80000000U) != 0;
+ put_cpu_var(clock_sync_sync);
+ return rc;
+}
+
/* Single threaded workqueue used for etr and stp sync events */
static struct workqueue_struct *time_sync_wq;
@@ -485,6 +500,8 @@ static void etr_reset(void)
if (etr_setr(&etr_eacr) == 0) {
etr_tolec = get_clock();
set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
+ if (etr_port0_online && etr_port1_online)
+ set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
} else if (etr_port0_online || etr_port1_online) {
pr_warning("The real or virtual hardware system does "
"not provide an ETR interface\n");
@@ -533,8 +550,7 @@ void etr_switch_to_local(void)
{
if (!etr_eacr.sl)
return;
- if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
- disable_sync_clock(NULL);
+ disable_sync_clock(NULL);
set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events);
queue_work(time_sync_wq, &etr_work);
}
@@ -549,8 +565,7 @@ void etr_sync_check(void)
{
if (!etr_eacr.es)
return;
- if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
- disable_sync_clock(NULL);
+ disable_sync_clock(NULL);
set_bit(ETR_EVENT_SYNC_CHECK, &etr_events);
queue_work(time_sync_wq, &etr_work);
}
@@ -914,7 +929,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib,
* Do not try to get the alternate port aib if the clock
* is not in sync yet.
*/
- if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags) && !eacr.es)
+ if (!check_sync_clock())
return eacr;
/*
@@ -997,7 +1012,6 @@ static void etr_work_fn(struct work_struct *work)
on_each_cpu(disable_sync_clock, NULL, 1);
del_timer_sync(&etr_timer);
etr_update_eacr(eacr);
- clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
goto out_unlock;
}
@@ -1071,18 +1085,13 @@ static void etr_work_fn(struct work_struct *work)
/* Both ports not usable. */
eacr.es = eacr.sl = 0;
sync_port = -1;
- clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
}
- if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
- eacr.es = 0;
-
/*
* If the clock is in sync just update the eacr and return.
* If there is no valid sync port wait for a port update.
*/
- if (test_bit(CLOCK_SYNC_STP, &clock_sync_flags) ||
- eacr.es || sync_port < 0) {
+ if (check_sync_clock() || sync_port < 0) {
etr_update_eacr(eacr);
etr_set_tolec_timeout(now);
goto out_unlock;
@@ -1103,13 +1112,11 @@ static void etr_work_fn(struct work_struct *work)
* and set up a timer to try again after 0.5 seconds
*/
etr_update_eacr(eacr);
- set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
if (now < etr_tolec + (1600000 << 12) ||
etr_sync_clock_stop(&aib, sync_port) != 0) {
/* Sync failed. Try again in 1/2 second. */
eacr.es = 0;
etr_update_eacr(eacr);
- clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
etr_set_sync_timeout();
} else
etr_set_tolec_timeout(now);
@@ -1191,19 +1198,30 @@ static ssize_t etr_online_store(struct sys_device *dev,
return -EINVAL;
if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
return -EOPNOTSUPP;
+ mutex_lock(&clock_sync_mutex);
if (dev == &etr_port0_dev) {
if (etr_port0_online == value)
- return count; /* Nothing to do. */
+ goto out; /* Nothing to do. */
etr_port0_online = value;
+ if (etr_port0_online && etr_port1_online)
+ set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
queue_work(time_sync_wq, &etr_work);
} else {
if (etr_port1_online == value)
- return count; /* Nothing to do. */
+ goto out; /* Nothing to do. */
etr_port1_online = value;
+ if (etr_port0_online && etr_port1_online)
+ set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
queue_work(time_sync_wq, &etr_work);
}
+out:
+ mutex_unlock(&clock_sync_mutex);
return count;
}
@@ -1471,8 +1489,6 @@ static void stp_timing_alert(struct stp_irq_parm *intparm)
*/
void stp_sync_check(void)
{
- if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
- return;
disable_sync_clock(NULL);
queue_work(time_sync_wq, &stp_work);
}
@@ -1485,8 +1501,6 @@ void stp_sync_check(void)
*/
void stp_island_check(void)
{
- if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
- return;
disable_sync_clock(NULL);
queue_work(time_sync_wq, &stp_work);
}
@@ -1513,10 +1527,6 @@ static int stp_sync_clock(void *data)
enable_sync_clock();
- set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
- if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
- queue_work(time_sync_wq, &etr_work);
-
rc = 0;
if (stp_info.todoff[0] || stp_info.todoff[1] ||
stp_info.todoff[2] || stp_info.todoff[3] ||
@@ -1535,9 +1545,6 @@ static int stp_sync_clock(void *data)
if (rc) {
disable_sync_clock(NULL);
stp_sync->in_sync = -EAGAIN;
- clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
- if (etr_port0_online || etr_port1_online)
- queue_work(time_sync_wq, &etr_work);
} else
stp_sync->in_sync = 1;
xchg(&first, 0);
@@ -1569,6 +1576,10 @@ static void stp_work_fn(struct work_struct *work)
if (rc || stp_info.c == 0)
goto out_unlock;
+ /* Skip synchronization if the clock is already in sync. */
+ if (check_sync_clock())
+ goto out_unlock;
+
memset(&stp_sync, 0, sizeof(stp_sync));
get_online_cpus();
atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
@@ -1684,8 +1695,14 @@ static ssize_t stp_online_store(struct sysdev_class *class,
return -EINVAL;
if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return -EOPNOTSUPP;
+ mutex_lock(&clock_sync_mutex);
stp_online = value;
+ if (stp_online)
+ set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
queue_work(time_sync_wq, &stp_work);
+ mutex_unlock(&clock_sync_mutex);
return count;
}
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index cc362c9ea8f..3c72c9cf22b 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(topology_lock);
cpumask_t cpu_core_map[NR_CPUS];
-cpumask_t cpu_coregroup_map(unsigned int cpu)
+static cpumask_t cpu_coregroup_map(unsigned int cpu)
{
struct core_info *core = &core_info;
unsigned long flags;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 4584d81984c..c2e42cc65ce 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -61,9 +61,11 @@ extern pgm_check_handler_t do_asce_exception;
#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
#ifndef CONFIG_64BIT
+#define LONG "%08lx "
#define FOURLONG "%08lx %08lx %08lx %08lx\n"
static int kstack_depth_to_print = 12;
#else /* CONFIG_64BIT */
+#define LONG "%016lx "
#define FOURLONG "%016lx %016lx %016lx %016lx\n"
static int kstack_depth_to_print = 20;
#endif /* CONFIG_64BIT */
@@ -155,7 +157,7 @@ void show_stack(struct task_struct *task, unsigned long *sp)
break;
if (i && ((i * sizeof (long) % 32) == 0))
printk("\n ");
- printk("%p ", (void *)*stack++);
+ printk(LONG, *stack++);
}
printk("\n");
show_trace(task, sp);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 690e1781968..89b2e7f1b7a 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -144,7 +144,6 @@ out:
return -ENOMEM;
}
-#ifdef CONFIG_HOTPLUG_CPU
void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
@@ -163,7 +162,6 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
free_page(page_table);
free_pages(segment_table, SEGMENT_ORDER);
}
-#endif /* CONFIG_HOTPLUG_CPU */
static void __vdso_init_cr5(void *dummy)
{
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index d796d05c9c0..7a2063eb88f 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -108,6 +108,8 @@ SECTIONS
EXIT_TEXT
}
+ /* early.c uses stsi, which requires page aligned data. */
+ . = ALIGN(PAGE_SIZE);
.init.data : {
INIT_DATA
}
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index e051cad1f1e..3e260b7e37b 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -4,6 +4,9 @@
config HAVE_KVM
bool
+config HAVE_KVM_IRQCHIP
+ bool
+
menuconfig VIRTUALIZATION
bool "Virtualization"
default y
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 61236102203..9d19803111b 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -103,7 +103,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
static intercept_handler_t instruction_handlers[256] = {
[0x83] = kvm_s390_handle_diag,
[0xae] = kvm_s390_handle_sigp,
- [0xb2] = kvm_s390_handle_priv,
+ [0xb2] = kvm_s390_handle_b2,
[0xb7] = handle_lctl,
[0xeb] = handle_lctlg,
};
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f4fe28a2521..0189356fe20 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -555,9 +555,14 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
s390int->parm);
break;
+ case KVM_S390_SIGP_SET_PREFIX:
+ inti->prefix.address = s390int->parm;
+ inti->type = s390int->type;
+ VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+ s390int->parm);
+ break;
case KVM_S390_SIGP_STOP:
case KVM_S390_RESTART:
- case KVM_S390_SIGP_SET_PREFIX:
case KVM_S390_INT_EMERGENCY:
VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
inti->type = s390int->type;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0d33893e1e8..f4d56e9939c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -23,7 +23,7 @@
#include <linux/timer.h>
#include <asm/lowcore.h>
#include <asm/pgtable.h>
-
+#include <asm/nmi.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -286,7 +286,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
(unsigned long) vcpu);
get_cpu_id(&vcpu->arch.cpu_id);
- vcpu->arch.cpu_id.version = 0xfe;
+ vcpu->arch.cpu_id.version = 0xff;
return 0;
}
@@ -422,8 +422,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
return -EINVAL; /* not implemented yet */
}
@@ -440,8 +440,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
-extern void s390_handle_mcck(void);
-
static void __vcpu_run(struct kvm_vcpu *vcpu)
{
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3893cf12eac..00bbe69b78d 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -50,7 +50,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
/* implemented in priv.c */
-int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 3605df45dd4..4b88834b8dd 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -304,12 +304,24 @@ static intercept_handler_t priv_handlers[256] = {
[0xb1] = handle_stfl,
};
-int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
{
intercept_handler_t handler;
+ /*
+ * a lot of B2 instructions are priviledged. We first check for
+ * the priviledges ones, that we can handle in the kernel. If the
+ * kernel can handle this instruction, we check for the problem
+ * state bit and (a) handle the instruction or (b) send a code 2
+ * program check.
+ * Anything else goes to userspace.*/
handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
- if (handler)
- return handler(vcpu);
+ if (handler) {
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu,
+ PGM_PRIVILEGED_OPERATION);
+ else
+ return handler(vcpu);
+ }
return -ENOTSUPP;
}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 2a01b9e0280..f27dbedf086 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -153,8 +153,6 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
switch (parameter & 0xff) {
case 0:
- printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
- " not supported");
rc = 3; /* not operational */
break;
case 1:
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 6ccb9fab055..3f5f680726e 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -9,6 +9,7 @@
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/timex.h>
+#include <linux/module.h>
#include <linux/irqflags.h>
#include <linux/interrupt.h>
@@ -92,6 +93,7 @@ out:
local_irq_restore(flags);
preempt_enable();
}
+EXPORT_SYMBOL(__udelay);
/*
* Simple udelay variant. To be used on startup and reboot
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index ae5cf5d03d4..4143b7c1909 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -44,7 +44,11 @@ static inline char *__strnend(const char *s, size_t n)
*/
size_t strlen(const char *s)
{
+#if __GNUC__ < 4
return __strend(s) - s;
+#else
+ return __builtin_strlen(s);
+#endif
}
EXPORT_SYMBOL(strlen);
@@ -70,6 +74,7 @@ EXPORT_SYMBOL(strnlen);
*/
char *strcpy(char *dest, const char *src)
{
+#if __GNUC__ < 4
register int r0 asm("0") = 0;
char *ret = dest;
@@ -78,6 +83,9 @@ char *strcpy(char *dest, const char *src)
: "+&a" (dest), "+&a" (src) : "d" (r0)
: "cc", "memory" );
return ret;
+#else
+ return __builtin_strcpy(dest, src);
+#endif
}
EXPORT_SYMBOL(strcpy);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 4d537205e83..833e8366c35 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -200,29 +200,6 @@ static void do_low_address(struct pt_regs *regs, unsigned long error_code)
do_no_context(regs, error_code, 0);
}
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
-{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
-
- up_read(&mm->mmap_sem);
- if (is_global_init(tsk)) {
- yield();
- down_read(&mm->mmap_sem);
- return 1;
- }
- printk("VM: killing process %s\n", tsk->comm);
- if (regs->psw.mask & PSW_MASK_PSTATE)
- do_group_exit(SIGKILL);
- do_no_context(regs, error_code, address);
- return 0;
-}
-
static void do_sigbus(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
@@ -367,7 +344,6 @@ good_area:
goto bad_area;
}
-survive:
if (is_vm_hugetlb_page(vma))
address &= HPAGE_MASK;
/*
@@ -378,8 +354,8 @@ survive:
fault = handle_mm_fault(mm, vma, address, write);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM) {
- if (do_out_of_memory(regs, error_code, address))
- goto survive;
+ up_read(&mm->mmap_sem);
+ pagefault_out_of_memory();
return;
} else if (fault & VM_FAULT_SIGBUS) {
do_sigbus(regs, error_code, address);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f0258ca3b17..c634dfbe92e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -40,7 +40,9 @@
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
+
char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+EXPORT_SYMBOL(empty_zero_page);
/*
* paging_init() sets up the page tables
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 6b6ddc4ea02..be6c1cf4ad5 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -258,6 +258,10 @@ int s390_enable_sie(void)
struct task_struct *tsk = current;
struct mm_struct *mm, *old_mm;
+ /* Do we have switched amode? If no, we cannot do sie */
+ if (!switch_amode)
+ return -EINVAL;
+
/* Do we have pgstes? if yes, we are done */
if (tsk->mm->context.has_pgste)
return 0;
@@ -292,7 +296,7 @@ int s390_enable_sie(void)
tsk->mm = tsk->active_mm = mm;
preempt_disable();
update_mm(mm, tsk);
- cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
preempt_enable();
task_unlock(tsk);
mmput(old_mm);
diff --git a/arch/sh/include/asm/socket.h b/arch/sh/include/asm/socket.h
index 6d4bf651295..345653b9682 100644
--- a/arch/sh/include/asm/socket.h
+++ b/arch/sh/include/asm/socket.h
@@ -54,4 +54,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* __ASM_SH_SOCKET_H */
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 90d63aefd27..3f1372eb009 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v)
goto unlock;
seq_printf(p, "%3d: ",i);
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
seq_printf(p, " %14s", irq_desc[i].chip->name);
seq_printf(p, "-%-8s", irq_desc[i].name);
seq_printf(p, " %s", action->name);
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index bf50d0c2d58..982a12f959f 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -50,6 +50,9 @@
#define SO_MARK 0x0022
+#define SO_TIMESTAMPING 0x0023
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index ec81cdedef2..ee38e731bfa 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -57,6 +57,8 @@ static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned i
static inline void tlb_flush_mmu(struct mmu_gather *mp)
{
+ if (!mp->fullmm)
+ flush_tlb_pending();
if (mp->need_flush) {
free_pages_and_swap_cache(mp->pages, mp->pages_nr);
mp->pages_nr = 0;
@@ -78,8 +80,6 @@ static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, un
if (mp->fullmm)
mp->fullmm = 0;
- else
- flush_tlb_pending();
/* keep the page table cache within bounds */
check_pgt_cache();
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 1c378d8e90c..8ba064f08a6 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -185,7 +185,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
seq_printf(p, " %9s", irq_desc[i].chip->typename);
seq_printf(p, " %s", action->name);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 6cd1a5b6506..79457f682b5 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1031,7 +1031,7 @@ void smp_fetch_global_regs(void)
* If the address space is non-shared (ie. mm->count == 1) we avoid
* cross calls when we want to flush the currently running process's
* tlb state. This is done by clearing all cpu bits except the current
- * processor's in current->active_mm->cpu_vm_mask and performing the
+ * processor's in current->mm->cpu_vm_mask and performing the
* flush locally only. This will force any subsequent cpus which run
* this task to flush the context from the local tlb if the process
* migrates to another cpu (again).
@@ -1074,7 +1074,7 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
u32 ctx = CTX_HWBITS(mm->context);
int cpu = get_cpu();
- if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1)
+ if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
mm->cpu_vm_mask = cpumask_of_cpu(cpu);
else
smp_cross_call_masked(&xcall_flush_tlb_pending,
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index f93c42a2b52..a8000b1cda7 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -51,7 +51,7 @@ sys_call_table32:
/*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
.word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount
/*160*/ .word compat_sys_sched_setaffinity, compat_sys_sched_getaffinity, sys32_getdomainname, sys32_setdomainname, sys_nis_syscall
- .word sys_quotactl, sys_set_tid_address, compat_sys_mount, sys_ustat, sys32_setxattr
+ .word sys_quotactl, sys_set_tid_address, compat_sys_mount, compat_sys_ustat, sys32_setxattr
/*170*/ .word sys32_lsetxattr, sys32_fsetxattr, sys_getxattr, sys_lgetxattr, compat_sys_getdents
.word sys_setsid, sys_fchdir, sys32_fgetxattr, sys_listxattr, sys_llistxattr
/*180*/ .word sys32_flistxattr, sys_removexattr, sys_lremovexattr, compat_sys_sigpending, sys_ni_syscall
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 2db3c2229b9..4ee2e48c4b3 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -36,10 +36,10 @@
#include <linux/clocksource.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
+#include <linux/irq.h>
#include <asm/oplib.h>
#include <asm/timer.h>
-#include <asm/irq.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/starfire.h>
@@ -724,12 +724,14 @@ void timer_interrupt(int irq, struct pt_regs *regs)
unsigned long tick_mask = tick_ops->softint_mask;
int cpu = smp_processor_id();
struct clock_event_device *evt = &per_cpu(sparc64_events, cpu);
+ struct irq_desc *desc;
clear_softint(tick_mask);
irq_enter();
- kstat_this_cpu.irqs[0]++;
+ desc = irq_to_desc(0);
+ kstat_incr_irqs_this_cpu(0, desc);
if (unlikely(!evt->event_handler)) {
printk(KERN_WARNING
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index fde510b664d..434224e2229 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -86,7 +86,7 @@ static int uml_net_rx(struct net_device *dev)
drop_skb->dev = dev;
/* Read a packet into drop_skb and don't do anything with it. */
(*lp->read)(lp->fd, drop_skb, lp);
- lp->stats.rx_dropped++;
+ dev->stats.rx_dropped++;
return 0;
}
@@ -99,8 +99,8 @@ static int uml_net_rx(struct net_device *dev)
skb_trim(skb, pkt_len);
skb->protocol = (*lp->protocol)(skb);
- lp->stats.rx_bytes += skb->len;
- lp->stats.rx_packets++;
+ dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_packets++;
netif_rx(skb);
return pkt_len;
}
@@ -224,8 +224,8 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
len = (*lp->write)(lp->fd, skb, lp);
if (len == skb->len) {
- lp->stats.tx_packets++;
- lp->stats.tx_bytes += skb->len;
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += skb->len;
dev->trans_start = jiffies;
netif_start_queue(dev);
@@ -234,7 +234,7 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
else if (len == 0) {
netif_start_queue(dev);
- lp->stats.tx_dropped++;
+ dev->stats.tx_dropped++;
}
else {
netif_start_queue(dev);
@@ -248,12 +248,6 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
return 0;
}
-static struct net_device_stats *uml_net_get_stats(struct net_device *dev)
-{
- struct uml_net_private *lp = netdev_priv(dev);
- return &lp->stats;
-}
-
static void uml_net_set_multicast_list(struct net_device *dev)
{
return;
@@ -377,6 +371,18 @@ static void net_device_release(struct device *dev)
free_netdev(netdev);
}
+static const struct net_device_ops uml_netdev_ops = {
+ .ndo_open = uml_net_open,
+ .ndo_stop = uml_net_close,
+ .ndo_start_xmit = uml_net_start_xmit,
+ .ndo_set_multicast_list = uml_net_set_multicast_list,
+ .ndo_tx_timeout = uml_net_tx_timeout,
+ .ndo_set_mac_address = uml_net_set_mac,
+ .ndo_change_mtu = uml_net_change_mtu,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
/*
* Ensures that platform_driver_register is called only once by
* eth_configure. Will be set in an initcall.
@@ -473,14 +479,7 @@ static void eth_configure(int n, void *init, char *mac,
set_ether_mac(dev, device->mac);
dev->mtu = transport->user->mtu;
- dev->open = uml_net_open;
- dev->hard_start_xmit = uml_net_start_xmit;
- dev->stop = uml_net_close;
- dev->get_stats = uml_net_get_stats;
- dev->set_multicast_list = uml_net_set_multicast_list;
- dev->tx_timeout = uml_net_tx_timeout;
- dev->set_mac_address = uml_net_set_mac;
- dev->change_mtu = uml_net_change_mtu;
+ dev->netdev_ops = &uml_netdev_ops;
dev->ethtool_ops = &uml_net_ethtool_ops;
dev->watchdog_timeo = (HZ >> 1);
dev->irq = UM_ETH_IRQ;
diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h
index d843c7924a7..5c367f22595 100644
--- a/arch/um/include/shared/net_kern.h
+++ b/arch/um/include/shared/net_kern.h
@@ -26,7 +26,7 @@ struct uml_net_private {
spinlock_t lock;
struct net_device *dev;
struct timer_list tl;
- struct net_device_stats stats;
+
struct work_struct work;
int fd;
unsigned char mac[ETH_ALEN];
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 3d7aad09b17..336b6156907 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -42,7 +42,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
seq_printf(p, " %14s", irq_desc[i].chip->typename);
seq_printf(p, " %s", action->name);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bc2fbadff9f..3a330a437c6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -165,6 +165,9 @@ config GENERIC_HARDIRQS
bool
default y
+config GENERIC_HARDIRQS_NO__DO_IRQ
+ def_bool y
+
config GENERIC_IRQ_PROBE
bool
default y
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 5a0d76dc56a..8ef8876666b 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -557,7 +557,7 @@ ia32_sys_call_table:
.quad sys32_olduname
.quad sys_umask /* 60 */
.quad sys_chroot
- .quad sys32_ustat
+ .quad compat_sys_ustat
.quad sys_dup2
.quad sys_getppid
.quad sys_getpgrp /* 65 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 6c0d7f6231a..efac92fd1ef 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -638,28 +638,6 @@ long sys32_uname(struct old_utsname __user *name)
return err ? -EFAULT : 0;
}
-long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
-{
- struct ustat u;
- mm_segment_t seg;
- int ret;
-
- seg = get_fs();
- set_fs(KERNEL_DS);
- ret = sys_ustat(dev, (struct ustat __user *)&u);
- set_fs(seg);
- if (ret < 0)
- return ret;
-
- if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) ||
- __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
- __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
- __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
- __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
- ret = -EFAULT;
- return ret;
-}
-
asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
compat_uptr_t __user *envp, struct pt_regs *regs)
{
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 50ca486fd88..1f7e6251728 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -129,13 +129,6 @@ typedef struct compat_siginfo {
} _sifields;
} compat_siginfo_t;
-struct ustat32 {
- __u32 f_tfree;
- compat_ino_t f_tinode;
- char f_fname[6];
- char f_fpack[6];
-};
-
#define IA32_STACK_TOP IA32_PAGE_OFFSET
#ifdef __KERNEL__
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 886c9402ec4..dc3f6cf1170 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -15,6 +15,7 @@
#define __KVM_HAVE_DEVICE_ASSIGNMENT
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
+#define __KVM_HAVE_GUEST_DEBUG
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
@@ -212,7 +213,30 @@ struct kvm_pit_channel_state {
__s64 count_load_time;
};
+struct kvm_debug_exit_arch {
+ __u32 exception;
+ __u32 pad;
+ __u64 pc;
+ __u64 dr6;
+ __u64 dr7;
+};
+
+#define KVM_GUESTDBG_USE_SW_BP 0x00010000
+#define KVM_GUESTDBG_USE_HW_BP 0x00020000
+#define KVM_GUESTDBG_INJECT_DB 0x00040000
+#define KVM_GUESTDBG_INJECT_BP 0x00080000
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+ __u64 debugreg[8];
+};
+
struct kvm_pit_state {
struct kvm_pit_channel_state channels[3];
};
+
+struct kvm_reinject_control {
+ __u8 pit_reinject;
+ __u8 reserved[31];
+};
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 730843d1d2f..f0faf58044f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
+#include <asm/msr-index.h>
#define KVM_MAX_VCPUS 16
#define KVM_MEMORY_SLOTS 32
@@ -134,11 +135,18 @@ enum {
#define KVM_NR_MEM_OBJS 40
-struct kvm_guest_debug {
- int enabled;
- unsigned long bp[4];
- int singlestep;
-};
+#define KVM_NR_DB_REGS 4
+
+#define DR6_BD (1 << 13)
+#define DR6_BS (1 << 14)
+#define DR6_FIXED_1 0xffff0ff0
+#define DR6_VOLATILE 0x0000e00f
+
+#define DR7_BP_EN_MASK 0x000000ff
+#define DR7_GE (1 << 9)
+#define DR7_GD (1 << 13)
+#define DR7_FIXED_1 0x00000400
+#define DR7_VOLATILE 0xffff23ff
/*
* We don't want allocation failures within the mmu code, so we preallocate
@@ -162,7 +170,8 @@ struct kvm_pte_chain {
* bits 0:3 - total guest paging levels (2-4, or zero for real mode)
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
- * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ * bit 16 - direct mapping of virtual to physical mapping at gfn
+ * used for real mode and two-dimensional paging
* bits 17:19 - common access permissions for all ptes in this shadow page
*/
union kvm_mmu_page_role {
@@ -172,9 +181,10 @@ union kvm_mmu_page_role {
unsigned level:4;
unsigned quadrant:2;
unsigned pad_for_nice_hex_output:6;
- unsigned metaphysical:1;
+ unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
+ unsigned cr4_pge:1;
};
};
@@ -218,6 +228,18 @@ struct kvm_pv_mmu_op_buffer {
char buf[512] __aligned(sizeof(long));
};
+struct kvm_pio_request {
+ unsigned long count;
+ int cur_count;
+ gva_t guest_gva;
+ int in;
+ int port;
+ int size;
+ int string;
+ int down;
+ int rep;
+};
+
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -236,6 +258,7 @@ struct kvm_mmu {
hpa_t root_hpa;
int root_level;
int shadow_root_level;
+ union kvm_mmu_page_role base_role;
u64 *pae_root;
};
@@ -258,6 +281,7 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr8;
+ u32 hflags;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 apic_base;
@@ -338,6 +362,15 @@ struct kvm_vcpu_arch {
struct mtrr_state_type mtrr_state;
u32 pat;
+
+ int switch_db_regs;
+ unsigned long host_db[KVM_NR_DB_REGS];
+ unsigned long host_dr6;
+ unsigned long host_dr7;
+ unsigned long db[KVM_NR_DB_REGS];
+ unsigned long dr6;
+ unsigned long dr7;
+ unsigned long eff_db[KVM_NR_DB_REGS];
};
struct kvm_mem_alias {
@@ -378,6 +411,7 @@ struct kvm_arch{
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
+ u64 vm_init_tsc;
};
struct kvm_vm_stat {
@@ -446,8 +480,7 @@ struct kvm_x86_ops {
void (*vcpu_put)(struct kvm_vcpu *vcpu);
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg);
- void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
+ struct kvm_guest_debug *dbg);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -583,16 +616,12 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
u32 error_code);
-void kvm_pic_set_irq(void *opaque, int irq, int level);
+int kvm_pic_set_irq(void *opaque, int irq, int level);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
void fx_init(struct kvm_vcpu *vcpu);
-int emulator_read_std(unsigned long addr,
- void *val,
- unsigned int bytes,
- struct kvm_vcpu *vcpu);
int emulator_write_emulated(unsigned long addr,
const void *val,
unsigned int bytes,
@@ -737,6 +766,10 @@ enum {
TASK_SWITCH_GATE = 3,
};
+#define HF_GIF_MASK (1 << 0)
+#define HF_HIF_MASK (1 << 1)
+#define HF_VINTR_MASK (1 << 2)
+
/*
* Hardware virtualization extension instructions may fault if a
* reboot turns off virtualization while processes are running.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 358acc59ae0..f4e505f286b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,11 +18,15 @@
#define _EFER_LME 8 /* Long mode enable */
#define _EFER_LMA 10 /* Long mode active (read-only) */
#define _EFER_NX 11 /* No execute enable */
+#define _EFER_SVME 12 /* Enable virtualization */
+#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
#define EFER_LMA (1<<_EFER_LMA)
#define EFER_NX (1<<_EFER_NX)
+#define EFER_SVME (1<<_EFER_SVME)
+#define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_PERFCTR0 0x000000c1
@@ -360,4 +364,9 @@
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+/* AMD-V MSRs */
+
+#define MSR_VM_CR 0xc0010114
+#define MSR_VM_HSAVE_PA 0xc0010117
+
#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h
index a8894647dd9..3ac5032fae0 100644
--- a/arch/x86/include/asm/prctl.h
+++ b/arch/x86/include/asm/prctl.h
@@ -6,8 +6,4 @@
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
-#ifdef CONFIG_X86_64
-extern long sys_arch_prctl(int, unsigned long);
-#endif /* CONFIG_X86_64 */
-
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ebe858cdc8a..c2308f5250f 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -1,27 +1,12 @@
#ifndef _ASM_X86_SETUP_H
#define _ASM_X86_SETUP_H
+#ifdef __KERNEL__
+
#define COMMAND_LINE_SIZE 2048
#ifndef __ASSEMBLY__
-/* Interrupt control for vSMPowered x86_64 systems */
-void vsmp_init(void);
-
-
-void setup_bios_corruption_check(void);
-
-
-#ifdef CONFIG_X86_VISWS
-extern void visws_early_detect(void);
-extern int is_visws_box(void);
-#else
-static inline void visws_early_detect(void) { }
-static inline int is_visws_box(void) { return 0; }
-#endif
-
-extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
-extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
/*
* Any setup quirks to be performed?
*/
@@ -48,16 +33,8 @@ struct x86_quirks {
int (*update_genapic)(void);
};
-extern struct x86_quirks *x86_quirks;
-extern unsigned long saved_video_mode;
-
-#ifndef CONFIG_PARAVIRT
-#define paravirt_post_allocator_init() do {} while (0)
-#endif
#endif /* __ASSEMBLY__ */
-#ifdef __KERNEL__
-
#ifdef __i386__
#include <linux/pfn.h>
@@ -78,6 +55,28 @@ extern unsigned long saved_video_mode;
#ifndef __ASSEMBLY__
#include <asm/bootparam.h>
+/* Interrupt control for vSMPowered x86_64 systems */
+void vsmp_init(void);
+
+void setup_bios_corruption_check(void);
+
+#ifdef CONFIG_X86_VISWS
+extern void visws_early_detect(void);
+extern int is_visws_box(void);
+#else
+static inline void visws_early_detect(void) { }
+static inline int is_visws_box(void) { return 0; }
+#endif
+
+extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
+extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
+extern struct x86_quirks *x86_quirks;
+extern unsigned long saved_video_mode;
+
+#ifndef CONFIG_PARAVIRT
+#define paravirt_post_allocator_init() do {} while (0)
+#endif
+
#ifndef _SETUP
/*
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index 8ab9cc8b2ec..ca8bf2cd0ba 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -54,4 +54,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e86..82ada75f3eb 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CPUID_FEATURE_SHIFT 2
#define SVM_CPUID_FUNC 0x8000000a
-#define MSR_EFER_SVME_MASK (1ULL << 12)
-#define MSR_VM_CR 0xc0010114
-#define MSR_VM_HSAVE_PA 0xc0010117ULL
-
#define SVM_VM_CR_SVM_DISABLE 4
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index ffb08be2a53..72a6dcd1299 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -70,8 +70,6 @@ struct old_utsname;
asmlinkage long sys32_olduname(struct oldold_utsname __user *);
long sys32_uname(struct old_utsname __user *);
-long sys32_ustat(unsigned, struct ustat32 __user *);
-
asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
compat_uptr_t __user *, struct pt_regs *);
asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index c0b0bda754e..e26d34b0bc7 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -74,6 +74,7 @@ asmlinkage long sys_vfork(struct pt_regs *);
asmlinkage long sys_execve(char __user *, char __user * __user *,
char __user * __user *,
struct pt_regs *);
+long sys_arch_prctl(int, unsigned long);
/* kernel/ioport.c */
asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 59363627523..e0f9aa16358 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void)
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+ wrmsrl(MSR_EFER, efer & ~EFER_SVME);
}
/** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d0238e6151d..498f944010b 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -270,8 +270,9 @@ enum vmcs_field {
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
-#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
+#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
+#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
@@ -311,7 +312,7 @@ enum vmcs_field {
#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
#define TYPE_MOV_TO_DR (0 << 4)
#define TYPE_MOV_FROM_DR (1 << 4)
-#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */
+#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
/* segment AR */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ad..5fff00c70de 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
#include <linux/string.h>
#include <linux/bitops.h>
#include <linux/smp.h>
+#include <linux/sched.h>
#include <linux/thread_info.h>
#include <linux/module.h>
@@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
/*
* c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
- * with P/T states and does not stop in deep C-states
+ * with P/T states and does not stop in deep C-states.
+ *
+ * It is also reliable across cores and sockets. (but not across
+ * cabinets - we turn it off in that case explicitly.)
*/
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+ sched_clock_stable = 1;
}
}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a00545fe5cd..648b3a2a3a4 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -80,6 +80,7 @@ static inline void hpet_clear_mapping(void)
*/
static int boot_hpet_disable;
int hpet_force_user;
+static int hpet_verbose;
static int __init hpet_setup(char *str)
{
@@ -88,6 +89,8 @@ static int __init hpet_setup(char *str)
boot_hpet_disable = 1;
if (!strncmp("force", str, 5))
hpet_force_user = 1;
+ if (!strncmp("verbose", str, 7))
+ hpet_verbose = 1;
}
return 1;
}
@@ -119,6 +122,43 @@ int is_hpet_enabled(void)
}
EXPORT_SYMBOL_GPL(is_hpet_enabled);
+static void _hpet_print_config(const char *function, int line)
+{
+ u32 i, timers, l, h;
+ printk(KERN_INFO "hpet: %s(%d):\n", function, line);
+ l = hpet_readl(HPET_ID);
+ h = hpet_readl(HPET_PERIOD);
+ timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
+ printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h);
+ l = hpet_readl(HPET_CFG);
+ h = hpet_readl(HPET_STATUS);
+ printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h);
+ l = hpet_readl(HPET_COUNTER);
+ h = hpet_readl(HPET_COUNTER+4);
+ printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
+
+ for (i = 0; i < timers; i++) {
+ l = hpet_readl(HPET_Tn_CFG(i));
+ h = hpet_readl(HPET_Tn_CFG(i)+4);
+ printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n",
+ i, l, h);
+ l = hpet_readl(HPET_Tn_CMP(i));
+ h = hpet_readl(HPET_Tn_CMP(i)+4);
+ printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n",
+ i, l, h);
+ l = hpet_readl(HPET_Tn_ROUTE(i));
+ h = hpet_readl(HPET_Tn_ROUTE(i)+4);
+ printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n",
+ i, l, h);
+ }
+}
+
+#define hpet_print_config() \
+do { \
+ if (hpet_verbose) \
+ _hpet_print_config(__FUNCTION__, __LINE__); \
+} while (0)
+
/*
* When the hpet driver (/dev/hpet) is enabled, we need to reserve
* timer 0 and timer 1 in case of RTC emulation.
@@ -191,27 +231,37 @@ static struct clock_event_device hpet_clockevent = {
.rating = 50,
};
-static void hpet_start_counter(void)
+static void hpet_stop_counter(void)
{
unsigned long cfg = hpet_readl(HPET_CFG);
-
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
hpet_writel(0, HPET_COUNTER);
hpet_writel(0, HPET_COUNTER + 4);
+}
+
+static void hpet_start_counter(void)
+{
+ unsigned long cfg = hpet_readl(HPET_CFG);
cfg |= HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
+static void hpet_restart_counter(void)
+{
+ hpet_stop_counter();
+ hpet_start_counter();
+}
+
static void hpet_resume_device(void)
{
force_hpet_resume();
}
-static void hpet_restart_counter(void)
+static void hpet_resume_counter(void)
{
hpet_resume_device();
- hpet_start_counter();
+ hpet_restart_counter();
}
static void hpet_enable_legacy_int(void)
@@ -259,29 +309,23 @@ static int hpet_setup_msi_irq(unsigned int irq);
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
{
- unsigned long cfg, cmp, now;
+ unsigned long cfg;
uint64_t delta;
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
+ hpet_stop_counter();
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
delta >>= evt->shift;
- now = hpet_readl(HPET_COUNTER);
- cmp = now + (unsigned long) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
/* Make sure we use edge triggered interrupts */
cfg &= ~HPET_TN_LEVEL;
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
- /*
- * The first write after writing TN_SETVAL to the
- * config register sets the counter value, the second
- * write sets the period.
- */
- hpet_writel(cmp, HPET_Tn_CMP(timer));
- udelay(1);
hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
+ hpet_start_counter();
+ hpet_print_config();
break;
case CLOCK_EVT_MODE_ONESHOT:
@@ -308,6 +352,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
+ hpet_print_config();
break;
}
}
@@ -526,6 +571,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
num_timers++; /* Value read out starts from 0 */
+ hpet_print_config();
hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
if (!hpet_devs)
@@ -695,7 +741,7 @@ static struct clocksource clocksource_hpet = {
.mask = HPET_MASK,
.shift = HPET_SHIFT,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
- .resume = hpet_restart_counter,
+ .resume = hpet_resume_counter,
#ifdef CONFIG_X86_64
.vread = vread_hpet,
#endif
@@ -707,7 +753,7 @@ static int hpet_clocksource_register(void)
cycle_t t1;
/* Start the counter */
- hpet_start_counter();
+ hpet_restart_counter();
/* Verify whether hpet counter works */
t1 = read_hpet();
@@ -793,6 +839,7 @@ int __init hpet_enable(void)
* information and the number of channels
*/
id = hpet_readl(HPET_ID);
+ hpet_print_config();
#ifdef CONFIG_HPET_EMULATE_RTC
/*
@@ -845,6 +892,7 @@ static __init int hpet_late_init(void)
return -ENODEV;
hpet_reserve_platform_timers(hpet_readl(HPET_ID));
+ hpet_print_config();
for_each_online_cpu(cpu) {
hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 309949e9e1c..697d1b78cfb 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -172,7 +172,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4,
ich_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
ich_force_enable_hpet);
-
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */
+ ich_force_enable_hpet);
static struct pci_dev *cached_dev;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b8e7aaf7ef7..08afa1579e6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -17,20 +17,21 @@
#include <asm/delay.h>
#include <asm/hypervisor.h>
-unsigned int cpu_khz; /* TSC clocks / usec, not used here */
+unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
-unsigned int tsc_khz;
+
+unsigned int __read_mostly tsc_khz;
EXPORT_SYMBOL(tsc_khz);
/*
* TSC can be unstable due to cpufreq or due to unsynced TSCs
*/
-static int tsc_unstable;
+static int __read_mostly tsc_unstable;
/* native_sched_clock() is called before tsc_init(), so
we must start with the TSC soft disabled to prevent
erroneous rdtsc usage on !cpu_has_tsc processors */
-static int tsc_disabled = -1;
+static int __read_mostly tsc_disabled = -1;
static int tsc_clocksource_reliable;
/*
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bde..0a303c3ed11 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -4,6 +4,10 @@
config HAVE_KVM
bool
+config HAVE_KVM_IRQCHIP
+ bool
+ default y
+
menuconfig VIRTUALIZATION
bool "Virtualization"
depends on HAVE_KVM || X86
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 72bd275a9b5..c13bb92d315 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
if (!atomic_inc_and_test(&pt->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
+ if (!pt->reinject)
+ atomic_set(&pt->pending, 1);
+
if (vcpu0 && waitqueue_active(&vcpu0->wq))
wake_up_interruptible(&vcpu0->wq);
@@ -536,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit)
pit->pit_state.irq_ack = 1;
}
+static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
+{
+ struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
+
+ if (!mask) {
+ atomic_set(&pit->pit_state.pit_timer.pending, 0);
+ pit->pit_state.irq_ack = 1;
+ }
+}
+
struct kvm_pit *kvm_create_pit(struct kvm *kvm)
{
struct kvm_pit *pit;
@@ -545,9 +558,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
if (!pit)
return NULL;
- mutex_lock(&kvm->lock);
pit->irq_source_id = kvm_request_irq_source_id(kvm);
- mutex_unlock(&kvm->lock);
if (pit->irq_source_id < 0) {
kfree(pit);
return NULL;
@@ -580,10 +591,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
pit_state->irq_ack_notifier.gsi = 0;
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+ pit_state->pit_timer.reinject = true;
mutex_unlock(&pit->pit_state.lock);
kvm_pit_reset(pit);
+ pit->mask_notifier.func = pit_mask_notifer;
+ kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+
return pit;
}
@@ -592,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm)
struct hrtimer *timer;
if (kvm->arch.vpit) {
+ kvm_unregister_irq_mask_notifier(kvm, 0,
+ &kvm->arch.vpit->mask_notifier);
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 4178022b97a..6acbe4b505d 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
s64 period; /* unit: ns */
s64 scheduled;
atomic_t pending;
+ bool reinject;
};
struct kvm_kpit_channel_state {
@@ -45,6 +46,7 @@ struct kvm_pit {
struct kvm *kvm;
struct kvm_kpit_state pit_state;
int irq_source_id;
+ struct kvm_irq_mask_notifier mask_notifier;
};
#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 179dcb0103f..1ccb50c74f1 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,11 +32,13 @@
#include <linux/kvm_host.h>
static void pic_lock(struct kvm_pic *s)
+ __acquires(&s->lock)
{
spin_lock(&s->lock);
}
static void pic_unlock(struct kvm_pic *s)
+ __releases(&s->lock)
{
struct kvm *kvm = s->kvm;
unsigned acks = s->pending_acks;
@@ -49,7 +51,8 @@ static void pic_unlock(struct kvm_pic *s)
spin_unlock(&s->lock);
while (acks) {
- kvm_notify_acked_irq(kvm, __ffs(acks));
+ kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
+ __ffs(acks));
acks &= acks - 1;
}
@@ -76,12 +79,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm)
/*
* set irq level. If an edge is detected, then the IRR is set to 1
*/
-static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
+static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
{
- int mask;
+ int mask, ret = 1;
mask = 1 << irq;
if (s->elcr & mask) /* level triggered */
if (level) {
+ ret = !(s->irr & mask);
s->irr |= mask;
s->last_irr |= mask;
} else {
@@ -90,11 +94,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
}
else /* edge triggered */
if (level) {
- if ((s->last_irr & mask) == 0)
+ if ((s->last_irr & mask) == 0) {
+ ret = !(s->irr & mask);
s->irr |= mask;
+ }
s->last_irr |= mask;
} else
s->last_irr &= ~mask;
+
+ return (s->imr & mask) ? -1 : ret;
}
/*
@@ -171,16 +179,19 @@ void kvm_pic_update_irq(struct kvm_pic *s)
pic_unlock(s);
}
-void kvm_pic_set_irq(void *opaque, int irq, int level)
+int kvm_pic_set_irq(void *opaque, int irq, int level)
{
struct kvm_pic *s = opaque;
+ int ret = -1;
pic_lock(s);
if (irq >= 0 && irq < PIC_NUM_PINS) {
- pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+ ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
}
pic_unlock(s);
+
+ return ret;
}
/*
@@ -232,7 +243,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
}
pic_update_irq(s);
pic_unlock(s);
- kvm_notify_acked_irq(kvm, irq);
+ kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
return intno;
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 82579ee538d..9f593188129 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
#include "lapic.h"
#define PIC_NUM_PINS 16
+#define SELECT_PIC(irq) \
+ ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
struct kvm;
struct kvm_vcpu;
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 8e5ee99551f..ed66e4c078d 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = {
};
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
-#define NUM_DB_REGS 4
struct kvm_vcpu;
@@ -29,18 +28,23 @@ struct vcpu_svm {
struct svm_cpu_data *svm_data;
uint64_t asid_generation;
- unsigned long db_regs[NUM_DB_REGS];
-
u64 next_rip;
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
u64 host_gs_base;
unsigned long host_cr2;
- unsigned long host_db_regs[NUM_DB_REGS];
- unsigned long host_dr6;
- unsigned long host_dr7;
u32 *msrpm;
+ struct vmcb *hsave;
+ u64 hsave_msr;
+
+ u64 nested_vmcb;
+
+ /* These are the merged vectors */
+ u32 *nested_msrpm;
+
+ /* gpa pointers to the real vectors */
+ u64 nested_vmcb_msrpm;
};
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2d4477c7147..2a36f7f7c4c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,20 @@ struct kvm_rmap_desc {
struct kvm_rmap_desc *more;
};
-struct kvm_shadow_walk {
- int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu,
- u64 addr, u64 *spte, int level);
+struct kvm_shadow_walk_iterator {
+ u64 addr;
+ hpa_t shadow_addr;
+ int level;
+ u64 *sptep;
+ unsigned index;
};
+#define for_each_shadow_entry(_vcpu, _addr, _walker) \
+ for (shadow_walk_init(&(_walker), _vcpu, _addr); \
+ shadow_walk_okay(&(_walker)); \
+ shadow_walk_next(&(_walker)))
+
+
struct kvm_unsync_walk {
int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
};
@@ -343,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
BUG_ON(!mc->nobjs);
p = mc->objects[--mc->nobjs];
- memset(p, 0, size);
return p;
}
@@ -794,10 +802,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&sp->oos_link);
- ASSERT(is_empty_shadow_page(sp->spt));
bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
- sp->global = 1;
sp->parent_pte = parent_pte;
--vcpu->kvm->arch.n_free_mmu_pages;
return sp;
@@ -983,8 +989,8 @@ struct kvm_mmu_pages {
idx < 512; \
idx = find_next_bit(bitmap, 512, idx+1))
-int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
- int idx)
+static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+ int idx)
{
int i;
@@ -1059,7 +1065,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry(sp, node, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical
+ if (sp->gfn == gfn && !sp->role.direct
&& !sp->role.invalid) {
pgprintk("%s: found role %x\n",
__func__, sp->role.word);
@@ -1115,8 +1121,9 @@ struct mmu_page_path {
i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
i = mmu_pages_next(&pvec, &parents, i))
-int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
- int i)
+static int mmu_pages_next(struct kvm_mmu_pages *pvec,
+ struct mmu_page_path *parents,
+ int i)
{
int n;
@@ -1135,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
return n;
}
-void mmu_pages_clear_parents(struct mmu_page_path *parents)
+static void mmu_pages_clear_parents(struct mmu_page_path *parents)
{
struct kvm_mmu_page *sp;
unsigned int level = 0;
@@ -1193,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
unsigned level,
- int metaphysical,
+ int direct,
unsigned access,
u64 *parent_pte)
{
@@ -1204,10 +1211,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp;
struct hlist_node *node, *tmp;
- role.word = 0;
- role.glevels = vcpu->arch.mmu.root_level;
+ role = vcpu->arch.mmu.base_role;
role.level = level;
- role.metaphysical = metaphysical;
+ role.direct = direct;
role.access = access;
if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1242,8 +1248,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
sp->gfn = gfn;
sp->role = role;
+ sp->global = role.cr4_pge;
hlist_add_head(&sp->hash_link, bucket);
- if (!metaphysical) {
+ if (!direct) {
if (rmap_write_protect(vcpu->kvm, gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
account_shadowed(vcpu->kvm, gfn);
@@ -1255,35 +1262,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
return sp;
}
-static int walk_shadow(struct kvm_shadow_walk *walker,
- struct kvm_vcpu *vcpu, u64 addr)
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+ struct kvm_vcpu *vcpu, u64 addr)
{
- hpa_t shadow_addr;
- int level;
- int r;
- u64 *sptep;
- unsigned index;
-
- shadow_addr = vcpu->arch.mmu.root_hpa;
- level = vcpu->arch.mmu.shadow_root_level;
- if (level == PT32E_ROOT_LEVEL) {
- shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
- shadow_addr &= PT64_BASE_ADDR_MASK;
- if (!shadow_addr)
- return 1;
- --level;
+ iterator->addr = addr;
+ iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
+ iterator->level = vcpu->arch.mmu.shadow_root_level;
+ if (iterator->level == PT32E_ROOT_LEVEL) {
+ iterator->shadow_addr
+ = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+ iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
+ --iterator->level;
+ if (!iterator->shadow_addr)
+ iterator->level = 0;
}
+}
- while (level >= PT_PAGE_TABLE_LEVEL) {
- index = SHADOW_PT_INDEX(addr, level);
- sptep = ((u64 *)__va(shadow_addr)) + index;
- r = walker->entry(walker, vcpu, addr, sptep, level);
- if (r)
- return r;
- shadow_addr = *sptep & PT64_BASE_ADDR_MASK;
- --level;
- }
- return 0;
+static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
+{
+ if (iterator->level < PT_PAGE_TABLE_LEVEL)
+ return false;
+ iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
+ iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
+ return true;
+}
+
+static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
+{
+ iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
+ --iterator->level;
}
static void kvm_mmu_page_unlink_children(struct kvm *kvm,
@@ -1388,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_page_unlink_children(kvm, sp);
kvm_mmu_unlink_parents(kvm, sp);
kvm_flush_remote_tlbs(kvm);
- if (!sp->role.invalid && !sp->role.metaphysical)
+ if (!sp->role.invalid && !sp->role.direct)
unaccount_shadowed(kvm, sp->gfn);
if (sp->unsync)
kvm_unlink_unsync_page(kvm, sp);
@@ -1451,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical) {
+ if (sp->gfn == gfn && !sp->role.direct) {
pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
@@ -1463,11 +1470,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
+ unsigned index;
+ struct hlist_head *bucket;
struct kvm_mmu_page *sp;
+ struct hlist_node *node, *nn;
- while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
- pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
- kvm_mmu_zap_page(kvm, sp);
+ index = kvm_page_table_hashfn(gfn);
+ bucket = &kvm->arch.mmu_page_hash[index];
+ hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
+ if (sp->gfn == gfn && !sp->role.direct
+ && !sp->role.invalid) {
+ pgprintk("%s: zap %lx %x\n",
+ __func__, gfn, sp->role.word);
+ kvm_mmu_zap_page(kvm, sp);
+ }
}
}
@@ -1622,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
/* don't unsync if pagetable is shadowed with multiple roles */
hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
- if (s->gfn != sp->gfn || s->role.metaphysical)
+ if (s->gfn != sp->gfn || s->role.direct)
continue;
if (s->role.word != sp->role.word)
return 1;
@@ -1669,8 +1685,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
u64 mt_mask = shadow_mt_mask;
struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
- if (!(vcpu->arch.cr4 & X86_CR4_PGE))
- global = 0;
if (!global && sp->global) {
sp->global = 0;
if (sp->unsync) {
@@ -1777,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
pgprintk("hfn old %lx new %lx\n",
spte_to_pfn(*shadow_pte), pfn);
rmap_remove(vcpu->kvm, shadow_pte);
- } else {
- if (largepage)
- was_rmapped = is_large_pte(*shadow_pte);
- else
- was_rmapped = 1;
- }
+ } else
+ was_rmapped = 1;
}
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
dirty, largepage, global, gfn, pfn, speculative, true)) {
@@ -1820,67 +1830,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}
-struct direct_shadow_walk {
- struct kvm_shadow_walk walker;
- pfn_t pfn;
- int write;
- int largepage;
- int pt_write;
-};
-
-static int direct_map_entry(struct kvm_shadow_walk *_walk,
- struct kvm_vcpu *vcpu,
- u64 addr, u64 *sptep, int level)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+ int largepage, gfn_t gfn, pfn_t pfn)
{
- struct direct_shadow_walk *walk =
- container_of(_walk, struct direct_shadow_walk, walker);
+ struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
+ int pt_write = 0;
gfn_t pseudo_gfn;
- gfn_t gfn = addr >> PAGE_SHIFT;
-
- if (level == PT_PAGE_TABLE_LEVEL
- || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
- mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
- 0, walk->write, 1, &walk->pt_write,
- walk->largepage, 0, gfn, walk->pfn, false);
- ++vcpu->stat.pf_fixed;
- return 1;
- }
- if (*sptep == shadow_trap_nonpresent_pte) {
- pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
- sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1,
- 1, ACC_ALL, sptep);
- if (!sp) {
- pgprintk("nonpaging_map: ENOMEM\n");
- kvm_release_pfn_clean(walk->pfn);
- return -ENOMEM;
+ for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
+ if (iterator.level == PT_PAGE_TABLE_LEVEL
+ || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
+ mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+ 0, write, 1, &pt_write,
+ largepage, 0, gfn, pfn, false);
+ ++vcpu->stat.pf_fixed;
+ break;
}
- set_shadow_pte(sptep,
- __pa(sp->spt)
- | PT_PRESENT_MASK | PT_WRITABLE_MASK
- | shadow_user_mask | shadow_x_mask);
- }
- return 0;
-}
+ if (*iterator.sptep == shadow_trap_nonpresent_pte) {
+ pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
+ sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
+ iterator.level - 1,
+ 1, ACC_ALL, iterator.sptep);
+ if (!sp) {
+ pgprintk("nonpaging_map: ENOMEM\n");
+ kvm_release_pfn_clean(pfn);
+ return -ENOMEM;
+ }
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
- int largepage, gfn_t gfn, pfn_t pfn)
-{
- int r;
- struct direct_shadow_walk walker = {
- .walker = { .entry = direct_map_entry, },
- .pfn = pfn,
- .largepage = largepage,
- .write = write,
- .pt_write = 0,
- };
-
- r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
- if (r < 0)
- return r;
- return walker.pt_write;
+ set_shadow_pte(iterator.sptep,
+ __pa(sp->spt)
+ | PT_PRESENT_MASK | PT_WRITABLE_MASK
+ | shadow_user_mask | shadow_x_mask);
+ }
+ }
+ return pt_write;
}
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
@@ -1962,7 +1947,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
int i;
gfn_t root_gfn;
struct kvm_mmu_page *sp;
- int metaphysical = 0;
+ int direct = 0;
root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
@@ -1971,18 +1956,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
if (tdp_enabled)
- metaphysical = 1;
+ direct = 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, metaphysical,
+ PT64_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->arch.mmu.root_hpa = root;
return;
}
- metaphysical = !is_paging(vcpu);
+ direct = !is_paging(vcpu);
if (tdp_enabled)
- metaphysical = 1;
+ direct = 1;
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
@@ -1996,7 +1981,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
} else if (vcpu->arch.mmu.root_level == 0)
root_gfn = 0;
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
- PT32_ROOT_LEVEL, metaphysical,
+ PT32_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
@@ -2251,17 +2236,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
+ int r;
+
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
if (!is_paging(vcpu))
- return nonpaging_init_context(vcpu);
+ r = nonpaging_init_context(vcpu);
else if (is_long_mode(vcpu))
- return paging64_init_context(vcpu);
+ r = paging64_init_context(vcpu);
else if (is_pae(vcpu))
- return paging32E_init_context(vcpu);
+ r = paging32E_init_context(vcpu);
else
- return paging32_init_context(vcpu);
+ r = paging32_init_context(vcpu);
+
+ vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
+
+ return r;
}
static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -2492,7 +2483,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
- if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid)
+ if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
continue;
pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
@@ -3130,7 +3121,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
gfn_t gfn;
list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
- if (sp->role.metaphysical)
+ if (sp->role.direct)
continue;
gfn = unalias_gfn(vcpu->kvm, sp->gfn);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 258e5d56298..eaab2145f62 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -54,7 +54,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
- return vcpu->arch.shadow_efer & EFER_LME;
+ return vcpu->arch.shadow_efer & EFER_LMA;
#else
return 0;
#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9fd78b6e17a..6bd70206c56 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,7 +25,6 @@
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
- #define shadow_walker shadow_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -42,7 +41,6 @@
#elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
- #define shadow_walker shadow_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -73,18 +71,6 @@ struct guest_walker {
u32 error_code;
};
-struct shadow_walker {
- struct kvm_shadow_walk walker;
- struct guest_walker *guest_walker;
- int user_fault;
- int write_fault;
- int largepage;
- int *ptwrite;
- pfn_t pfn;
- u64 *sptep;
- gpa_t pte_gpa;
-};
-
static gfn_t gpte_to_gfn(pt_element_t gpte)
{
return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -283,91 +269,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
-static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
- struct kvm_vcpu *vcpu, u64 addr,
- u64 *sptep, int level)
+static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+ struct guest_walker *gw,
+ int user_fault, int write_fault, int largepage,
+ int *ptwrite, pfn_t pfn)
{
- struct shadow_walker *sw =
- container_of(_sw, struct shadow_walker, walker);
- struct guest_walker *gw = sw->guest_walker;
unsigned access = gw->pt_access;
struct kvm_mmu_page *shadow_page;
- u64 spte;
- int metaphysical;
+ u64 spte, *sptep;
+ int direct;
gfn_t table_gfn;
int r;
+ int level;
pt_element_t curr_pte;
+ struct kvm_shadow_walk_iterator iterator;
- if (level == PT_PAGE_TABLE_LEVEL
- || (sw->largepage && level == PT_DIRECTORY_LEVEL)) {
- mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
- sw->user_fault, sw->write_fault,
- gw->ptes[gw->level-1] & PT_DIRTY_MASK,
- sw->ptwrite, sw->largepage,
- gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
- gw->gfn, sw->pfn, false);
- sw->sptep = sptep;
- return 1;
- }
+ if (!is_present_pte(gw->ptes[gw->level - 1]))
+ return NULL;
- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
- return 0;
+ for_each_shadow_entry(vcpu, addr, iterator) {
+ level = iterator.level;
+ sptep = iterator.sptep;
+ if (level == PT_PAGE_TABLE_LEVEL
+ || (largepage && level == PT_DIRECTORY_LEVEL)) {
+ mmu_set_spte(vcpu, sptep, access,
+ gw->pte_access & access,
+ user_fault, write_fault,
+ gw->ptes[gw->level-1] & PT_DIRTY_MASK,
+ ptwrite, largepage,
+ gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+ gw->gfn, pfn, false);
+ break;
+ }
- if (is_large_pte(*sptep)) {
- set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
- kvm_flush_remote_tlbs(vcpu->kvm);
- rmap_remove(vcpu->kvm, sptep);
- }
+ if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
+ continue;
- if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) {
- metaphysical = 1;
- if (!is_dirty_pte(gw->ptes[level - 1]))
- access &= ~ACC_WRITE_MASK;
- table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
- } else {
- metaphysical = 0;
- table_gfn = gw->table_gfn[level - 2];
- }
- shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
- metaphysical, access, sptep);
- if (!metaphysical) {
- r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
- &curr_pte, sizeof(curr_pte));
- if (r || curr_pte != gw->ptes[level - 2]) {
- kvm_mmu_put_page(shadow_page, sptep);
- kvm_release_pfn_clean(sw->pfn);
- sw->sptep = NULL;
- return 1;
+ if (is_large_pte(*sptep)) {
+ rmap_remove(vcpu->kvm, sptep);
+ set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(vcpu->kvm);
}
- }
- spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK
- | PT_WRITABLE_MASK | PT_USER_MASK;
- *sptep = spte;
- return 0;
-}
-
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
- struct guest_walker *guest_walker,
- int user_fault, int write_fault, int largepage,
- int *ptwrite, pfn_t pfn)
-{
- struct shadow_walker walker = {
- .walker = { .entry = FNAME(shadow_walk_entry), },
- .guest_walker = guest_walker,
- .user_fault = user_fault,
- .write_fault = write_fault,
- .largepage = largepage,
- .ptwrite = ptwrite,
- .pfn = pfn,
- };
-
- if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1]))
- return NULL;
+ if (level == PT_DIRECTORY_LEVEL
+ && gw->level == PT_DIRECTORY_LEVEL) {
+ direct = 1;
+ if (!is_dirty_pte(gw->ptes[level - 1]))
+ access &= ~ACC_WRITE_MASK;
+ table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
+ } else {
+ direct = 0;
+ table_gfn = gw->table_gfn[level - 2];
+ }
+ shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+ direct, access, sptep);
+ if (!direct) {
+ r = kvm_read_guest_atomic(vcpu->kvm,
+ gw->pte_gpa[level - 2],
+ &curr_pte, sizeof(curr_pte));
+ if (r || curr_pte != gw->ptes[level - 2]) {
+ kvm_mmu_put_page(shadow_page, sptep);
+ kvm_release_pfn_clean(pfn);
+ sptep = NULL;
+ break;
+ }
+ }
- walk_shadow(&walker.walker, vcpu, addr);
+ spte = __pa(shadow_page->spt)
+ | PT_PRESENT_MASK | PT_ACCESSED_MASK
+ | PT_WRITABLE_MASK | PT_USER_MASK;
+ *sptep = spte;
+ }
- return walker.sptep;
+ return sptep;
}
/*
@@ -465,54 +439,56 @@ out_unlock:
return 0;
}
-static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
- struct kvm_vcpu *vcpu, u64 addr,
- u64 *sptep, int level)
+static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
- struct shadow_walker *sw =
- container_of(_sw, struct shadow_walker, walker);
-
- /* FIXME: properly handle invlpg on large guest pages */
- if (level == PT_PAGE_TABLE_LEVEL ||
- ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
- struct kvm_mmu_page *sp = page_header(__pa(sptep));
+ struct kvm_shadow_walk_iterator iterator;
+ pt_element_t gpte;
+ gpa_t pte_gpa = -1;
+ int level;
+ u64 *sptep;
+ int need_flush = 0;
- sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
- sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+ spin_lock(&vcpu->kvm->mmu_lock);
- if (is_shadow_present_pte(*sptep)) {
- rmap_remove(vcpu->kvm, sptep);
- if (is_large_pte(*sptep))
- --vcpu->kvm->stat.lpages;
+ for_each_shadow_entry(vcpu, gva, iterator) {
+ level = iterator.level;
+ sptep = iterator.sptep;
+
+ /* FIXME: properly handle invlpg on large guest pages */
+ if (level == PT_PAGE_TABLE_LEVEL ||
+ ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
+ struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+ pte_gpa = (sp->gfn << PAGE_SHIFT);
+ pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
+ if (is_shadow_present_pte(*sptep)) {
+ rmap_remove(vcpu->kvm, sptep);
+ if (is_large_pte(*sptep))
+ --vcpu->kvm->stat.lpages;
+ need_flush = 1;
+ }
+ set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+ break;
}
- set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
- return 1;
- }
- if (!is_shadow_present_pte(*sptep))
- return 1;
- return 0;
-}
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
-{
- pt_element_t gpte;
- struct shadow_walker walker = {
- .walker = { .entry = FNAME(shadow_invlpg_entry), },
- .pte_gpa = -1,
- };
+ if (!is_shadow_present_pte(*sptep))
+ break;
+ }
- spin_lock(&vcpu->kvm->mmu_lock);
- walk_shadow(&walker.walker, vcpu, gva);
+ if (need_flush)
+ kvm_flush_remote_tlbs(vcpu->kvm);
spin_unlock(&vcpu->kvm->mmu_lock);
- if (walker.pte_gpa == -1)
+
+ if (pte_gpa == -1)
return;
- if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+ if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
sizeof(pt_element_t)))
return;
if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
if (mmu_topup_memory_caches(vcpu))
return;
- kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+ kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
sizeof(pt_element_t), 0);
}
}
@@ -540,7 +516,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
pt_element_t pt[256 / sizeof(pt_element_t)];
gpa_t pte_gpa;
- if (sp->role.metaphysical
+ if (sp->role.direct
|| (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
nonpaging_prefetch_page(vcpu, sp);
return;
@@ -619,7 +595,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
#undef pt_element_t
#undef guest_walker
-#undef shadow_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a9e769e4e25..1821c207819 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,9 +38,6 @@ MODULE_LICENSE("GPL");
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
-#define DR7_GD_MASK (1 << 13)
-#define DR6_BD_MASK (1 << 13)
-
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3
@@ -50,6 +47,15 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+/* Turn on to get debugging output*/
+/* #define NESTED_DEBUG */
+
+#ifdef NESTED_DEBUG
+#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
+#else
+#define nsvm_printk(fmt, args...) do {} while(0)
+#endif
+
/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static bool npt_enabled = true;
@@ -60,14 +66,29 @@ static int npt = 1;
module_param(npt, int, S_IRUGO);
+static int nested = 0;
+module_param(nested, int, S_IRUGO);
+
static void kvm_reput_irq(struct vcpu_svm *svm);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
+static int nested_svm_vmexit(struct vcpu_svm *svm);
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+ void *arg2, void *opaque);
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code);
+
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_svm, vcpu);
}
+static inline bool is_nested(struct vcpu_svm *svm)
+{
+ return svm->nested_vmcb;
+}
+
static unsigned long iopm_base;
struct kvm_ldttss_desc {
@@ -157,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val)
asm volatile ("mov %0, %%cr2" :: "r" (val));
}
-static inline unsigned long read_dr6(void)
-{
- unsigned long dr6;
-
- asm volatile ("mov %%dr6, %0" : "=r" (dr6));
- return dr6;
-}
-
-static inline void write_dr6(unsigned long val)
-{
- asm volatile ("mov %0, %%dr6" :: "r" (val));
-}
-
-static inline unsigned long read_dr7(void)
-{
- unsigned long dr7;
-
- asm volatile ("mov %%dr7, %0" : "=r" (dr7));
- return dr7;
-}
-
-static inline void write_dr7(unsigned long val)
-{
- asm volatile ("mov %0, %%dr7" :: "r" (val));
-}
-
static inline void force_new_asid(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->asid_generation--;
@@ -198,7 +193,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if (!npt_enabled && !(efer & EFER_LMA))
efer &= ~EFER_LME;
- to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+ to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
vcpu->arch.shadow_efer = efer;
}
@@ -207,6 +202,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
{
struct vcpu_svm *svm = to_svm(vcpu);
+ /* If we are within a nested VM we'd better #VMEXIT and let the
+ guest handle the exception */
+ if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
+ return;
+
svm->vmcb->control.event_inj = nr
| SVM_EVTINJ_VALID
| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
@@ -242,7 +242,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, svm->next_rip);
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
- vcpu->arch.interrupt_window_open = 1;
+ vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static int has_svm(void)
@@ -250,7 +250,7 @@ static int has_svm(void)
const char *msg;
if (!cpu_has_svm(&msg)) {
- printk(KERN_INFO "has_svn: %s\n", msg);
+ printk(KERN_INFO "has_svm: %s\n", msg);
return 0;
}
@@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage)
svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
+ wrmsrl(MSR_EFER, efer | EFER_SVME);
wrmsrl(MSR_VM_HSAVE_PA,
page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -417,6 +417,14 @@ static __init int svm_hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
+ if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+ kvm_enable_efer_bits(EFER_FFXSR);
+
+ if (nested) {
+ printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
+ kvm_enable_efer_bits(EFER_SVME);
+ }
+
for_each_online_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
@@ -559,7 +567,7 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- save->efer = MSR_EFER_SVME_MASK;
+ save->efer = EFER_SVME;
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400;
save->rflags = 2;
@@ -591,6 +599,9 @@ static void init_vmcb(struct vcpu_svm *svm)
save->cr4 = 0;
}
force_new_asid(&svm->vcpu);
+
+ svm->nested_vmcb = 0;
+ svm->vcpu.arch.hflags = HF_GIF_MASK;
}
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -615,6 +626,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
struct vcpu_svm *svm;
struct page *page;
struct page *msrpm_pages;
+ struct page *hsave_page;
+ struct page *nested_msrpm_pages;
int err;
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -637,14 +650,25 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
if (!msrpm_pages)
goto uninit;
+
+ nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+ if (!nested_msrpm_pages)
+ goto uninit;
+
svm->msrpm = page_address(msrpm_pages);
svm_vcpu_init_msrpm(svm->msrpm);
+ hsave_page = alloc_page(GFP_KERNEL);
+ if (!hsave_page)
+ goto uninit;
+ svm->hsave = page_address(hsave_page);
+
+ svm->nested_msrpm = page_address(nested_msrpm_pages);
+
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
- memset(svm->db_regs, 0, sizeof(svm->db_regs));
init_vmcb(svm);
fx_init(&svm->vcpu);
@@ -669,6 +693,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+ __free_page(virt_to_page(svm->hsave));
+ __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -718,6 +744,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
+static void svm_set_vintr(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
+}
+
+static void svm_clear_vintr(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
{
struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -760,20 +796,37 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
- /*
- * SVM always stores 0 for the 'G' bit in the CS selector in
- * the VMCB on a VMEXIT. This hurts cross-vendor migration:
- * Intel's VMENTRY has a check on the 'G' bit.
- */
- if (seg == VCPU_SREG_CS)
+ switch (seg) {
+ case VCPU_SREG_CS:
+ /*
+ * SVM always stores 0 for the 'G' bit in the CS selector in
+ * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+ * Intel's VMENTRY has a check on the 'G' bit.
+ */
var->g = s->limit > 0xfffff;
-
- /*
- * Work around a bug where the busy flag in the tr selector
- * isn't exposed
- */
- if (seg == VCPU_SREG_TR)
+ break;
+ case VCPU_SREG_TR:
+ /*
+ * Work around a bug where the busy flag in the tr selector
+ * isn't exposed
+ */
var->type |= 0x2;
+ break;
+ case VCPU_SREG_DS:
+ case VCPU_SREG_ES:
+ case VCPU_SREG_FS:
+ case VCPU_SREG_GS:
+ /*
+ * The accessed bit must always be set in the segment
+ * descriptor cache, although it can be cleared in the
+ * descriptor, the cached bit always remains at 1. Since
+ * Intel has a check on this, set it here to support
+ * cross-vendor migration.
+ */
+ if (!var->unusable)
+ var->type |= 0x1;
+ break;
+ }
var->unusable = !var->present;
}
@@ -905,9 +958,37 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
}
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
{
- return -EOPNOTSUPP;
+ int old_debug = vcpu->guest_debug;
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ vcpu->guest_debug = dbg->control;
+
+ svm->vmcb->control.intercept_exceptions &=
+ ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+ if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ svm->vmcb->control.intercept_exceptions |=
+ 1 << DB_VECTOR;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ svm->vmcb->control.intercept_exceptions |=
+ 1 << BP_VECTOR;
+ } else
+ vcpu->guest_debug = 0;
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
+ else
+ svm->vmcb->save.dr7 = vcpu->arch.dr7;
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
+ svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+
+ return 0;
}
static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -949,7 +1030,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
{
- unsigned long val = to_svm(vcpu)->db_regs[dr];
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long val;
+
+ switch (dr) {
+ case 0 ... 3:
+ val = vcpu->arch.db[dr];
+ break;
+ case 6:
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ val = vcpu->arch.dr6;
+ else
+ val = svm->vmcb->save.dr6;
+ break;
+ case 7:
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ val = vcpu->arch.dr7;
+ else
+ val = svm->vmcb->save.dr7;
+ break;
+ default:
+ val = 0;
+ }
+
KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
return val;
}
@@ -959,33 +1062,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
{
struct vcpu_svm *svm = to_svm(vcpu);
- *exception = 0;
+ KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
- if (svm->vmcb->save.dr7 & DR7_GD_MASK) {
- svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
- svm->vmcb->save.dr6 |= DR6_BD_MASK;
- *exception = DB_VECTOR;
- return;
- }
+ *exception = 0;
switch (dr) {
case 0 ... 3:
- svm->db_regs[dr] = value;
+ vcpu->arch.db[dr] = value;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ vcpu->arch.eff_db[dr] = value;
return;
case 4 ... 5:
- if (vcpu->arch.cr4 & X86_CR4_DE) {
+ if (vcpu->arch.cr4 & X86_CR4_DE)
*exception = UD_VECTOR;
+ return;
+ case 6:
+ if (value & 0xffffffff00000000ULL) {
+ *exception = GP_VECTOR;
return;
}
- case 7: {
- if (value & ~((1ULL << 32) - 1)) {
+ vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
+ return;
+ case 7:
+ if (value & 0xffffffff00000000ULL) {
*exception = GP_VECTOR;
return;
}
- svm->vmcb->save.dr7 = value;
+ vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+ svm->vmcb->save.dr7 = vcpu->arch.dr7;
+ vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
+ }
return;
- }
default:
+ /* FIXME: Possible case? */
printk(KERN_DEBUG "%s: unexpected dr %u\n",
__func__, dr);
*exception = UD_VECTOR;
@@ -1031,6 +1141,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
}
+static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (!(svm->vcpu.guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ kvm_queue_exception(&svm->vcpu, DB_VECTOR);
+ return 1;
+ }
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ return 0;
+}
+
+static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+ kvm_run->debug.arch.exception = BP_VECTOR;
+ return 0;
+}
+
static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
int er;
@@ -1080,7 +1211,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
- int size, down, in, string, rep;
+ int size, in, string;
unsigned port;
++svm->vcpu.stat.io_exits;
@@ -1099,8 +1230,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
- rep = (io_info & SVM_IOIO_REP_MASK) != 0;
- down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
@@ -1139,6 +1268,567 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
+static int nested_svm_check_permissions(struct vcpu_svm *svm)
+{
+ if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
+ || !is_paging(&svm->vcpu)) {
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ if (svm->vmcb->save.cpl) {
+ kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code)
+{
+ if (is_nested(svm)) {
+ svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = error_code;
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+ if (nested_svm_exit_handled(svm, false)) {
+ nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
+
+ nested_svm_vmexit(svm);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static inline int nested_svm_intr(struct vcpu_svm *svm)
+{
+ if (is_nested(svm)) {
+ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ return 0;
+
+ if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
+ return 0;
+
+ svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+
+ if (nested_svm_exit_handled(svm, false)) {
+ nsvm_printk("VMexit -> INTR\n");
+ nested_svm_vmexit(svm);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
+{
+ struct page *page;
+
+ down_read(&current->mm->mmap_sem);
+ page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+ up_read(&current->mm->mmap_sem);
+
+ if (is_error_page(page)) {
+ printk(KERN_INFO "%s: could not find page at 0x%llx\n",
+ __func__, gpa);
+ kvm_release_page_clean(page);
+ kvm_inject_gp(&svm->vcpu, 0);
+ return NULL;
+ }
+ return page;
+}
+
+static int nested_svm_do(struct vcpu_svm *svm,
+ u64 arg1_gpa, u64 arg2_gpa, void *opaque,
+ int (*handler)(struct vcpu_svm *svm,
+ void *arg1,
+ void *arg2,
+ void *opaque))
+{
+ struct page *arg1_page;
+ struct page *arg2_page = NULL;
+ void *arg1;
+ void *arg2 = NULL;
+ int retval;
+
+ arg1_page = nested_svm_get_page(svm, arg1_gpa);
+ if(arg1_page == NULL)
+ return 1;
+
+ if (arg2_gpa) {
+ arg2_page = nested_svm_get_page(svm, arg2_gpa);
+ if(arg2_page == NULL) {
+ kvm_release_page_clean(arg1_page);
+ return 1;
+ }
+ }
+
+ arg1 = kmap_atomic(arg1_page, KM_USER0);
+ if (arg2_gpa)
+ arg2 = kmap_atomic(arg2_page, KM_USER1);
+
+ retval = handler(svm, arg1, arg2, opaque);
+
+ kunmap_atomic(arg1, KM_USER0);
+ if (arg2_gpa)
+ kunmap_atomic(arg2, KM_USER1);
+
+ kvm_release_page_dirty(arg1_page);
+ if (arg2_gpa)
+ kvm_release_page_dirty(arg2_page);
+
+ return retval;
+}
+
+static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
+ void *arg1,
+ void *arg2,
+ void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ bool kvm_overrides = *(bool *)opaque;
+ u32 exit_code = svm->vmcb->control.exit_code;
+
+ if (kvm_overrides) {
+ switch (exit_code) {
+ case SVM_EXIT_INTR:
+ case SVM_EXIT_NMI:
+ return 0;
+ /* For now we are always handling NPFs when using them */
+ case SVM_EXIT_NPF:
+ if (npt_enabled)
+ return 0;
+ break;
+ /* When we're shadowing, trap PFs */
+ case SVM_EXIT_EXCP_BASE + PF_VECTOR:
+ if (!npt_enabled)
+ return 0;
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (exit_code) {
+ case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
+ u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
+ if (nested_vmcb->control.intercept_cr_read & cr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
+ u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
+ if (nested_vmcb->control.intercept_cr_write & cr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
+ u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
+ if (nested_vmcb->control.intercept_dr_read & dr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
+ u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
+ if (nested_vmcb->control.intercept_dr_write & dr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+ u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
+ if (nested_vmcb->control.intercept_exceptions & excp_bits)
+ return 1;
+ break;
+ }
+ default: {
+ u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
+ nsvm_printk("exit code: 0x%x\n", exit_code);
+ if (nested_vmcb->control.intercept & exit_bits)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
+ void *arg1, void *arg2,
+ void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ u8 *msrpm = (u8 *)arg2;
+ u32 t0, t1;
+ u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+ u32 param = svm->vmcb->control.exit_info_1 & 1;
+
+ if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ return 0;
+
+ switch(msr) {
+ case 0 ... 0x1fff:
+ t0 = (msr * 2) % 8;
+ t1 = msr / 8;
+ break;
+ case 0xc0000000 ... 0xc0001fff:
+ t0 = (8192 + msr - 0xc0000000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ case 0xc0010000 ... 0xc0011fff:
+ t0 = (16384 + msr - 0xc0010000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ default:
+ return 1;
+ break;
+ }
+ if (msrpm[t1] & ((1 << param) << t0))
+ return 1;
+
+ return 0;
+}
+
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
+{
+ bool k = kvm_override;
+
+ switch (svm->vmcb->control.exit_code) {
+ case SVM_EXIT_MSR:
+ return nested_svm_do(svm, svm->nested_vmcb,
+ svm->nested_vmcb_msrpm, NULL,
+ nested_svm_exit_handled_msr);
+ default: break;
+ }
+
+ return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
+ nested_svm_exit_handled_real);
+}
+
+static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ struct vmcb *hsave = svm->hsave;
+ u64 nested_save[] = { nested_vmcb->save.cr0,
+ nested_vmcb->save.cr3,
+ nested_vmcb->save.cr4,
+ nested_vmcb->save.efer,
+ nested_vmcb->control.intercept_cr_read,
+ nested_vmcb->control.intercept_cr_write,
+ nested_vmcb->control.intercept_dr_read,
+ nested_vmcb->control.intercept_dr_write,
+ nested_vmcb->control.intercept_exceptions,
+ nested_vmcb->control.intercept,
+ nested_vmcb->control.msrpm_base_pa,
+ nested_vmcb->control.iopm_base_pa,
+ nested_vmcb->control.tsc_offset };
+
+ /* Give the current vmcb to the guest */
+ memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
+ nested_vmcb->save.cr0 = nested_save[0];
+ if (!npt_enabled)
+ nested_vmcb->save.cr3 = nested_save[1];
+ nested_vmcb->save.cr4 = nested_save[2];
+ nested_vmcb->save.efer = nested_save[3];
+ nested_vmcb->control.intercept_cr_read = nested_save[4];
+ nested_vmcb->control.intercept_cr_write = nested_save[5];
+ nested_vmcb->control.intercept_dr_read = nested_save[6];
+ nested_vmcb->control.intercept_dr_write = nested_save[7];
+ nested_vmcb->control.intercept_exceptions = nested_save[8];
+ nested_vmcb->control.intercept = nested_save[9];
+ nested_vmcb->control.msrpm_base_pa = nested_save[10];
+ nested_vmcb->control.iopm_base_pa = nested_save[11];
+ nested_vmcb->control.tsc_offset = nested_save[12];
+
+ /* We always set V_INTR_MASKING and remember the old value in hflags */
+ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
+ (nested_vmcb->control.int_vector)) {
+ nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
+ nested_vmcb->control.int_vector);
+ }
+
+ /* Restore the original control entries */
+ svm->vmcb->control = hsave->control;
+
+ /* Kill any pending exceptions */
+ if (svm->vcpu.arch.exception.pending == true)
+ nsvm_printk("WARNING: Pending Exception\n");
+ svm->vcpu.arch.exception.pending = false;
+
+ /* Restore selected save entries */
+ svm->vmcb->save.es = hsave->save.es;
+ svm->vmcb->save.cs = hsave->save.cs;
+ svm->vmcb->save.ss = hsave->save.ss;
+ svm->vmcb->save.ds = hsave->save.ds;
+ svm->vmcb->save.gdtr = hsave->save.gdtr;
+ svm->vmcb->save.idtr = hsave->save.idtr;
+ svm->vmcb->save.rflags = hsave->save.rflags;
+ svm_set_efer(&svm->vcpu, hsave->save.efer);
+ svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
+ svm_set_cr4(&svm->vcpu, hsave->save.cr4);
+ if (npt_enabled) {
+ svm->vmcb->save.cr3 = hsave->save.cr3;
+ svm->vcpu.arch.cr3 = hsave->save.cr3;
+ } else {
+ kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
+ }
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
+ svm->vmcb->save.dr7 = 0;
+ svm->vmcb->save.cpl = 0;
+ svm->vmcb->control.exit_int_info = 0;
+
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+ /* Exit nested SVM mode */
+ svm->nested_vmcb = 0;
+
+ return 0;
+}
+
+static int nested_svm_vmexit(struct vcpu_svm *svm)
+{
+ nsvm_printk("VMexit\n");
+ if (nested_svm_do(svm, svm->nested_vmcb, 0,
+ NULL, nested_svm_vmexit_real))
+ return 1;
+
+ kvm_mmu_reset_context(&svm->vcpu);
+ kvm_mmu_load(&svm->vcpu);
+
+ return 0;
+}
+
+static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ int i;
+ u32 *nested_msrpm = (u32*)arg1;
+ for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
+ svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
+ svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
+
+ return 0;
+}
+
+static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ struct vmcb *hsave = svm->hsave;
+
+ /* nested_vmcb is our indicator if nested SVM is activated */
+ svm->nested_vmcb = svm->vmcb->save.rax;
+
+ /* Clear internal status */
+ svm->vcpu.arch.exception.pending = false;
+
+ /* Save the old vmcb, so we don't need to pick what we save, but
+ can restore everything when a VMEXIT occurs */
+ memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
+ /* We need to remember the original CR3 in the SPT case */
+ if (!npt_enabled)
+ hsave->save.cr3 = svm->vcpu.arch.cr3;
+ hsave->save.cr4 = svm->vcpu.arch.cr4;
+ hsave->save.rip = svm->next_rip;
+
+ if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
+ svm->vcpu.arch.hflags |= HF_HIF_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+ /* Load the nested guest state */
+ svm->vmcb->save.es = nested_vmcb->save.es;
+ svm->vmcb->save.cs = nested_vmcb->save.cs;
+ svm->vmcb->save.ss = nested_vmcb->save.ss;
+ svm->vmcb->save.ds = nested_vmcb->save.ds;
+ svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
+ svm->vmcb->save.idtr = nested_vmcb->save.idtr;
+ svm->vmcb->save.rflags = nested_vmcb->save.rflags;
+ svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
+ svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
+ svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
+ if (npt_enabled) {
+ svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
+ svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
+ } else {
+ kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
+ kvm_mmu_reset_context(&svm->vcpu);
+ }
+ svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
+ /* In case we don't even reach vcpu_run, the fields are not updated */
+ svm->vmcb->save.rax = nested_vmcb->save.rax;
+ svm->vmcb->save.rsp = nested_vmcb->save.rsp;
+ svm->vmcb->save.rip = nested_vmcb->save.rip;
+ svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
+ svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+ svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+
+ /* We don't want a nested guest to be more powerful than the guest,
+ so all intercepts are ORed */
+ svm->vmcb->control.intercept_cr_read |=
+ nested_vmcb->control.intercept_cr_read;
+ svm->vmcb->control.intercept_cr_write |=
+ nested_vmcb->control.intercept_cr_write;
+ svm->vmcb->control.intercept_dr_read |=
+ nested_vmcb->control.intercept_dr_read;
+ svm->vmcb->control.intercept_dr_write |=
+ nested_vmcb->control.intercept_dr_write;
+ svm->vmcb->control.intercept_exceptions |=
+ nested_vmcb->control.intercept_exceptions;
+
+ svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
+
+ svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+
+ force_new_asid(&svm->vcpu);
+ svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
+ svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
+ svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+ if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
+ nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
+ nested_vmcb->control.int_ctl);
+ }
+ if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
+ svm->vcpu.arch.hflags |= HF_VINTR_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+
+ nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
+ nested_vmcb->control.exit_int_info,
+ nested_vmcb->control.int_state);
+
+ svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
+ svm->vmcb->control.int_state = nested_vmcb->control.int_state;
+ svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
+ if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
+ nsvm_printk("Injecting Event: 0x%x\n",
+ nested_vmcb->control.event_inj);
+ svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
+ svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+ return 0;
+}
+
+static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+{
+ to_vmcb->save.fs = from_vmcb->save.fs;
+ to_vmcb->save.gs = from_vmcb->save.gs;
+ to_vmcb->save.tr = from_vmcb->save.tr;
+ to_vmcb->save.ldtr = from_vmcb->save.ldtr;
+ to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
+ to_vmcb->save.star = from_vmcb->save.star;
+ to_vmcb->save.lstar = from_vmcb->save.lstar;
+ to_vmcb->save.cstar = from_vmcb->save.cstar;
+ to_vmcb->save.sfmask = from_vmcb->save.sfmask;
+ to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
+ to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
+ to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
+
+ return 1;
+}
+
+static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
+ void *arg2, void *opaque)
+{
+ return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
+}
+
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+ void *arg2, void *opaque)
+{
+ return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
+}
+
+static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
+
+ return 1;
+}
+
+static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
+
+ return 1;
+}
+
+static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ nsvm_printk("VMrun\n");
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
+ NULL, nested_svm_vmrun))
+ return 1;
+
+ if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
+ NULL, nested_svm_vmrun_msrpm))
+ return 1;
+
+ return 1;
+}
+
+static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+ return 1;
+}
+
+static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+
+ /* After a CLGI no interrupts should come */
+ svm_clear_vintr(svm);
+ svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+
+ return 1;
+}
+
static int invalid_op_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
@@ -1250,6 +1940,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
case MSR_IA32_LASTINTTOIP:
*data = svm->vmcb->save.last_excp_to;
break;
+ case MSR_VM_HSAVE_PA:
+ *data = svm->hsave_msr;
+ break;
+ case MSR_VM_CR:
+ *data = 0;
+ break;
+ case MSR_IA32_UCODE_REV:
+ *data = 0x01000065;
+ break;
default:
return kvm_get_msr_common(vcpu, ecx, data);
}
@@ -1344,6 +2043,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_VM_HSAVE_PA:
+ svm->hsave_msr = data;
+ break;
default:
return kvm_set_msr_common(vcpu, ecx, data);
}
@@ -1380,7 +2082,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
{
KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
- svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+ svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
/*
* If the user space waits to inject interrupts, exit as soon as
@@ -1417,6 +2119,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_WRITE_DR3] = emulate_on_interception,
[SVM_EXIT_WRITE_DR5] = emulate_on_interception,
[SVM_EXIT_WRITE_DR7] = emulate_on_interception,
+ [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
+ [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
@@ -1436,12 +2140,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_MSR] = msr_interception,
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
- [SVM_EXIT_VMRUN] = invalid_op_interception,
+ [SVM_EXIT_VMRUN] = vmrun_interception,
[SVM_EXIT_VMMCALL] = vmmcall_interception,
- [SVM_EXIT_VMLOAD] = invalid_op_interception,
- [SVM_EXIT_VMSAVE] = invalid_op_interception,
- [SVM_EXIT_STGI] = invalid_op_interception,
- [SVM_EXIT_CLGI] = invalid_op_interception,
+ [SVM_EXIT_VMLOAD] = vmload_interception,
+ [SVM_EXIT_VMSAVE] = vmsave_interception,
+ [SVM_EXIT_STGI] = stgi_interception,
+ [SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = invalid_op_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
@@ -1457,6 +2161,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
(u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
+ if (is_nested(svm)) {
+ nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
+ exit_code, svm->vmcb->control.exit_info_1,
+ svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
+ if (nested_svm_exit_handled(svm, true)) {
+ nested_svm_vmexit(svm);
+ nsvm_printk("-> #VMEXIT\n");
+ return 1;
+ }
+ }
+
if (npt_enabled) {
int mmu_reload = 0;
if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1544,6 +2259,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ nested_svm_intr(svm);
+
svm_inject_irq(svm, irq);
}
@@ -1589,11 +2306,17 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
if (!kvm_cpu_has_interrupt(vcpu))
goto out;
+ if (nested_svm_intr(svm))
+ goto out;
+
+ if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
+ goto out;
+
if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
(vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
/* unable to deliver irq, set pending irq */
- vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+ svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
goto out;
}
@@ -1615,7 +2338,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
}
svm->vcpu.arch.interrupt_window_open =
- !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+ !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+ (svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1637,9 +2361,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
+ if (nested_svm_intr(svm))
+ return;
+
svm->vcpu.arch.interrupt_window_open =
(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- (svm->vmcb->save.rflags & X86_EFLAGS_IF));
+ (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
+ (svm->vcpu.arch.hflags & HF_GIF_MASK));
if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
/*
@@ -1652,9 +2380,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
*/
if (!svm->vcpu.arch.interrupt_window_open &&
(svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
- control->intercept |= 1ULL << INTERCEPT_VINTR;
- else
- control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+ svm_set_vintr(svm);
+ else
+ svm_clear_vintr(svm);
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -1662,22 +2390,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static void save_db_regs(unsigned long *db_regs)
-{
- asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
- asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1]));
- asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2]));
- asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3]));
-}
-
-static void load_db_regs(unsigned long *db_regs)
-{
- asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0]));
- asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1]));
- asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2]));
- asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
-}
-
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
{
force_new_asid(vcpu);
@@ -1736,19 +2448,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
gs_selector = kvm_read_gs();
ldt_selector = kvm_read_ldt();
svm->host_cr2 = kvm_read_cr2();
- svm->host_dr6 = read_dr6();
- svm->host_dr7 = read_dr7();
- svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ if (!is_nested(svm))
+ svm->vmcb->save.cr2 = vcpu->arch.cr2;
/* required for live migration with NPT */
if (npt_enabled)
svm->vmcb->save.cr3 = vcpu->arch.cr3;
- if (svm->vmcb->save.dr7 & 0xff) {
- write_dr7(0);
- save_db_regs(svm->host_db_regs);
- load_db_regs(svm->db_regs);
- }
-
clgi();
local_irq_enable();
@@ -1824,16 +2529,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
#endif
);
- if ((svm->vmcb->save.dr7 & 0xff))
- load_db_regs(svm->host_db_regs);
-
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
- write_dr6(svm->host_dr6);
- write_dr7(svm->host_dr7);
kvm_write_cr2(svm->host_cr2);
kvm_load_fs(fs_selector);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7611af57682..bb481330716 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
+ enum emulation_result invalid_state_emulation_result;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
@@ -189,21 +190,21 @@ static inline int is_page_fault(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_no_device(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_invalid_opcode(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_external_interrupt(u32 intr_info)
@@ -480,8 +481,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
if (!vcpu->fpu_active)
eb |= 1u << NM_VECTOR;
- if (vcpu->guest_debug.enabled)
- eb |= 1u << DB_VECTOR;
+ if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ eb |= 1u << DB_VECTOR;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ eb |= 1u << BP_VECTOR;
+ }
if (vcpu->arch.rmode.active)
eb = ~0;
if (vm_need_ept())
@@ -747,29 +753,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 intr_info = nr | INTR_INFO_VALID_MASK;
- if (has_error_code)
+ if (has_error_code) {
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+ intr_info |= INTR_INFO_DELIVER_CODE_MASK;
+ }
if (vcpu->arch.rmode.active) {
vmx->rmode.irq.pending = true;
vmx->rmode.irq.vector = nr;
vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- if (nr == BP_VECTOR)
+ if (nr == BP_VECTOR || nr == OF_VECTOR)
vmx->rmode.irq.rip++;
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- nr | INTR_TYPE_SOFT_INTR
- | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
- | INTR_INFO_VALID_MASK);
+ intr_info |= INTR_TYPE_SOFT_INTR;
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
return;
}
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- nr | INTR_TYPE_EXCEPTION
- | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
- | INTR_INFO_VALID_MASK);
+ if (nr == BP_VECTOR || nr == OF_VECTOR) {
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+ intr_info |= INTR_TYPE_SOFT_EXCEPTION;
+ } else
+ intr_info |= INTR_TYPE_HARD_EXCEPTION;
+
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
}
static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -856,11 +866,8 @@ static u64 guest_read_tsc(void)
* writes 'guest_tsc' into guest's timestamp counter "register"
* guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
*/
-static void guest_write_tsc(u64 guest_tsc)
+static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
{
- u64 host_tsc;
-
- rdtscll(host_tsc);
vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
}
@@ -925,14 +932,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_msr_entry *msr;
+ u64 host_tsc;
int ret = 0;
switch (msr_index) {
-#ifdef CONFIG_X86_64
case MSR_EFER:
vmx_load_host_state(vmx);
ret = kvm_set_msr_common(vcpu, msr_index, data);
break;
+#ifdef CONFIG_X86_64
case MSR_FS_BASE:
vmcs_writel(GUEST_FS_BASE, data);
break;
@@ -950,7 +958,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_TIME_STAMP_COUNTER:
- guest_write_tsc(data);
+ rdtscll(host_tsc);
+ guest_write_tsc(data, host_tsc);
break;
case MSR_P6_PERFCTR0:
case MSR_P6_PERFCTR1:
@@ -999,40 +1008,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
}
}
-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
{
- unsigned long dr7 = 0x400;
- int old_singlestep;
-
- old_singlestep = vcpu->guest_debug.singlestep;
-
- vcpu->guest_debug.enabled = dbg->enabled;
- if (vcpu->guest_debug.enabled) {
- int i;
+ int old_debug = vcpu->guest_debug;
+ unsigned long flags;
- dr7 |= 0x200; /* exact */
- for (i = 0; i < 4; ++i) {
- if (!dbg->breakpoints[i].enabled)
- continue;
- vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
- dr7 |= 2 << (i*2); /* global enable */
- dr7 |= 0 << (i*4+16); /* execution breakpoint */
- }
+ vcpu->guest_debug = dbg->control;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+ vcpu->guest_debug = 0;
- vcpu->guest_debug.singlestep = dbg->singlestep;
- } else
- vcpu->guest_debug.singlestep = 0;
-
- if (old_singlestep && !vcpu->guest_debug.singlestep) {
- unsigned long flags;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
+ else
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
- flags = vmcs_readl(GUEST_RFLAGS);
+ flags = vmcs_readl(GUEST_RFLAGS);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
- vmcs_writel(GUEST_RFLAGS, flags);
- }
+ vmcs_writel(GUEST_RFLAGS, flags);
update_exception_bitmap(vcpu);
- vmcs_writel(GUEST_DR7, dr7);
return 0;
}
@@ -1433,6 +1430,29 @@ continue_rmode:
init_rmode(vcpu->kvm);
}
+static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+
+ vcpu->arch.shadow_efer = efer;
+ if (!msr)
+ return;
+ if (efer & EFER_LMA) {
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ vmcs_read32(VM_ENTRY_CONTROLS) |
+ VM_ENTRY_IA32E_MODE);
+ msr->data = efer;
+ } else {
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ vmcs_read32(VM_ENTRY_CONTROLS) &
+ ~VM_ENTRY_IA32E_MODE);
+
+ msr->data = efer & ~EFER_LME;
+ }
+ setup_msrs(vmx);
+}
+
#ifdef CONFIG_X86_64
static void enter_lmode(struct kvm_vcpu *vcpu)
@@ -1447,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
(guest_tr_ar & ~AR_TYPE_MASK)
| AR_TYPE_BUSY_64_TSS);
}
-
vcpu->arch.shadow_efer |= EFER_LMA;
-
- find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;
- vmcs_write32(VM_ENTRY_CONTROLS,
- vmcs_read32(VM_ENTRY_CONTROLS)
- | VM_ENTRY_IA32E_MODE);
+ vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
}
static void exit_lmode(struct kvm_vcpu *vcpu)
@@ -1612,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcs_writel(GUEST_CR4, hw_cr4);
}
-static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
-
- vcpu->arch.shadow_efer = efer;
- if (!msr)
- return;
- if (efer & EFER_LMA) {
- vmcs_write32(VM_ENTRY_CONTROLS,
- vmcs_read32(VM_ENTRY_CONTROLS) |
- VM_ENTRY_IA32E_MODE);
- msr->data = efer;
-
- } else {
- vmcs_write32(VM_ENTRY_CONTROLS,
- vmcs_read32(VM_ENTRY_CONTROLS) &
- ~VM_ENTRY_IA32E_MODE);
-
- msr->data = efer & ~EFER_LME;
- }
- setup_msrs(vmx);
-}
-
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1653,7 +1644,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->limit = vmcs_read32(sf->limit);
var->selector = vmcs_read16(sf->selector);
ar = vmcs_read32(sf->ar_bytes);
- if (ar & AR_UNUSABLE_MASK)
+ if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
ar = 0;
var->type = ar & 15;
var->s = (ar >> 4) & 1;
@@ -1788,14 +1779,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
cs_rpl = cs.selector & SELECTOR_RPL_MASK;
+ if (cs.unusable)
+ return false;
if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
return false;
if (!cs.s)
return false;
- if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) {
+ if (cs.type & AR_TYPE_WRITEABLE_MASK) {
if (cs.dpl > cs_rpl)
return false;
- } else if (cs.type & AR_TYPE_CODE_MASK) {
+ } else {
if (cs.dpl != cs_rpl)
return false;
}
@@ -1814,7 +1807,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
ss_rpl = ss.selector & SELECTOR_RPL_MASK;
- if ((ss.type != 3) || (ss.type != 7))
+ if (ss.unusable)
+ return true;
+ if (ss.type != 3 && ss.type != 7)
return false;
if (!ss.s)
return false;
@@ -1834,6 +1829,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
vmx_get_segment(vcpu, &var, seg);
rpl = var.selector & SELECTOR_RPL_MASK;
+ if (var.unusable)
+ return true;
if (!var.s)
return false;
if (!var.present)
@@ -1855,9 +1852,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
+ if (tr.unusable)
+ return false;
if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */
return false;
- if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */
+ if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
return false;
if (!tr.present)
return false;
@@ -1871,6 +1870,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
+ if (ldtr.unusable)
+ return true;
if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */
return false;
if (ldtr.type != 2)
@@ -2112,7 +2113,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
u32 host_sysenter_cs, msr_low, msr_high;
u32 junk;
- u64 host_pat;
+ u64 host_pat, tsc_this, tsc_base;
unsigned long a;
struct descriptor_table dt;
int i;
@@ -2240,6 +2241,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
+ tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
+ rdtscll(tsc_this);
+ if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
+ tsc_base = tsc_this;
+
+ guest_write_tsc(0, tsc_base);
return 0;
}
@@ -2319,7 +2326,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, 0);
kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
- /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
vmcs_writel(GUEST_DR7, 0x400);
vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2332,8 +2338,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
- guest_write_tsc(0);
-
/* Special registers */
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
@@ -2486,6 +2490,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
{
vmx_update_window_states(vcpu);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_STI |
+ GUEST_INTR_STATE_MOV_SS);
+
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vcpu->arch.interrupt.pending) {
enable_nmi_window(vcpu);
@@ -2536,24 +2545,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
-{
- struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-
- set_debugreg(dbg->bp[0], 0);
- set_debugreg(dbg->bp[1], 1);
- set_debugreg(dbg->bp[2], 2);
- set_debugreg(dbg->bp[3], 3);
-
- if (dbg->singlestep) {
- unsigned long flags;
-
- flags = vmcs_readl(GUEST_RFLAGS);
- flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- vmcs_writel(GUEST_RFLAGS, flags);
- }
-}
-
static int handle_rmode_exception(struct kvm_vcpu *vcpu,
int vec, u32 err_code)
{
@@ -2570,9 +2561,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* the required debugging infrastructure rework.
*/
switch (vec) {
- case DE_VECTOR:
case DB_VECTOR:
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ return 0;
+ kvm_queue_exception(vcpu, vec);
+ return 1;
case BP_VECTOR:
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ return 0;
+ /* fall through */
+ case DE_VECTOR:
case OF_VECTOR:
case BR_VECTOR:
case UD_VECTOR:
@@ -2589,8 +2588,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 intr_info, error_code;
- unsigned long cr2, rip;
+ u32 intr_info, ex_no, error_code;
+ unsigned long cr2, rip, dr6;
u32 vect_info;
enum emulation_result er;
@@ -2649,14 +2648,30 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
- if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
- (INTR_TYPE_EXCEPTION | 1)) {
+ ex_no = intr_info & INTR_INFO_VECTOR_MASK;
+ switch (ex_no) {
+ case DB_VECTOR:
+ dr6 = vmcs_readl(EXIT_QUALIFICATION);
+ if (!(vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ return 1;
+ }
+ kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
+ kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
+ /* fall through */
+ case BP_VECTOR:
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- return 0;
+ kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+ kvm_run->debug.arch.exception = ex_no;
+ break;
+ default:
+ kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+ kvm_run->ex.exception = ex_no;
+ kvm_run->ex.error_code = error_code;
+ break;
}
- kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
- kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
- kvm_run->ex.error_code = error_code;
return 0;
}
@@ -2677,7 +2692,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
unsigned long exit_qualification;
- int size, down, in, string, rep;
+ int size, in, string;
unsigned port;
++vcpu->stat.io_exits;
@@ -2693,8 +2708,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
size = (exit_qualification & 7) + 1;
in = (exit_qualification & 8) != 0;
- down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
- rep = (exit_qualification & 32) != 0;
port = exit_qualification >> 16;
skip_emulated_instruction(vcpu);
@@ -2795,21 +2808,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
unsigned long val;
int dr, reg;
- /*
- * FIXME: this code assumes the host is debugging the guest.
- * need to deal with guest debugging itself too.
- */
+ dr = vmcs_readl(GUEST_DR7);
+ if (dr & DR7_GD) {
+ /*
+ * As the vm-exit takes precedence over the debug trap, we
+ * need to emulate the latter, either for the host or the
+ * guest debugging itself.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+ kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
+ kvm_run->debug.arch.dr7 = dr;
+ kvm_run->debug.arch.pc =
+ vmcs_readl(GUEST_CS_BASE) +
+ vmcs_readl(GUEST_RIP);
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ return 0;
+ } else {
+ vcpu->arch.dr7 &= ~DR7_GD;
+ vcpu->arch.dr6 |= DR6_BD;
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ return 1;
+ }
+ }
+
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- dr = exit_qualification & 7;
- reg = (exit_qualification >> 8) & 15;
- if (exit_qualification & 16) {
- /* mov from dr */
+ dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+ reg = DEBUG_REG_ACCESS_REG(exit_qualification);
+ if (exit_qualification & TYPE_MOV_FROM_DR) {
switch (dr) {
+ case 0 ... 3:
+ val = vcpu->arch.db[dr];
+ break;
case 6:
- val = 0xffff0ff0;
+ val = vcpu->arch.dr6;
break;
case 7:
- val = 0x400;
+ val = vcpu->arch.dr7;
break;
default:
val = 0;
@@ -2817,7 +2853,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_register_write(vcpu, reg, val);
KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
} else {
- /* mov to dr */
+ val = vcpu->arch.regs[reg];
+ switch (dr) {
+ case 0 ... 3:
+ vcpu->arch.db[dr] = val;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ vcpu->arch.eff_db[dr] = val;
+ break;
+ case 4 ... 5:
+ if (vcpu->arch.cr4 & X86_CR4_DE)
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ break;
+ case 6:
+ if (val & 0xffffffff00000000ULL) {
+ kvm_queue_exception(vcpu, GP_VECTOR);
+ break;
+ }
+ vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+ break;
+ case 7:
+ if (val & 0xffffffff00000000ULL) {
+ kvm_queue_exception(vcpu, GP_VECTOR);
+ break;
+ }
+ vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+ vcpu->arch.switch_db_regs =
+ (val & DR7_BP_EN_MASK);
+ }
+ break;
+ }
+ KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
}
skip_emulated_instruction(vcpu);
return 1;
@@ -2968,17 +3035,25 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
tss_selector = exit_qualification;
- return kvm_task_switch(vcpu, tss_selector, reason);
+ if (!kvm_task_switch(vcpu, tss_selector, reason))
+ return 0;
+
+ /* clear all local breakpoint enable flags */
+ vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
+
+ /*
+ * TODO: What about debug traps on tss switch?
+ * Are we supposed to inject them and update dr6?
+ */
+
+ return 1;
}
static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u64 exit_qualification;
- enum emulation_result er;
gpa_t gpa;
- unsigned long hva;
int gla_validity;
- int r;
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
@@ -3001,32 +3076,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
- if (!kvm_is_error_hva(hva)) {
- r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
- if (r < 0) {
- printk(KERN_ERR "EPT: Not enough memory!\n");
- return -ENOMEM;
- }
- return 1;
- } else {
- /* must be MMIO */
- er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
-
- if (er == EMULATE_FAIL) {
- printk(KERN_ERR
- "EPT: Fail to handle EPT violation vmexit!er is %d\n",
- er);
- printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
- (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
- (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
- printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
- (long unsigned int)exit_qualification);
- return -ENOTSUPP;
- } else if (er == EMULATE_DO_MMIO)
- return 0;
- }
- return 1;
+ return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
}
static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3046,7 +3096,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int err;
+ enum emulation_result err = EMULATE_DONE;
preempt_enable();
local_irq_enable();
@@ -3071,10 +3121,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
local_irq_disable();
preempt_disable();
- /* Guest state should be valid now except if we need to
- * emulate an MMIO */
- if (guest_state_valid(vcpu))
- vmx->emulation_required = 0;
+ vmx->invalid_state_emulation_result = err;
}
/*
@@ -3123,8 +3170,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/* If we need to emulate an MMIO from handle_invalid_guest_state
* we just return 0 */
- if (vmx->emulation_required && emulate_invalid_guest_state)
- return 0;
+ if (vmx->emulation_required && emulate_invalid_guest_state) {
+ if (guest_state_valid(vcpu))
+ vmx->emulation_required = 0;
+ return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
+ }
/* Access CR3 don't cause VMExit in paging mode, so we need
* to sync with guest real CR3. */
@@ -3238,7 +3288,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
vmx->vcpu.arch.nmi_injected = false;
}
kvm_clear_exception_queue(&vmx->vcpu);
- if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
+ if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
+ type == INTR_TYPE_SOFT_EXCEPTION)) {
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
kvm_queue_exception_e(&vmx->vcpu, vector, error);
@@ -3259,6 +3310,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
vmx_update_window_states(vcpu);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_STI |
+ GUEST_INTR_STATE_MOV_SS);
+
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vcpu->arch.interrupt.pending) {
enable_nmi_window(vcpu);
@@ -3347,6 +3403,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
*/
vmcs_writel(HOST_CR0, read_cr0());
+ set_debugreg(vcpu->arch.dr6, 6);
+
asm(
/* Store host registers */
"push %%"R"dx; push %%"R"bp;"
@@ -3441,6 +3499,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
vcpu->arch.regs_dirty = 0;
+ get_debugreg(vcpu->arch.dr6, 6);
+
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
if (vmx->rmode.irq.pending)
fixup_rmode_irq(vmx);
@@ -3595,7 +3655,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_put = vmx_vcpu_put,
.set_guest_debug = set_guest_debug,
- .guest_debug_pre = kvm_guest_debug_pre,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 758b7a155ae..8ca100a9eca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
+#include <linux/cpufreq.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
@@ -69,6 +70,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index);
struct kvm_x86_ops *kvm_x86_ops;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -173,6 +176,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
u32 error_code)
{
++vcpu->stat.pf_guest;
+
if (vcpu->arch.exception.pending) {
if (vcpu->arch.exception.nr == PF_VECTOR) {
printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -361,6 +365,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
+ vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
@@ -442,6 +447,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
+static inline u32 bit(int bitno)
+{
+ return 1 << (bitno & 31);
+}
+
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -456,7 +466,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
+ MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
};
static unsigned num_msrs_to_save;
@@ -481,6 +491,28 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
return;
}
+ if (efer & EFER_FFXSR) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
+ printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
+ kvm_inject_gp(vcpu, 0);
+ return;
+ }
+ }
+
+ if (efer & EFER_SVME) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+ printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+ kvm_inject_gp(vcpu, 0);
+ return;
+ }
+ }
+
kvm_x86_ops->set_efer(vcpu, efer);
efer &= ~EFER_LMA;
@@ -586,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
hv_clock->tsc_to_system_mul);
}
+static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+
static void kvm_write_guest_time(struct kvm_vcpu *v)
{
struct timespec ts;
@@ -596,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
if ((!vcpu->time_page))
return;
- if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
- kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
- vcpu->hv_clock_tsc_khz = tsc_khz;
+ if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
+ kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
+ vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
}
/* Keep irq disabled to prevent changes to the clock */
@@ -629,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
}
+static int kvm_request_guest_time_update(struct kvm_vcpu *v)
+{
+ struct kvm_vcpu_arch *vcpu = &v->arch;
+
+ if (!vcpu->time_page)
+ return 0;
+ set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+ return 1;
+}
+
static bool msr_mtrr_valid(unsigned msr)
{
switch (msr) {
@@ -722,6 +766,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
+ case MSR_VM_HSAVE_PA:
break;
case 0x200 ... 0x2ff:
return set_msr_mtrr(vcpu, msr, data);
@@ -758,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
vcpu->arch.time_page = NULL;
}
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
break;
}
default:
@@ -843,6 +888,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_LASTBRANCHTOIP:
case MSR_IA32_LASTINTFROMIP:
case MSR_IA32_LASTINTTOIP:
+ case MSR_VM_HSAVE_PA:
data = 0;
break;
case MSR_MTRRcap:
@@ -967,10 +1013,13 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
+ case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
case KVM_CAP_SYNC_MMU:
+ case KVM_CAP_REINJECT_CONTROL:
+ case KVM_CAP_IRQ_INJECT_STATUS:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -991,9 +1040,6 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IOMMU:
r = iommu_found();
break;
- case KVM_CAP_CLOCKSOURCE:
- r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
- break;
default:
r = 0;
break;
@@ -1044,7 +1090,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
@@ -1064,7 +1110,7 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1142,8 +1188,8 @@ out:
}
static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries)
{
int r;
@@ -1162,8 +1208,8 @@ out:
}
static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries)
{
int r;
@@ -1172,7 +1218,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
goto out;
r = -EFAULT;
if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+ vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
return 0;
@@ -1181,18 +1227,13 @@ out:
return r;
}
-static inline u32 bit(int bitno)
-{
- return 1 << (bitno & 31);
-}
-
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
- u32 index)
+ u32 index)
{
entry->function = function;
entry->index = index;
cpuid_count(entry->function, entry->index,
- &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
+ &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
entry->flags = 0;
}
@@ -1222,15 +1263,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
#ifdef CONFIG_X86_64
bit(X86_FEATURE_LM) |
#endif
+ bit(X86_FEATURE_FXSR_OPT) |
bit(X86_FEATURE_MMXEXT) |
bit(X86_FEATURE_3DNOWEXT) |
bit(X86_FEATURE_3DNOW);
const u32 kvm_supported_word3_x86_features =
bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
const u32 kvm_supported_word6_x86_features =
- bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
+ bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
+ bit(X86_FEATURE_SVM);
- /* all func 2 cpuid_count() should be called on the same cpu */
+ /* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
do_cpuid_1_ent(entry, function, index);
++*nent;
@@ -1304,7 +1347,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid_entry2 __user *entries)
{
struct kvm_cpuid_entry2 *cpuid_entries;
int limit, nent = 0, r = -E2BIG;
@@ -1321,7 +1364,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
limit = cpuid_entries[0].eax;
for (func = 1; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,
- &nent, cpuid->nent);
+ &nent, cpuid->nent);
r = -E2BIG;
if (nent >= cpuid->nent)
goto out_free;
@@ -1330,10 +1373,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
limit = cpuid_entries[nent - 1].eax;
for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,
- &nent, cpuid->nent);
+ &nent, cpuid->nent);
r = -EFAULT;
if (copy_to_user(entries, cpuid_entries,
- nent * sizeof(struct kvm_cpuid_entry2)))
+ nent * sizeof(struct kvm_cpuid_entry2)))
goto out_free;
cpuid->nent = nent;
r = 0;
@@ -1477,7 +1520,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
break;
@@ -1490,7 +1533,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
r = -EFAULT;
@@ -1710,6 +1753,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
return r;
}
+static int kvm_vm_ioctl_reinject(struct kvm *kvm,
+ struct kvm_reinject_control *control)
+{
+ if (!kvm->arch.vpit)
+ return -ENXIO;
+ kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+ return 0;
+}
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@@ -1807,13 +1859,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
} else
goto out;
+ r = kvm_setup_default_irq_routing(kvm);
+ if (r) {
+ kfree(kvm->arch.vpic);
+ kfree(kvm->arch.vioapic);
+ goto out;
+ }
break;
case KVM_CREATE_PIT:
+ mutex_lock(&kvm->lock);
+ r = -EEXIST;
+ if (kvm->arch.vpit)
+ goto create_pit_unlock;
r = -ENOMEM;
kvm->arch.vpit = kvm_create_pit(kvm);
if (kvm->arch.vpit)
r = 0;
+ create_pit_unlock:
+ mutex_unlock(&kvm->lock);
break;
+ case KVM_IRQ_LINE_STATUS:
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -1821,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (copy_from_user(&irq_event, argp, sizeof irq_event))
goto out;
if (irqchip_in_kernel(kvm)) {
+ __s32 status;
mutex_lock(&kvm->lock);
- kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
- irq_event.irq, irq_event.level);
+ status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event.irq, irq_event.level);
mutex_unlock(&kvm->lock);
+ if (ioctl == KVM_IRQ_LINE_STATUS) {
+ irq_event.status = status;
+ if (copy_to_user(argp, &irq_event,
+ sizeof irq_event))
+ goto out;
+ }
r = 0;
}
break;
@@ -1907,6 +1979,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_REINJECT_CONTROL: {
+ struct kvm_reinject_control control;
+ r = -EFAULT;
+ if (copy_from_user(&control, argp, sizeof(control)))
+ goto out;
+ r = kvm_vm_ioctl_reinject(kvm, &control);
+ if (r)
+ goto out;
+ r = 0;
+ break;
+ }
default:
;
}
@@ -1960,10 +2043,38 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
return dev;
}
-int emulator_read_std(unsigned long addr,
- void *val,
- unsigned int bytes,
- struct kvm_vcpu *vcpu)
+static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu)
+{
+ void *data = val;
+ int r = X86EMUL_CONTINUE;
+
+ while (bytes) {
+ gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ unsigned offset = addr & (PAGE_SIZE-1);
+ unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
+ int ret;
+
+ if (gpa == UNMAPPED_GVA) {
+ r = X86EMUL_PROPAGATE_FAULT;
+ goto out;
+ }
+ ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
+ if (ret < 0) {
+ r = X86EMUL_UNHANDLEABLE;
+ goto out;
+ }
+
+ bytes -= toread;
+ data += toread;
+ addr += toread;
+ }
+out:
+ return r;
+}
+
+static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu)
{
void *data = val;
int r = X86EMUL_CONTINUE;
@@ -1971,27 +2082,27 @@ int emulator_read_std(unsigned long addr,
while (bytes) {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
- unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
+ unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
if (gpa == UNMAPPED_GVA) {
r = X86EMUL_PROPAGATE_FAULT;
goto out;
}
- ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
+ ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
if (ret < 0) {
r = X86EMUL_UNHANDLEABLE;
goto out;
}
- bytes -= tocopy;
- data += tocopy;
- addr += tocopy;
+ bytes -= towrite;
+ data += towrite;
+ addr += towrite;
}
out:
return r;
}
-EXPORT_SYMBOL_GPL(emulator_read_std);
+
static int emulator_read_emulated(unsigned long addr,
void *val,
@@ -2013,8 +2124,8 @@ static int emulator_read_emulated(unsigned long addr,
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto mmio;
- if (emulator_read_std(addr, val, bytes, vcpu)
- == X86EMUL_CONTINUE)
+ if (kvm_read_guest_virt(addr, val, bytes, vcpu)
+ == X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
@@ -2217,7 +2328,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
- emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
+ kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2225,7 +2336,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
static struct x86_emulate_ops emulate_ops = {
- .read_std = emulator_read_std,
+ .read_std = kvm_read_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -2327,40 +2438,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(emulate_instruction);
-static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
- if (vcpu->arch.pio.guest_pages[i]) {
- kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
- vcpu->arch.pio.guest_pages[i] = NULL;
- }
-}
-
static int pio_copy_data(struct kvm_vcpu *vcpu)
{
void *p = vcpu->arch.pio_data;
- void *q;
+ gva_t q = vcpu->arch.pio.guest_gva;
unsigned bytes;
- int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
+ int ret;
- q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
- PAGE_KERNEL);
- if (!q) {
- free_pio_guest_pages(vcpu);
- return -ENOMEM;
- }
- q += vcpu->arch.pio.guest_page_offset;
bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
if (vcpu->arch.pio.in)
- memcpy(q, p, bytes);
+ ret = kvm_write_guest_virt(q, p, bytes, vcpu);
else
- memcpy(p, q, bytes);
- q -= vcpu->arch.pio.guest_page_offset;
- vunmap(q);
- free_pio_guest_pages(vcpu);
- return 0;
+ ret = kvm_read_guest_virt(q, p, bytes, vcpu);
+ return ret;
}
int complete_pio(struct kvm_vcpu *vcpu)
@@ -2471,7 +2561,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 0;
vcpu->arch.pio.down = 0;
- vcpu->arch.pio.guest_page_offset = 0;
vcpu->arch.pio.rep = 0;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2499,9 +2588,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
gva_t address, int rep, unsigned port)
{
unsigned now, in_page;
- int i, ret = 0;
- int nr_pages = 1;
- struct page *page;
+ int ret = 0;
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2513,7 +2600,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 1;
vcpu->arch.pio.down = down;
- vcpu->arch.pio.guest_page_offset = offset_in_page(address);
vcpu->arch.pio.rep = rep;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2533,15 +2619,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
else
in_page = offset_in_page(address) + size;
now = min(count, (unsigned long)in_page / size);
- if (!now) {
- /*
- * String I/O straddles page boundary. Pin two guest pages
- * so that we satisfy atomicity constraints. Do just one
- * transaction to avoid complexity.
- */
- nr_pages = 2;
+ if (!now)
now = 1;
- }
if (down) {
/*
* String I/O in reverse. Yuck. Kill the guest, fix later.
@@ -2556,15 +2635,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
kvm_x86_ops->skip_emulated_instruction(vcpu);
- for (i = 0; i < nr_pages; ++i) {
- page = gva_to_page(vcpu, address + i * PAGE_SIZE);
- vcpu->arch.pio.guest_pages[i] = page;
- if (!page) {
- kvm_inject_gp(vcpu, 0);
- free_pio_guest_pages(vcpu);
- return 1;
- }
- }
+ vcpu->arch.pio.guest_gva = address;
pio_dev = vcpu_find_pio_dev(vcpu, port,
vcpu->arch.pio.cur_count,
@@ -2572,7 +2643,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
if (!vcpu->arch.pio.in) {
/* string PIO write */
ret = pio_copy_data(vcpu);
- if (ret >= 0 && pio_dev) {
+ if (ret == X86EMUL_PROPAGATE_FAULT) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ if (ret == 0 && pio_dev) {
pio_string_write(pio_dev, vcpu);
complete_pio(vcpu);
if (vcpu->arch.pio.count == 0)
@@ -2587,9 +2662,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
+static void bounce_off(void *info)
+{
+ /* nothing */
+}
+
+static unsigned int ref_freq;
+static unsigned long tsc_khz_ref;
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int i, send_ipi = 0;
+
+ if (!ref_freq)
+ ref_freq = freq->old;
+
+ if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+ return 0;
+ if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+ return 0;
+ per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ if (vcpu->cpu != freq->cpu)
+ continue;
+ if (!kvm_request_guest_time_update(vcpu))
+ continue;
+ if (vcpu->cpu != smp_processor_id())
+ send_ipi++;
+ }
+ }
+ spin_unlock(&kvm_lock);
+
+ if (freq->old < freq->new && send_ipi) {
+ /*
+ * We upscale the frequency. Must make the guest
+ * doesn't see old kvmclock values while running with
+ * the new frequency, otherwise we risk the guest sees
+ * time go backwards.
+ *
+ * In case we update the frequency for another cpu
+ * (which might be in guest context) send an interrupt
+ * to kick the cpu out of guest context. Next time
+ * guest context is entered kvmclock will be updated,
+ * so the guest will not see stale values.
+ */
+ smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+ }
+ return 0;
+}
+
+static struct notifier_block kvmclock_cpufreq_notifier_block = {
+ .notifier_call = kvmclock_cpufreq_notifier
+};
+
int kvm_arch_init(void *opaque)
{
- int r;
+ int r, cpu;
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
@@ -2620,6 +2758,15 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ tsc_khz_ref = tsc_khz;
+ cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+
return 0;
out:
@@ -2827,25 +2974,20 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
return 0;
if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
- !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
+ !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
return 0;
return 1;
}
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index)
{
int i;
- u32 function, index;
- struct kvm_cpuid_entry2 *e, *best;
+ struct kvm_cpuid_entry2 *best = NULL;
- function = kvm_register_read(vcpu, VCPU_REGS_RAX);
- index = kvm_register_read(vcpu, VCPU_REGS_RCX);
- kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
- best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+ struct kvm_cpuid_entry2 *e;
+
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2860,6 +3002,21 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
if (!best || e->function > best->function)
best = e;
}
+ return best;
+}
+
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+ u32 function, index;
+ struct kvm_cpuid_entry2 *best;
+
+ function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+ best = kvm_find_cpuid_entry(vcpu, function, index);
if (best) {
kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
@@ -2945,6 +3102,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->requests) {
if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
__kvm_migrate_timers(vcpu);
+ if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+ kvm_write_guest_time(vcpu);
if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
kvm_mmu_sync_roots(vcpu);
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
@@ -2979,9 +3138,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
- if (vcpu->guest_debug.enabled)
- kvm_x86_ops->guest_debug_pre(vcpu);
-
vcpu->guest_mode = 1;
/*
* Make sure that guest_mode assignment won't happen after
@@ -3002,10 +3158,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_guest_enter();
+ get_debugreg(vcpu->arch.host_dr6, 6);
+ get_debugreg(vcpu->arch.host_dr7, 7);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ get_debugreg(vcpu->arch.host_db[0], 0);
+ get_debugreg(vcpu->arch.host_db[1], 1);
+ get_debugreg(vcpu->arch.host_db[2], 2);
+ get_debugreg(vcpu->arch.host_db[3], 3);
+
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.eff_db[0], 0);
+ set_debugreg(vcpu->arch.eff_db[1], 1);
+ set_debugreg(vcpu->arch.eff_db[2], 2);
+ set_debugreg(vcpu->arch.eff_db[3], 3);
+ }
KVMTRACE_0D(VMENTRY, vcpu, entryexit);
kvm_x86_ops->run(vcpu, kvm_run);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.host_db[0], 0);
+ set_debugreg(vcpu->arch.host_db[1], 1);
+ set_debugreg(vcpu->arch.host_db[2], 2);
+ set_debugreg(vcpu->arch.host_db[3], 3);
+ }
+ set_debugreg(vcpu->arch.host_dr6, 6);
+ set_debugreg(vcpu->arch.host_dr7, 7);
+
vcpu->guest_mode = 0;
local_irq_enable();
@@ -3192,7 +3372,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
/*
* Don't leak debug flags in case they were set for guest debugging
*/
- if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vcpu_put(vcpu);
@@ -3811,15 +3991,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
- int r;
+ int i, r;
vcpu_load(vcpu);
+ if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
+ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+ for (i = 0; i < KVM_NR_DB_REGS; ++i)
+ vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
+ vcpu->arch.switch_db_regs =
+ (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+ } else {
+ for (i = 0; i < KVM_NR_DB_REGS; i++)
+ vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+ vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
+ }
+
r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+ if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+ kvm_queue_exception(vcpu, BP_VECTOR);
+
vcpu_put(vcpu);
return r;
@@ -4007,6 +4204,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = false;
+ vcpu->arch.switch_db_regs = 0;
+ memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+ vcpu->arch.dr6 = DR6_FIXED_1;
+ vcpu->arch.dr7 = DR7_FIXED_1;
+
return kvm_x86_ops->vcpu_reset(vcpu);
}
@@ -4100,6 +4302,8 @@ struct kvm *kvm_arch_create_vm(void)
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+ rdtscll(kvm->arch.vm_init_tsc);
+
return kvm;
}
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d174db7a337..ca91749d208 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -178,7 +178,7 @@ static u32 opcode_table[256] = {
0, ImplicitOps | Stack, 0, 0,
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */
- 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
/* 0xD0 - 0xD7 */
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
}
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+ struct x86_emulate_ops *ops,
+ void *dest, int len)
{
struct decode_cache *c = &ctxt->decode;
int rc;
rc = ops->read_emulated(register_address(c, ss_base(ctxt),
c->regs[VCPU_REGS_RSP]),
- &c->src.val, c->src.bytes, ctxt->vcpu);
+ dest, len, ctxt->vcpu);
if (rc != 0)
return rc;
- register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
return rc;
}
@@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct decode_cache *c = &ctxt->decode;
int rc;
- c->src.bytes = c->dst.bytes;
- rc = emulate_pop(ctxt, ops);
+ rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
if (rc != 0)
return rc;
- c->dst.val = c->src.val;
return 0;
}
@@ -1279,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
return 0;
}
+static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+ unsigned long cs;
+
+ rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
+ if (rc)
+ return rc;
+ if (c->op_bytes == 4)
+ c->eip = (u32)c->eip;
+ rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+ if (rc)
+ return rc;
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
+ return rc;
+}
+
static inline int writeback(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
@@ -1467,11 +1485,9 @@ special_insn:
break;
case 0x58 ... 0x5f: /* pop reg */
pop_instruction:
- c->src.bytes = c->op_bytes;
- rc = emulate_pop(ctxt, ops);
+ rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
if (rc != 0)
goto done;
- c->dst.val = c->src.val;
break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1738,6 +1754,11 @@ special_insn:
mov:
c->dst.val = c->src.val;
break;
+ case 0xcb: /* ret far */
+ rc = emulate_ret_far(ctxt, ops);
+ if (rc)
+ goto done;
+ break;
case 0xd0 ... 0xd1: /* Grp2 */
c->src.val = 1;
emulate_grp2(ctxt);
@@ -1908,11 +1929,16 @@ twobyte_insn:
c->dst.type = OP_NONE;
break;
case 3: /* lidt/vmmcall */
- if (c->modrm_mod == 3 && c->modrm_rm == 1) {
- rc = kvm_fix_hypercall(ctxt->vcpu);
- if (rc)
- goto done;
- kvm_emulate_hypercall(ctxt->vcpu);
+ if (c->modrm_mod == 3) {
+ switch (c->modrm_rm) {
+ case 1:
+ rc = kvm_fix_hypercall(ctxt->vcpu);
+ if (rc)
+ goto done;
+ break;
+ default:
+ goto cannot_emulate;
+ }
} else {
rc = read_descriptor(ctxt, ops, c->src.ptr,
&size, &address,
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index 6100682b1da..dd1a7a4a1ce 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -65,4 +65,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _XTENSA_SOCKET_H */
diff --git a/arch/xtensa/include/asm/swab.h b/arch/xtensa/include/asm/swab.h
index f50b697eb60..226a3916231 100644
--- a/arch/xtensa/include/asm/swab.h
+++ b/arch/xtensa/include/asm/swab.h
@@ -11,7 +11,7 @@
#ifndef _XTENSA_SWAB_H
#define _XTENSA_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#define __SWAB_64_THRU_32__
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 5fbcde59a92..f3b66fba5b8 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -99,7 +99,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
seq_printf(p, " %14s", irq_desc[i].chip->typename);
seq_printf(p, " %s", action->name);