aboutsummaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig247
-rw-r--r--arch/s390/Kconfig.debug14
-rw-r--r--arch/s390/Makefile32
-rw-r--r--arch/s390/appldata/appldata_base.c26
-rw-r--r--arch/s390/appldata/appldata_mem.c21
-rw-r--r--arch/s390/appldata/appldata_net_sum.c20
-rw-r--r--arch/s390/appldata/appldata_os.c4
-rw-r--r--arch/s390/boot/Makefile4
-rw-r--r--arch/s390/boot/compressed/Makefile11
-rw-r--r--arch/s390/boot/compressed/misc.c4
-rw-r--r--arch/s390/configs/default_defconfig687
-rw-r--r--arch/s390/configs/gcov_defconfig640
-rw-r--r--arch/s390/configs/performance_defconfig632
-rw-r--r--arch/s390/configs/zfcpdump_defconfig86
-rw-r--r--arch/s390/crypto/aes_s390.c133
-rw-r--r--arch/s390/crypto/des_s390.c104
-rw-r--r--arch/s390/defconfig57
-rw-r--r--arch/s390/hypfs/Makefile2
-rw-r--r--arch/s390/hypfs/hypfs.h20
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c22
-rw-r--r--arch/s390/hypfs/hypfs_diag.c58
-rw-r--r--arch/s390/hypfs/hypfs_sprp.c141
-rw-r--r--arch/s390/hypfs/hypfs_vm.c74
-rw-r--r--arch/s390/hypfs/inode.c61
-rw-r--r--arch/s390/include/asm/Kbuild3
-rw-r--r--arch/s390/include/asm/airq.h92
-rw-r--r--arch/s390/include/asm/atomic.h203
-rw-r--r--arch/s390/include/asm/barrier.h29
-rw-r--r--arch/s390/include/asm/bitops.h1042
-rw-r--r--arch/s390/include/asm/ccwdev.h9
-rw-r--r--arch/s390/include/asm/ccwgroup.h3
-rw-r--r--arch/s390/include/asm/checksum.h11
-rw-r--r--arch/s390/include/asm/chpid.h11
-rw-r--r--arch/s390/include/asm/cio.h5
-rw-r--r--arch/s390/include/asm/clp.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h5
-rw-r--r--arch/s390/include/asm/compat.h69
-rw-r--r--arch/s390/include/asm/cpu_mf.h186
-rw-r--r--arch/s390/include/asm/cputime.h3
-rw-r--r--arch/s390/include/asm/css_chars.h2
-rw-r--r--arch/s390/include/asm/ctl_reg.h128
-rw-r--r--arch/s390/include/asm/debug.h5
-rw-r--r--arch/s390/include/asm/dis.h52
-rw-r--r--arch/s390/include/asm/dma-mapping.h13
-rw-r--r--arch/s390/include/asm/eadm.h19
-rw-r--r--arch/s390/include/asm/elf.h24
-rw-r--r--arch/s390/include/asm/facility.h17
-rw-r--r--arch/s390/include/asm/fcx.h38
-rw-r--r--arch/s390/include/asm/ftrace.h12
-rw-r--r--arch/s390/include/asm/futex.h67
-rw-r--r--arch/s390/include/asm/hardirq.h5
-rw-r--r--arch/s390/include/asm/hugetlb.h113
-rw-r--r--arch/s390/include/asm/hw_irq.h17
-rw-r--r--arch/s390/include/asm/io.h27
-rw-r--r--arch/s390/include/asm/ipl.h10
-rw-r--r--arch/s390/include/asm/irq.h69
-rw-r--r--arch/s390/include/asm/jump_label.h2
-rw-r--r--arch/s390/include/asm/kprobes.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h312
-rw-r--r--arch/s390/include/asm/lowcore.h35
-rw-r--r--arch/s390/include/asm/mman.h4
-rw-r--r--arch/s390/include/asm/mmu.h6
-rw-r--r--arch/s390/include/asm/mmu_context.h105
-rw-r--r--arch/s390/include/asm/mutex.h2
-rw-r--r--arch/s390/include/asm/page.h100
-rw-r--r--arch/s390/include/asm/pci.h83
-rw-r--r--arch/s390/include/asm/pci_clp.h10
-rw-r--r--arch/s390/include/asm/pci_debug.h12
-rw-r--r--arch/s390/include/asm/pci_insn.h220
-rw-r--r--arch/s390/include/asm/pci_io.h16
-rw-r--r--arch/s390/include/asm/percpu.h137
-rw-r--r--arch/s390/include/asm/perf_event.h88
-rw-r--r--arch/s390/include/asm/pgalloc.h22
-rw-r--r--arch/s390/include/asm/pgtable.h1095
-rw-r--r--arch/s390/include/asm/processor.h54
-rw-r--r--arch/s390/include/asm/ptrace.h81
-rw-r--r--arch/s390/include/asm/qdio.h35
-rw-r--r--arch/s390/include/asm/sclp.h22
-rw-r--r--arch/s390/include/asm/serial.h6
-rw-r--r--arch/s390/include/asm/setup.h55
-rw-r--r--arch/s390/include/asm/signal.h19
-rw-r--r--arch/s390/include/asm/sigp.h21
-rw-r--r--arch/s390/include/asm/smp.h17
-rw-r--r--arch/s390/include/asm/spinlock.h132
-rw-r--r--arch/s390/include/asm/spinlock_types.h6
-rw-r--r--arch/s390/include/asm/switch_to.h141
-rw-r--r--arch/s390/include/asm/syscall.h10
-rw-r--r--arch/s390/include/asm/thread_info.h37
-rw-r--r--arch/s390/include/asm/timex.h40
-rw-r--r--arch/s390/include/asm/tlb.h38
-rw-r--r--arch/s390/include/asm/tlbflush.h123
-rw-r--r--arch/s390/include/asm/topology.h13
-rw-r--r--arch/s390/include/asm/uaccess.h228
-rw-r--r--arch/s390/include/asm/unistd.h11
-rw-r--r--arch/s390/include/asm/vdso.h5
-rw-r--r--arch/s390/include/asm/vtime.h7
-rw-r--r--arch/s390/include/uapi/asm/Kbuild3
-rw-r--r--arch/s390/include/uapi/asm/chsc.h13
-rw-r--r--arch/s390/include/uapi/asm/dasd.h4
-rw-r--r--arch/s390/include/uapi/asm/hypfs.h25
-rw-r--r--arch/s390/include/uapi/asm/kvm.h71
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h31
-rw-r--r--arch/s390/include/uapi/asm/sclp_ctl.h24
-rw-r--r--arch/s390/include/uapi/asm/sie.h243
-rw-r--r--arch/s390/include/uapi/asm/sigcontext.h1
-rw-r--r--arch/s390/include/uapi/asm/socket.h12
-rw-r--r--arch/s390/include/uapi/asm/statfs.h63
-rw-r--r--arch/s390/include/uapi/asm/ucontext.h8
-rw-r--r--arch/s390/include/uapi/asm/unistd.h5
-rw-r--r--arch/s390/include/uapi/asm/virtio-ccw.h21
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h65
-rw-r--r--arch/s390/kernel/Makefile25
-rw-r--r--arch/s390/kernel/asm-offsets.c27
-rw-r--r--arch/s390/kernel/bitmap.c54
-rw-r--r--arch/s390/kernel/cache.c25
-rw-r--r--arch/s390/kernel/compat_exec_domain.c29
-rw-r--r--arch/s390/kernel/compat_linux.c295
-rw-r--r--arch/s390/kernel/compat_linux.h125
-rw-r--r--arch/s390/kernel/compat_signal.c268
-rw-r--r--arch/s390/kernel/compat_wrapper.S1667
-rw-r--r--arch/s390/kernel/compat_wrapper.c216
-rw-r--r--arch/s390/kernel/crash_dump.c388
-rw-r--r--arch/s390/kernel/debug.c6
-rw-r--r--arch/s390/kernel/dis.c93
-rw-r--r--arch/s390/kernel/dumpstack.c217
-rw-r--r--arch/s390/kernel/early.c23
-rw-r--r--arch/s390/kernel/entry.S167
-rw-r--r--arch/s390/kernel/entry.h28
-rw-r--r--arch/s390/kernel/entry64.S262
-rw-r--r--arch/s390/kernel/ftrace.c26
-rw-r--r--arch/s390/kernel/head.S8
-rw-r--r--arch/s390/kernel/head31.S1
-rw-r--r--arch/s390/kernel/head64.S7
-rw-r--r--arch/s390/kernel/ipl.c28
-rw-r--r--arch/s390/kernel/irq.c241
-rw-r--r--arch/s390/kernel/kprobes.c188
-rw-r--r--arch/s390/kernel/machine_kexec.c32
-rw-r--r--arch/s390/kernel/mcount.S2
-rw-r--r--arch/s390/kernel/mcount64.S2
-rw-r--r--arch/s390/kernel/mem_detect.c145
-rw-r--r--arch/s390/kernel/module.c145
-rw-r--r--arch/s390/kernel/nmi.c15
-rw-r--r--arch/s390/kernel/os_info.c1
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c28
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c322
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c1643
-rw-r--r--arch/s390/kernel/perf_event.c226
-rw-r--r--arch/s390/kernel/pgm_check.S2
-rw-r--r--arch/s390/kernel/process.c66
-rw-r--r--arch/s390/kernel/processor.c2
-rw-r--r--arch/s390/kernel/ptrace.c150
-rw-r--r--arch/s390/kernel/runtime_instr.c9
-rw-r--r--arch/s390/kernel/s390_ksyms.c3
-rw-r--r--arch/s390/kernel/sclp.S7
-rw-r--r--arch/s390/kernel/setup.c602
-rw-r--r--arch/s390/kernel/signal.c127
-rw-r--r--arch/s390/kernel/smp.c204
-rw-r--r--arch/s390/kernel/suspend.c43
-rw-r--r--arch/s390/kernel/swsusp_asm64.S36
-rw-r--r--arch/s390/kernel/sys_s390.c14
-rw-r--r--arch/s390/kernel/syscalls.S581
-rw-r--r--arch/s390/kernel/sysinfo.c2
-rw-r--r--arch/s390/kernel/time.c76
-rw-r--r--arch/s390/kernel/topology.c25
-rw-r--r--arch/s390/kernel/traps.c250
-rw-r--r--arch/s390/kernel/vdso.c17
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S31
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S9
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S4
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S24
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S9
-rw-r--r--arch/s390/kernel/vmlinux.lds.S4
-rw-r--r--arch/s390/kernel/vtime.c24
-rw-r--r--arch/s390/kvm/Kconfig7
-rw-r--r--arch/s390/kvm/Makefile7
-rw-r--r--arch/s390/kvm/diag.c126
-rw-r--r--arch/s390/kvm/gaccess.c726
-rw-r--r--arch/s390/kvm/gaccess.h607
-rw-r--r--arch/s390/kvm/guestdbg.c482
-rw-r--r--arch/s390/kvm/intercept.c352
-rw-r--r--arch/s390/kvm/interrupt.c1330
-rw-r--r--arch/s390/kvm/irq.h22
-rw-r--r--arch/s390/kvm/kvm-s390.c1087
-rw-r--r--arch/s390/kvm/kvm-s390.h173
-rw-r--r--arch/s390/kvm/priv.c956
-rw-r--r--arch/s390/kvm/sigp.c381
-rw-r--r--arch/s390/kvm/trace-s390.h69
-rw-r--r--arch/s390/kvm/trace.h145
-rw-r--r--arch/s390/lib/Makefile4
-rw-r--r--arch/s390/lib/delay.c18
-rw-r--r--arch/s390/lib/find.c77
-rw-r--r--arch/s390/lib/spinlock.c157
-rw-r--r--arch/s390/lib/uaccess.c406
-rw-r--r--arch/s390/lib/uaccess.h21
-rw-r--r--arch/s390/lib/uaccess_mvcos.c225
-rw-r--r--arch/s390/lib/uaccess_pt.c388
-rw-r--r--arch/s390/lib/uaccess_std.c317
-rw-r--r--arch/s390/lib/usercopy.c8
-rw-r--r--arch/s390/math-emu/math.c2
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/cmm.c20
-rw-r--r--arch/s390/mm/dump_pagetables.c39
-rw-r--r--arch/s390/mm/fault.c280
-rw-r--r--arch/s390/mm/gup.c89
-rw-r--r--arch/s390/mm/hugetlbpage.c123
-rw-r--r--arch/s390/mm/init.c96
-rw-r--r--arch/s390/mm/maccess.c29
-rw-r--r--arch/s390/mm/mem_detect.c65
-rw-r--r--arch/s390/mm/mmap.c32
-rw-r--r--arch/s390/mm/pageattr.c32
-rw-r--r--arch/s390/mm/pgtable.c894
-rw-r--r--arch/s390/mm/vmem.c83
-rw-r--r--arch/s390/net/bpf_jit_comp.c337
-rw-r--r--arch/s390/oprofile/hwsampler.c102
-rw-r--r--arch/s390/oprofile/hwsampler.h52
-rw-r--r--arch/s390/oprofile/init.c59
-rw-r--r--arch/s390/pci/Makefile4
-rw-r--r--arch/s390/pci/pci.c980
-rw-r--r--arch/s390/pci/pci_clp.c217
-rw-r--r--arch/s390/pci/pci_debug.c40
-rw-r--r--arch/s390/pci/pci_dma.c66
-rw-r--r--arch/s390/pci/pci_event.c101
-rw-r--r--arch/s390/pci/pci_insn.c202
-rw-r--r--arch/s390/pci/pci_msi.c141
-rw-r--r--arch/s390/pci/pci_sysfs.c138
225 files changed, 19649 insertions, 11297 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b5ea38c2564..bb63499fc5d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -52,7 +52,7 @@ config KEXEC
config AUDIT_ARCH
def_bool y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
config PCI_QUIRKS
@@ -60,86 +60,92 @@ config PCI_QUIRKS
config S390
def_bool y
- select USE_GENERIC_SMP_HELPERS if SMP
- select GENERIC_CPU_DEVICES if !SMP
- select HAVE_SYSCALL_WRAPPERS
- select HAVE_FUNCTION_TRACER
- select HAVE_FUNCTION_TRACE_MCOUNT_TEST
- select HAVE_FTRACE_MCOUNT_RECORD
- select HAVE_C_RECORDMCOUNT
- select HAVE_SYSCALL_TRACEPOINTS
- select SYSCTL_EXCEPTION_TRACE
- select HAVE_DYNAMIC_FTRACE
- select HAVE_FUNCTION_GRAPH_TRACER
- select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_OPROFILE
- select HAVE_KPROBES
- select HAVE_KRETPROBES
- select HAVE_KVM if 64BIT
- select HAVE_ARCH_TRACEHOOK
- select INIT_ALL_POSSIBLE
- select HAVE_IRQ_WORK
- select HAVE_PERF_EVENTS
- select ARCH_HAVE_NMI_SAFE_CMPXCHG
- select HAVE_DEBUG_KMEMLEAK
- select HAVE_KERNEL_GZIP
- select HAVE_KERNEL_BZIP2
- select HAVE_KERNEL_LZMA
- select HAVE_KERNEL_LZO
- select HAVE_KERNEL_XZ
- select HAVE_ARCH_MUTEX_CPU_RELAX
- select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
- select HAVE_BPF_JIT if 64BIT && PACK_STACK
- select ARCH_SAVE_PAGE_KEYS if HIBERNATION
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
- select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
- select HAVE_CMPXCHG_LOCAL
- select HAVE_CMPXCHG_DOUBLE
- select HAVE_ALIGNED_STRUCT_PAGE if SLUB
- select HAVE_VIRT_CPU_ACCOUNTING
- select VIRT_CPU_ACCOUNTING
- select ARCH_DISCARD_MEMBLOCK
- select BUILDTIME_EXTABLE_SORT
- select ARCH_INLINE_SPIN_TRYLOCK
- select ARCH_INLINE_SPIN_TRYLOCK_BH
- select ARCH_INLINE_SPIN_LOCK
- select ARCH_INLINE_SPIN_LOCK_BH
- select ARCH_INLINE_SPIN_LOCK_IRQ
- select ARCH_INLINE_SPIN_LOCK_IRQSAVE
- select ARCH_INLINE_SPIN_UNLOCK
- select ARCH_INLINE_SPIN_UNLOCK_BH
- select ARCH_INLINE_SPIN_UNLOCK_IRQ
- select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
- select ARCH_INLINE_READ_TRYLOCK
+ select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_INLINE_READ_LOCK
select ARCH_INLINE_READ_LOCK_BH
select ARCH_INLINE_READ_LOCK_IRQ
select ARCH_INLINE_READ_LOCK_IRQSAVE
+ select ARCH_INLINE_READ_TRYLOCK
select ARCH_INLINE_READ_UNLOCK
select ARCH_INLINE_READ_UNLOCK_BH
select ARCH_INLINE_READ_UNLOCK_IRQ
select ARCH_INLINE_READ_UNLOCK_IRQRESTORE
- select ARCH_INLINE_WRITE_TRYLOCK
+ select ARCH_INLINE_SPIN_LOCK
+ select ARCH_INLINE_SPIN_LOCK_BH
+ select ARCH_INLINE_SPIN_LOCK_IRQ
+ select ARCH_INLINE_SPIN_LOCK_IRQSAVE
+ select ARCH_INLINE_SPIN_TRYLOCK
+ select ARCH_INLINE_SPIN_TRYLOCK_BH
+ select ARCH_INLINE_SPIN_UNLOCK
+ select ARCH_INLINE_SPIN_UNLOCK_BH
+ select ARCH_INLINE_SPIN_UNLOCK_IRQ
+ select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
select ARCH_INLINE_WRITE_LOCK
select ARCH_INLINE_WRITE_LOCK_BH
select ARCH_INLINE_WRITE_LOCK_IRQ
select ARCH_INLINE_WRITE_LOCK_IRQSAVE
+ select ARCH_INLINE_WRITE_TRYLOCK
select ARCH_INLINE_WRITE_UNLOCK
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
- select HAVE_UID16 if 32BIT
+ select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+ select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANT_IPC_PARSE_VERSION
- select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
- select GENERIC_SMP_IDLE_THREAD
- select GENERIC_TIME_VSYSCALL_OLD
+ select BUILDTIME_EXTABLE_SORT
+ select CLONE_BACKWARDS2
select GENERIC_CLOCKEVENTS
- select KTIME_SCALAR if 32BIT
+ select GENERIC_CPU_DEVICES if !SMP
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_SMP_IDLE_THREAD
+ select GENERIC_TIME_VSYSCALL
+ select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_TRACEHOOK
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
+ select HAVE_BPF_JIT if 64BIT && PACK_STACK
+ select HAVE_CMPXCHG_DOUBLE
+ select HAVE_CMPXCHG_LOCAL
+ select HAVE_C_RECORDMCOUNT
+ select HAVE_DEBUG_KMEMLEAK
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_FUTEX_CMPXCHG if FUTEX
+ select HAVE_KERNEL_BZIP2
+ select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZ4
+ select HAVE_KERNEL_LZMA
+ select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_XZ
+ select HAVE_KPROBES
+ select HAVE_KRETPROBES
+ select HAVE_KVM if 64BIT
+ select HAVE_MEMBLOCK
+ select HAVE_MEMBLOCK_NODE_MAP
+ select HAVE_MEMBLOCK_PHYS_MAP
select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_OPROFILE
+ select HAVE_PERF_EVENTS
+ select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_UID16 if 32BIT
+ select HAVE_VIRT_CPU_ACCOUNTING
+ select KTIME_SCALAR if 32BIT
select MODULES_USE_ELF_RELA
- select CLONE_BACKWARDS2
+ select NO_BOOTMEM
+ select OLD_SIGACTION
+ select OLD_SIGSUSPEND3
+ select SYSCTL_EXCEPTION_TRACE
+ select TTY
+ select VIRT_CPU_ACCOUNTING
+ select VIRT_TO_BUS
config SCHED_OMIT_FRAME_POINTER
def_bool y
@@ -225,11 +231,73 @@ config MARCH_Z196
not work on older machines.
config MARCH_ZEC12
- bool "IBM zEC12"
+ bool "IBM zBC12 and zEC12"
select HAVE_MARCH_ZEC12_FEATURES if 64BIT
help
- Select this to enable optimizations for IBM zEC12 (2827 series). The
- kernel will be slightly faster but will not work on older machines.
+ Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
+ 2827 series). The kernel will be slightly faster but will not work on
+ older machines.
+
+endchoice
+
+config MARCH_G5_TUNE
+ def_bool TUNE_G5 || MARCH_G5 && TUNE_DEFAULT
+
+config MARCH_Z900_TUNE
+ def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT
+
+config MARCH_Z990_TUNE
+ def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT
+
+config MARCH_Z9_109_TUNE
+ def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT
+
+config MARCH_Z10_TUNE
+ def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT
+
+config MARCH_Z196_TUNE
+ def_bool TUNE_Z196 || MARCH_Z196 && TUNE_DEFAULT
+
+config MARCH_ZEC12_TUNE
+ def_bool TUNE_ZEC12 || MARCH_ZEC12 && TUNE_DEFAULT
+
+choice
+ prompt "Tune code generation"
+ default TUNE_DEFAULT
+ help
+ Cause the compiler to tune (-mtune) the generated code for a machine.
+ This will make the code run faster on the selected machine but
+ somewhat slower on other machines.
+ This option only changes how the compiler emits instructions, not the
+ selection of instructions itself, so the resulting kernel will run on
+ all other machines.
+
+config TUNE_DEFAULT
+ bool "Default"
+ help
+ Tune the generated code for the target processor for which the kernel
+ will be compiled.
+
+config TUNE_G5
+ bool "System/390 model G5 and G6"
+
+config TUNE_Z900
+ bool "IBM zSeries model z800 and z900"
+
+config TUNE_Z990
+ bool "IBM zSeries model z890 and z990"
+
+config TUNE_Z9_109
+ bool "IBM System z9"
+
+config TUNE_Z10
+ bool "IBM System z10"
+
+config TUNE_Z196
+ bool "IBM zEnterprise 114 and 196"
+
+config TUNE_ZEC12
+ bool "IBM zBC12 and zEC12"
endchoice
@@ -249,6 +317,7 @@ config COMPAT
depends on 64BIT
select COMPAT_BINFMT_ELF if BINFMT_ELF
select ARCH_WANT_OLD_COMPAT_IPC
+ select COMPAT_OLD_SIGACTION
help
Select this option if you want to enable your system kernel to
handle system-calls from ELF binaries for 31 bit ESA. This option
@@ -269,10 +338,10 @@ config SMP
a system with only one CPU, like most personal computers, say N. If
you have a system with more than one CPU, say Y.
- If you say N here, the kernel will run on single and multiprocessor
+ If you say N here, the kernel will run on uni- and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
you say Y here, the kernel will run on many, but not all,
- singleprocessor machines. On a singleprocessor machine, the kernel
+ uniprocessor machines. On a uniprocessor machine, the kernel
will run faster if you say N here.
See also the SMP-HOWTO available at
@@ -281,14 +350,14 @@ config SMP
Even if you don't know what to do here, say Y.
config NR_CPUS
- int "Maximum number of CPUs (2-64)"
- range 2 64
+ int "Maximum number of CPUs (2-256)"
+ range 2 256
depends on SMP
default "32" if !64BIT
default "64" if 64BIT
help
This allows you to specify the maximum number of CPUs which this
- kernel will support. The maximum supported value is 64 and the
+ kernel will support. The maximum supported value is 256 and the
minimum value which makes sense is 2.
This is purely to save memory - each supported CPU adds
@@ -298,7 +367,6 @@ config HOTPLUG_CPU
def_bool y
prompt "Support for hot-pluggable CPUs"
depends on SMP
- select HOTPLUG
help
Say Y here to be able to turn CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu/cpu#.
@@ -351,6 +419,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
config ARCH_ENABLE_MEMORY_HOTREMOVE
def_bool y
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+ def_bool y
+ depends on 64BIT
+
config FORCE_MAX_ZONEORDER
int
default "9"
@@ -371,19 +443,6 @@ config PACK_STACK
Say Y if you are unsure.
-config SMALL_STACK
- def_bool n
- prompt "Use 8kb for kernel stack instead of 16kb"
- depends on PACK_STACK && 64BIT && !LOCKDEP
- help
- If you say Y here and the compiler supports the -mkernel-backchain
- option the kernel will use a smaller kernel stack size. The reduced
- size is 8kb instead of 16kb. This allows to run more threads on a
- system and reduces the pressure on the memory management for higher
- order page allocations.
-
- Say N if you are unsure.
-
config CHECK_STACK
def_bool y
prompt "Detect kernel stack overflow"
@@ -439,7 +498,6 @@ menuconfig PCI
bool "PCI support"
default n
depends on 64BIT
- select ARCH_SUPPORTS_MSI
select PCI_MSI
help
Enable PCI support.
@@ -454,6 +512,16 @@ config PCI_NR_FUNCTIONS
This allows you to specify the maximum number of PCI functions which
this kernel will support.
+config PCI_NR_MSI
+ int "Maximum number of MSI interrupts (64-32768)"
+ range 64 32768
+ default "256"
+ help
+ This defines the number of virtual interrupts the kernel will
+ provide for MSI interrupts. If you configure your system to have
+ too few drivers will fail to allocate MSI interrupts for all
+ PCI devices.
+
source "drivers/pci/Kconfig"
source "drivers/pci/pcie/Kconfig"
source "drivers/pci/hotplug/Kconfig"
@@ -530,15 +598,9 @@ config CRASH_DUMP
Crash dump kernels are loaded in the main kernel with kexec-tools
into a specially reserved region and then later executed after
a crash by kdump/kexec.
- For more details see Documentation/kdump/kdump.txt
-
-config ZFCPDUMP
- def_bool n
- prompt "zfcpdump support"
- select SMP
- help
- Select this option if you want to build an zfcpdump enabled kernel.
Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+ This option also enables s390 zfcpdump.
+ See also <file:Documentation/s390/zfcpdump.txt>
endmenu
@@ -718,8 +780,9 @@ source "arch/s390/kvm/Kconfig"
config S390_GUEST
def_bool y
- prompt "s390 support for virtio devices (EXPERIMENTAL)"
- depends on 64BIT && EXPERIMENTAL
+ prompt "s390 support for virtio devices"
+ depends on 64BIT
+ select TTY
select VIRTUALIZATION
select VIRTIO
select VIRTIO_CONSOLE
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index fc32a2df497..c56878e1245 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -17,20 +17,6 @@ config STRICT_DEVMEM
If you are unsure, say Y.
-config DEBUG_STRICT_USER_COPY_CHECKS
- def_bool n
- prompt "Strict user copy size checks"
- ---help---
- Enabling this option turns a certain set of sanity checks for user
- copy operations into compile time warnings.
-
- The copy_from_user() etc checks are there to help test if there
- are sufficient security checks on the length argument of
- the copy operation, by having gcc prove that the argument is
- within bounds.
-
- If unsure, or if you run an older (pre 4.4) gcc, say N.
-
config S390_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 7e3ce78d429..874e6d6e9c5 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -35,13 +35,21 @@ endif
export LD_BFD
-cflags-$(CONFIG_MARCH_G5) += $(call cc-option,-march=g5)
-cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900)
-cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990)
-cflags-$(CONFIG_MARCH_Z9_109) += $(call cc-option,-march=z9-109)
-cflags-$(CONFIG_MARCH_Z10) += $(call cc-option,-march=z10)
-cflags-$(CONFIG_MARCH_Z196) += $(call cc-option,-march=z196)
-cflags-$(CONFIG_MARCH_ZEC12) += $(call cc-option,-march=zEC12)
+cflags-$(CONFIG_MARCH_G5) += -march=g5
+cflags-$(CONFIG_MARCH_Z900) += -march=z900
+cflags-$(CONFIG_MARCH_Z990) += -march=z990
+cflags-$(CONFIG_MARCH_Z9_109) += -march=z9-109
+cflags-$(CONFIG_MARCH_Z10) += -march=z10
+cflags-$(CONFIG_MARCH_Z196) += -march=z196
+cflags-$(CONFIG_MARCH_ZEC12) += -march=zEC12
+
+cflags-$(CONFIG_MARCH_G5_TUNE) += -mtune=g5
+cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900
+cflags-$(CONFIG_MARCH_Z990_TUNE) += -mtune=z990
+cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109
+cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10
+cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196
+cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12
#KBUILD_IMAGE is necessary for make rpm
KBUILD_IMAGE :=arch/s390/boot/image
@@ -55,22 +63,12 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
ifeq ($(call cc-option-yn,-mkernel-backchain),y)
cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK
aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK
-cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK
-aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK
-ifdef CONFIG_SMALL_STACK
-STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) )
-endif
endif
# new style option for packed stacks
ifeq ($(call cc-option-yn,-mpacked-stack),y)
cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK
aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK
-cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK
-aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK
-ifdef CONFIG_SMALL_STACK
-STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) )
-endif
endif
ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index bae0f402bf2..47c8630c93c 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -48,9 +48,9 @@ static struct platform_device *appldata_pdev;
* /proc entries (sysctl)
*/
static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(ctl_table *ctl, int write,
+static int appldata_timer_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-static int appldata_interval_handler(ctl_table *ctl, int write,
+static int appldata_interval_handler(struct ctl_table *ctl, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos);
@@ -201,10 +201,10 @@ static void __appldata_vtimer_setup(int cmd)
* Start/Stop timer, show status of timer (0 = not active, 1 = active)
*/
static int
-appldata_timer_handler(ctl_table *ctl, int write,
+appldata_timer_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int len;
+ unsigned int len;
char buf[2];
if (!*lenp || *ppos) {
@@ -212,7 +212,9 @@ appldata_timer_handler(ctl_table *ctl, int write,
return 0;
}
if (!write) {
- len = sprintf(buf, appldata_timer_active ? "1\n" : "0\n");
+ strncpy(buf, appldata_timer_active ? "1\n" : "0\n",
+ ARRAY_SIZE(buf));
+ len = strnlen(buf, ARRAY_SIZE(buf));
if (len > *lenp)
len = *lenp;
if (copy_to_user(buffer, buf, len))
@@ -241,10 +243,11 @@ out:
* current timer interval.
*/
static int
-appldata_interval_handler(ctl_table *ctl, int write,
+appldata_interval_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int len, interval;
+ unsigned int len;
+ int interval;
char buf[16];
if (!*lenp || *ppos) {
@@ -284,11 +287,12 @@ out:
* monitoring (0 = not in process, 1 = in process)
*/
static int
-appldata_generic_handler(ctl_table *ctl, int write,
+appldata_generic_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
- int rc, len, found;
+ unsigned int len;
+ int rc, found;
char buf[2];
struct list_head *lh;
@@ -317,7 +321,8 @@ appldata_generic_handler(ctl_table *ctl, int write,
return 0;
}
if (!write) {
- len = sprintf(buf, ops->active ? "1\n" : "0\n");
+ strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf));
+ len = strnlen(buf, ARRAY_SIZE(buf));
if (len > *lenp)
len = *lenp;
if (copy_to_user(buffer, buf, len)) {
@@ -524,6 +529,7 @@ static int __init appldata_init(void)
{
int rc;
+ init_virt_timer(&appldata_timer);
appldata_timer.function = appldata_timer_function;
appldata_timer.data = (unsigned long) &appldata_work;
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 02d9a1cf505..edcf2a70694 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -13,6 +13,7 @@
#include <linux/kernel_stat.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/slab.h>
#include <asm/io.h>
#include "appldata.h"
@@ -32,7 +33,7 @@
* book:
* http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
-static struct appldata_mem_data {
+struct appldata_mem_data {
u64 timestamp;
u32 sync_count_1; /* after VM collected the record data, */
u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the
@@ -63,7 +64,7 @@ static struct appldata_mem_data {
u64 pgmajfault; /* page faults (major only) */
// <-- New in 2.6
-} __attribute__((packed)) appldata_mem_data;
+} __packed;
/*
@@ -108,7 +109,7 @@ static void appldata_get_mem_data(void *data)
mem_data->totalswap = P2K(val.totalswap);
mem_data->freeswap = P2K(val.freeswap);
- mem_data->timestamp = get_clock();
+ mem_data->timestamp = get_tod_clock();
mem_data->sync_count_2++;
}
@@ -118,7 +119,6 @@ static struct appldata_ops ops = {
.record_nr = APPLDATA_RECORD_MEM_ID,
.size = sizeof(struct appldata_mem_data),
.callback = &appldata_get_mem_data,
- .data = &appldata_mem_data,
.owner = THIS_MODULE,
.mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */
};
@@ -131,7 +131,17 @@ static struct appldata_ops ops = {
*/
static int __init appldata_mem_init(void)
{
- return appldata_register_ops(&ops);
+ int ret;
+
+ ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL);
+ if (!ops.data)
+ return -ENOMEM;
+
+ ret = appldata_register_ops(&ops);
+ if (ret)
+ kfree(ops.data);
+
+ return ret;
}
/*
@@ -142,6 +152,7 @@ static int __init appldata_mem_init(void)
static void __exit appldata_mem_exit(void)
{
appldata_unregister_ops(&ops);
+ kfree(ops.data);
}
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 1370e358d49..66037d2622b 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -29,7 +29,7 @@
* book:
* http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
-static struct appldata_net_sum_data {
+struct appldata_net_sum_data {
u64 timestamp;
u32 sync_count_1; /* after VM collected the record data, */
u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the
@@ -51,7 +51,7 @@ static struct appldata_net_sum_data {
u64 rx_dropped; /* no space in linux buffers */
u64 tx_dropped; /* no space available in linux */
u64 collisions; /* collisions while transmitting */
-} __attribute__((packed)) appldata_net_sum_data;
+} __packed;
/*
@@ -111,7 +111,7 @@ static void appldata_get_net_sum_data(void *data)
net_data->tx_dropped = tx_dropped;
net_data->collisions = collisions;
- net_data->timestamp = get_clock();
+ net_data->timestamp = get_tod_clock();
net_data->sync_count_2++;
}
@@ -121,7 +121,6 @@ static struct appldata_ops ops = {
.record_nr = APPLDATA_RECORD_NET_SUM_ID,
.size = sizeof(struct appldata_net_sum_data),
.callback = &appldata_get_net_sum_data,
- .data = &appldata_net_sum_data,
.owner = THIS_MODULE,
.mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */
};
@@ -134,7 +133,17 @@ static struct appldata_ops ops = {
*/
static int __init appldata_net_init(void)
{
- return appldata_register_ops(&ops);
+ int ret;
+
+ ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL);
+ if (!ops.data)
+ return -ENOMEM;
+
+ ret = appldata_register_ops(&ops);
+ if (ret)
+ kfree(ops.data);
+
+ return ret;
}
/*
@@ -145,6 +154,7 @@ static int __init appldata_net_init(void)
static void __exit appldata_net_exit(void)
{
appldata_unregister_ops(&ops);
+ kfree(ops.data);
}
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 87521ba682e..69b23b25ac3 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -156,7 +156,7 @@ static void appldata_get_os_data(void *data)
}
ops.size = new_size;
}
- os_data->timestamp = get_clock();
+ os_data->timestamp = get_tod_clock();
os_data->sync_count_2++;
}
@@ -171,7 +171,7 @@ static int __init appldata_os_init(void)
int rc, max_size;
max_size = sizeof(struct appldata_os_data) +
- (NR_CPUS * sizeof(struct appldata_os_per_cpu));
+ (num_possible_cpus() * sizeof(struct appldata_os_per_cpu));
if (max_size > APPLDATA_MAX_REC_SIZE) {
pr_err("Maximum OS record size %i exceeds the maximum "
"record size %i\n", max_size, APPLDATA_MAX_REC_SIZE);
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index f2737a005af..9a42ecec564 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -21,6 +21,6 @@ $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
$(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
-install: $(CONFIGURE) $(obj)/image
- sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/image \
+install: $(CONFIGURE) $(obj)/bzImage
+ sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
System.map "$(INSTALL_PATH)"
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 3ad8f61c998..f90d1fc6d60 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -6,13 +6,13 @@
BITS := $(if $(CONFIG_64BIT),64,31)
-targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
- vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o \
- sizes.h head$(BITS).o
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += misc.o piggy.o sizes.h head$(BITS).o
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS += $(cflags-y)
+KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
@@ -48,6 +48,7 @@ vmlinux.bin.all-y := $(obj)/vmlinux.bin
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZ4) := lz4
suffix-$(CONFIG_KERNEL_LZMA) := lzma
suffix-$(CONFIG_KERNEL_LZO) := lzo
suffix-$(CONFIG_KERNEL_XZ) := xz
@@ -56,6 +57,8 @@ $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
$(call if_changed,gzip)
$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
$(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+ $(call if_changed,lz4)
$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
$(call if_changed,lzma)
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index c4c6a1cf221..57cbaff1f39 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -47,6 +47,10 @@ static unsigned long free_mem_end_ptr;
#include "../../../../lib/decompress_bunzip2.c"
#endif
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
#ifdef CONFIG_KERNEL_LZMA
#include "../../../../lib/decompress_unlzma.c"
#endif
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
new file mode 100644
index 00000000000..fd09a10a2b5
--- /dev/null
+++ b/arch/s390/configs/default_defconfig
@@ -0,0 +1,687 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=256
+CONFIG_PREEMPT=y
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PCI=y
+CONFIG_PCI_DEBUG=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NET_SCTPPROBE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_RDS_DEBUG=y
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_DEVTMPFS=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_XIP=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=y
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_ISCSI_TCP=m
+CONFIG_LIBFCOE=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_VHOST_NET=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_VIRTIO_BALLOON=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD_DEBUG=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_XFS_DEBUG=y
+CONFIG_GFS2_FS=m
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_READABLE_ASM=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_SELFTEST=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_SLUB_STATS=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_RT_MUTEX_TESTER=y
+CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_PROVE_RCU=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=300
+CONFIG_NOTIFIER_ERROR_INJECTION=m
+CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
+CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAILSLAB=y
+CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAIL_MAKE_REQUEST=y
+CONFIG_FAIL_IO_TIMEOUT=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
+CONFIG_LATENCYTOP=y
+CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_KPROBE_EVENT is not set
+CONFIG_LKDTM=m
+CONFIG_TEST_LIST_SORT=y
+CONFIG_KPROBES_SANITY_TEST=y
+CONFIG_RBTREE_TEST=y
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_DMA_API_DEBUG=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_IMA=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=y
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
+CONFIG_X509_CERTIFICATE_PARSER=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_IA64=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
new file mode 100644
index 00000000000..b061180d354
--- /dev/null
+++ b/arch/s390/configs/gcov_defconfig
@@ -0,0 +1,640 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_PERF=y
+CONFIG_BLK_CGROUP=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_GCOV_KERNEL=y
+CONFIG_GCOV_PROFILE_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=256
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PCI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NET_SCTPPROBE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_DEVTMPFS=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_XIP=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=y
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_ISCSI_TCP=m
+CONFIG_LIBFCOE=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_VHOST_NET=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_VIRTIO_BALLOON=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD_DEBUG=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_TIMER_STATS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_NOTIFIER_ERROR_INJECTION=m
+CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
+CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_LATENCYTOP=y
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_KPROBE_EVENT is not set
+CONFIG_LKDTM=m
+CONFIG_RBTREE_TEST=m
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_IMA=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=y
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
+CONFIG_X509_CERTIFICATE_PARSER=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_IA64=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
new file mode 100644
index 00000000000..d279baa0801
--- /dev/null
+++ b/arch/s390/configs/performance_defconfig
@@ -0,0 +1,632 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_PERF=y
+CONFIG_BLK_CGROUP=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=256
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PCI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NET_SCTPPROBE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_DEVTMPFS=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_XIP=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=y
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_ISCSI_TCP=m
+CONFIG_LIBFCOE=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_VHOST_NET=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_VIRTIO_BALLOON=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD_DEBUG=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_TIMER_STATS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_KPROBE_EVENT is not set
+CONFIG_LKDTM=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_IMA=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=y
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
+CONFIG_X509_CERTIFICATE_PARSER=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_IA64=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
new file mode 100644
index 00000000000..948e0e057a2
--- /dev/null
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -0,0 +1,86 @@
+# CONFIG_SWAP is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+# CONFIG_COMPAT is not set
+CONFIG_NR_CPUS=2
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_HZ_100=y
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+# CONFIG_CHECK_STACK is not set
+# CONFIG_CHSC_SCH is not set
+# CONFIG_SCM_BUS is not set
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_SECCOMP is not set
+# CONFIG_IUCV is not set
+CONFIG_ATM=y
+CONFIG_ATM_LANE=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV_XPRAM is not set
+# CONFIG_DCSSBLK is not set
+# CONFIG_DASD is not set
+CONFIG_ENCLOSURE_SERVICES=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_ENCLOSURE=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SRP_ATTRS=y
+CONFIG_ZFCP=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_HVC_IUCV is not set
+CONFIG_RAW_DRIVER=y
+# CONFIG_SCLP_ASYNC is not set
+# CONFIG_HMC_DRV is not set
+# CONFIG_S390_TAPE is not set
+# CONFIG_VMCP is not set
+# CONFIG_MONWRITER is not set
+# CONFIG_S390_VMUR is not set
+# CONFIG_HID is not set
+CONFIG_MEMSTICK=y
+CONFIG_MEMSTICK_DEBUG=y
+CONFIG_MEMSTICK_UNSAFE_RESUME=y
+CONFIG_MSPRO_BLOCK=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_INOTIFY_USER is not set
+CONFIG_CONFIGFS_FS=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_FTRACE is not set
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_IA64=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+# CONFIG_PFAULT is not set
+# CONFIG_S390_HYPFS_FS is not set
+# CONFIG_VIRTUALIZATION is not set
+# CONFIG_S390_GUEST is not set
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index b4dbade8ca2..23223cd63e5 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -25,6 +25,7 @@
#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/spinlock.h>
#include "crypt_s390.h"
#define AES_KEYLEN_128 1
@@ -32,10 +33,10 @@
#define AES_KEYLEN_256 4
static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
static char keylen_flag;
struct s390_aes_ctx {
- u8 iv[AES_BLOCK_SIZE];
u8 key[AES_MAX_KEY_SIZE];
long enc;
long dec;
@@ -56,8 +57,7 @@ struct pcc_param {
struct s390_xts_ctx {
u8 key[32];
- u8 xts_param[16];
- struct pcc_param pcc;
+ u8 pcc_key[32];
long enc;
long dec;
int key_len;
@@ -441,30 +441,36 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return aes_set_key(tfm, in_key, key_len);
}
-static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+static int cbc_aes_crypt(struct blkcipher_desc *desc, long func,
struct blkcipher_walk *walk)
{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
int ret = blkcipher_walk_virt(desc, walk);
unsigned int nbytes = walk->nbytes;
+ struct {
+ u8 iv[AES_BLOCK_SIZE];
+ u8 key[AES_MAX_KEY_SIZE];
+ } param;
if (!nbytes)
goto out;
- memcpy(param, walk->iv, AES_BLOCK_SIZE);
+ memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ memcpy(param.key, sctx->key, sctx->key_len);
do {
/* only use complete blocks */
unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_kmc(func, param, out, in, n);
+ ret = crypt_s390_kmc(func, &param, out, in, n);
if (ret < 0 || ret != n)
return -EIO;
nbytes &= AES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, walk, nbytes);
} while ((nbytes = walk->nbytes));
- memcpy(walk->iv, param, AES_BLOCK_SIZE);
+ memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
out:
return ret;
@@ -481,7 +487,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
return fallback_blk_enc(desc, dst, src, nbytes);
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk);
+ return cbc_aes_crypt(desc, sctx->enc, &walk);
}
static int cbc_aes_decrypt(struct blkcipher_desc *desc,
@@ -495,7 +501,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
return fallback_blk_dec(desc, dst, src, nbytes);
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk);
+ return cbc_aes_crypt(desc, sctx->dec, &walk);
}
static struct crypto_alg cbc_aes_alg = {
@@ -586,7 +592,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
xts_ctx->enc = KM_XTS_128_ENCRYPT;
xts_ctx->dec = KM_XTS_128_DECRYPT;
memcpy(xts_ctx->key + 16, in_key, 16);
- memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16);
+ memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16);
break;
case 48:
xts_ctx->enc = 0;
@@ -597,7 +603,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
xts_ctx->enc = KM_XTS_256_ENCRYPT;
xts_ctx->dec = KM_XTS_256_DECRYPT;
memcpy(xts_ctx->key, in_key, 32);
- memcpy(xts_ctx->pcc.key, in_key + 32, 32);
+ memcpy(xts_ctx->pcc_key, in_key + 32, 32);
break;
default:
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
@@ -616,29 +622,33 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
unsigned int nbytes = walk->nbytes;
unsigned int n;
u8 *in, *out;
- void *param;
+ struct pcc_param pcc_param;
+ struct {
+ u8 key[32];
+ u8 init[16];
+ } xts_param;
if (!nbytes)
goto out;
- memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block));
- memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit));
- memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts));
- memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak));
- param = xts_ctx->pcc.key + offset;
- ret = crypt_s390_pcc(func, param);
+ memset(pcc_param.block, 0, sizeof(pcc_param.block));
+ memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
+ memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
+ memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.key, xts_ctx->pcc_key, 32);
+ ret = crypt_s390_pcc(func, &pcc_param.key[offset]);
if (ret < 0)
return -EIO;
- memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16);
- param = xts_ctx->key + offset;
+ memcpy(xts_param.key, xts_ctx->key, 32);
+ memcpy(xts_param.init, pcc_param.xts, 16);
do {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
out = walk->dst.virt.addr;
in = walk->src.virt.addr;
- ret = crypt_s390_km(func, param, out, in, n);
+ ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n);
if (ret < 0 || ret != n)
return -EIO;
@@ -725,6 +735,8 @@ static struct crypto_alg xts_aes_alg = {
}
};
+static int xts_aes_alg_reg;
+
static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -748,43 +760,70 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return aes_set_key(tfm, in_key, key_len);
}
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+ unsigned int i, n;
+
+ /* only use complete blocks, max. PAGE_SIZE */
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+ for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
+ memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(ctrptr + i, AES_BLOCK_SIZE);
+ }
+ return n;
+}
+
static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
{
int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
- unsigned int i, n, nbytes;
- u8 buf[AES_BLOCK_SIZE];
- u8 *out, *in;
+ unsigned int n, nbytes;
+ u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE];
+ u8 *out, *in, *ctrptr = ctrbuf;
if (!walk->nbytes)
return ret;
- memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE);
+ if (spin_trylock(&ctrblk_lock))
+ ctrptr = ctrblk;
+
+ memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE);
while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
out = walk->dst.virt.addr;
in = walk->src.virt.addr;
while (nbytes >= AES_BLOCK_SIZE) {
- /* only use complete blocks, max. PAGE_SIZE */
- n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
- nbytes & ~(AES_BLOCK_SIZE - 1);
- for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
- memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE,
- AES_BLOCK_SIZE);
- crypto_inc(ctrblk + i, AES_BLOCK_SIZE);
- }
- ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk);
- if (ret < 0 || ret != n)
+ if (ctrptr == ctrblk)
+ n = __ctrblk_init(ctrptr, nbytes);
+ else
+ n = AES_BLOCK_SIZE;
+ ret = crypt_s390_kmctr(func, sctx->key, out, in,
+ n, ctrptr);
+ if (ret < 0 || ret != n) {
+ if (ctrptr == ctrblk)
+ spin_unlock(&ctrblk_lock);
return -EIO;
+ }
if (n > AES_BLOCK_SIZE)
- memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE,
+ memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
AES_BLOCK_SIZE);
- crypto_inc(ctrblk, AES_BLOCK_SIZE);
+ crypto_inc(ctrptr, AES_BLOCK_SIZE);
out += n;
in += n;
nbytes -= n;
}
ret = blkcipher_walk_done(desc, walk, nbytes);
}
+ if (ctrptr == ctrblk) {
+ if (nbytes)
+ memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE);
+ else
+ memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+ spin_unlock(&ctrblk_lock);
+ } else {
+ if (!nbytes)
+ memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+ }
/*
* final block may be < AES_BLOCK_SIZE, copy only nbytes
*/
@@ -792,14 +831,15 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
out = walk->dst.virt.addr;
in = walk->src.virt.addr;
ret = crypt_s390_kmctr(func, sctx->key, buf, in,
- AES_BLOCK_SIZE, ctrblk);
+ AES_BLOCK_SIZE, ctrbuf);
if (ret < 0 || ret != AES_BLOCK_SIZE)
return -EIO;
memcpy(out, buf, nbytes);
- crypto_inc(ctrblk, AES_BLOCK_SIZE);
+ crypto_inc(ctrbuf, AES_BLOCK_SIZE);
ret = blkcipher_walk_done(desc, walk, 0);
+ memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE);
}
- memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE);
+
return ret;
}
@@ -846,6 +886,8 @@ static struct crypto_alg ctr_aes_alg = {
}
};
+static int ctr_aes_alg_reg;
+
static int __init aes_s390_init(void)
{
int ret;
@@ -884,6 +926,7 @@ static int __init aes_s390_init(void)
ret = crypto_register_alg(&xts_aes_alg);
if (ret)
goto xts_aes_err;
+ xts_aes_alg_reg = 1;
}
if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT,
@@ -902,6 +945,7 @@ static int __init aes_s390_init(void)
free_page((unsigned long) ctrblk);
goto ctr_aes_err;
}
+ ctr_aes_alg_reg = 1;
}
out:
@@ -921,9 +965,12 @@ aes_err:
static void __exit aes_s390_fini(void)
{
- crypto_unregister_alg(&ctr_aes_alg);
- free_page((unsigned long) ctrblk);
- crypto_unregister_alg(&xts_aes_alg);
+ if (ctr_aes_alg_reg) {
+ crypto_unregister_alg(&ctr_aes_alg);
+ free_page((unsigned long) ctrblk);
+ }
+ if (xts_aes_alg_reg)
+ crypto_unregister_alg(&xts_aes_alg);
crypto_unregister_alg(&cbc_aes_alg);
crypto_unregister_alg(&ecb_aes_alg);
crypto_unregister_alg(&aes_alg);
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index bcca01c9989..7acb77f7ef1 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -25,6 +25,7 @@
#define DES3_KEY_SIZE (3 * DES_KEY_SIZE)
static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
struct s390_des_ctx {
u8 iv[DES_BLOCK_SIZE];
@@ -105,29 +106,35 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
}
static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
- u8 *iv, struct blkcipher_walk *walk)
+ struct blkcipher_walk *walk)
{
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int ret = blkcipher_walk_virt(desc, walk);
unsigned int nbytes = walk->nbytes;
+ struct {
+ u8 iv[DES_BLOCK_SIZE];
+ u8 key[DES3_KEY_SIZE];
+ } param;
if (!nbytes)
goto out;
- memcpy(iv, walk->iv, DES_BLOCK_SIZE);
+ memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+ memcpy(param.key, ctx->key, DES3_KEY_SIZE);
do {
/* only use complete blocks */
unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_kmc(func, iv, out, in, n);
+ ret = crypt_s390_kmc(func, &param, out, in, n);
if (ret < 0 || ret != n)
return -EIO;
nbytes &= DES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, walk, nbytes);
} while ((nbytes = walk->nbytes));
- memcpy(walk->iv, iv, DES_BLOCK_SIZE);
+ memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
out:
return ret;
@@ -179,22 +186,20 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk);
}
static int cbc_des_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk);
}
static struct crypto_alg cbc_des_alg = {
@@ -237,9 +242,9 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
- if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
- memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
- DES_KEY_SIZE)) &&
+ if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
+ crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
+ DES_KEY_SIZE)) &&
(*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
*flags |= CRYPTO_TFM_RES_WEAK_KEY;
return -EINVAL;
@@ -327,22 +332,20 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk);
}
static int cbc_des3_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk);
}
static struct crypto_alg cbc_des3_alg = {
@@ -366,54 +369,83 @@ static struct crypto_alg cbc_des3_alg = {
}
};
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+ unsigned int i, n;
+
+ /* align to block size, max. PAGE_SIZE */
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
+ for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
+ memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+ crypto_inc(ctrptr + i, DES_BLOCK_SIZE);
+ }
+ return n;
+}
+
static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
- struct s390_des_ctx *ctx, struct blkcipher_walk *walk)
+ struct s390_des_ctx *ctx,
+ struct blkcipher_walk *walk)
{
int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
- unsigned int i, n, nbytes;
- u8 buf[DES_BLOCK_SIZE];
- u8 *out, *in;
+ unsigned int n, nbytes;
+ u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE];
+ u8 *out, *in, *ctrptr = ctrbuf;
+
+ if (!walk->nbytes)
+ return ret;
- memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE);
+ if (spin_trylock(&ctrblk_lock))
+ ctrptr = ctrblk;
+
+ memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE);
while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
out = walk->dst.virt.addr;
in = walk->src.virt.addr;
while (nbytes >= DES_BLOCK_SIZE) {
- /* align to block size, max. PAGE_SIZE */
- n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
- nbytes & ~(DES_BLOCK_SIZE - 1);
- for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
- memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE,
- DES_BLOCK_SIZE);
- crypto_inc(ctrblk + i, DES_BLOCK_SIZE);
- }
- ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk);
- if (ret < 0 || ret != n)
+ if (ctrptr == ctrblk)
+ n = __ctrblk_init(ctrptr, nbytes);
+ else
+ n = DES_BLOCK_SIZE;
+ ret = crypt_s390_kmctr(func, ctx->key, out, in,
+ n, ctrptr);
+ if (ret < 0 || ret != n) {
+ if (ctrptr == ctrblk)
+ spin_unlock(&ctrblk_lock);
return -EIO;
+ }
if (n > DES_BLOCK_SIZE)
- memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE,
+ memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
DES_BLOCK_SIZE);
- crypto_inc(ctrblk, DES_BLOCK_SIZE);
+ crypto_inc(ctrptr, DES_BLOCK_SIZE);
out += n;
in += n;
nbytes -= n;
}
ret = blkcipher_walk_done(desc, walk, nbytes);
}
-
+ if (ctrptr == ctrblk) {
+ if (nbytes)
+ memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE);
+ else
+ memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+ spin_unlock(&ctrblk_lock);
+ } else {
+ if (!nbytes)
+ memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+ }
/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
if (nbytes) {
out = walk->dst.virt.addr;
in = walk->src.virt.addr;
ret = crypt_s390_kmctr(func, ctx->key, buf, in,
- DES_BLOCK_SIZE, ctrblk);
+ DES_BLOCK_SIZE, ctrbuf);
if (ret < 0 || ret != DES_BLOCK_SIZE)
return -EIO;
memcpy(out, buf, nbytes);
- crypto_inc(ctrblk, DES_BLOCK_SIZE);
+ crypto_inc(ctrbuf, DES_BLOCK_SIZE);
ret = blkcipher_walk_done(desc, walk, 0);
+ memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE);
}
- memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE);
return ret;
}
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index b74400e3e03..2e56498a40d 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -1,14 +1,13 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -27,6 +26,7 @@ CONFIG_RD_BZIP2=y
CONFIG_RD_LZMA=y
CONFIG_RD_XZ=y
CONFIG_RD_LZO=y
+CONFIG_RD_LZ4=y
CONFIG_EXPERT=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
@@ -39,10 +39,14 @@ CONFIG_MODVERSIONS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_NR_CPUS=256
CONFIG_HZ_100=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CMA=y
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
@@ -92,40 +96,58 @@ CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SCAN_ASYNC=y
CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=y
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
CONFIG_EQUALIZER=m
CONFIG_TUN=m
CONFIG_VIRTIO_NET=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
CONFIG_RAW_DRIVER=m
CONFIG_VIRTIO_BALLOON=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_FANOTIFY=y
+CONFIG_FUSE_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DETECT_HUNG_TASK=y
CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
CONFIG_LOCK_STAT=y
CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_PROVE_RCU=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_RCU_TRACE=y
-CONFIG_KPROBES_SANITY_TEST=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_KPROBES_SANITY_TEST=y
# CONFIG_STRICT_DEVMEM is not set
-CONFIG_CRYPTO_NULL=m
+CONFIG_S390_PTDUMP=y
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_AUTHENC=m
CONFIG_CRYPTO_TEST=m
@@ -137,8 +159,11 @@ CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_CRCT10DIF=m
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
@@ -165,6 +190,8 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_ZLIB=m
CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
CONFIG_ZCRYPT=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
@@ -172,4 +199,10 @@ CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRC7=m
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_POWERPC is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
CONFIG_CMM=m
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
index 2e671d5004c..06f8d95a16c 100644
--- a/arch/s390/hypfs/Makefile
+++ b/arch/s390/hypfs/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o
-s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o
+s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index f41e0ef7fdf..b34b5ab90a3 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -13,31 +13,33 @@
#include <linux/debugfs.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
+#include <asm/hypfs.h>
#define REG_FILE_MODE 0440
#define UPDATE_FILE_MODE 0220
#define DIR_MODE 0550
-extern struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
- const char *name);
+extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name);
-extern struct dentry *hypfs_create_u64(struct super_block *sb,
- struct dentry *dir, const char *name,
+extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name,
__u64 value);
-extern struct dentry *hypfs_create_str(struct super_block *sb,
- struct dentry *dir, const char *name,
+extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name,
char *string);
/* LPAR Hypervisor */
extern int hypfs_diag_init(void);
extern void hypfs_diag_exit(void);
-extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root);
+extern int hypfs_diag_create_files(struct dentry *root);
/* VM Hypervisor */
extern int hypfs_vm_init(void);
extern void hypfs_vm_exit(void);
-extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root);
+extern int hypfs_vm_create_files(struct dentry *root);
+
+/* Set Partition-Resource Parameter */
+int hypfs_sprp_init(void);
+void hypfs_sprp_exit(void);
/* debugfs interface */
struct hypfs_dbfs_file;
@@ -55,6 +57,8 @@ struct hypfs_dbfs_file {
int (*data_create)(void **data, void **data_free_ptr,
size_t *size);
void (*data_free)(const void *buf_free_ptr);
+ long (*unlocked_ioctl) (struct file *, unsigned int,
+ unsigned long);
/* Private data for hypfs_dbfs.c */
struct hypfs_dbfs_data *data;
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index 13e76dabbe8..2badf2bf9cd 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -54,7 +54,7 @@ static ssize_t dbfs_read(struct file *file, char __user *buf,
if (*ppos != 0)
return 0;
- df = file->f_path.dentry->d_inode->i_private;
+ df = file_inode(file)->i_private;
mutex_lock(&df->lock);
if (!df->data) {
data = hypfs_dbfs_data_alloc(df);
@@ -81,9 +81,25 @@ static ssize_t dbfs_read(struct file *file, char __user *buf,
return rc;
}
+static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct hypfs_dbfs_file *df;
+ long rc;
+
+ df = file->f_path.dentry->d_inode->i_private;
+ mutex_lock(&df->lock);
+ if (df->unlocked_ioctl)
+ rc = df->unlocked_ioctl(file, cmd, arg);
+ else
+ rc = -ENOTTY;
+ mutex_unlock(&df->lock);
+ return rc;
+}
+
static const struct file_operations dbfs_ops = {
.read = dbfs_read,
.llseek = no_llseek,
+ .unlocked_ioctl = dbfs_ioctl,
};
int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
@@ -105,9 +121,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
int hypfs_dbfs_init(void)
{
dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
- if (IS_ERR(dbfs_dir))
- return PTR_ERR(dbfs_dir);
- return 0;
+ return PTR_ERR_OR_ZERO(dbfs_dir);
}
void hypfs_dbfs_exit(void)
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 7fd3690b676..5eeffeefae0 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -623,8 +623,7 @@ void hypfs_diag_exit(void)
* *******************************************
*/
-static int hypfs_create_cpu_files(struct super_block *sb,
- struct dentry *cpus_dir, void *cpu_info)
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
{
struct dentry *cpu_dir;
char buffer[TMP_SIZE];
@@ -632,32 +631,29 @@ static int hypfs_create_cpu_files(struct super_block *sb,
snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
cpu_info));
- cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
- rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+ cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+ rc = hypfs_create_u64(cpu_dir, "mgmtime",
cpu_info__acc_time(diag204_info_type, cpu_info) -
cpu_info__lp_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
- rc = hypfs_create_u64(sb, cpu_dir, "cputime",
+ rc = hypfs_create_u64(cpu_dir, "cputime",
cpu_info__lp_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
if (diag204_info_type == INFO_EXT) {
- rc = hypfs_create_u64(sb, cpu_dir, "onlinetime",
+ rc = hypfs_create_u64(cpu_dir, "onlinetime",
cpu_info__online_time(diag204_info_type,
cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
}
diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
- rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
- if (IS_ERR(rc))
- return PTR_ERR(rc);
- return 0;
+ rc = hypfs_create_str(cpu_dir, "type", buffer);
+ return PTR_RET(rc);
}
-static void *hypfs_create_lpar_files(struct super_block *sb,
- struct dentry *systems_dir, void *part_hdr)
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
{
struct dentry *cpus_dir;
struct dentry *lpar_dir;
@@ -667,16 +663,16 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
lpar_name[LPAR_NAME_LEN] = 0;
- lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name);
+ lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
if (IS_ERR(lpar_dir))
return lpar_dir;
- cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus");
+ cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
if (IS_ERR(cpus_dir))
return cpus_dir;
cpu_info = part_hdr + part_hdr__size(diag204_info_type);
for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
int rc;
- rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info);
+ rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
if (rc)
return ERR_PTR(rc);
cpu_info += cpu_info__size(diag204_info_type);
@@ -684,8 +680,7 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
return cpu_info;
}
-static int hypfs_create_phys_cpu_files(struct super_block *sb,
- struct dentry *cpus_dir, void *cpu_info)
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
{
struct dentry *cpu_dir;
char buffer[TMP_SIZE];
@@ -693,34 +688,31 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb,
snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
cpu_info));
- cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
+ cpu_dir = hypfs_mkdir(cpus_dir, buffer);
if (IS_ERR(cpu_dir))
return PTR_ERR(cpu_dir);
- rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+ rc = hypfs_create_u64(cpu_dir, "mgmtime",
phys_cpu__mgm_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
- rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
- if (IS_ERR(rc))
- return PTR_ERR(rc);
- return 0;
+ rc = hypfs_create_str(cpu_dir, "type", buffer);
+ return PTR_RET(rc);
}
-static void *hypfs_create_phys_files(struct super_block *sb,
- struct dentry *parent_dir, void *phys_hdr)
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
{
int i;
void *cpu_info;
struct dentry *cpus_dir;
- cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus");
+ cpus_dir = hypfs_mkdir(parent_dir, "cpus");
if (IS_ERR(cpus_dir))
return cpus_dir;
cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
int rc;
- rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info);
+ rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
if (rc)
return ERR_PTR(rc);
cpu_info += phys_cpu__size(diag204_info_type);
@@ -728,7 +720,7 @@ static void *hypfs_create_phys_files(struct super_block *sb,
return cpu_info;
}
-int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
+int hypfs_diag_create_files(struct dentry *root)
{
struct dentry *systems_dir, *hyp_dir;
void *time_hdr, *part_hdr;
@@ -739,7 +731,7 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
if (IS_ERR(buffer))
return PTR_ERR(buffer);
- systems_dir = hypfs_mkdir(sb, root, "systems");
+ systems_dir = hypfs_mkdir(root, "systems");
if (IS_ERR(systems_dir)) {
rc = PTR_ERR(systems_dir);
goto err_out;
@@ -747,25 +739,25 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
time_hdr = (struct x_info_blk_hdr *)buffer;
part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
- part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr);
+ part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
if (IS_ERR(part_hdr)) {
rc = PTR_ERR(part_hdr);
goto err_out;
}
}
if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
- ptr = hypfs_create_phys_files(sb, root, part_hdr);
+ ptr = hypfs_create_phys_files(root, part_hdr);
if (IS_ERR(ptr)) {
rc = PTR_ERR(ptr);
goto err_out;
}
}
- hyp_dir = hypfs_mkdir(sb, root, "hyp");
+ hyp_dir = hypfs_mkdir(root, "hyp");
if (IS_ERR(hyp_dir)) {
rc = PTR_ERR(hyp_dir);
goto err_out;
}
- ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor");
+ ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
if (IS_ERR(ptr)) {
rc = PTR_ERR(ptr);
goto err_out;
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
new file mode 100644
index 00000000000..f043c3c7e73
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -0,0 +1,141 @@
+/*
+ * Hypervisor filesystem for Linux on s390.
+ * Set Partition-Resource Parameter interface.
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/compat.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <asm/compat.h>
+#include <asm/sclp.h>
+#include "hypfs.h"
+
+#define DIAG304_SET_WEIGHTS 0
+#define DIAG304_QUERY_PRP 1
+#define DIAG304_SET_CAPPING 2
+
+#define DIAG304_CMD_MAX 2
+
+static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+ register unsigned long _data asm("2") = (unsigned long) data;
+ register unsigned long _rc asm("3");
+ register unsigned long _cmd asm("4") = cmd;
+
+ asm volatile("diag %1,%2,0x304\n"
+ : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory");
+
+ return _rc;
+}
+
+static void hypfs_sprp_free(const void *data)
+{
+ free_page((unsigned long) data);
+}
+
+static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size)
+{
+ unsigned long rc;
+ void *data;
+
+ data = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP);
+ if (rc != 1) {
+ *data_ptr = *free_ptr = NULL;
+ *size = 0;
+ free_page((unsigned long) data);
+ return -EIO;
+ }
+ *data_ptr = *free_ptr = data;
+ *size = PAGE_SIZE;
+ return 0;
+}
+
+static int __hypfs_sprp_ioctl(void __user *user_area)
+{
+ struct hypfs_diag304 diag304;
+ unsigned long cmd;
+ void __user *udata;
+ void *data;
+ int rc;
+
+ if (copy_from_user(&diag304, user_area, sizeof(diag304)))
+ return -EFAULT;
+ if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX)
+ return -EINVAL;
+
+ data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!data)
+ return -ENOMEM;
+
+ udata = (void __user *)(unsigned long) diag304.data;
+ if (diag304.args[1] == DIAG304_SET_WEIGHTS ||
+ diag304.args[1] == DIAG304_SET_CAPPING)
+ if (copy_from_user(data, udata, PAGE_SIZE)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ cmd = *(unsigned long *) &diag304.args[0];
+ diag304.rc = hypfs_sprp_diag304(data, cmd);
+
+ if (diag304.args[1] == DIAG304_QUERY_PRP)
+ if (copy_to_user(udata, data, PAGE_SIZE)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0;
+out:
+ free_page((unsigned long) data);
+ return rc;
+}
+
+static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ void __user *argp;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (is_compat_task())
+ argp = compat_ptr(arg);
+ else
+ argp = (void __user *) arg;
+ switch (cmd) {
+ case HYPFS_DIAG304:
+ return __hypfs_sprp_ioctl(argp);
+ default: /* unknown ioctl number */
+ return -ENOTTY;
+ }
+ return 0;
+}
+
+static struct hypfs_dbfs_file hypfs_sprp_file = {
+ .name = "diag_304",
+ .data_create = hypfs_sprp_create,
+ .data_free = hypfs_sprp_free,
+ .unlocked_ioctl = hypfs_sprp_ioctl,
+};
+
+int hypfs_sprp_init(void)
+{
+ if (!sclp_has_sprp())
+ return 0;
+ return hypfs_dbfs_create_file(&hypfs_sprp_file);
+}
+
+void hypfs_sprp_exit(void)
+{
+ if (!sclp_has_sprp())
+ return;
+ hypfs_dbfs_remove_file(&hypfs_sprp_file);
+}
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 4f6afaa8bd8..32040ace00e 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -32,7 +32,7 @@ struct diag2fc_data {
__u32 pcpus;
__u32 lcpus;
__u32 vcpus;
- __u32 cpu_min;
+ __u32 ocpus;
__u32 cpu_max;
__u32 cpu_shares;
__u32 cpu_use_samp;
@@ -107,16 +107,15 @@ static void diag2fc_free(const void *data)
vfree(data);
}
-#define ATTRIBUTE(sb, dir, name, member) \
+#define ATTRIBUTE(dir, name, member) \
do { \
void *rc; \
- rc = hypfs_create_u64(sb, dir, name, member); \
+ rc = hypfs_create_u64(dir, name, member); \
if (IS_ERR(rc)) \
return PTR_ERR(rc); \
} while(0)
-static int hpyfs_vm_create_guest(struct super_block *sb,
- struct dentry *systems_dir,
+static int hpyfs_vm_create_guest(struct dentry *systems_dir,
struct diag2fc_data *data)
{
char guest_name[NAME_LEN + 1] = {};
@@ -130,46 +129,51 @@ static int hpyfs_vm_create_guest(struct super_block *sb,
memcpy(guest_name, data->guest_name, NAME_LEN);
EBCASC(guest_name, NAME_LEN);
strim(guest_name);
- guest_dir = hypfs_mkdir(sb, systems_dir, guest_name);
+ guest_dir = hypfs_mkdir(systems_dir, guest_name);
if (IS_ERR(guest_dir))
return PTR_ERR(guest_dir);
- ATTRIBUTE(sb, guest_dir, "onlinetime_us", data->el_time);
+ ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
/* logical cpu information */
- cpus_dir = hypfs_mkdir(sb, guest_dir, "cpus");
+ cpus_dir = hypfs_mkdir(guest_dir, "cpus");
if (IS_ERR(cpus_dir))
return PTR_ERR(cpus_dir);
- ATTRIBUTE(sb, cpus_dir, "cputime_us", data->used_cpu);
- ATTRIBUTE(sb, cpus_dir, "capped", capped_value);
- ATTRIBUTE(sb, cpus_dir, "dedicated", dedicated_flag);
- ATTRIBUTE(sb, cpus_dir, "count", data->vcpus);
- ATTRIBUTE(sb, cpus_dir, "weight_min", data->cpu_min);
- ATTRIBUTE(sb, cpus_dir, "weight_max", data->cpu_max);
- ATTRIBUTE(sb, cpus_dir, "weight_cur", data->cpu_shares);
+ ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+ ATTRIBUTE(cpus_dir, "capped", capped_value);
+ ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+ ATTRIBUTE(cpus_dir, "count", data->vcpus);
+ /*
+ * Note: The "weight_min" attribute got the wrong name.
+ * The value represents the number of non-stopped (operating)
+ * CPUS.
+ */
+ ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
+ ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+ ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
/* memory information */
- mem_dir = hypfs_mkdir(sb, guest_dir, "mem");
+ mem_dir = hypfs_mkdir(guest_dir, "mem");
if (IS_ERR(mem_dir))
return PTR_ERR(mem_dir);
- ATTRIBUTE(sb, mem_dir, "min_KiB", data->mem_min_kb);
- ATTRIBUTE(sb, mem_dir, "max_KiB", data->mem_max_kb);
- ATTRIBUTE(sb, mem_dir, "used_KiB", data->mem_used_kb);
- ATTRIBUTE(sb, mem_dir, "share_KiB", data->mem_share_kb);
+ ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+ ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+ ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+ ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
/* samples */
- samples_dir = hypfs_mkdir(sb, guest_dir, "samples");
+ samples_dir = hypfs_mkdir(guest_dir, "samples");
if (IS_ERR(samples_dir))
return PTR_ERR(samples_dir);
- ATTRIBUTE(sb, samples_dir, "cpu_using", data->cpu_use_samp);
- ATTRIBUTE(sb, samples_dir, "cpu_delay", data->cpu_delay_samp);
- ATTRIBUTE(sb, samples_dir, "mem_delay", data->page_wait_samp);
- ATTRIBUTE(sb, samples_dir, "idle", data->idle_samp);
- ATTRIBUTE(sb, samples_dir, "other", data->other_samp);
- ATTRIBUTE(sb, samples_dir, "total", data->total_samp);
+ ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+ ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+ ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+ ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+ ATTRIBUTE(samples_dir, "other", data->other_samp);
+ ATTRIBUTE(samples_dir, "total", data->total_samp);
return 0;
}
-int hypfs_vm_create_files(struct super_block *sb, struct dentry *root)
+int hypfs_vm_create_files(struct dentry *root)
{
struct dentry *dir, *file;
struct diag2fc_data *data;
@@ -181,38 +185,38 @@ int hypfs_vm_create_files(struct super_block *sb, struct dentry *root)
return PTR_ERR(data);
/* Hpervisor Info */
- dir = hypfs_mkdir(sb, root, "hyp");
+ dir = hypfs_mkdir(root, "hyp");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
goto failed;
}
- file = hypfs_create_str(sb, dir, "type", "z/VM Hypervisor");
+ file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
if (IS_ERR(file)) {
rc = PTR_ERR(file);
goto failed;
}
/* physical cpus */
- dir = hypfs_mkdir(sb, root, "cpus");
+ dir = hypfs_mkdir(root, "cpus");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
goto failed;
}
- file = hypfs_create_u64(sb, dir, "count", data->lcpus);
+ file = hypfs_create_u64(dir, "count", data->lcpus);
if (IS_ERR(file)) {
rc = PTR_ERR(file);
goto failed;
}
/* guests */
- dir = hypfs_mkdir(sb, root, "systems");
+ dir = hypfs_mkdir(root, "systems");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
goto failed;
}
for (i = 0; i < count; i++) {
- rc = hpyfs_vm_create_guest(sb, dir, &(data[i]));
+ rc = hpyfs_vm_create_guest(dir, &(data[i]));
if (rc)
goto failed;
}
@@ -245,7 +249,7 @@ static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size)
d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr));
if (IS_ERR(d2fc))
return PTR_ERR(d2fc);
- get_clock_ext(d2fc->hdr.tod_ext);
+ get_tod_clock_ext(d2fc->hdr.tod_ext);
d2fc->hdr.len = count * sizeof(struct diag2fc_data);
d2fc->hdr.version = DBFS_D2FC_HDR_VERSION;
d2fc->hdr.count = count;
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 06ea69bd387..c952b981e4f 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -21,14 +21,14 @@
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
+#include <linux/aio.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */
#define TMP_SIZE 64 /* size of temporary buffers */
-static struct dentry *hypfs_create_update_file(struct super_block *sb,
- struct dentry *dir);
+static struct dentry *hypfs_create_update_file(struct dentry *dir);
struct hypfs_sb_info {
kuid_t uid; /* uid used for files and dirs */
@@ -119,7 +119,7 @@ static void hypfs_evict_inode(struct inode *inode)
static int hypfs_open(struct inode *inode, struct file *filp)
{
- char *data = filp->f_path.dentry->d_inode->i_private;
+ char *data = file_inode(filp)->i_private;
struct hypfs_sb_info *fs_info;
if (filp->f_mode & FMODE_WRITE) {
@@ -171,12 +171,10 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t offset)
{
int rc;
- struct super_block *sb;
- struct hypfs_sb_info *fs_info;
+ struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
+ struct hypfs_sb_info *fs_info = sb->s_fs_info;
size_t count = iov_length(iov, nr_segs);
- sb = iocb->ki_filp->f_path.dentry->d_inode->i_sb;
- fs_info = sb->s_fs_info;
/*
* Currently we only allow one update per second for two reasons:
* 1. diag 204 is VERY expensive
@@ -194,9 +192,9 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
}
hypfs_delete_tree(sb->s_root);
if (MACHINE_IS_VM)
- rc = hypfs_vm_create_files(sb, sb->s_root);
+ rc = hypfs_vm_create_files(sb->s_root);
else
- rc = hypfs_diag_create_files(sb, sb->s_root);
+ rc = hypfs_diag_create_files(sb->s_root);
if (rc) {
pr_err("Updating the hypfs tree failed\n");
hypfs_delete_tree(sb->s_root);
@@ -303,12 +301,12 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
if (!root_dentry)
return -ENOMEM;
if (MACHINE_IS_VM)
- rc = hypfs_vm_create_files(sb, root_dentry);
+ rc = hypfs_vm_create_files(root_dentry);
else
- rc = hypfs_diag_create_files(sb, root_dentry);
+ rc = hypfs_diag_create_files(root_dentry);
if (rc)
return rc;
- sbi->update_file = hypfs_create_update_file(sb, root_dentry);
+ sbi->update_file = hypfs_create_update_file(root_dentry);
if (IS_ERR(sbi->update_file))
return PTR_ERR(sbi->update_file);
hypfs_update_update(sb);
@@ -335,8 +333,7 @@ static void hypfs_kill_super(struct super_block *sb)
kill_litter_super(sb);
}
-static struct dentry *hypfs_create_file(struct super_block *sb,
- struct dentry *parent, const char *name,
+static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
char *data, umode_t mode)
{
struct dentry *dentry;
@@ -348,7 +345,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb,
dentry = ERR_PTR(-ENOMEM);
goto fail;
}
- inode = hypfs_make_inode(sb, mode);
+ inode = hypfs_make_inode(parent->d_sb, mode);
if (!inode) {
dput(dentry);
dentry = ERR_PTR(-ENOMEM);
@@ -374,24 +371,22 @@ fail:
return dentry;
}
-struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
- const char *name)
+struct dentry *hypfs_mkdir(struct dentry *parent, const char *name)
{
struct dentry *dentry;
- dentry = hypfs_create_file(sb, parent, name, NULL, S_IFDIR | DIR_MODE);
+ dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE);
if (IS_ERR(dentry))
return dentry;
hypfs_add_dentry(dentry);
return dentry;
}
-static struct dentry *hypfs_create_update_file(struct super_block *sb,
- struct dentry *dir)
+static struct dentry *hypfs_create_update_file(struct dentry *dir)
{
struct dentry *dentry;
- dentry = hypfs_create_file(sb, dir, "update", NULL,
+ dentry = hypfs_create_file(dir, "update", NULL,
S_IFREG | UPDATE_FILE_MODE);
/*
* We do not put the update file on the 'delete' list with
@@ -401,7 +396,7 @@ static struct dentry *hypfs_create_update_file(struct super_block *sb,
return dentry;
}
-struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
+struct dentry *hypfs_create_u64(struct dentry *dir,
const char *name, __u64 value)
{
char *buffer;
@@ -413,7 +408,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
if (!buffer)
return ERR_PTR(-ENOMEM);
dentry =
- hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
+ hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
if (IS_ERR(dentry)) {
kfree(buffer);
return ERR_PTR(-ENOMEM);
@@ -422,7 +417,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
return dentry;
}
-struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
+struct dentry *hypfs_create_str(struct dentry *dir,
const char *name, char *string)
{
char *buffer;
@@ -433,7 +428,7 @@ struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
return ERR_PTR(-ENOMEM);
sprintf(buffer, "%s\n", string);
dentry =
- hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
+ hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
if (IS_ERR(dentry)) {
kfree(buffer);
return ERR_PTR(-ENOMEM);
@@ -458,6 +453,7 @@ static struct file_system_type hypfs_type = {
.mount = hypfs_mount,
.kill_sb = hypfs_kill_super
};
+MODULE_ALIAS_FS("s390_hypfs");
static const struct super_operations hypfs_s_ops = {
.statfs = simple_statfs,
@@ -482,10 +478,14 @@ static int __init hypfs_init(void)
rc = -ENODATA;
goto fail_hypfs_diag_exit;
}
+ if (hypfs_sprp_init()) {
+ rc = -ENODATA;
+ goto fail_hypfs_vm_exit;
+ }
s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
if (!s390_kobj) {
rc = -ENOMEM;
- goto fail_hypfs_vm_exit;
+ goto fail_hypfs_sprp_exit;
}
rc = register_filesystem(&hypfs_type);
if (rc)
@@ -494,6 +494,8 @@ static int __init hypfs_init(void)
fail_filesystem:
kobject_put(s390_kobj);
+fail_hypfs_sprp_exit:
+ hypfs_sprp_exit();
fail_hypfs_vm_exit:
hypfs_vm_exit();
fail_hypfs_diag_exit:
@@ -506,11 +508,12 @@ fail_dbfs_exit:
static void __exit hypfs_exit(void)
{
- hypfs_diag_exit();
- hypfs_vm_exit();
- hypfs_dbfs_exit();
unregister_filesystem(&hypfs_type);
kobject_put(s390_kobj);
+ hypfs_sprp_exit();
+ hypfs_vm_exit();
+ hypfs_diag_exit();
+ hypfs_dbfs_exit();
}
module_init(hypfs_init)
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index f313f9cbcf4..57892a8a905 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,4 +1,7 @@
generic-y += clkdev.h
+generic-y += hash.h
+generic-y += mcs_spinlock.h
+generic-y += preempt.h
generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 9819891ed7a..bd93ff6661b 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -9,9 +9,95 @@
#ifndef _ASM_S390_AIRQ_H
#define _ASM_S390_AIRQ_H
-typedef void (*adapter_int_handler_t)(void *, void *);
+#include <linux/bit_spinlock.h>
-void *s390_register_adapter_interrupt(adapter_int_handler_t, void *, u8);
-void s390_unregister_adapter_interrupt(void *, u8);
+struct airq_struct {
+ struct hlist_node list; /* Handler queueing. */
+ void (*handler)(struct airq_struct *); /* Thin-interrupt handler */
+ u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
+ u8 lsi_mask; /* Local-Summary-Indicator mask */
+ u8 isc; /* Interrupt-subclass */
+ u8 flags;
+};
+
+#define AIRQ_PTR_ALLOCATED 0x01
+
+int register_adapter_interrupt(struct airq_struct *airq);
+void unregister_adapter_interrupt(struct airq_struct *airq);
+
+/* Adapter interrupt bit vector */
+struct airq_iv {
+ unsigned long *vector; /* Adapter interrupt bit vector */
+ unsigned long *avail; /* Allocation bit mask for the bit vector */
+ unsigned long *bitlock; /* Lock bit mask for the bit vector */
+ unsigned long *ptr; /* Pointer associated with each bit */
+ unsigned int *data; /* 32 bit value associated with each bit */
+ unsigned long bits; /* Number of bits in the vector */
+ unsigned long end; /* Number of highest allocated bit + 1 */
+ spinlock_t lock; /* Lock to protect alloc & free */
+};
+
+#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
+#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
+#define AIRQ_IV_DATA 8 /* Allocate the data array */
+
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+void airq_iv_release(struct airq_iv *iv);
+unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
+void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+ unsigned long end);
+
+static inline unsigned long airq_iv_alloc_bit(struct airq_iv *iv)
+{
+ return airq_iv_alloc(iv, 1);
+}
+
+static inline void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit)
+{
+ airq_iv_free(iv, bit, 1);
+}
+
+static inline unsigned long airq_iv_end(struct airq_iv *iv)
+{
+ return iv->end;
+}
+
+static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_lock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
+ unsigned int data)
+{
+ iv->data[bit] = data;
+}
+
+static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->data[bit];
+}
+
+static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
+ unsigned long ptr)
+{
+ iv->ptr[bit] = ptr;
+}
+
+static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->ptr[bit];
+}
#endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index c797832daa5..fa934fe080c 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,25 +15,61 @@
#include <linux/compiler.h>
#include <linux/types.h>
+#include <asm/barrier.h>
#include <asm/cmpxchg.h>
#define ATOMIC_INIT(i) { (i) }
-#define __CS_LOOP(ptr, op_val, op_string) ({ \
+#define __ATOMIC_NO_BARRIER "\n"
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __ATOMIC_OR "lao"
+#define __ATOMIC_AND "lan"
+#define __ATOMIC_ADD "laa"
+#define __ATOMIC_BARRIER "bcr 14,0\n"
+
+#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
+({ \
+ int old_val; \
+ \
+ typecheck(atomic_t *, ptr); \
+ asm volatile( \
+ __barrier \
+ op_string " %0,%2,%1\n" \
+ __barrier \
+ : "=d" (old_val), "+Q" ((ptr)->counter) \
+ : "d" (op_val) \
+ : "cc", "memory"); \
+ old_val; \
+})
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __ATOMIC_OR "or"
+#define __ATOMIC_AND "nr"
+#define __ATOMIC_ADD "ar"
+#define __ATOMIC_BARRIER "\n"
+
+#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
+({ \
int old_val, new_val; \
+ \
+ typecheck(atomic_t *, ptr); \
asm volatile( \
" l %0,%2\n" \
"0: lr %1,%0\n" \
op_string " %1,%3\n" \
" cs %0,%1,%2\n" \
" jl 0b" \
- : "=&d" (old_val), "=&d" (new_val), \
- "=Q" (((atomic_t *)(ptr))->counter) \
- : "d" (op_val), "Q" (((atomic_t *)(ptr))->counter) \
+ : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\
+ : "d" (op_val) \
: "cc", "memory"); \
- new_val; \
+ old_val; \
})
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
static inline int atomic_read(const atomic_t *v)
{
int c;
@@ -53,32 +89,43 @@ static inline void atomic_set(atomic_t *v, int i)
static inline int atomic_add_return(int i, atomic_t *v)
{
- return __CS_LOOP(v, i, "ar");
+ return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
}
-#define atomic_add(_i, _v) atomic_add_return(_i, _v)
-#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0)
-#define atomic_inc(_v) atomic_add_return(1, _v)
-#define atomic_inc_return(_v) atomic_add_return(1, _v)
-#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0)
-static inline int atomic_sub_return(int i, atomic_t *v)
+static inline void atomic_add(int i, atomic_t *v)
{
- return __CS_LOOP(v, i, "sr");
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ asm volatile(
+ "asi %0,%1\n"
+ : "+Q" (v->counter)
+ : "i" (i)
+ : "cc", "memory");
+ return;
+ }
+#endif
+ __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_NO_BARRIER);
}
-#define atomic_sub(_i, _v) atomic_sub_return(_i, _v)
+
+#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0)
+#define atomic_inc(_v) atomic_add(1, _v)
+#define atomic_inc_return(_v) atomic_add_return(1, _v)
+#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0)
+#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v)
+#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v)
#define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0)
-#define atomic_dec(_v) atomic_sub_return(1, _v)
+#define atomic_dec(_v) atomic_sub(1, _v)
#define atomic_dec_return(_v) atomic_sub_return(1, _v)
#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
-static inline void atomic_clear_mask(unsigned long mask, atomic_t *v)
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
{
- __CS_LOOP(v, ~mask, "nr");
+ __ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER);
}
-static inline void atomic_set_mask(unsigned long mask, atomic_t *v)
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
{
- __CS_LOOP(v, mask, "or");
+ __ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER);
}
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
@@ -87,8 +134,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
asm volatile(
" cs %0,%2,%1"
- : "+d" (old), "=Q" (v->counter)
- : "d" (new), "Q" (v->counter)
+ : "+d" (old), "+Q" (v->counter)
+ : "d" (new)
: "cc", "memory");
return old;
}
@@ -109,27 +156,62 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
}
-#undef __CS_LOOP
+#undef __ATOMIC_LOOP
#define ATOMIC64_INIT(i) { (i) }
#ifdef CONFIG_64BIT
-#define __CSG_LOOP(ptr, op_val, op_string) ({ \
+#define __ATOMIC64_NO_BARRIER "\n"
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __ATOMIC64_OR "laog"
+#define __ATOMIC64_AND "lang"
+#define __ATOMIC64_ADD "laag"
+#define __ATOMIC64_BARRIER "bcr 14,0\n"
+
+#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
+({ \
+ long long old_val; \
+ \
+ typecheck(atomic64_t *, ptr); \
+ asm volatile( \
+ __barrier \
+ op_string " %0,%2,%1\n" \
+ __barrier \
+ : "=d" (old_val), "+Q" ((ptr)->counter) \
+ : "d" (op_val) \
+ : "cc", "memory"); \
+ old_val; \
+})
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __ATOMIC64_OR "ogr"
+#define __ATOMIC64_AND "ngr"
+#define __ATOMIC64_ADD "agr"
+#define __ATOMIC64_BARRIER "\n"
+
+#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
+({ \
long long old_val, new_val; \
+ \
+ typecheck(atomic64_t *, ptr); \
asm volatile( \
" lg %0,%2\n" \
"0: lgr %1,%0\n" \
op_string " %1,%3\n" \
" csg %0,%1,%2\n" \
" jl 0b" \
- : "=&d" (old_val), "=&d" (new_val), \
- "=Q" (((atomic_t *)(ptr))->counter) \
- : "d" (op_val), "Q" (((atomic_t *)(ptr))->counter) \
+ : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\
+ : "d" (op_val) \
: "cc", "memory"); \
- new_val; \
+ old_val; \
})
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
static inline long long atomic64_read(const atomic64_t *v)
{
long long c;
@@ -149,22 +231,32 @@ static inline void atomic64_set(atomic64_t *v, long long i)
static inline long long atomic64_add_return(long long i, atomic64_t *v)
{
- return __CSG_LOOP(v, i, "agr");
+ return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
}
-static inline long long atomic64_sub_return(long long i, atomic64_t *v)
+static inline void atomic64_add(long long i, atomic64_t *v)
{
- return __CSG_LOOP(v, i, "sgr");
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ asm volatile(
+ "agsi %0,%1\n"
+ : "+Q" (v->counter)
+ : "i" (i)
+ : "cc", "memory");
+ return;
+ }
+#endif
+ __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER);
}
static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
{
- __CSG_LOOP(v, ~mask, "ngr");
+ __ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER);
}
static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
{
- __CSG_LOOP(v, mask, "ogr");
+ __ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER);
}
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
@@ -174,13 +266,13 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
{
asm volatile(
" csg %0,%2,%1"
- : "+d" (old), "=Q" (v->counter)
- : "d" (new), "Q" (v->counter)
+ : "+d" (old), "+Q" (v->counter)
+ : "d" (new)
: "cc", "memory");
return old;
}
-#undef __CSG_LOOP
+#undef __ATOMIC64_LOOP
#else /* CONFIG_64BIT */
@@ -216,8 +308,8 @@ static inline long long atomic64_xchg(atomic64_t *v, long long new)
" lm %0,%N0,%1\n"
"0: cds %0,%2,%1\n"
" jl 0b\n"
- : "=&d" (rp_old), "=Q" (v->counter)
- : "d" (rp_new), "Q" (v->counter)
+ : "=&d" (rp_old), "+Q" (v->counter)
+ : "d" (rp_new)
: "cc");
return rp_old.pair;
}
@@ -230,8 +322,8 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
asm volatile(
" cds %0,%2,%1"
- : "+&d" (rp_old), "=Q" (v->counter)
- : "d" (rp_new), "Q" (v->counter)
+ : "+&d" (rp_old), "+Q" (v->counter)
+ : "d" (rp_new)
: "cc");
return rp_old.pair;
}
@@ -248,17 +340,6 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
return new;
}
-static inline long long atomic64_sub_return(long long i, atomic64_t *v)
-{
- long long old, new;
-
- do {
- old = atomic64_read(v);
- new = old - i;
- } while (atomic64_cmpxchg(v, old, new) != old);
- return new;
-}
-
static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v)
{
long long old, new;
@@ -279,9 +360,14 @@ static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v)
} while (atomic64_cmpxchg(v, old, new) != old);
}
+static inline void atomic64_add(long long i, atomic64_t *v)
+{
+ atomic64_add_return(i, v);
+}
+
#endif /* CONFIG_64BIT */
-static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
{
long long c, old;
@@ -289,7 +375,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
for (;;) {
if (unlikely(c == u))
break;
- old = atomic64_cmpxchg(v, c, c + a);
+ old = atomic64_cmpxchg(v, c, c + i);
if (likely(old == c))
break;
c = old;
@@ -314,21 +400,16 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
return dec;
}
-#define atomic64_add(_i, _v) atomic64_add_return(_i, _v)
#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0)
-#define atomic64_inc(_v) atomic64_add_return(1, _v)
+#define atomic64_inc(_v) atomic64_add(1, _v)
#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
-#define atomic64_sub(_i, _v) atomic64_sub_return(_i, _v)
+#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v)
+#define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v)
#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
-#define atomic64_dec(_v) atomic64_sub_return(1, _v)
+#define atomic64_dec(_v) atomic64_sub(1, _v)
#define atomic64_dec_return(_v) atomic64_sub_return(1, _v)
#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-#define smp_mb__before_atomic_dec() smp_mb()
-#define smp_mb__after_atomic_dec() smp_mb()
-#define smp_mb__before_atomic_inc() smp_mb()
-#define smp_mb__after_atomic_inc() smp_mb()
-
#endif /* __ARCH_S390_ATOMIC__ */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 10a50880294..19ff956b752 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -13,15 +13,12 @@
* to devices.
*/
-static inline void mb(void)
-{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- /* Fast-BCR without checkpoint synchronization */
- asm volatile("bcr 14,0" : : : "memory");
+/* Fast-BCR without checkpoint synchronization */
+#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0)
#else
- asm volatile("bcr 15,0" : : : "memory");
+#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0)
#endif
-}
#define rmb() mb()
#define wmb() mb()
@@ -30,9 +27,25 @@ static inline void mb(void)
#define smp_rmb() rmb()
#define smp_wmb() wmb()
#define smp_read_barrier_depends() read_barrier_depends()
-#define smp_mb__before_clear_bit() smp_mb()
-#define smp_mb__after_clear_bit() smp_mb()
+
+#define smp_mb__before_atomic() smp_mb()
+#define smp_mb__after_atomic() smp_mb()
#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_release(p, v) \
+do { \
+ compiletime_assert_atomic_type(*p); \
+ barrier(); \
+ ACCESS_ONCE(*p) = (v); \
+} while (0)
+
+#define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = ACCESS_ONCE(*p); \
+ compiletime_assert_atomic_type(*p); \
+ barrier(); \
+ ___p1; \
+})
+
#endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 15422933c60..52054247767 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -1,10 +1,40 @@
/*
- * S390 version
- * Copyright IBM Corp. 1999
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ * Copyright IBM Corp. 1999,2013
*
- * Derived from "include/asm-i386/bitops.h"
- * Copyright (C) 1992, Linus Torvalds
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ * The description below was taken in large parts from the powerpc
+ * bitops header file:
+ * Within a word, bits are numbered LSB first. Lot's of places make
+ * this assumption by directly testing bits with (val & (1<<nr)).
+ * This can cause confusion for large (> 1 word) bitmaps on a
+ * big-endian system because, unlike little endian, the number of each
+ * bit depends on the word size.
+ *
+ * The bitop functions are defined to work on unsigned longs, so for an
+ * s390x system the bits end up numbered:
+ * |63..............0|127............64|191...........128|255...........192|
+ * and on s390:
+ * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224|
+ *
+ * There are a few little-endian macros used mostly for filesystem
+ * bitmaps, these work on similar bit arrays layouts, but
+ * byte-oriented:
+ * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
+ *
+ * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
+ * number field needs to be reversed compared to the big-endian bit
+ * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ *
+ * We also have special functions which work with an MSB0 encoding:
+ * on an s390x system the bits are numbered:
+ * |0..............63|64............127|128...........191|192...........255|
+ * and on s390:
+ * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
+ *
+ * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit
+ * number field needs to be reversed compared to the LSB0 encoded bit
+ * fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b).
*
*/
@@ -15,561 +45,353 @@
#error only <linux/bitops.h> can be included directly
#endif
+#include <linux/typecheck.h>
#include <linux/compiler.h>
+#include <asm/barrier.h>
-/*
- * 32 bit bitops format:
- * bit 0 is the LSB of *addr; bit 31 is the MSB of *addr;
- * bit 32 is the LSB of *(addr+4). That combined with the
- * big endian byte order on S390 give the following bit
- * order in memory:
- * 1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 \
- * 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
- * after that follows the next long with bit numbers
- * 3f 3e 3d 3c 3b 3a 39 38 37 36 35 34 33 32 31 30
- * 2f 2e 2d 2c 2b 2a 29 28 27 26 25 24 23 22 21 20
- * The reason for this bit ordering is the fact that
- * in the architecture independent code bits operations
- * of the form "flags |= (1 << bitnr)" are used INTERMIXED
- * with operation of the form "set_bit(bitnr, flags)".
- *
- * 64 bit bitops format:
- * bit 0 is the LSB of *addr; bit 63 is the MSB of *addr;
- * bit 64 is the LSB of *(addr+8). That combined with the
- * big endian byte order on S390 give the following bit
- * order in memory:
- * 3f 3e 3d 3c 3b 3a 39 38 37 36 35 34 33 32 31 30
- * 2f 2e 2d 2c 2b 2a 29 28 27 26 25 24 23 22 21 20
- * 1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10
- * 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
- * after that follows the next long with bit numbers
- * 7f 7e 7d 7c 7b 7a 79 78 77 76 75 74 73 72 71 70
- * 6f 6e 6d 6c 6b 6a 69 68 67 66 65 64 63 62 61 60
- * 5f 5e 5d 5c 5b 5a 59 58 57 56 55 54 53 52 51 50
- * 4f 4e 4d 4c 4b 4a 49 48 47 46 45 44 43 42 41 40
- * The reason for this bit ordering is the fact that
- * in the architecture independent code bits operations
- * of the form "flags |= (1 << bitnr)" are used INTERMIXED
- * with operation of the form "set_bit(bitnr, flags)".
- */
-
-/* bitmap tables from arch/s390/kernel/bitmap.c */
-extern const char _oi_bitmap[];
-extern const char _ni_bitmap[];
-extern const char _zb_findmap[];
-extern const char _sb_findmap[];
+#define __BITOPS_NO_BARRIER "\n"
#ifndef CONFIG_64BIT
-#define __BITOPS_ALIGN 3
-#define __BITOPS_WORDSIZE 32
#define __BITOPS_OR "or"
#define __BITOPS_AND "nr"
#define __BITOPS_XOR "xr"
+#define __BITOPS_BARRIER "\n"
-#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string) \
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
+({ \
+ unsigned long __old, __new; \
+ \
+ typecheck(unsigned long *, (__addr)); \
asm volatile( \
" l %0,%2\n" \
"0: lr %1,%0\n" \
__op_string " %1,%3\n" \
" cs %0,%1,%2\n" \
" jl 0b" \
- : "=&d" (__old), "=&d" (__new), \
- "=Q" (*(unsigned long *) __addr) \
- : "d" (__val), "Q" (*(unsigned long *) __addr) \
- : "cc");
+ : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
+ : "d" (__val) \
+ : "cc", "memory"); \
+ __old; \
+})
#else /* CONFIG_64BIT */
-#define __BITOPS_ALIGN 7
-#define __BITOPS_WORDSIZE 64
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __BITOPS_OR "laog"
+#define __BITOPS_AND "lang"
+#define __BITOPS_XOR "laxg"
+#define __BITOPS_BARRIER "bcr 14,0\n"
+
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
+({ \
+ unsigned long __old; \
+ \
+ typecheck(unsigned long *, (__addr)); \
+ asm volatile( \
+ __barrier \
+ __op_string " %0,%2,%1\n" \
+ __barrier \
+ : "=d" (__old), "+Q" (*(__addr)) \
+ : "d" (__val) \
+ : "cc", "memory"); \
+ __old; \
+})
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
#define __BITOPS_OR "ogr"
#define __BITOPS_AND "ngr"
#define __BITOPS_XOR "xgr"
+#define __BITOPS_BARRIER "\n"
-#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string) \
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
+({ \
+ unsigned long __old, __new; \
+ \
+ typecheck(unsigned long *, (__addr)); \
asm volatile( \
" lg %0,%2\n" \
"0: lgr %1,%0\n" \
__op_string " %1,%3\n" \
" csg %0,%1,%2\n" \
" jl 0b" \
- : "=&d" (__old), "=&d" (__new), \
- "=Q" (*(unsigned long *) __addr) \
- : "d" (__val), "Q" (*(unsigned long *) __addr) \
- : "cc");
+ : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
+ : "d" (__val) \
+ : "cc", "memory"); \
+ __old; \
+})
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
#endif /* CONFIG_64BIT */
-#define __BITOPS_WORDS(bits) (((bits)+__BITOPS_WORDSIZE-1)/__BITOPS_WORDSIZE)
-#define __BITOPS_BARRIER() asm volatile("" : : : "memory")
+#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
-#ifdef CONFIG_SMP
-/*
- * SMP safe set_bit routine based on compare and swap (CS)
- */
-static inline void set_bit_cs(unsigned long nr, volatile unsigned long *ptr)
+static inline unsigned long *
+__bitops_word(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr, old, new, mask;
-
- addr = (unsigned long) ptr;
- /* calculate address for CS */
- addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3;
- /* make OR mask */
- mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1));
- /* Do the atomic update. */
- __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR);
+ unsigned long addr;
+
+ addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
+ return (unsigned long *)addr;
}
-/*
- * SMP safe clear_bit routine based on compare and swap (CS)
- */
-static inline void clear_bit_cs(unsigned long nr, volatile unsigned long *ptr)
+static inline unsigned char *
+__bitops_byte(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr, old, new, mask;
-
- addr = (unsigned long) ptr;
- /* calculate address for CS */
- addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3;
- /* make AND mask */
- mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1)));
- /* Do the atomic update. */
- __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND);
+ return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
}
-/*
- * SMP safe change_bit routine based on compare and swap (CS)
- */
-static inline void change_bit_cs(unsigned long nr, volatile unsigned long *ptr)
+static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr, old, new, mask;
-
- addr = (unsigned long) ptr;
- /* calculate address for CS */
- addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3;
- /* make XOR mask */
- mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1));
- /* Do the atomic update. */
- __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR);
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ if (__builtin_constant_p(nr)) {
+ unsigned char *caddr = __bitops_byte(nr, ptr);
+
+ asm volatile(
+ "oi %0,%b1\n"
+ : "+Q" (*caddr)
+ : "i" (1 << (nr & 7))
+ : "cc", "memory");
+ return;
+ }
+#endif
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_NO_BARRIER);
}
-/*
- * SMP safe test_and_set_bit routine based on compare and swap (CS)
- */
-static inline int
-test_and_set_bit_cs(unsigned long nr, volatile unsigned long *ptr)
+static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr, old, new, mask;
-
- addr = (unsigned long) ptr;
- /* calculate address for CS */
- addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3;
- /* make OR/test mask */
- mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1));
- /* Do the atomic update. */
- __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR);
- __BITOPS_BARRIER();
- return (old & mask) != 0;
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ if (__builtin_constant_p(nr)) {
+ unsigned char *caddr = __bitops_byte(nr, ptr);
+
+ asm volatile(
+ "ni %0,%b1\n"
+ : "+Q" (*caddr)
+ : "i" (~(1 << (nr & 7)))
+ : "cc", "memory");
+ return;
+ }
+#endif
+ mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
+ __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_NO_BARRIER);
}
-/*
- * SMP safe test_and_clear_bit routine based on compare and swap (CS)
- */
-static inline int
-test_and_clear_bit_cs(unsigned long nr, volatile unsigned long *ptr)
+static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr, old, new, mask;
-
- addr = (unsigned long) ptr;
- /* calculate address for CS */
- addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3;
- /* make AND/test mask */
- mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1)));
- /* Do the atomic update. */
- __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND);
- __BITOPS_BARRIER();
- return (old ^ new) != 0;
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ if (__builtin_constant_p(nr)) {
+ unsigned char *caddr = __bitops_byte(nr, ptr);
+
+ asm volatile(
+ "xi %0,%b1\n"
+ : "+Q" (*caddr)
+ : "i" (1 << (nr & 7))
+ : "cc", "memory");
+ return;
+ }
+#endif
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_NO_BARRIER);
}
-/*
- * SMP safe test_and_change_bit routine based on compare and swap (CS)
- */
static inline int
-test_and_change_bit_cs(unsigned long nr, volatile unsigned long *ptr)
+test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr, old, new, mask;
-
- addr = (unsigned long) ptr;
- /* calculate address for CS */
- addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3;
- /* make XOR/test mask */
- mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1));
- /* Do the atomic update. */
- __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR);
- __BITOPS_BARRIER();
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long old, mask;
+
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ old = __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_BARRIER);
return (old & mask) != 0;
}
-#endif /* CONFIG_SMP */
-/*
- * fast, non-SMP set_bit routine
- */
-static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline int
+test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr;
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long old, mask;
- addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- asm volatile(
- " oc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
+ mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
+ old = __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_BARRIER);
+ return (old & ~mask) != 0;
}
-static inline void
-__constant_set_bit(const unsigned long nr, volatile unsigned long *ptr)
+static inline int
+test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr;
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long old, mask;
- addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- *(unsigned char *) addr |= 1 << (nr & 7);
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_BARRIER);
+ return (old & mask) != 0;
}
-#define set_bit_simple(nr,addr) \
-(__builtin_constant_p((nr)) ? \
- __constant_set_bit((nr),(addr)) : \
- __set_bit((nr),(addr)) )
-
-/*
- * fast, non-SMP clear_bit routine
- */
-static inline void
-__clear_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr;
+ unsigned char *addr = __bitops_byte(nr, ptr);
- addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- asm volatile(
- " nc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" );
+ *addr |= 1 << (nr & 7);
}
static inline void
-__constant_clear_bit(const unsigned long nr, volatile unsigned long *ptr)
+__clear_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr;
+ unsigned char *addr = __bitops_byte(nr, ptr);
- addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- *(unsigned char *) addr &= ~(1 << (nr & 7));
+ *addr &= ~(1 << (nr & 7));
}
-#define clear_bit_simple(nr,addr) \
-(__builtin_constant_p((nr)) ? \
- __constant_clear_bit((nr),(addr)) : \
- __clear_bit((nr),(addr)) )
-
-/*
- * fast, non-SMP change_bit routine
- */
static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr;
-
- addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- asm volatile(
- " xc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" );
-}
-
-static inline void
-__constant_change_bit(const unsigned long nr, volatile unsigned long *ptr)
-{
- unsigned long addr;
+ unsigned char *addr = __bitops_byte(nr, ptr);
- addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- *(unsigned char *) addr ^= 1 << (nr & 7);
+ *addr ^= 1 << (nr & 7);
}
-#define change_bit_simple(nr,addr) \
-(__builtin_constant_p((nr)) ? \
- __constant_change_bit((nr),(addr)) : \
- __change_bit((nr),(addr)) )
-
-/*
- * fast, non-SMP test_and_set_bit routine
- */
static inline int
-test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr)
+__test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr;
+ unsigned char *addr = __bitops_byte(nr, ptr);
unsigned char ch;
- addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- ch = *(unsigned char *) addr;
- asm volatile(
- " oc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
- : "cc", "memory");
+ ch = *addr;
+ *addr |= 1 << (nr & 7);
return (ch >> (nr & 7)) & 1;
}
-#define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
-/*
- * fast, non-SMP test_and_clear_bit routine
- */
static inline int
-test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr)
+__test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr;
+ unsigned char *addr = __bitops_byte(nr, ptr);
unsigned char ch;
- addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- ch = *(unsigned char *) addr;
- asm volatile(
- " nc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7])
- : "cc", "memory");
+ ch = *addr;
+ *addr &= ~(1 << (nr & 7));
return (ch >> (nr & 7)) & 1;
}
-#define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
-/*
- * fast, non-SMP test_and_change_bit routine
- */
static inline int
-test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr)
+__test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
{
- unsigned long addr;
+ unsigned char *addr = __bitops_byte(nr, ptr);
unsigned char ch;
- addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- ch = *(unsigned char *) addr;
- asm volatile(
- " xc %O0(1,%R0),%1"
- : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
- : "cc", "memory");
+ ch = *addr;
+ *addr ^= 1 << (nr & 7);
return (ch >> (nr & 7)) & 1;
}
-#define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
-
-#ifdef CONFIG_SMP
-#define set_bit set_bit_cs
-#define clear_bit clear_bit_cs
-#define change_bit change_bit_cs
-#define test_and_set_bit test_and_set_bit_cs
-#define test_and_clear_bit test_and_clear_bit_cs
-#define test_and_change_bit test_and_change_bit_cs
-#else
-#define set_bit set_bit_simple
-#define clear_bit clear_bit_simple
-#define change_bit change_bit_simple
-#define test_and_set_bit test_and_set_bit_simple
-#define test_and_clear_bit test_and_clear_bit_simple
-#define test_and_change_bit test_and_change_bit_simple
-#endif
-
-/*
- * This routine doesn't need to be atomic.
- */
-
-static inline int __test_bit(unsigned long nr, const volatile unsigned long *ptr)
+static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr)
{
- unsigned long addr;
- unsigned char ch;
+ const volatile unsigned char *addr;
- addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
- ch = *(volatile unsigned char *) addr;
- return (ch >> (nr & 7)) & 1;
-}
-
-static inline int
-__constant_test_bit(unsigned long nr, const volatile unsigned long *addr) {
- return (((volatile char *) addr)
- [(nr^(__BITOPS_WORDSIZE-8))>>3] & (1<<(nr&7))) != 0;
+ addr = ((const volatile unsigned char *)ptr);
+ addr += (nr ^ (BITS_PER_LONG - 8)) >> 3;
+ return (*addr >> (nr & 7)) & 1;
}
-#define test_bit(nr,addr) \
-(__builtin_constant_p((nr)) ? \
- __constant_test_bit((nr),(addr)) : \
- __test_bit((nr),(addr)) )
-
/*
- * Optimized find bit helper functions.
+ * Functions which use MSB0 bit numbering.
+ * On an s390x system the bits are numbered:
+ * |0..............63|64............127|128...........191|192...........255|
+ * and on s390:
+ * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
*/
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+ unsigned long offset);
-/**
- * __ffz_word_loop - find byte offset of first long != -1UL
- * @addr: pointer to array of unsigned long
- * @size: size of the array in bits
- */
-static inline unsigned long __ffz_word_loop(const unsigned long *addr,
- unsigned long size)
+static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
- typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
- unsigned long bytes = 0;
-
- asm volatile(
-#ifndef CONFIG_64BIT
- " ahi %1,-1\n"
- " sra %1,5\n"
- " jz 1f\n"
- "0: c %2,0(%0,%3)\n"
- " jne 1f\n"
- " la %0,4(%0)\n"
- " brct %1,0b\n"
- "1:\n"
-#else
- " aghi %1,-1\n"
- " srag %1,%1,6\n"
- " jz 1f\n"
- "0: cg %2,0(%0,%3)\n"
- " jne 1f\n"
- " la %0,8(%0)\n"
- " brct %1,0b\n"
- "1:\n"
-#endif
- : "+&a" (bytes), "+&d" (size)
- : "d" (-1UL), "a" (addr), "m" (*(addrtype *) addr)
- : "cc" );
- return bytes;
-}
-
-/**
- * __ffs_word_loop - find byte offset of first long != 0UL
- * @addr: pointer to array of unsigned long
- * @size: size of the array in bits
- */
-static inline unsigned long __ffs_word_loop(const unsigned long *addr,
- unsigned long size)
-{
- typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
- unsigned long bytes = 0;
-
- asm volatile(
-#ifndef CONFIG_64BIT
- " ahi %1,-1\n"
- " sra %1,5\n"
- " jz 1f\n"
- "0: c %2,0(%0,%3)\n"
- " jne 1f\n"
- " la %0,4(%0)\n"
- " brct %1,0b\n"
- "1:\n"
-#else
- " aghi %1,-1\n"
- " srag %1,%1,6\n"
- " jz 1f\n"
- "0: cg %2,0(%0,%3)\n"
- " jne 1f\n"
- " la %0,8(%0)\n"
- " brct %1,0b\n"
- "1:\n"
-#endif
- : "+&a" (bytes), "+&a" (size)
- : "d" (0UL), "a" (addr), "m" (*(addrtype *) addr)
- : "cc" );
- return bytes;
+ return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
-/**
- * __ffz_word - add number of the first unset bit
- * @nr: base value the bit number is added to
- * @word: the word that is searched for unset bits
- */
-static inline unsigned long __ffz_word(unsigned long nr, unsigned long word)
+static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
-#ifdef CONFIG_64BIT
- if ((word & 0xffffffff) == 0xffffffff) {
- word >>= 32;
- nr += 32;
- }
-#endif
- if ((word & 0xffff) == 0xffff) {
- word >>= 16;
- nr += 16;
- }
- if ((word & 0xff) == 0xff) {
- word >>= 8;
- nr += 8;
- }
- return nr + _zb_findmap[(unsigned char) word];
+ return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
-/**
- * __ffs_word - add number of the first set bit
- * @nr: base value the bit number is added to
- * @word: the word that is searched for set bits
- */
-static inline unsigned long __ffs_word(unsigned long nr, unsigned long word)
+static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
-#ifdef CONFIG_64BIT
- if ((word & 0xffffffff) == 0) {
- word >>= 32;
- nr += 32;
- }
-#endif
- if ((word & 0xffff) == 0) {
- word >>= 16;
- nr += 16;
- }
- if ((word & 0xff) == 0) {
- word >>= 8;
- nr += 8;
- }
- return nr + _sb_findmap[(unsigned char) word];
+ return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
-
-/**
- * __load_ulong_be - load big endian unsigned long
- * @p: pointer to array of unsigned long
- * @offset: byte offset of source value in the array
- */
-static inline unsigned long __load_ulong_be(const unsigned long *p,
- unsigned long offset)
+static inline void __clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
- p = (unsigned long *)((unsigned long) p + offset);
- return *p;
+ return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
-/**
- * __load_ulong_le - load little endian unsigned long
- * @p: pointer to array of unsigned long
- * @offset: byte offset of source value in the array
- */
-static inline unsigned long __load_ulong_le(const unsigned long *p,
- unsigned long offset)
+static inline int test_bit_inv(unsigned long nr,
+ const volatile unsigned long *ptr)
{
- unsigned long word;
-
- p = (unsigned long *)((unsigned long) p + offset);
-#ifndef CONFIG_64BIT
- asm volatile(
- " ic %0,%O1(%R1)\n"
- " icm %0,2,%O1+1(%R1)\n"
- " icm %0,4,%O1+2(%R1)\n"
- " icm %0,8,%O1+3(%R1)"
- : "=&d" (word) : "Q" (*p) : "cc");
-#else
- asm volatile(
- " lrvg %0,%1"
- : "=d" (word) : "m" (*p) );
-#endif
- return word;
+ return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
-/*
- * The various find bit functions.
- */
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
-/*
- * ffz - find first zero in word.
- * @word: The word to search
+/**
+ * __flogr - find leftmost one
+ * @word - The word to search
*
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static inline unsigned long ffz(unsigned long word)
-{
- return __ffz_word(0, word);
+ * Returns the bit number of the most significant bit set,
+ * where the most significant bit has bit number 0.
+ * If no bit is set this function returns 64.
+ */
+static inline unsigned char __flogr(unsigned long word)
+{
+ if (__builtin_constant_p(word)) {
+ unsigned long bit = 0;
+
+ if (!word)
+ return 64;
+ if (!(word & 0xffffffff00000000UL)) {
+ word <<= 32;
+ bit += 32;
+ }
+ if (!(word & 0xffff000000000000UL)) {
+ word <<= 16;
+ bit += 16;
+ }
+ if (!(word & 0xff00000000000000UL)) {
+ word <<= 8;
+ bit += 8;
+ }
+ if (!(word & 0xf000000000000000UL)) {
+ word <<= 4;
+ bit += 4;
+ }
+ if (!(word & 0xc000000000000000UL)) {
+ word <<= 2;
+ bit += 2;
+ }
+ if (!(word & 0x8000000000000000UL)) {
+ word <<= 1;
+ bit += 1;
+ }
+ return bit;
+ } else {
+ register unsigned long bit asm("4") = word;
+ register unsigned long out asm("5");
+
+ asm volatile(
+ " flogr %[bit],%[bit]\n"
+ : [bit] "+d" (bit), [out] "=d" (out) : : "cc");
+ return bit;
+ }
}
/**
@@ -578,337 +400,83 @@ static inline unsigned long ffz(unsigned long word)
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static inline unsigned long __ffs (unsigned long word)
+static inline unsigned long __ffs(unsigned long word)
{
- return __ffs_word(0, word);
+ return __flogr(-word & word) ^ (BITS_PER_LONG - 1);
}
/**
* ffs - find first bit set
- * @x: the word to search
+ * @word: the word to search
*
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
+ * This is defined the same way as the libc and
+ * compiler builtin ffs routines (man ffs).
*/
-static inline int ffs(int x)
+static inline int ffs(int word)
{
- if (!x)
- return 0;
- return __ffs_word(1, x);
+ unsigned long mask = 2 * BITS_PER_LONG - 1;
+ unsigned int val = (unsigned int)word;
+
+ return (1 + (__flogr(-val & val) ^ (BITS_PER_LONG - 1))) & mask;
}
/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
*
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
+ * Undefined if no set bit exists, so code should check against 0 first.
*/
-static inline unsigned long find_first_zero_bit(const unsigned long *addr,
- unsigned long size)
+static inline unsigned long __fls(unsigned long word)
{
- unsigned long bytes, bits;
-
- if (!size)
- return 0;
- bytes = __ffz_word_loop(addr, size);
- bits = __ffz_word(bytes*8, __load_ulong_be(addr, bytes));
- return (bits < size) ? bits : size;
+ return __flogr(word) ^ (BITS_PER_LONG - 1);
}
-#define find_first_zero_bit find_first_zero_bit
/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
+ * fls64 - find last set bit in a 64-bit word
+ * @word: the word to search
*
- * Returns the bit-number of the first set bit, not the number of the byte
- * containing a bit.
- */
-static inline unsigned long find_first_bit(const unsigned long * addr,
- unsigned long size)
-{
- unsigned long bytes, bits;
-
- if (!size)
- return 0;
- bytes = __ffs_word_loop(addr, size);
- bits = __ffs_word(bytes*8, __load_ulong_be(addr, bytes));
- return (bits < size) ? bits : size;
-}
-#define find_first_bit find_first_bit
-
-/*
- * Big endian variant whichs starts bit counting from left using
- * the flogr (find leftmost one) instruction.
- */
-static inline unsigned long __flo_word(unsigned long nr, unsigned long val)
-{
- register unsigned long bit asm("2") = val;
- register unsigned long out asm("3");
-
- asm volatile (
- " .insn rre,0xb9830000,%[bit],%[bit]\n"
- : [bit] "+d" (bit), [out] "=d" (out) : : "cc");
- return nr + bit;
-}
-
-/*
- * 64 bit special left bitops format:
- * order in memory:
- * 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
- * 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
- * 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
- * 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
- * after that follows the next long with bit numbers
- * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
- * 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
- * 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
- * 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
- * The reason for this bit ordering is the fact that
- * the hardware sets bits in a bitmap starting at bit 0
- * and we don't want to scan the bitmap from the 'wrong
- * end'.
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
*/
-static inline unsigned long find_first_bit_left(const unsigned long *addr,
- unsigned long size)
+static inline int fls64(unsigned long word)
{
- unsigned long bytes, bits;
-
- if (!size)
- return 0;
- bytes = __ffs_word_loop(addr, size);
- bits = __flo_word(bytes * 8, __load_ulong_be(addr, bytes));
- return (bits < size) ? bits : size;
-}
-
-static inline int find_next_bit_left(const unsigned long *addr,
- unsigned long size,
- unsigned long offset)
-{
- const unsigned long *p;
- unsigned long bit, set;
-
- if (offset >= size)
- return size;
- bit = offset & (__BITOPS_WORDSIZE - 1);
- offset -= bit;
- size -= offset;
- p = addr + offset / __BITOPS_WORDSIZE;
- if (bit) {
- set = __flo_word(0, *p & (~0UL << bit));
- if (set >= size)
- return size + offset;
- if (set < __BITOPS_WORDSIZE)
- return set + offset;
- offset += __BITOPS_WORDSIZE;
- size -= __BITOPS_WORDSIZE;
- p++;
- }
- return offset + find_first_bit_left(p, size);
-}
+ unsigned long mask = 2 * BITS_PER_LONG - 1;
-#define for_each_set_bit_left(bit, addr, size) \
- for ((bit) = find_first_bit_left((addr), (size)); \
- (bit) < (size); \
- (bit) = find_next_bit_left((addr), (size), (bit) + 1))
-
-/* same as for_each_set_bit() but use bit as value to start with */
-#define for_each_set_bit_left_cont(bit, addr, size) \
- for ((bit) = find_next_bit_left((addr), (size), (bit)); \
- (bit) < (size); \
- (bit) = find_next_bit_left((addr), (size), (bit) + 1))
-
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-static inline int find_next_zero_bit (const unsigned long * addr,
- unsigned long size,
- unsigned long offset)
-{
- const unsigned long *p;
- unsigned long bit, set;
-
- if (offset >= size)
- return size;
- bit = offset & (__BITOPS_WORDSIZE - 1);
- offset -= bit;
- size -= offset;
- p = addr + offset / __BITOPS_WORDSIZE;
- if (bit) {
- /*
- * __ffz_word returns __BITOPS_WORDSIZE
- * if no zero bit is present in the word.
- */
- set = __ffz_word(bit, *p >> bit);
- if (set >= size)
- return size + offset;
- if (set < __BITOPS_WORDSIZE)
- return set + offset;
- offset += __BITOPS_WORDSIZE;
- size -= __BITOPS_WORDSIZE;
- p++;
- }
- return offset + find_first_zero_bit(p, size);
+ return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask;
}
-#define find_next_zero_bit find_next_zero_bit
/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
+ * fls - find last (most-significant) bit set
+ * @word: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static inline int find_next_bit (const unsigned long * addr,
- unsigned long size,
- unsigned long offset)
+static inline int fls(int word)
{
- const unsigned long *p;
- unsigned long bit, set;
-
- if (offset >= size)
- return size;
- bit = offset & (__BITOPS_WORDSIZE - 1);
- offset -= bit;
- size -= offset;
- p = addr + offset / __BITOPS_WORDSIZE;
- if (bit) {
- /*
- * __ffs_word returns __BITOPS_WORDSIZE
- * if no one bit is present in the word.
- */
- set = __ffs_word(0, *p & (~0UL << bit));
- if (set >= size)
- return size + offset;
- if (set < __BITOPS_WORDSIZE)
- return set + offset;
- offset += __BITOPS_WORDSIZE;
- size -= __BITOPS_WORDSIZE;
- p++;
- }
- return offset + find_first_bit(p, size);
+ return fls64((unsigned int)word);
}
-#define find_next_bit find_next_bit
-/*
- * Every architecture must define this function. It's the fastest
- * way of searching a 140-bit bitmap where the first 100 bits are
- * unlikely to be set. It's guaranteed that at least one of the 140
- * bits is cleared.
- */
-static inline int sched_find_first_bit(unsigned long *b)
-{
- return find_first_bit(b, 140);
-}
+#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
-#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/ffs.h>
#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/fls64.h>
+#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
-
-/*
- * ATTENTION: intel byte ordering convention for ext2 and minix !!
- * bit 0 is the LSB of addr; bit 31 is the MSB of addr;
- * bit 32 is the LSB of (addr+4).
- * That combined with the little endian byte order of Intel gives the
- * following bit order in memory:
- * 07 06 05 04 03 02 01 00 15 14 13 12 11 10 09 08 \
- * 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24
- */
-
-static inline int find_first_zero_bit_le(void *vaddr, unsigned int size)
-{
- unsigned long bytes, bits;
-
- if (!size)
- return 0;
- bytes = __ffz_word_loop(vaddr, size);
- bits = __ffz_word(bytes*8, __load_ulong_le(vaddr, bytes));
- return (bits < size) ? bits : size;
-}
-#define find_first_zero_bit_le find_first_zero_bit_le
-
-static inline int find_next_zero_bit_le(void *vaddr, unsigned long size,
- unsigned long offset)
-{
- unsigned long *addr = vaddr, *p;
- unsigned long bit, set;
-
- if (offset >= size)
- return size;
- bit = offset & (__BITOPS_WORDSIZE - 1);
- offset -= bit;
- size -= offset;
- p = addr + offset / __BITOPS_WORDSIZE;
- if (bit) {
- /*
- * s390 version of ffz returns __BITOPS_WORDSIZE
- * if no zero bit is present in the word.
- */
- set = __ffz_word(bit, __load_ulong_le(p, 0) >> bit);
- if (set >= size)
- return size + offset;
- if (set < __BITOPS_WORDSIZE)
- return set + offset;
- offset += __BITOPS_WORDSIZE;
- size -= __BITOPS_WORDSIZE;
- p++;
- }
- return offset + find_first_zero_bit_le(p, size);
-}
-#define find_next_zero_bit_le find_next_zero_bit_le
-
-static inline unsigned long find_first_bit_le(void *vaddr, unsigned long size)
-{
- unsigned long bytes, bits;
-
- if (!size)
- return 0;
- bytes = __ffs_word_loop(vaddr, size);
- bits = __ffs_word(bytes*8, __load_ulong_le(vaddr, bytes));
- return (bits < size) ? bits : size;
-}
-#define find_first_bit_le find_first_bit_le
-
-static inline int find_next_bit_le(void *vaddr, unsigned long size,
- unsigned long offset)
-{
- unsigned long *addr = vaddr, *p;
- unsigned long bit, set;
-
- if (offset >= size)
- return size;
- bit = offset & (__BITOPS_WORDSIZE - 1);
- offset -= bit;
- size -= offset;
- p = addr + offset / __BITOPS_WORDSIZE;
- if (bit) {
- /*
- * s390 version of ffz returns __BITOPS_WORDSIZE
- * if no zero bit is present in the word.
- */
- set = __ffs_word(0, __load_ulong_le(p, 0) & (~0UL << bit));
- if (set >= size)
- return size + offset;
- if (set < __BITOPS_WORDSIZE)
- return set + offset;
- offset += __BITOPS_WORDSIZE;
- size -= __BITOPS_WORDSIZE;
- p++;
- }
- return offset + find_first_bit_le(p, size);
-}
-#define find_next_bit_le find_next_bit_le
-
+#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/le.h>
-
#include <asm-generic/bitops/ext2-atomic-setbit.h>
#endif /* _S390_BITOPS_H */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index e6061617a50..b80e456d642 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -219,12 +219,15 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *);
#define to_ccwdev(n) container_of(n, struct ccw_device, dev)
#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver)
-extern struct ccw_device *ccw_device_probe_console(void);
-extern int ccw_device_force_console(void);
+extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
+extern void ccw_device_destroy_console(struct ccw_device *);
+extern int ccw_device_enable_console(struct ccw_device *);
+extern void ccw_device_wait_idle(struct ccw_device *);
+extern int ccw_device_force_console(struct ccw_device *);
int ccw_device_siosl(struct ccw_device *);
extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
-extern void *ccw_device_get_chp_desc(struct ccw_device *, int);
+struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int);
#endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 23723ce5ca7..057ce0ca637 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -10,6 +10,8 @@ struct ccw_driver;
* @count: number of attached slave devices
* @dev: embedded device structure
* @cdev: variable number of slave devices, allocated as needed
+ * @ungroup_work: work to be done when a ccwgroup notifier has action
+ * type %BUS_NOTIFY_UNBIND_DRIVER
*/
struct ccwgroup_device {
enum {
@@ -22,6 +24,7 @@ struct ccwgroup_device {
/* public: */
unsigned int count;
struct device dev;
+ struct work_struct ungroup_work;
struct ccw_device *cdev[0];
};
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 4f57a4f3909..74036485635 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -44,22 +44,15 @@ csum_partial(const void *buff, int len, __wsum sum)
* here even more important to align src and dst on a 32-bit (or even
* better 64-bit) boundary
*
- * Copy from userspace and compute checksum. If we catch an exception
- * then zero the rest of the buffer.
+ * Copy from userspace and compute checksum.
*/
static inline __wsum
csum_partial_copy_from_user(const void __user *src, void *dst,
int len, __wsum sum,
int *err_ptr)
{
- int missing;
-
- missing = copy_from_user(dst, src, len);
- if (missing) {
- memset(dst + len - missing, 0, missing);
+ if (unlikely(copy_from_user(dst, src, len)))
*err_ptr = -EFAULT;
- }
-
return csum_partial(dst, len, sum);
}
diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h
index 38c405ef89c..7298eec9854 100644
--- a/arch/s390/include/asm/chpid.h
+++ b/arch/s390/include/asm/chpid.h
@@ -8,6 +8,17 @@
#include <uapi/asm/chpid.h>
#include <asm/cio.h>
+struct channel_path_desc {
+ u8 flags;
+ u8 lsn;
+ u8 desc;
+ u8 chpid;
+ u8 swla;
+ u8 zeroes;
+ u8 chla;
+ u8 chpp;
+} __packed;
+
static inline void chp_id_init(struct chp_id *chpid)
{
memset(chpid, 0, sizeof(struct chp_id));
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index ad2b924167d..09633920776 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -199,7 +199,7 @@ struct esw_eadm {
/**
* struct irb - interruption response block
* @scsw: subchannel status word
- * @esw: extened status word
+ * @esw: extended status word
* @ecw: extended control word
*
* The irb that is handed to the device driver when an interrupt occurs. For
@@ -296,8 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
return 0;
}
-extern void wait_cons_dev(void);
-
+void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
extern void reipl_ccw_dev(struct ccw_dev_id *id);
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
index 6c3aecc245f..a0e71a501f7 100644
--- a/arch/s390/include/asm/clp.h
+++ b/arch/s390/include/asm/clp.h
@@ -2,7 +2,7 @@
#define _ASM_S390_CLP_H
/* CLP common request & response block size */
-#define CLP_BLK_SIZE (PAGE_SIZE * 2)
+#define CLP_BLK_SIZE PAGE_SIZE
struct clp_req_hdr {
u16 len;
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 0f636cbdf34..4236408070e 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -185,11 +185,12 @@ static inline unsigned long long __cmpxchg64(void *ptr,
{
register_pair rp_old = {.pair = old};
register_pair rp_new = {.pair = new};
+ unsigned long long *ullptr = ptr;
asm volatile(
" cds %0,%2,%1"
- : "+&d" (rp_old), "=Q" (ptr)
- : "d" (rp_new), "Q" (ptr)
+ : "+d" (rp_old), "+Q" (*ullptr)
+ : "d" (rp_new)
: "memory", "cc");
return rp_old.pair;
}
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index f8c6df6cd1f..d350ed9d0fb 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -8,7 +8,11 @@
#include <linux/thread_info.h>
#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
-#define __SC_DELOUSE(t,v) (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v))
+
+#define __SC_DELOUSE(t,v) ({ \
+ BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \
+ (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
+})
#define PSW32_MASK_PER 0x40000000UL
#define PSW32_MASK_DAT 0x04000000UL
@@ -22,6 +26,7 @@
#define PSW32_MASK_ASC 0x0000C000UL
#define PSW32_MASK_CC 0x00003000UL
#define PSW32_MASK_PM 0x00000f00UL
+#define PSW32_MASK_RI 0x00000080UL
#define PSW32_MASK_USER 0x0000FF00UL
@@ -35,7 +40,10 @@
#define PSW32_ASC_SECONDARY 0x00008000UL
#define PSW32_ASC_HOME 0x0000C000UL
-extern u32 psw32_user_bits;
+#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
+ PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
+ PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
+ PSW32_ASC_PRIMARY)
#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "s390\0\0\0\0"
@@ -70,6 +78,22 @@ typedef u32 compat_ulong_t;
typedef u64 compat_u64;
typedef u32 compat_uptr_t;
+typedef struct {
+ u32 mask;
+ u32 addr;
+} __aligned(8) psw_compat_t;
+
+typedef struct {
+ psw_compat_t psw;
+ u32 gprs[NUM_GPRS];
+ u32 acrs[NUM_ACRS];
+ u32 orig_gpr2;
+} s390_compat_regs;
+
+typedef struct {
+ u32 gprs_high[NUM_GPRS];
+} s390_compat_regs_high;
+
struct compat_timespec {
compat_time_t tv_sec;
s32 tv_nsec;
@@ -124,18 +148,33 @@ struct compat_flock64 {
};
struct compat_statfs {
- s32 f_type;
- s32 f_bsize;
- s32 f_blocks;
- s32 f_bfree;
- s32 f_bavail;
- s32 f_files;
- s32 f_ffree;
+ u32 f_type;
+ u32 f_bsize;
+ u32 f_blocks;
+ u32 f_bfree;
+ u32 f_bavail;
+ u32 f_files;
+ u32 f_ffree;
compat_fsid_t f_fsid;
- s32 f_namelen;
- s32 f_frsize;
- s32 f_flags;
- s32 f_spare[5];
+ u32 f_namelen;
+ u32 f_frsize;
+ u32 f_flags;
+ u32 f_spare[4];
+};
+
+struct compat_statfs64 {
+ u32 f_type;
+ u32 f_bsize;
+ u64 f_blocks;
+ u64 f_bfree;
+ u64 f_bavail;
+ u64 f_files;
+ u64 f_ffree;
+ compat_fsid_t f_fsid;
+ u32 f_namelen;
+ u32 f_frsize;
+ u32 f_flags;
+ u32 f_spare[4];
};
#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff
@@ -248,8 +287,6 @@ static inline int is_compat_task(void)
return is_32bit_task();
}
-#endif
-
static inline void __user *arch_compat_alloc_user_space(long len)
{
unsigned long stack;
@@ -260,6 +297,8 @@ static inline void __user *arch_compat_alloc_user_space(long len)
return (void __user *) (stack - len);
}
+#endif
+
struct compat_ipc64_perm {
compat_key_t key;
__compat_uid32_t uid;
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 35f0020b7ba..cb700d54bd8 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -12,6 +12,7 @@
#ifndef _ASM_S390_CPU_MF_H
#define _ASM_S390_CPU_MF_H
+#include <linux/errno.h>
#include <asm/facility.h>
#define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */
@@ -34,12 +35,12 @@
/* CPU measurement facility support */
static inline int cpum_cf_avail(void)
{
- return MACHINE_HAS_SPP && test_facility(67);
+ return MACHINE_HAS_LPP && test_facility(67);
}
static inline int cpum_sf_avail(void)
{
- return MACHINE_HAS_SPP && test_facility(68);
+ return MACHINE_HAS_LPP && test_facility(68);
}
@@ -55,6 +56,96 @@ struct cpumf_ctr_info {
u32 reserved2[12];
} __packed;
+/* QUERY SAMPLING INFORMATION block */
+struct hws_qsi_info_block { /* Bit(s) */
+ unsigned int b0_13:14; /* 0-13: zeros */
+ unsigned int as:1; /* 14: basic-sampling authorization */
+ unsigned int ad:1; /* 15: diag-sampling authorization */
+ unsigned int b16_21:6; /* 16-21: zeros */
+ unsigned int es:1; /* 22: basic-sampling enable control */
+ unsigned int ed:1; /* 23: diag-sampling enable control */
+ unsigned int b24_29:6; /* 24-29: zeros */
+ unsigned int cs:1; /* 30: basic-sampling activation control */
+ unsigned int cd:1; /* 31: diag-sampling activation control */
+ unsigned int bsdes:16; /* 4-5: size of basic sampling entry */
+ unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */
+ unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+ unsigned long tear; /* 24-31: TEAR contents */
+ unsigned long dear; /* 32-39: DEAR contents */
+ unsigned int rsvrd0; /* 40-43: reserved */
+ unsigned int cpu_speed; /* 44-47: CPU speed */
+ unsigned long long rsvrd1; /* 48-55: reserved */
+ unsigned long long rsvrd2; /* 56-63: reserved */
+} __packed;
+
+/* SET SAMPLING CONTROLS request block */
+struct hws_lsctl_request_block {
+ unsigned int s:1; /* 0: maximum buffer indicator */
+ unsigned int h:1; /* 1: part. level reserved for VM use*/
+ unsigned long long b2_53:52;/* 2-53: zeros */
+ unsigned int es:1; /* 54: basic-sampling enable control */
+ unsigned int ed:1; /* 55: diag-sampling enable control */
+ unsigned int b56_61:6; /* 56-61: - zeros */
+ unsigned int cs:1; /* 62: basic-sampling activation control */
+ unsigned int cd:1; /* 63: diag-sampling activation control */
+ unsigned long interval; /* 8-15: sampling interval */
+ unsigned long tear; /* 16-23: TEAR contents */
+ unsigned long dear; /* 24-31: DEAR contents */
+ /* 32-63: */
+ unsigned long rsvrd1; /* reserved */
+ unsigned long rsvrd2; /* reserved */
+ unsigned long rsvrd3; /* reserved */
+ unsigned long rsvrd4; /* reserved */
+} __packed;
+
+struct hws_basic_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:4; /* 16-19 reserved */
+ unsigned int U:4; /* 20-23 Number of unique instruct. */
+ unsigned int z:2; /* zeros */
+ unsigned int T:1; /* 26 PSW DAT mode */
+ unsigned int W:1; /* 27 PSW wait state */
+ unsigned int P:1; /* 28 PSW Problem state */
+ unsigned int AS:2; /* 29-30 PSW address-space control */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ unsigned int:16;
+ unsigned int prim_asn:16; /* primary ASN */
+ unsigned long long ia; /* Instruction Address */
+ unsigned long long gpp; /* Guest Program Parameter */
+ unsigned long long hpp; /* Host Program Parameter */
+} __packed;
+
+struct hws_diag_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:14; /* 16-19 and 20-30 reserved */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ u8 data[]; /* Machine-dependent sample data */
+} __packed;
+
+struct hws_combined_entry {
+ struct hws_basic_entry basic; /* Basic-sampling data entry */
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+} __packed;
+
+struct hws_trailer_entry {
+ union {
+ struct {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned int t:1; /* 2 - Timestamp format */
+ unsigned long long:61; /* 3 - 63: Reserved */
+ };
+ unsigned long long flags; /* 0 - 63: All indicators */
+ };
+ unsigned long long overflow; /* 64 - sample Overflow count */
+ unsigned char timestamp[16]; /* 16 - 31 timestamp */
+ unsigned long long reserved1; /* 32 -Reserved */
+ unsigned long long reserved2; /* */
+ unsigned long long progusage1; /* 48 - reserved for programming use */
+ unsigned long long progusage2; /* */
+} __packed;
+
/* Query counter information */
static inline int qctri(struct cpumf_ctr_info *info)
{
@@ -98,4 +189,95 @@ static inline int ecctr(u64 ctr, u64 *val)
return cc;
}
+/* Query sampling information */
+static inline int qsi(struct hws_qsi_info_block *info)
+{
+ int cc;
+ cc = 1;
+
+ asm volatile(
+ "0: .insn s,0xb2860000,0(%1)\n"
+ "1: lhi %0,0\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "=d" (cc), "+a" (info)
+ : "m" (*info)
+ : "cc", "memory");
+
+ return cc ? -EINVAL : 0;
+}
+
+/* Load sampling controls */
+static inline int lsctl(struct hws_lsctl_request_block *req)
+{
+ int cc;
+
+ cc = 1;
+ asm volatile(
+ "0: .insn s,0xb2870000,0(%1)\n"
+ "1: ipm %0\n"
+ " srl %0,28\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "+d" (cc), "+a" (req)
+ : "m" (*req)
+ : "cc", "memory");
+
+ return cc ? -EINVAL : 0;
+}
+
+/* Sampling control helper functions */
+
+#include <linux/time.h>
+
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+ unsigned long freq)
+{
+ return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+ unsigned long rate)
+{
+ return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL
+#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+ /* TOD in STCKE format */
+ if (te->t)
+ return *((unsigned long long *) &te->timestamp[1]);
+
+ /* TOD in STCK format */
+ return *((unsigned long long *) &te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+ void *ret;
+
+ ret = (void *) v;
+ ret += PAGE_SIZE;
+ ret -= sizeof(struct hws_trailer_entry);
+
+ return (unsigned long *) ret;
+}
+
+/* Return if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt */
+static inline int is_link_entry(unsigned long *s)
+{
+ return *s & 0x1ul ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+ return (unsigned long *) (*s & ~0x1ul);
+}
#endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d2ff41370c0..f65bd363451 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -13,9 +13,6 @@
#include <asm/div64.h>
-#define __ARCH_HAS_VTIME_ACCOUNT
-#define __ARCH_HAS_VTIME_TASK_SWITCH
-
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 7e1c917bbba..09d1dd46bd5 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -29,6 +29,8 @@ struct css_general_char {
u32 fcx : 1; /* bit 88 */
u32 : 19;
u32 alt_ssi : 1; /* bit 108 */
+ u32:1;
+ u32 narf:1; /* bit 110 */
} __packed;
extern struct css_general_char css_general_characteristics;
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index debfda33d1f..31ab9f346d7 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -7,70 +7,76 @@
#ifndef __ASM_CTL_REG_H
#define __ASM_CTL_REG_H
-#ifdef CONFIG_64BIT
-
-#define __ctl_load(array, low, high) ({ \
- typedef struct { char _[sizeof(array)]; } addrtype; \
- asm volatile( \
- " lctlg %1,%2,%0\n" \
- : : "Q" (*(addrtype *)(&array)), \
- "i" (low), "i" (high)); \
- })
-
-#define __ctl_store(array, low, high) ({ \
- typedef struct { char _[sizeof(array)]; } addrtype; \
- asm volatile( \
- " stctg %1,%2,%0\n" \
- : "=Q" (*(addrtype *)(&array)) \
- : "i" (low), "i" (high)); \
- })
-
-#else /* CONFIG_64BIT */
-
-#define __ctl_load(array, low, high) ({ \
- typedef struct { char _[sizeof(array)]; } addrtype; \
- asm volatile( \
- " lctl %1,%2,%0\n" \
- : : "Q" (*(addrtype *)(&array)), \
- "i" (low), "i" (high)); \
-})
-
-#define __ctl_store(array, low, high) ({ \
- typedef struct { char _[sizeof(array)]; } addrtype; \
- asm volatile( \
- " stctl %1,%2,%0\n" \
- : "=Q" (*(addrtype *)(&array)) \
- : "i" (low), "i" (high)); \
- })
-
-#endif /* CONFIG_64BIT */
-
-#define __ctl_set_bit(cr, bit) ({ \
- unsigned long __dummy; \
- __ctl_store(__dummy, cr, cr); \
- __dummy |= 1UL << (bit); \
- __ctl_load(__dummy, cr, cr); \
-})
+#include <linux/bug.h>
-#define __ctl_clear_bit(cr, bit) ({ \
- unsigned long __dummy; \
- __ctl_store(__dummy, cr, cr); \
- __dummy &= ~(1UL << (bit)); \
- __ctl_load(__dummy, cr, cr); \
-})
+#ifdef CONFIG_64BIT
+# define __CTL_LOAD "lctlg"
+# define __CTL_STORE "stctg"
+#else
+# define __CTL_LOAD "lctl"
+# define __CTL_STORE "stctl"
+#endif
+
+#define __ctl_load(array, low, high) { \
+ typedef struct { char _[sizeof(array)]; } addrtype; \
+ \
+ BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+ asm volatile( \
+ __CTL_LOAD " %1,%2,%0\n" \
+ : : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high));\
+}
+
+#define __ctl_store(array, low, high) { \
+ typedef struct { char _[sizeof(array)]; } addrtype; \
+ \
+ BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+ asm volatile( \
+ __CTL_STORE " %1,%2,%0\n" \
+ : "=Q" (*(addrtype *)(&array)) \
+ : "i" (low), "i" (high)); \
+}
+
+static inline void __ctl_set_bit(unsigned int cr, unsigned int bit)
+{
+ unsigned long reg;
+
+ __ctl_store(reg, cr, cr);
+ reg |= 1UL << bit;
+ __ctl_load(reg, cr, cr);
+}
+
+static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
+{
+ unsigned long reg;
+
+ __ctl_store(reg, cr, cr);
+ reg &= ~(1UL << bit);
+ __ctl_load(reg, cr, cr);
+}
+
+void smp_ctl_set_bit(int cr, int bit);
+void smp_ctl_clear_bit(int cr, int bit);
+
+union ctlreg0 {
+ unsigned long val;
+ struct {
+#ifdef CONFIG_64BIT
+ unsigned long : 32;
+#endif
+ unsigned long : 3;
+ unsigned long lap : 1; /* Low-address-protection control */
+ unsigned long : 4;
+ unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+ unsigned long : 23;
+ };
+};
#ifdef CONFIG_SMP
-
-extern void smp_ctl_set_bit(int cr, int bit);
-extern void smp_ctl_clear_bit(int cr, int bit);
-#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
-#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
-
+# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
+# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
#else
-
-#define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit)
-#define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit)
-
-#endif /* CONFIG_SMP */
+# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit)
+# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit)
+#endif
#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 188c5052a20..530c15eb01e 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -107,6 +107,11 @@ void debug_set_level(debug_info_t* id, int new_level);
void debug_set_critical(void);
void debug_stop_all(void);
+static inline bool debug_level_enabled(debug_info_t* id, int level)
+{
+ return level <= id->level;
+}
+
static inline debug_entry_t*
debug_event(debug_info_t* id, int level, void* data, int length)
{
diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h
new file mode 100644
index 00000000000..04a83f5773c
--- /dev/null
+++ b/arch/s390/include/asm/dis.h
@@ -0,0 +1,52 @@
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#ifndef __ASM_S390_DIS_H__
+#define __ASM_S390_DIS_H__
+
+/* Type of operand */
+#define OPERAND_GPR 0x1 /* Operand printed as %rx */
+#define OPERAND_FPR 0x2 /* Operand printed as %fx */
+#define OPERAND_AR 0x4 /* Operand printed as %ax */
+#define OPERAND_CR 0x8 /* Operand printed as %cx */
+#define OPERAND_DISP 0x10 /* Operand printed as displacement */
+#define OPERAND_BASE 0x20 /* Operand printed as base register */
+#define OPERAND_INDEX 0x40 /* Operand printed as index register */
+#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */
+#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */
+#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */
+
+
+struct s390_operand {
+ int bits; /* The number of bits in the operand. */
+ int shift; /* The number of bits to shift. */
+ int flags; /* One bit syntax flags. */
+};
+
+struct s390_insn {
+ const char name[5];
+ unsigned char opfrag;
+ unsigned char format;
+};
+
+
+static inline int insn_length(unsigned char code)
+{
+ return ((((int) code + 64) >> 7) + 1) << 1;
+}
+
+void show_code(struct pt_regs *regs);
+void print_fn_code(unsigned char *code, unsigned long len);
+int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len);
+struct s390_insn *find_insn(unsigned char *code);
+
+static inline int is_known_insn(unsigned char *code)
+{
+ return !!find_insn(code);
+}
+
+#endif /* __ASM_S390_DIS_H__ */
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index 8a32f7dfd3a..3fbc67d9e19 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -19,9 +19,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
}
extern int dma_set_mask(struct device *dev, u64 mask);
-extern int dma_is_consistent(struct device *dev, dma_addr_t dma_handle);
-extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
- enum dma_data_direction direction);
+
+static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction)
+{
+}
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
@@ -48,9 +50,10 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
struct dma_map_ops *dma_ops = get_dma_ops(dev);
+ debug_dma_mapping_error(dev, dma_addr);
if (dma_ops->mapping_error)
return dma_ops->mapping_error(dev, dma_addr);
- return (dma_addr == 0UL);
+ return dma_addr == DMA_ERROR_CODE;
}
static inline void *dma_alloc_coherent(struct device *dev, size_t size,
@@ -69,8 +72,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
{
struct dma_map_ops *dma_ops = get_dma_ops(dev);
- dma_ops->free(dev, size, cpu_addr, dma_handle, NULL);
debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+ dma_ops->free(dev, size, cpu_addr, dma_handle, NULL);
}
#endif /* _ASM_S390_DMA_MAPPING_H */
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 8d4847191ec..67026300c88 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -34,6 +34,8 @@ struct arsb {
u32 reserved[4];
} __packed;
+#define EQC_WR_PROHIBIT 22
+
struct msb {
u8 fmt:4;
u8 oc:4;
@@ -96,29 +98,20 @@ struct scm_device {
#define OP_STATE_TEMP_ERR 2
#define OP_STATE_PERM_ERR 3
+enum scm_event {SCM_CHANGE, SCM_AVAIL};
+
struct scm_driver {
struct device_driver drv;
int (*probe) (struct scm_device *scmdev);
int (*remove) (struct scm_device *scmdev);
- void (*notify) (struct scm_device *scmdev);
+ void (*notify) (struct scm_device *scmdev, enum scm_event event);
void (*handler) (struct scm_device *scmdev, void *data, int error);
};
int scm_driver_register(struct scm_driver *scmdrv);
void scm_driver_unregister(struct scm_driver *scmdrv);
-int scm_start_aob(struct aob *aob);
+int eadm_start_aob(struct aob *aob);
void scm_irq_handler(struct aob *aob, int error);
-struct eadm_ops {
- int (*eadm_start) (struct aob *aob);
- struct module *owner;
-};
-
-int scm_get_ref(void);
-void scm_put_ref(void);
-
-void register_eadm_ops(struct eadm_ops *ops);
-void unregister_eadm_ops(struct eadm_ops *ops);
-
#endif /* _ASM_S390_EADM_H */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 178ff966a8b..78f4f8711d5 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -119,6 +119,8 @@
*/
#include <asm/ptrace.h>
+#include <asm/compat.h>
+#include <asm/syscall.h>
#include <asm/user.h>
typedef s390_fp_regs elf_fpregset_t;
@@ -180,21 +182,31 @@ extern unsigned long elf_hwcap;
extern char elf_platform[];
#define ELF_PLATFORM (elf_platform)
-#ifndef CONFIG_64BIT
+#ifndef CONFIG_COMPAT
#define SET_PERSONALITY(ex) \
- set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-#else /* CONFIG_64BIT */
+do { \
+ set_personality(PER_LINUX | \
+ (current->personality & (~PER_MASK))); \
+ current_thread_info()->sys_call_table = \
+ (unsigned long) &sys_call_table; \
+} while (0)
+#else /* CONFIG_COMPAT */
#define SET_PERSONALITY(ex) \
do { \
if (personality(current->personality) != PER_LINUX32) \
set_personality(PER_LINUX | \
(current->personality & ~PER_MASK)); \
- if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \
+ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \
set_thread_flag(TIF_31BIT); \
- else \
+ current_thread_info()->sys_call_table = \
+ (unsigned long) &sys_call_table_emu; \
+ } else { \
clear_thread_flag(TIF_31BIT); \
+ current_thread_info()->sys_call_table = \
+ (unsigned long) &sys_call_table; \
+ } \
} while (0)
-#endif /* CONFIG_64BIT */
+#endif /* CONFIG_COMPAT */
#define STACK_RND_MASK 0x7ffUL
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 2ee66a65f2d..0aa6a7ed95a 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -13,6 +13,16 @@
#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */
+static inline int __test_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return 0;
+ ptr = (unsigned char *) facilities + (nr >> 3);
+ return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
/*
* The test_facility function uses the bit odering where the MSB is bit 0.
* That makes it easier to query facility bits with the bit number as
@@ -20,12 +30,7 @@
*/
static inline int test_facility(unsigned long nr)
{
- unsigned char *ptr;
-
- if (nr >= MAX_FACILITY_BIT)
- return 0;
- ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3);
- return (*ptr & (0x80 >> (nr & 7))) != 0;
+ return __test_facility(nr, &S390_lowcore.stfle_fac_list);
}
/**
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
index ef617099507..7ecb92b469b 100644
--- a/arch/s390/include/asm/fcx.h
+++ b/arch/s390/include/asm/fcx.h
@@ -12,9 +12,9 @@
#define TCW_FORMAT_DEFAULT 0
#define TCW_TIDAW_FORMAT_DEFAULT 0
-#define TCW_FLAGS_INPUT_TIDA 1 << (23 - 5)
-#define TCW_FLAGS_TCCB_TIDA 1 << (23 - 6)
-#define TCW_FLAGS_OUTPUT_TIDA 1 << (23 - 7)
+#define TCW_FLAGS_INPUT_TIDA (1 << (23 - 5))
+#define TCW_FLAGS_TCCB_TIDA (1 << (23 - 6))
+#define TCW_FLAGS_OUTPUT_TIDA (1 << (23 - 7))
#define TCW_FLAGS_TIDAW_FORMAT(x) ((x) & 3) << (23 - 9)
#define TCW_FLAGS_GET_TIDAW_FORMAT(x) (((x) >> (23 - 9)) & 3)
@@ -54,11 +54,11 @@ struct tcw {
u32 intrg;
} __attribute__ ((packed, aligned(64)));
-#define TIDAW_FLAGS_LAST 1 << (7 - 0)
-#define TIDAW_FLAGS_SKIP 1 << (7 - 1)
-#define TIDAW_FLAGS_DATA_INT 1 << (7 - 2)
-#define TIDAW_FLAGS_TTIC 1 << (7 - 3)
-#define TIDAW_FLAGS_INSERT_CBC 1 << (7 - 4)
+#define TIDAW_FLAGS_LAST (1 << (7 - 0))
+#define TIDAW_FLAGS_SKIP (1 << (7 - 1))
+#define TIDAW_FLAGS_DATA_INT (1 << (7 - 2))
+#define TIDAW_FLAGS_TTIC (1 << (7 - 3))
+#define TIDAW_FLAGS_INSERT_CBC (1 << (7 - 4))
/**
* struct tidaw - Transport-Indirect-Addressing Word (TIDAW)
@@ -106,9 +106,9 @@ struct tsa_ddpc {
u8 sense[32];
} __attribute__ ((packed));
-#define TSA_INTRG_FLAGS_CU_STATE_VALID 1 << (7 - 0)
-#define TSA_INTRG_FLAGS_DEV_STATE_VALID 1 << (7 - 1)
-#define TSA_INTRG_FLAGS_OP_STATE_VALID 1 << (7 - 2)
+#define TSA_INTRG_FLAGS_CU_STATE_VALID (1 << (7 - 0))
+#define TSA_INTRG_FLAGS_DEV_STATE_VALID (1 << (7 - 1))
+#define TSA_INTRG_FLAGS_OP_STATE_VALID (1 << (7 - 2))
/**
* struct tsa_intrg - Interrogate Transport-Status Area (Intrg. TSA)
@@ -140,10 +140,10 @@ struct tsa_intrg {
#define TSB_FORMAT_DDPC 2
#define TSB_FORMAT_INTRG 3
-#define TSB_FLAGS_DCW_OFFSET_VALID 1 << (7 - 0)
-#define TSB_FLAGS_COUNT_VALID 1 << (7 - 1)
-#define TSB_FLAGS_CACHE_MISS 1 << (7 - 2)
-#define TSB_FLAGS_TIME_VALID 1 << (7 - 3)
+#define TSB_FLAGS_DCW_OFFSET_VALID (1 << (7 - 0))
+#define TSB_FLAGS_COUNT_VALID (1 << (7 - 1))
+#define TSB_FLAGS_CACHE_MISS (1 << (7 - 2))
+#define TSB_FLAGS_TIME_VALID (1 << (7 - 3))
#define TSB_FLAGS_FORMAT(x) ((x) & 7)
#define TSB_FORMAT(t) ((t)->flags & 7)
@@ -179,9 +179,9 @@ struct tsb {
#define DCW_INTRG_RCQ_PRIMARY 1
#define DCW_INTRG_RCQ_SECONDARY 2
-#define DCW_INTRG_FLAGS_MPM 1 < (7 - 0)
-#define DCW_INTRG_FLAGS_PPR 1 < (7 - 1)
-#define DCW_INTRG_FLAGS_CRIT 1 < (7 - 2)
+#define DCW_INTRG_FLAGS_MPM (1 << (7 - 0))
+#define DCW_INTRG_FLAGS_PPR (1 << (7 - 1))
+#define DCW_INTRG_FLAGS_CRIT (1 << (7 - 2))
/**
* struct dcw_intrg_data - Interrogate DCW data
@@ -216,7 +216,7 @@ struct dcw_intrg_data {
u8 prog_data[0];
} __attribute__ ((packed));
-#define DCW_FLAGS_CC 1 << (7 - 1)
+#define DCW_FLAGS_CC (1 << (7 - 1))
#define DCW_CMD_WRITE 0x01
#define DCW_CMD_READ 0x02
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index b7931faaef6..bf246dae136 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -9,11 +9,6 @@ struct dyn_arch_ftrace { };
#define MCOUNT_ADDR ((long)_mcount)
-#ifdef CONFIG_64BIT
-#define MCOUNT_INSN_SIZE 12
-#else
-#define MCOUNT_INSN_SIZE 20
-#endif
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
@@ -21,4 +16,11 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
}
#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_64BIT
+#define MCOUNT_INSN_SIZE 12
+#else
+#define MCOUNT_INSN_SIZE 22
+#endif
+
#endif /* _ASM_S390_FTRACE_H */
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 96bc83ea5c9..a4811aa0304 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -1,26 +1,63 @@
#ifndef _ASM_S390_FUTEX_H
#define _ASM_S390_FUTEX_H
-#include <linux/futex.h>
#include <linux/uaccess.h>
+#include <linux/futex.h>
+#include <asm/mmu_context.h>
#include <asm/errno.h>
-static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
+ asm volatile( \
+ " sacf 256\n" \
+ "0: l %1,0(%6)\n" \
+ "1:"insn \
+ "2: cs %1,%2,0(%6)\n" \
+ "3: jl 1b\n" \
+ " lhi %0,0\n" \
+ "4: sacf 768\n" \
+ EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
+ "=m" (*uaddr) \
+ : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
+ "m" (*uaddr) : "cc");
+
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
- int oldval, ret;
+ int oldval = 0, newval, ret;
+ load_kernel_asce();
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
-
pagefault_disable();
- ret = uaccess.futex_atomic_op(op, uaddr, oparg, &oldval);
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op("lr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("lr %2,%1\nar %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("lr %2,%1\nor %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
pagefault_enable();
if (!ret) {
@@ -40,10 +77,20 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
+ int ret;
- return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval);
+ load_kernel_asce();
+ asm volatile(
+ " sacf 256\n"
+ "0: cs %1,%4,0(%5)\n"
+ "1: la %0,0\n"
+ "2: sacf 768\n"
+ EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+ : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+ : "cc", "memory");
+ *uval = oldval;
+ return ret;
}
#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 0c82ba86e99..b7eabaaeffb 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -18,6 +18,9 @@
#define __ARCH_HAS_DO_SOFTIRQ
#define __ARCH_IRQ_EXIT_IRQS_DISABLED
-#define HARDIRQ_BITS 8
+static inline void ack_bad_irq(unsigned int irq)
+{
+ printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 593753ee07f..11eae5f55b7 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -17,6 +17,9 @@
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
/*
* If the arch doesn't supply something else, assume that hugepage
@@ -38,93 +41,75 @@ static inline int prepare_hugepage_range(struct file *file,
int arch_prepare_hugepage(struct page *page);
void arch_release_hugepage(struct page *page);
-static inline pte_t huge_pte_wrprotect(pte_t pte)
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- pte_val(pte) |= _PAGE_RO;
- return pte;
+ pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
}
-static inline int huge_pte_none(pte_t pte)
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
{
- return (pte_val(pte) & _SEGMENT_ENTRY_INV) &&
- !(pte_val(pte) & _SEGMENT_ENTRY_RO);
+ huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
}
-static inline pte_t huge_ptep_get(pte_t *ptep)
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
{
- pte_t pte = *ptep;
- unsigned long mask;
-
- if (!MACHINE_HAS_HPAGE) {
- ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN);
- if (ptep) {
- mask = pte_val(pte) &
- (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
- pte = pte_mkhuge(*ptep);
- pte_val(pte) |= mask;
- }
+ int changed = !pte_same(huge_ptep_get(ptep), pte);
+ if (changed) {
+ huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
- return pte;
+ return changed;
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+ set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
+}
+
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+{
+ return mk_pte(page, pgprot);
}
-static inline void __pmd_csp(pmd_t *pmdp)
+static inline int huge_pte_none(pte_t pte)
{
- register unsigned long reg2 asm("2") = pmd_val(*pmdp);
- register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
- _SEGMENT_ENTRY_INV;
- register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-
- asm volatile(
- " csp %1,%3"
- : "=m" (*pmdp)
- : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+ return pte_none(pte);
}
-static inline void huge_ptep_invalidate(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+static inline int huge_pte_write(pte_t pte)
{
- pmd_t *pmdp = (pmd_t *) ptep;
+ return pte_write(pte);
+}
- if (MACHINE_HAS_IDTE)
- __pmd_idte(address, pmdp);
- else
- __pmd_csp(pmdp);
- pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
+static inline int huge_pte_dirty(pte_t pte)
+{
+ return pte_dirty(pte);
}
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+static inline pte_t huge_pte_mkwrite(pte_t pte)
{
- pte_t pte = huge_ptep_get(ptep);
+ return pte_mkwrite(pte);
+}
- huge_ptep_invalidate(mm, addr, ptep);
- return pte;
+static inline pte_t huge_pte_mkdirty(pte_t pte)
+{
+ return pte_mkdirty(pte);
}
-#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
-({ \
- int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \
- if (__changed) { \
- huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \
- set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \
- } \
- __changed; \
-})
-
-#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \
-({ \
- pte_t __pte = huge_ptep_get(__ptep); \
- if (pte_write(__pte)) { \
- huge_ptep_invalidate(__mm, __addr, __ptep); \
- set_huge_pte_at(__mm, __addr, __ptep, \
- huge_pte_wrprotect(__pte)); \
- } \
-})
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+ return pte_wrprotect(pte);
+}
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
{
- huge_ptep_invalidate(vma->vm_mm, address, ptep);
+ return pte_modify(pte, newprot);
}
#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
index 7e3d2586c1f..ee96a8b697f 100644
--- a/arch/s390/include/asm/hw_irq.h
+++ b/arch/s390/include/asm/hw_irq.h
@@ -4,19 +4,8 @@
#include <linux/msi.h>
#include <linux/pci.h>
-static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
-{
- return __irq_get_msi_desc(irq);
-}
-
-/* Must be called with msi map lock held */
-static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi)
-{
- if (!msi)
- return -EINVAL;
-
- msi->irq = irq;
- return 0;
-}
+void __init init_airq_interrupts(void);
+void __init init_cio_interrupts(void);
+void __init init_ext_interrupts(void);
#endif
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 27cb32185ce..cd6b9ee7b69 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -13,29 +13,8 @@
#include <asm/page.h>
#include <asm/pci_io.h>
-/*
- * Change virtual addresses to physical addresses and vv.
- * These are pretty trivial
- */
-static inline unsigned long virt_to_phys(volatile void * address)
-{
- unsigned long real_address;
- asm volatile(
- " lra %0,0(%1)\n"
- " jz 0f\n"
- " la %0,0\n"
- "0:"
- : "=a" (real_address) : "a" (address) : "cc");
- return real_address;
-}
-#define virt_to_phys virt_to_phys
-
-static inline void * phys_to_virt(unsigned long address)
-{
- return (void *) address;
-}
-
void *xlate_dev_mem_ptr(unsigned long phys);
+#define xlate_dev_mem_ptr xlate_dev_mem_ptr
void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
/*
@@ -50,10 +29,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
#define ioremap_nocache(addr, size) ioremap(addr, size)
#define ioremap_wc ioremap_nocache
-/* TODO: s390 cannot support io_remap_pfn_range... */
-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
-
static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
{
return (void __iomem *) offset;
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 2bd6cb897b9..2fcccc0c997 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -7,6 +7,7 @@
#ifndef _ASM_S390_IPL_H
#define _ASM_S390_IPL_H
+#include <asm/lowcore.h>
#include <asm/types.h>
#include <asm/cio.h>
#include <asm/setup.h>
@@ -86,7 +87,14 @@ struct ipl_parameter_block {
*/
extern u32 ipl_flags;
extern u32 dump_prefix_page;
-extern unsigned int zfcpdump_prefix_array[];
+
+struct dump_save_areas {
+ struct save_area **areas;
+ int count;
+};
+
+extern struct dump_save_areas dump_save_areas;
+struct save_area *dump_save_area_create(int cpu);
extern void do_reipl(void);
extern void do_halt(void);
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 7def77302d6..c4dd400a279 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -1,17 +1,42 @@
#ifndef _ASM_IRQ_H
#define _ASM_IRQ_H
+#define EXT_INTERRUPT 1
+#define IO_INTERRUPT 2
+#define THIN_INTERRUPT 3
+
+#define NR_IRQS_BASE 4
+
+#ifdef CONFIG_PCI_NR_MSI
+# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
+#else
+# define NR_IRQS NR_IRQS_BASE
+#endif
+
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ 0
+
+/* External interruption codes */
+#define EXT_IRQ_INTERRUPT_KEY 0x0040
+#define EXT_IRQ_CLK_COMP 0x1004
+#define EXT_IRQ_CPU_TIMER 0x1005
+#define EXT_IRQ_WARNING_TRACK 0x1007
+#define EXT_IRQ_MALFUNC_ALERT 0x1200
+#define EXT_IRQ_EMERGENCY_SIG 0x1201
+#define EXT_IRQ_EXTERNAL_CALL 0x1202
+#define EXT_IRQ_TIMING_ALERT 0x1406
+#define EXT_IRQ_MEASURE_ALERT 0x1407
+#define EXT_IRQ_SERVICE_SIG 0x2401
+#define EXT_IRQ_CP_SERVICE 0x2603
+#define EXT_IRQ_IUCV 0x4000
+
+#ifndef __ASSEMBLY__
+
#include <linux/hardirq.h>
#include <linux/percpu.h>
#include <linux/cache.h>
#include <linux/types.h>
-enum interruption_main_class {
- EXTERNAL_INTERRUPT,
- IO_INTERRUPT,
- NR_IRQS
-};
-
enum interruption_class {
IRQEXT_CLK,
IRQEXT_EXC,
@@ -41,6 +66,8 @@ enum interruption_class {
IRQIO_CSC,
IRQIO_PCI,
IRQIO_MSI,
+ IRQIO_VIR,
+ IRQIO_VAI,
NMI_NMI,
CPU_RST,
NR_ARCH_IRQS
@@ -64,21 +91,19 @@ struct ext_code {
typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long);
-int register_external_interrupt(u16 code, ext_int_handler_t handler);
-int unregister_external_interrupt(u16 code, ext_int_handler_t handler);
-void service_subclass_irq_register(void);
-void service_subclass_irq_unregister(void);
-void measurement_alert_subclass_register(void);
-void measurement_alert_subclass_unregister(void);
-
-#ifdef CONFIG_LOCKDEP
-# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq)
-# define disable_irq_nosync_lockdep_irqsave(irq, flags) \
- disable_irq_nosync(irq)
-# define disable_irq_lockdep(irq) disable_irq(irq)
-# define enable_irq_lockdep(irq) enable_irq(irq)
-# define enable_irq_lockdep_irqrestore(irq, flags) \
- enable_irq(irq)
-#endif
+int register_external_irq(u16 code, ext_int_handler_t handler);
+int unregister_external_irq(u16 code, ext_int_handler_t handler);
+
+enum irq_subclass {
+ IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
+ IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
+};
+
+void irq_subclass_register(enum irq_subclass subclass);
+void irq_subclass_unregister(enum irq_subclass subclass);
+
+#define irq_canonicalize(irq) (irq)
+
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 6c32190dc73..346b1c85ffb 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -15,7 +15,7 @@
static __always_inline bool arch_static_branch(struct static_key *key)
{
- asm goto("0: brcl 0,0\n"
+ asm_volatile_goto("0: brcl 0,0\n"
".pushsection __jump_table, \"aw\"\n"
ASM_ALIGN "\n"
ASM_PTR " 0b, %l[label], %0\n"
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index dcf6948a875..4176dfe0fba 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -31,6 +31,8 @@
#include <linux/ptrace.h>
#include <linux/percpu.h>
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
struct pt_regs;
struct kprobe;
@@ -57,7 +59,7 @@ typedef u16 kprobe_opcode_t;
/* Architecture specific copy of original instruction */
struct arch_specific_insn {
/* copy of original instruction */
- kprobe_opcode_t insn[MAX_INSN_SIZE];
+ kprobe_opcode_t *insn;
};
struct prev_kprobe {
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index b7841546991..4181d7baabb 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -16,37 +16,48 @@
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
+#include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>
+#include <asm/isc.h>
#define KVM_MAX_VCPUS 64
-#define KVM_MEMORY_SLOTS 32
-/* memory slots that does not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_USER_MEM_SLOTS 32
+
+/*
+ * These seem to be used for allocating ->chip in the routing table,
+ * which we don't use. 4096 is an out-of-thin-air value. If we need
+ * to look at ->chip later on, we'll need to revisit this.
+ */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 4096
+
+#define SIGP_CTRL_C 0x00800000
struct sca_entry {
- atomic_t scn;
+ atomic_t ctrl;
__u32 reserved;
__u64 sda;
__u64 reserved2[2];
} __attribute__((packed));
+union ipte_control {
+ unsigned long val;
+ struct {
+ unsigned long k : 1;
+ unsigned long kh : 31;
+ unsigned long kg : 32;
+ };
+};
struct sca_block {
- __u64 ipte_control;
+ union ipte_control ipte_control;
__u64 reserved[5];
__u64 mcn;
__u64 reserved2;
struct sca_entry cpu[64];
} __attribute__((packed));
-#define KVM_NR_PAGE_SIZES 2
-#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8)
-#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
-#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
-#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
-#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
-
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
@@ -63,23 +74,51 @@ struct sca_block {
#define CPUSTAT_ZARCH 0x00000800
#define CPUSTAT_MCDS 0x00000100
#define CPUSTAT_SM 0x00000080
+#define CPUSTAT_IBS 0x00000040
#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
#define CPUSTAT_J 0x00000002
#define CPUSTAT_P 0x00000001
struct kvm_s390_sie_block {
atomic_t cpuflags; /* 0x0000 */
- __u32 prefix; /* 0x0004 */
- __u8 reserved8[32]; /* 0x0008 */
+ __u32 : 1; /* 0x0004 */
+ __u32 prefix : 18;
+ __u32 : 13;
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+ __u32 prog0c; /* 0x000c */
+ __u8 reserved10[16]; /* 0x0010 */
+#define PROG_BLOCK_SIE 0x00000001
+ atomic_t prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
__u64 cputm; /* 0x0028 */
__u64 ckc; /* 0x0030 */
__u64 epoch; /* 0x0038 */
__u8 reserved40[4]; /* 0x0040 */
#define LCTL_CR0 0x8000
+#define LCTL_CR6 0x0200
+#define LCTL_CR9 0x0040
+#define LCTL_CR10 0x0020
+#define LCTL_CR11 0x0010
+#define LCTL_CR14 0x0002
__u16 lctl; /* 0x0044 */
__s16 icpua; /* 0x0046 */
+#define ICTL_PINT 0x20000000
+#define ICTL_LPSW 0x00400000
+#define ICTL_STCTL 0x00040000
+#define ICTL_ISKE 0x00004000
+#define ICTL_SSKE 0x00002000
+#define ICTL_RRBE 0x00001000
+#define ICTL_TPROT 0x00000200
__u32 ictl; /* 0x0048 */
__u32 eca; /* 0x004c */
+#define ICPT_INST 0x04
+#define ICPT_PROGI 0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_OPEREXC 0x2C
+#define ICPT_PARTEXEC 0x38
+#define ICPT_IOINST 0x40
__u8 icptcode; /* 0x0050 */
__u8 reserved51; /* 0x0051 */
__u16 ihcpu; /* 0x0052 */
@@ -89,7 +128,8 @@ struct kvm_s390_sie_block {
__u32 scaoh; /* 0x005c */
__u8 reserved60; /* 0x0060 */
__u8 ecb; /* 0x0061 */
- __u8 reserved62[2]; /* 0x0062 */
+ __u8 ecb2; /* 0x0062 */
+ __u8 reserved63[1]; /* 0x0063 */
__u32 scaol; /* 0x0064 */
__u8 reserved68[4]; /* 0x0068 */
__u32 todpr; /* 0x006c */
@@ -97,16 +137,48 @@ struct kvm_s390_sie_block {
psw_t gpsw; /* 0x0090 */
__u64 gg14; /* 0x00a0 */
__u64 gg15; /* 0x00a8 */
- __u8 reservedb0[30]; /* 0x00b0 */
- __u16 iprcc; /* 0x00ce */
- __u8 reservedd0[48]; /* 0x00d0 */
+ __u8 reservedb0[20]; /* 0x00b0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ __u32 reservedc8; /* 0x00c8 */
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ __u64 peraddr; /* 0x00d8 */
+ __u8 eai; /* 0x00e0 */
+ __u8 peraid; /* 0x00e1 */
+ __u8 oai; /* 0x00e2 */
+ __u8 armid; /* 0x00e3 */
+ __u8 reservede4[4]; /* 0x00e4 */
+ __u64 tecmc; /* 0x00e8 */
+ __u8 reservedf0[16]; /* 0x00f0 */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
__u8 reserved188[24]; /* 0x0188 */
__u32 fac; /* 0x01a0 */
- __u8 reserved1a4[92]; /* 0x01a4 */
+ __u8 reserved1a4[20]; /* 0x01a4 */
+ __u64 cbrlo; /* 0x01b8 */
+ __u8 reserved1c0[30]; /* 0x01c0 */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
+ __u64 itdba; /* 0x01e8 */
+ __u8 reserved1f0[16]; /* 0x01f0 */
} __attribute__((packed));
+struct kvm_s390_itdb {
+ __u8 data[256];
+} __packed;
+
+struct sie_page {
+ struct kvm_s390_sie_block sie_block;
+ __u8 reserved200[1024]; /* 0x0200 */
+ struct kvm_s390_itdb itdb; /* 0x0600 */
+ __u8 reserved700[2304]; /* 0x0700 */
+} __packed;
+
struct kvm_vcpu_stat {
u32 exit_userspace;
u32 exit_null;
@@ -117,6 +189,8 @@ struct kvm_vcpu_stat {
u32 exit_instruction;
u32 instruction_lctl;
u32 instruction_lctlg;
+ u32 instruction_stctl;
+ u32 instruction_stctg;
u32 exit_program_interruption;
u32 exit_instr_and_program;
u32 deliver_external_call;
@@ -127,17 +201,21 @@ struct kvm_vcpu_stat {
u32 deliver_prefix_signal;
u32 deliver_restart_signal;
u32 deliver_program_int;
+ u32 deliver_io_int;
u32 exit_wait_state;
+ u32 instruction_pfmf;
u32 instruction_stidp;
u32 instruction_spx;
u32 instruction_stpx;
u32 instruction_stap;
u32 instruction_storage_key;
+ u32 instruction_ipte_interlock;
u32 instruction_stsch;
u32 instruction_chsc;
u32 instruction_stsi;
u32 instruction_stfl;
u32 instruction_tprot;
+ u32 instruction_essa;
u32 instruction_sigp_sense;
u32 instruction_sigp_sense_running;
u32 instruction_sigp_external_call;
@@ -151,41 +229,58 @@ struct kvm_vcpu_stat {
u32 diagnose_9c;
};
-struct kvm_s390_io_info {
- __u16 subchannel_id; /* 0x0b8 */
- __u16 subchannel_nr; /* 0x0ba */
- __u32 io_int_parm; /* 0x0bc */
- __u32 io_int_word; /* 0x0c0 */
-};
-
-struct kvm_s390_ext_info {
- __u32 ext_params;
- __u64 ext_params2;
-};
-
-#define PGM_OPERATION 0x01
-#define PGM_PRIVILEGED_OPERATION 0x02
-#define PGM_EXECUTE 0x03
-#define PGM_PROTECTION 0x04
-#define PGM_ADDRESSING 0x05
-#define PGM_SPECIFICATION 0x06
-#define PGM_DATA 0x07
-
-struct kvm_s390_pgm_info {
- __u16 code;
-};
-
-struct kvm_s390_prefix_info {
- __u32 address;
-};
-
-struct kvm_s390_extcall_info {
- __u16 code;
-};
-
-struct kvm_s390_emerg_info {
- __u16 code;
-};
+#define PGM_OPERATION 0x01
+#define PGM_PRIVILEGED_OP 0x02
+#define PGM_EXECUTE 0x03
+#define PGM_PROTECTION 0x04
+#define PGM_ADDRESSING 0x05
+#define PGM_SPECIFICATION 0x06
+#define PGM_DATA 0x07
+#define PGM_FIXED_POINT_OVERFLOW 0x08
+#define PGM_FIXED_POINT_DIVIDE 0x09
+#define PGM_DECIMAL_OVERFLOW 0x0a
+#define PGM_DECIMAL_DIVIDE 0x0b
+#define PGM_HFP_EXPONENT_OVERFLOW 0x0c
+#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d
+#define PGM_HFP_SIGNIFICANCE 0x0e
+#define PGM_HFP_DIVIDE 0x0f
+#define PGM_SEGMENT_TRANSLATION 0x10
+#define PGM_PAGE_TRANSLATION 0x11
+#define PGM_TRANSLATION_SPEC 0x12
+#define PGM_SPECIAL_OPERATION 0x13
+#define PGM_OPERAND 0x15
+#define PGM_TRACE_TABEL 0x16
+#define PGM_SPACE_SWITCH 0x1c
+#define PGM_HFP_SQUARE_ROOT 0x1d
+#define PGM_PC_TRANSLATION_SPEC 0x1f
+#define PGM_AFX_TRANSLATION 0x20
+#define PGM_ASX_TRANSLATION 0x21
+#define PGM_LX_TRANSLATION 0x22
+#define PGM_EX_TRANSLATION 0x23
+#define PGM_PRIMARY_AUTHORITY 0x24
+#define PGM_SECONDARY_AUTHORITY 0x25
+#define PGM_LFX_TRANSLATION 0x26
+#define PGM_LSX_TRANSLATION 0x27
+#define PGM_ALET_SPECIFICATION 0x28
+#define PGM_ALEN_TRANSLATION 0x29
+#define PGM_ALE_SEQUENCE 0x2a
+#define PGM_ASTE_VALIDITY 0x2b
+#define PGM_ASTE_SEQUENCE 0x2c
+#define PGM_EXTENDED_AUTHORITY 0x2d
+#define PGM_LSTE_SEQUENCE 0x2e
+#define PGM_ASTE_INSTANCE 0x2f
+#define PGM_STACK_FULL 0x30
+#define PGM_STACK_EMPTY 0x31
+#define PGM_STACK_SPECIFICATION 0x32
+#define PGM_STACK_TYPE 0x33
+#define PGM_STACK_OPERATION 0x34
+#define PGM_ASCE_TYPE 0x38
+#define PGM_REGION_FIRST_TRANS 0x39
+#define PGM_REGION_SECOND_TRANS 0x3a
+#define PGM_REGION_THIRD_TRANS 0x3b
+#define PGM_MONITOR 0x40
+#define PGM_PER 0x80
+#define PGM_CRYPTO_OPERATION 0x119
struct kvm_s390_interrupt_info {
struct list_head list;
@@ -197,13 +292,13 @@ struct kvm_s390_interrupt_info {
struct kvm_s390_emerg_info emerg;
struct kvm_s390_extcall_info extcall;
struct kvm_s390_prefix_info prefix;
+ struct kvm_s390_mchk_info mchk;
};
};
/* for local_interrupt.action_flags */
#define ACTION_STORE_ON_STOP (1<<0)
#define ACTION_STOP_ON_STOP (1<<1)
-#define ACTION_RELOADVCPU_ON_STOP (1<<2)
struct kvm_s390_local_interrupt {
spinlock_t lock;
@@ -211,7 +306,7 @@ struct kvm_s390_local_interrupt {
atomic_t active;
struct kvm_s390_float_interrupt *float_int;
int timer_due; /* event indicator for waitqueue below */
- wait_queue_head_t wq;
+ wait_queue_head_t *wq;
atomic_t *cpuflags;
unsigned int action_bits;
};
@@ -221,11 +316,49 @@ struct kvm_s390_float_interrupt {
struct list_head list;
atomic_t active;
int next_rr_cpu;
- unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
- / sizeof(long)];
- struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
+ unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned int irq_count;
+};
+
+struct kvm_hw_wp_info_arch {
+ unsigned long addr;
+ unsigned long phys_addr;
+ int len;
+ char *old_data;
+};
+
+struct kvm_hw_bp_info_arch {
+ unsigned long addr;
+ int len;
};
+/*
+ * Only the upper 16 bits of kvm_guest_debug->control are arch specific.
+ * Further KVM_GUESTDBG flags which an be used from userspace can be found in
+ * arch/s390/include/uapi/asm/kvm.h
+ */
+#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
+
+#define guestdbg_enabled(vcpu) \
+ (vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
+#define guestdbg_sstep_enabled(vcpu) \
+ (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+#define guestdbg_hw_bp_enabled(vcpu) \
+ (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
+ (vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
+
+struct kvm_guestdbg_info_arch {
+ unsigned long cr0;
+ unsigned long cr9;
+ unsigned long cr10;
+ unsigned long cr11;
+ struct kvm_hw_bp_info_arch *hw_bp_info;
+ struct kvm_hw_wp_info_arch *hw_wp_info;
+ int nr_hw_bp;
+ int nr_hw_wp;
+ unsigned long last_bp;
+};
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
@@ -235,11 +368,17 @@ struct kvm_vcpu_arch {
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct tasklet_struct tasklet;
+ struct kvm_s390_pgm_info pgm;
union {
struct cpuid cpu_id;
u64 stidp_data;
};
struct gmap *gmap;
+ struct kvm_guestdbg_info_arch guestdbg;
+#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
+ unsigned long pfault_token;
+ unsigned long pfault_select;
+ unsigned long pfault_compare;
};
struct kvm_vm_stat {
@@ -249,12 +388,67 @@ struct kvm_vm_stat {
struct kvm_arch_memory_slot {
};
+struct s390_map_info {
+ struct list_head list;
+ __u64 guest_addr;
+ __u64 addr;
+ struct page *page;
+};
+
+struct s390_io_adapter {
+ unsigned int id;
+ int isc;
+ bool maskable;
+ bool masked;
+ bool swap;
+ struct rw_semaphore maps_lock;
+ struct list_head maps;
+ atomic_t nr_maps;
+};
+
+#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+#define MAX_S390_ADAPTER_MAPS 256
+
struct kvm_arch{
struct sca_block *sca;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
+ struct kvm_device *flic;
struct gmap *gmap;
+ int css_support;
+ int use_irqchip;
+ int use_cmma;
+ struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
+ wait_queue_head_t ipte_wq;
+ spinlock_t start_stop_lock;
};
+#define KVM_HVA_ERR_BAD (-1UL)
+#define KVM_HVA_ERR_RO_BAD (-2UL)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+ return IS_ERR_VALUE(addr);
+}
+
+#define ASYNC_PF_PER_VCPU 64
+struct kvm_vcpu;
+struct kvm_async_pf;
+struct kvm_arch_async_pf {
+ unsigned long pfault_token;
+};
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+extern char sie_exit;
#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index bbf8141408c..4349197ab9d 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -56,13 +56,14 @@ struct _lowcore {
__u16 pgm_code; /* 0x008e */
__u32 trans_exc_code; /* 0x0090 */
__u16 mon_class_num; /* 0x0094 */
- __u16 per_perc_atmid; /* 0x0096 */
+ __u8 per_code; /* 0x0096 */
+ __u8 per_atmid; /* 0x0097 */
__u32 per_address; /* 0x0098 */
__u32 monitor_code; /* 0x009c */
__u8 exc_access_id; /* 0x00a0 */
__u8 per_access_id; /* 0x00a1 */
__u8 op_access_id; /* 0x00a2 */
- __u8 ar_access_id; /* 0x00a3 */
+ __u8 ar_mode_id; /* 0x00a3 */
__u8 pad_0x00a4[0x00b8-0x00a4]; /* 0x00a4 */
__u16 subchannel_id; /* 0x00b8 */
__u16 subchannel_nr; /* 0x00ba */
@@ -93,7 +94,9 @@ struct _lowcore {
__u32 save_area_sync[8]; /* 0x0200 */
__u32 save_area_async[8]; /* 0x0220 */
__u32 save_area_restart[1]; /* 0x0240 */
- __u8 pad_0x0244[0x0248-0x0244]; /* 0x0244 */
+
+ /* CPU flags. */
+ __u32 cpu_flags; /* 0x0244 */
/* Return psws. */
psw_t return_psw; /* 0x0248 */
@@ -139,12 +142,9 @@ struct _lowcore {
__u32 percpu_offset; /* 0x02f0 */
__u32 machine_flags; /* 0x02f4 */
__u32 ftrace_func; /* 0x02f8 */
- __u8 pad_0x02fc[0x0300-0x02fc]; /* 0x02fc */
-
- /* Interrupt response block */
- __u8 irb[64]; /* 0x0300 */
+ __u32 spinlock_lockval; /* 0x02fc */
- __u8 pad_0x0340[0x0e00-0x0340]; /* 0x0340 */
+ __u8 pad_0x0300[0x0e00-0x0300]; /* 0x0300 */
/*
* 0xe00 contains the address of the IPL Parameter Information
@@ -196,12 +196,13 @@ struct _lowcore {
__u16 pgm_code; /* 0x008e */
__u32 data_exc_code; /* 0x0090 */
__u16 mon_class_num; /* 0x0094 */
- __u16 per_perc_atmid; /* 0x0096 */
+ __u8 per_code; /* 0x0096 */
+ __u8 per_atmid; /* 0x0097 */
__u64 per_address; /* 0x0098 */
__u8 exc_access_id; /* 0x00a0 */
__u8 per_access_id; /* 0x00a1 */
__u8 op_access_id; /* 0x00a2 */
- __u8 ar_access_id; /* 0x00a3 */
+ __u8 ar_mode_id; /* 0x00a3 */
__u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */
__u64 trans_exc_code; /* 0x00a8 */
__u64 monitor_code; /* 0x00b0 */
@@ -237,7 +238,9 @@ struct _lowcore {
__u64 save_area_sync[8]; /* 0x0200 */
__u64 save_area_async[8]; /* 0x0240 */
__u64 save_area_restart[1]; /* 0x0280 */
- __u8 pad_0x0288[0x0290-0x0288]; /* 0x0288 */
+
+ /* CPU flags. */
+ __u64 cpu_flags; /* 0x0288 */
/* Return psws. */
psw_t return_psw; /* 0x0290 */
@@ -285,15 +288,13 @@ struct _lowcore {
__u64 machine_flags; /* 0x0388 */
__u64 ftrace_func; /* 0x0390 */
__u64 gmap; /* 0x0398 */
- __u8 pad_0x03a0[0x0400-0x03a0]; /* 0x03a0 */
-
- /* Interrupt response block. */
- __u8 irb[64]; /* 0x0400 */
+ __u32 spinlock_lockval; /* 0x03a0 */
+ __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */
/* Per cpu primary space access list */
- __u32 paste[16]; /* 0x0440 */
+ __u32 paste[16]; /* 0x0400 */
- __u8 pad_0x0480[0x0e00-0x0480]; /* 0x0480 */
+ __u8 pad_0x04c0[0x0e00-0x0440]; /* 0x0440 */
/*
* 0xe00 contains the address of the IPL Parameter Information
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index 0e47a576d66..9977e08df5b 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -9,7 +9,7 @@
#include <uapi/asm/mman.h>
#if !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
-int s390_mmap_check(unsigned long addr, unsigned long len);
-#define arch_mmap_check(addr,len,flags) s390_mmap_check(addr,len)
+int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags);
+#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags)
#endif
#endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 6340178748b..a5e656260a7 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -1,9 +1,11 @@
#ifndef __MMU_H
#define __MMU_H
+#include <linux/cpumask.h>
#include <linux/errno.h>
typedef struct {
+ cpumask_t cpu_attach_mask;
atomic_t attach_count;
unsigned int flush_mm;
spinlock_t list_lock;
@@ -12,10 +14,10 @@ typedef struct {
unsigned long asce_bits;
unsigned long asce_limit;
unsigned long vdso_base;
- /* Cloned contexts will be created with extended page tables. */
- unsigned int alloc_pgste:1;
/* The mmu context has extended page tables. */
unsigned int has_pgste:1;
+ /* The mmu context uses storage keys. */
+ unsigned int use_skey:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 084e7755ed9..3815bfea1b2 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -15,30 +15,15 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.attach_count, 0);
mm->context.flush_mm = 0;
mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
#ifdef CONFIG_64BIT
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#endif
- if (current->mm && current->mm->context.alloc_pgste) {
- /*
- * alloc_pgste indicates, that any NEW context will be created
- * with extended page tables. The old context is unchanged. The
- * page table allocation and the page table operations will
- * look at has_pgste to distinguish normal and extended page
- * tables. The only way to create extended page tables is to
- * set alloc_pgste and then create a new context (e.g. dup_mm).
- * The page table allocation is called after init_new_context
- * and if has_pgste is set, it will create extended page
- * tables.
- */
- mm->context.has_pgste = 1;
- mm->context.alloc_pgste = 1;
- } else {
- mm->context.has_pgste = 0;
- mm->context.alloc_pgste = 0;
- }
+ mm->context.has_pgste = 0;
+ mm->context.use_skey = 0;
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
@@ -46,39 +31,69 @@ static inline int init_new_context(struct task_struct *tsk,
#define destroy_context(mm) do { } while (0)
-#ifndef CONFIG_64BIT
-#define LCTL_OPCODE "lctl"
-#else
-#define LCTL_OPCODE "lctlg"
-#endif
+static inline void set_user_asce(struct mm_struct *mm)
+{
+ S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd);
+ if (current->thread.mm_segment.ar4)
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ set_cpu_flag(CIF_ASCE);
+}
-static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
+static inline void clear_user_asce(void)
{
- pgd_t *pgd = mm->pgd;
-
- S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
- if (s390_user_mode != HOME_SPACE_MODE) {
- /* Load primary space page table origin. */
- asm volatile(LCTL_OPCODE" 1,1,%0\n"
- : : "m" (S390_lowcore.user_asce) );
- } else
- /* Load home space page table origin. */
- asm volatile(LCTL_OPCODE" 13,13,%0"
- : : "m" (S390_lowcore.user_asce) );
- set_fs(current->thread.mm_segment);
+ S390_lowcore.user_asce = S390_lowcore.kernel_asce;
+
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+}
+
+static inline void load_kernel_asce(void)
+{
+ unsigned long asce;
+
+ __ctl_store(asce, 1, 1);
+ if (asce != S390_lowcore.kernel_asce)
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ set_cpu_flag(CIF_ASCE);
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
- update_mm(next, tsk);
- atomic_dec(&prev->context.attach_count);
- WARN_ON(atomic_read(&prev->context.attach_count) < 0);
+ int cpu = smp_processor_id();
+
+ if (prev == next)
+ return;
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+ /* Clear old ASCE by loading the kernel ASCE. */
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ __ctl_load(S390_lowcore.kernel_asce, 7, 7);
atomic_inc(&next->context.attach_count);
- /* Check for TLBs not flushed yet */
- if (next->context.flush_mm)
- __tlb_flush_mm(next);
+ atomic_dec(&prev->context.attach_count);
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+ S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
+}
+
+#define finish_arch_post_lock_switch finish_arch_post_lock_switch
+static inline void finish_arch_post_lock_switch(void)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+
+ load_kernel_asce();
+ if (mm) {
+ preempt_disable();
+ while (atomic_read(&mm->context.attach_count) >> 16)
+ cpu_relax();
+
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+ if (mm->context.flush_mm)
+ __tlb_flush_mm(mm);
+ preempt_enable();
+ }
+ set_fs(current->thread.mm_segment);
}
#define enter_lazy_tlb(mm,tsk) do { } while (0)
@@ -87,7 +102,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
- switch_mm(prev, next, current);
+ switch_mm(prev, next, current);
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ set_user_asce(next);
}
static inline void arch_dup_mmap(struct mm_struct *oldmm,
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
index 688271f5f2e..458c1f7fbc1 100644
--- a/arch/s390/include/asm/mutex.h
+++ b/arch/s390/include/asm/mutex.h
@@ -7,5 +7,3 @@
*/
#include <asm-generic/mutex-dec.h>
-
-#define arch_mutex_cpu_relax() barrier()
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index a86ad408407..114258eeaac 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -30,60 +30,39 @@
#include <asm/setup.h>
#ifndef __ASSEMBLY__
-void storage_key_init_range(unsigned long start, unsigned long end);
-
-static unsigned long pfmf(unsigned long function, unsigned long address)
+static inline void storage_key_init_range(unsigned long start, unsigned long end)
{
- asm volatile(
- " .insn rre,0xb9af0000,%[function],%[address]"
- : [address] "+a" (address)
- : [function] "d" (function)
- : "memory");
- return address;
+#if PAGE_DEFAULT_KEY
+ __storage_key_init_range(start, end);
+#endif
}
static inline void clear_page(void *page)
{
- if (MACHINE_HAS_PFMF) {
- pfmf(0x10000, (unsigned long)page);
- } else {
- register unsigned long reg1 asm ("1") = 0;
- register void *reg2 asm ("2") = page;
- register unsigned long reg3 asm ("3") = 4096;
- asm volatile(
- " mvcl 2,0"
- : "+d" (reg2), "+d" (reg3) : "d" (reg1)
- : "memory", "cc");
- }
+ register unsigned long reg1 asm ("1") = 0;
+ register void *reg2 asm ("2") = page;
+ register unsigned long reg3 asm ("3") = 4096;
+ asm volatile(
+ " mvcl 2,0"
+ : "+d" (reg2), "+d" (reg3) : "d" (reg1)
+ : "memory", "cc");
}
+/*
+ * copy_page uses the mvcl instruction with 0xb0 padding byte in order to
+ * bypass caches when copying a page. Especially when copying huge pages
+ * this keeps L1 and L2 data caches alive.
+ */
static inline void copy_page(void *to, void *from)
{
- if (MACHINE_HAS_MVPG) {
- register unsigned long reg0 asm ("0") = 0;
- asm volatile(
- " mvpg %0,%1"
- : : "a" (to), "a" (from), "d" (reg0)
- : "memory", "cc");
- } else
- asm volatile(
- " mvc 0(256,%0),0(%1)\n"
- " mvc 256(256,%0),256(%1)\n"
- " mvc 512(256,%0),512(%1)\n"
- " mvc 768(256,%0),768(%1)\n"
- " mvc 1024(256,%0),1024(%1)\n"
- " mvc 1280(256,%0),1280(%1)\n"
- " mvc 1536(256,%0),1536(%1)\n"
- " mvc 1792(256,%0),1792(%1)\n"
- " mvc 2048(256,%0),2048(%1)\n"
- " mvc 2304(256,%0),2304(%1)\n"
- " mvc 2560(256,%0),2560(%1)\n"
- " mvc 2816(256,%0),2816(%1)\n"
- " mvc 3072(256,%0),3072(%1)\n"
- " mvc 3328(256,%0),3328(%1)\n"
- " mvc 3584(256,%0),3584(%1)\n"
- " mvc 3840(256,%0),3840(%1)\n"
- : : "a" (to), "a" (from) : "memory");
+ register void *reg2 asm ("2") = to;
+ register unsigned long reg3 asm ("3") = 0x1000;
+ register void *reg4 asm ("4") = from;
+ register unsigned long reg5 asm ("5") = 0xb0001000;
+ asm volatile(
+ " mvcl 2,4"
+ : "+d" (reg2), "+d" (reg3), "+d" (reg4), "+d" (reg5)
+ : : "memory", "cc");
}
#define clear_user_page(page, vaddr, pg) clear_page(page)
@@ -154,37 +133,6 @@ static inline int page_reset_referenced(unsigned long addr)
#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */
#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
-/*
- * Test and clear dirty bit in storage key.
- * We can't clear the changed bit atomically. This is a potential
- * race against modification of the referenced bit. This function
- * should therefore only be called if it is not mapped in any
- * address space.
- *
- * Note that the bit gets set whenever page content is changed. That means
- * also when the page is modified by DMA or from inside the kernel.
- */
-#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
-static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped)
-{
- unsigned char skey;
-
- skey = page_get_storage_key(pfn << PAGE_SHIFT);
- if (!(skey & _PAGE_CHANGED))
- return 0;
- page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped);
- return 1;
-}
-
-/*
- * Test and clear referenced bit in storage key.
- */
-#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
-static inline int page_test_and_clear_young(unsigned long pfn)
-{
- return page_reset_referenced(pfn << PAGE_SHIFT);
-}
-
struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index b1fa93c606a..c030900320e 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -6,6 +6,7 @@
/* must be set before including pci_clp.h */
#define PCI_BAR_COUNT 6
+#include <linux/pci.h>
#include <asm-generic/pci.h>
#include <asm-generic/pci-dma-compat.h>
#include <asm/pci_clp.h>
@@ -21,10 +22,6 @@ void pci_iounmap(struct pci_dev *, void __iomem *);
int pci_domain_nr(struct pci_bus *);
int pci_proc_domain(struct pci_bus *);
-/* MSI arch hooks */
-#define arch_setup_msi_irqs arch_setup_msi_irqs
-#define arch_teardown_msi_irqs arch_teardown_msi_irqs
-
#define ZPCI_BUS_NR 0 /* default bus number */
#define ZPCI_DEVFN 0 /* default device number */
@@ -53,14 +50,9 @@ struct zpci_fmb {
atomic64_t unmapped_pages;
} __packed __aligned(16);
-struct msi_map {
- unsigned long irq;
- struct msi_desc *msi;
- struct hlist_node msi_chain;
-};
-
-#define ZPCI_NR_MSI_VECS 64
-#define ZPCI_MSI_MASK (ZPCI_NR_MSI_VECS - 1)
+#define ZPCI_MSI_VEC_BITS 11
+#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS)
+#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1)
enum zpci_state {
ZPCI_FN_STATE_RESERVED,
@@ -71,9 +63,10 @@ enum zpci_state {
};
struct zpci_bar_struct {
+ struct resource *res; /* bus resource */
u32 val; /* bar start & 3 flag bits */
- u8 size; /* order 2 exponent */
u16 map_idx; /* index into bar mapping array */
+ u8 size; /* order 2 exponent */
};
/* Private data per function */
@@ -85,14 +78,19 @@ struct zpci_dev {
enum zpci_state state;
u32 fid; /* function ID, used by sclp */
u32 fh; /* function handle, used by insn's */
+ u16 vfn; /* virtual function number */
u16 pchid; /* physical channel ID */
u8 pfgid; /* function group ID */
+ u8 pft; /* pci function type */
u16 domain;
+ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
+ u32 uid; /* user defined id */
+ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
+
/* IRQ stuff */
u64 msi_addr; /* MSI address */
- struct zdev_irq_map *irq_map;
- struct msi_map *msi_map[ZPCI_NR_MSI_VECS];
+ struct airq_iv *aibv; /* adapter interrupt bit vector */
unsigned int aisb; /* number of the summary bit */
/* DMA stuff */
@@ -106,6 +104,7 @@ struct zpci_dev {
unsigned long iommu_pages;
unsigned int next_bit;
+ char res_name[16];
struct zpci_bar_struct bars[PCI_BAR_COUNT];
u64 start_dma; /* Start of available DMA addresses */
@@ -120,12 +119,6 @@ struct zpci_dev {
struct dentry *debugfs_dev;
struct dentry *debugfs_perf;
- struct dentry *debugfs_debug;
-};
-
-struct pci_hp_callback_ops {
- int (*create_slot) (struct zpci_dev *zdev);
- void (*remove_slot) (struct zpci_dev *zdev);
};
static inline bool zdev_enabled(struct zpci_dev *zdev)
@@ -133,56 +126,58 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
return (zdev->fh & (1UL << 31)) ? true : false;
}
+extern const struct attribute_group *zpci_attr_groups[];
+
/* -----------------------------------------------------------------------------
Prototypes
----------------------------------------------------------------------------- */
/* Base stuff */
-struct zpci_dev *zpci_alloc_device(void);
int zpci_create_device(struct zpci_dev *);
int zpci_enable_device(struct zpci_dev *);
+int zpci_disable_device(struct zpci_dev *);
void zpci_stop_device(struct zpci_dev *);
-void zpci_free_device(struct zpci_dev *);
-int zpci_scan_device(struct zpci_dev *);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
/* CLP */
-int clp_find_pci_devices(void);
+int clp_scan_pci_devices(void);
+int clp_rescan_pci_devices(void);
+int clp_rescan_pci_devices_simple(void);
int clp_add_pci_device(u32, u32, int);
int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
-/* MSI */
-struct msi_desc *__irq_get_msi_desc(unsigned int);
-int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32);
-int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int);
-void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *);
-int zpci_msihash_init(void);
-void zpci_msihash_exit(void);
-
+#ifdef CONFIG_PCI
/* Error handling and recovery */
void zpci_event_error(void *);
void zpci_event_availability(void *);
+void zpci_rescan(void);
+bool zpci_is_enabled(void);
+#else /* CONFIG_PCI */
+static inline void zpci_event_error(void *e) {}
+static inline void zpci_event_availability(void *e) {}
+static inline void zpci_rescan(void) {}
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_HOTPLUG_PCI_S390
+int zpci_init_slot(struct zpci_dev *);
+void zpci_exit_slot(struct zpci_dev *);
+#else /* CONFIG_HOTPLUG_PCI_S390 */
+static inline int zpci_init_slot(struct zpci_dev *zdev)
+{
+ return 0;
+}
+static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
+#endif /* CONFIG_HOTPLUG_PCI_S390 */
/* Helpers */
struct zpci_dev *get_zdev(struct pci_dev *);
struct zpci_dev *get_zdev_by_fid(u32);
-bool zpci_fid_present(u32);
-
-/* sysfs */
-int zpci_sysfs_add_device(struct device *);
-void zpci_sysfs_remove_device(struct device *);
/* DMA */
int zpci_dma_init(void);
void zpci_dma_exit(void);
-/* Hotplug */
-extern struct mutex zpci_list_lock;
-extern struct list_head zpci_list;
-extern struct pci_hp_callback_ops hotplug_ops;
-extern unsigned int pci_probe;
-
/* FMB */
int zpci_fmb_enable_device(struct zpci_dev *);
int zpci_fmb_disable_device(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index d31d739f868..dd78f92f1cc 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -44,6 +44,7 @@ struct clp_fh_list_entry {
#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */
#define CLP_UTIL_STR_LEN 64
+#define CLP_PFIP_NR_SEGMENTS 4
/* List PCI functions request */
struct clp_req_list_pci {
@@ -85,7 +86,7 @@ struct clp_rsp_query_pci {
struct clp_rsp_hdr hdr;
u32 fmt : 4; /* cmd request block format */
u32 : 28;
- u64 reserved1;
+ u64 : 64;
u16 vfn; /* virtual fn number */
u16 : 7;
u16 util_str_avail : 1; /* utility string available? */
@@ -94,10 +95,13 @@ struct clp_rsp_query_pci {
u8 bar_size[PCI_BAR_COUNT];
u16 pchid;
u32 bar[PCI_BAR_COUNT];
- u64 reserved2;
+ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
+ u32 : 24;
+ u8 pft; /* pci function type */
u64 sdma; /* start dma as */
u64 edma; /* end dma as */
- u64 reserved3[6];
+ u32 reserved[11];
+ u32 uid; /* user defined id */
u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
} __packed;
diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h
index 6bbec4265b6..ac24b26fc06 100644
--- a/arch/s390/include/asm/pci_debug.h
+++ b/arch/s390/include/asm/pci_debug.h
@@ -6,16 +6,8 @@
extern debug_info_t *pci_debug_msg_id;
extern debug_info_t *pci_debug_err_id;
-#ifdef CONFIG_PCI_DEBUG
-#define zpci_dbg(fmt, args...) \
- do { \
- if (pci_debug_msg_id->level >= 2) \
- debug_sprintf_event(pci_debug_msg_id, 2, fmt , ## args);\
- } while (0)
-
-#else /* !CONFIG_PCI_DEBUG */
-#define zpci_dbg(fmt, args...) do { } while (0)
-#endif
+#define zpci_dbg(imp, fmt, args...) \
+ debug_sprintf_event(pci_debug_msg_id, imp, fmt, ##args)
#define zpci_err(text...) \
do { \
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 1486a98d5da..649eb62c52b 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -1,10 +1,6 @@
#ifndef _ASM_S390_PCI_INSN_H
#define _ASM_S390_PCI_INSN_H
-#include <linux/delay.h>
-
-#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
-
/* Load/Store status codes */
#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4
#define ZPCI_PCI_ST_FUNC_IN_ERR 8
@@ -58,11 +54,9 @@
struct zpci_fib {
u32 fmt : 8; /* format */
u32 : 24;
- u32 reserved1;
+ u32 : 32;
u8 fc; /* function controls */
- u8 reserved2;
- u16 reserved3;
- u32 reserved4;
+ u64 : 56;
u64 pba; /* PCI base address */
u64 pal; /* PCI address limit */
u64 iota; /* I/O Translation Anchor */
@@ -74,207 +68,19 @@ struct zpci_fib {
u32 sum : 1; /* Adapter int summary bit enabled */
u32 : 1;
u32 aisbo : 6; /* Adapter int summary bit offset */
- u32 reserved5;
+ u32 : 32;
u64 aibv; /* Adapter int bit vector address */
u64 aisb; /* Adapter int summary bit address */
u64 fmb_addr; /* Function measurement block address and key */
- u64 reserved6;
- u64 reserved7;
-} __packed;
-
-/* Modify PCI Function Controls */
-static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
-{
- u8 cc;
-
- asm volatile (
- " .insn rxy,0xe300000000d0,%[req],%[fib]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib)
- : : "cc");
- *status = req >> 24 & 0xff;
- return cc;
-}
-
-static inline int mpcifc_instr(u64 req, struct zpci_fib *fib)
-{
- u8 cc, status;
-
- do {
- cc = __mpcifc(req, fib, &status);
- if (cc == 2)
- msleep(ZPCI_INSN_BUSY_DELAY);
- } while (cc == 2);
-
- if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d\n",
- __func__, cc, status);
- return (cc) ? -EIO : 0;
-}
-
-/* Refresh PCI Translations */
-static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
-{
- register u64 __addr asm("2") = addr;
- register u64 __range asm("3") = range;
- u8 cc;
-
- asm volatile (
- " .insn rre,0xb9d30000,%[fn],%[addr]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [fn] "+d" (fn)
- : [addr] "d" (__addr), "d" (__range)
- : "cc");
- *status = fn >> 24 & 0xff;
- return cc;
-}
-
-static inline int rpcit_instr(u64 fn, u64 addr, u64 range)
-{
- u8 cc, status;
-
- do {
- cc = __rpcit(fn, addr, range, &status);
- if (cc == 2)
- udelay(ZPCI_INSN_BUSY_DELAY);
- } while (cc == 2);
-
- if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n",
- __func__, cc, status, addr, range);
- return (cc) ? -EIO : 0;
-}
-
-/* Store PCI function controls */
-static inline u8 __stpcifc(u32 handle, u8 space, struct zpci_fib *fib, u8 *status)
-{
- u64 fn = (u64) handle << 32 | space << 16;
- u8 cc;
-
- asm volatile (
- " .insn rxy,0xe300000000d4,%[fn],%[fib]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [fn] "+d" (fn), [fib] "=m" (*fib)
- : : "cc");
- *status = fn >> 24 & 0xff;
- return cc;
-}
-
-/* Set Interruption Controls */
-static inline void sic_instr(u16 ctl, char *unused, u8 isc)
-{
- asm volatile (
- " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
- : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
-}
-
-/* PCI Load */
-static inline u8 __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
-{
- register u64 __req asm("2") = req;
- register u64 __offset asm("3") = offset;
- u64 __data;
- u8 cc;
-
- asm volatile (
- " .insn rre,0xb9d20000,%[data],%[req]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [data] "=d" (__data), [req] "+d" (__req)
- : "d" (__offset)
- : "cc");
- *status = __req >> 24 & 0xff;
- *data = __data;
- return cc;
-}
-
-static inline int pcilg_instr(u64 *data, u64 req, u64 offset)
-{
- u8 cc, status;
-
- do {
- cc = __pcilg(data, req, offset, &status);
- if (cc == 2)
- udelay(ZPCI_INSN_BUSY_DELAY);
- } while (cc == 2);
-
- if (cc) {
- printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
- __func__, cc, status, req, offset);
- /* TODO: on IO errors set data to 0xff...
- * here or in users of pcilg (le conversion)?
- */
- }
- return (cc) ? -EIO : 0;
-}
-
-/* PCI Store */
-static inline u8 __pcistg(u64 data, u64 req, u64 offset, u8 *status)
-{
- register u64 __req asm("2") = req;
- register u64 __offset asm("3") = offset;
- u8 cc;
-
- asm volatile (
- " .insn rre,0xb9d00000,%[data],%[req]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [req] "+d" (__req)
- : "d" (__offset), [data] "d" (data)
- : "cc");
- *status = __req >> 24 & 0xff;
- return cc;
-}
-
-static inline int pcistg_instr(u64 data, u64 req, u64 offset)
-{
- u8 cc, status;
-
- do {
- cc = __pcistg(data, req, offset, &status);
- if (cc == 2)
- udelay(ZPCI_INSN_BUSY_DELAY);
- } while (cc == 2);
-
- if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
- __func__, cc, status, req, offset);
- return (cc) ? -EIO : 0;
-}
-
-/* PCI Store Block */
-static inline u8 __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
-{
- u8 cc;
-
- asm volatile (
- " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [req] "+d" (req)
- : [offset] "d" (offset), [data] "Q" (*data)
- : "cc");
- *status = req >> 24 & 0xff;
- return cc;
-}
-
-static inline int pcistb_instr(const u64 *data, u64 req, u64 offset)
-{
- u8 cc, status;
-
- do {
- cc = __pcistb(data, req, offset, &status);
- if (cc == 2)
- udelay(ZPCI_INSN_BUSY_DELAY);
- } while (cc == 2);
-
- if (cc)
- printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
- __func__, cc, status, req, offset);
- return (cc) ? -EIO : 0;
-}
+ u32 : 32;
+ u32 gd;
+} __packed __aligned(8);
+
+int zpci_mod_fc(u64 req, struct zpci_fib *fib);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 5fd81f31d6c..d194d544d69 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
u64 data; \
int rc; \
\
- rc = pcilg_instr(&data, req, ZPCI_OFFSET(addr)); \
+ rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \
if (rc) \
data = -1ULL; \
return (RETTYPE) data; \
@@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val, \
u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
u64 data = (VALTYPE) val; \
\
- pcistg_instr(data, req, ZPCI_OFFSET(addr)); \
+ zpci_store(data, req, ZPCI_OFFSET(addr)); \
}
zpci_read(8, u64)
@@ -83,15 +83,18 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len
val = 0; /* let FW report error */
break;
}
- return pcistg_instr(val, req, offset);
+ return zpci_store(val, req, offset);
}
static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
{
u64 data;
- u8 cc;
+ int cc;
+
+ cc = zpci_load(&data, req, offset);
+ if (cc)
+ goto out;
- cc = pcilg_instr(&data, req, offset);
switch (len) {
case 1:
*((u8 *) dst) = (u8) data;
@@ -106,12 +109,13 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
*((u64 *) dst) = (u64) data;
break;
}
+out:
return cc;
}
static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
{
- return pcistb_instr(data, req, offset);
+ return zpci_store_block(data, req, offset);
}
static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 86fe0ee2cee..fa91e009745 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -10,16 +10,22 @@
*/
#define __my_cpu_offset S390_lowcore.percpu_offset
+#ifdef CONFIG_64BIT
+
/*
* For 64 bit module code, the module may be more than 4G above the
* per cpu area, use weak definitions to force the compiler to
* generate external references.
*/
-#if defined(CONFIG_SMP) && defined(CONFIG_64BIT) && defined(MODULE)
+#if defined(CONFIG_SMP) && defined(MODULE)
#define ARCH_NEEDS_WEAK_PER_CPU
#endif
-#define arch_this_cpu_to_op(pcp, val, op) \
+/*
+ * We use a compare-and-swap loop since that uses less cpu cycles than
+ * disabling and enabling interrupts like the generic variant would do.
+ */
+#define arch_this_cpu_to_op_simple(pcp, val, op) \
({ \
typedef typeof(pcp) pcp_op_T__; \
pcp_op_T__ old__, new__, prev__; \
@@ -30,42 +36,101 @@
do { \
old__ = prev__; \
new__ = old__ op (val); \
- switch (sizeof(*ptr__)) { \
- case 8: \
- prev__ = cmpxchg64(ptr__, old__, new__); \
- break; \
- default: \
- prev__ = cmpxchg(ptr__, old__, new__); \
- } \
+ prev__ = cmpxchg(ptr__, old__, new__); \
} while (prev__ != old__); \
preempt_enable(); \
new__; \
})
-#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+
+#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \
+{ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ val__ = (val); \
+ pcp_op_T__ old__, *ptr__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ if (__builtin_constant_p(val__) && \
+ ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \
+ asm volatile( \
+ op2 " %[ptr__],%[val__]\n" \
+ : [ptr__] "+Q" (*ptr__) \
+ : [val__] "i" ((szcast)val__) \
+ : "cc"); \
+ } else { \
+ asm volatile( \
+ op1 " %[old__],%[val__],%[ptr__]\n" \
+ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
+ : [val__] "d" (val__) \
+ : "cc"); \
+ } \
+ preempt_enable(); \
+}
-#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long)
-#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &)
-#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &)
-#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &)
-#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, &)
+#define arch_this_cpu_add_return(pcp, val, op) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ val__ = (val); \
+ pcp_op_T__ old__, *ptr__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ asm volatile( \
+ op " %[old__],%[val__],%[ptr__]\n" \
+ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
+ : [val__] "d" (val__) \
+ : "cc"); \
+ preempt_enable(); \
+ old__ + val__; \
+})
-#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op(pcp, val, |)
-#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op(pcp, val, |)
-#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, |)
-#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, |)
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa")
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag")
-#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
-#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
-#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
-#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
+#define arch_this_cpu_to_op(pcp, val, op) \
+{ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ val__ = (val); \
+ pcp_op_T__ old__, *ptr__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ asm volatile( \
+ op " %[old__],%[val__],%[ptr__]\n" \
+ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
+ : [val__] "d" (val__) \
+ : "cc"); \
+ preempt_enable(); \
+}
+
+#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan")
+#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, "lang")
+#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lao")
+#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laog")
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
#define arch_this_cpu_cmpxchg(pcp, oval, nval) \
({ \
@@ -74,13 +139,7 @@
pcp_op_T__ *ptr__; \
preempt_disable(); \
ptr__ = __this_cpu_ptr(&(pcp)); \
- switch (sizeof(*ptr__)) { \
- case 8: \
- ret__ = cmpxchg64(ptr__, oval, nval); \
- break; \
- default: \
- ret__ = cmpxchg(ptr__, oval, nval); \
- } \
+ ret__ = cmpxchg(ptr__, oval, nval); \
preempt_enable(); \
ret__; \
})
@@ -104,9 +163,7 @@
#define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval)
#define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval)
#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
-#ifdef CONFIG_64BIT
#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
-#endif
#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \
({ \
@@ -124,9 +181,9 @@
})
#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
-#ifdef CONFIG_64BIT
#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
-#endif
+
+#endif /* CONFIG_64BIT */
#include <asm-generic/percpu.h>
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 5f0173a3169..159a8ec6da9 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -1,16 +1,96 @@
/*
* Performance event support - s390 specific definitions.
*
- * Copyright IBM Corp. 2009, 2012
+ * Copyright IBM Corp. 2009, 2013
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
-#include <asm/cpu_mf.h>
+#ifndef _ASM_S390_PERF_EVENT_H
+#define _ASM_S390_PERF_EVENT_H
-/* CPU-measurement counter facility */
-#define PERF_CPUM_CF_MAX_CTR 256
+#ifdef CONFIG_64BIT
+
+#include <linux/perf_event.h>
+#include <linux/device.h>
+#include <asm/cpu_mf.h>
/* Per-CPU flags for PMU states */
#define PMU_F_RESERVED 0x1000
#define PMU_F_ENABLED 0x2000
+#define PMU_F_IN_USE 0x4000
+#define PMU_F_ERR_IBE 0x0100
+#define PMU_F_ERR_LSDA 0x0200
+#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
+
+/* Perf defintions for PMU event attributes in sysfs */
+extern __init const struct attribute_group **cpumf_cf_event_group(void);
+extern ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page);
+#define EVENT_VAR(_cat, _name) event_attr_##_cat##_##_name
+#define EVENT_PTR(_cat, _name) (&EVENT_VAR(_cat, _name).attr.attr)
+
+#define CPUMF_EVENT_ATTR(cat, name, id) \
+ PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show)
+#define CPUMF_EVENT_PTR(cat, name) EVENT_PTR(cat, name)
+
+
+/* Perf callbacks */
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+
+/* Perf pt_regs extension for sample-data-entry indicators */
+struct perf_sf_sde_regs {
+ unsigned char in_guest:1; /* guest sample */
+ unsigned long reserved:63; /* reserved */
+};
+
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR 256
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR 2
+#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \
+ PERF_CPUM_SF_DIAG_MODE)
+#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */
+
+#define REG_NONE 0
+#define REG_OVERFLOW 1
+#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
+#define RAWSAMPLE_REG(hwc) ((hwc)->config)
+#define TEAR_REG(hwc) ((hwc)->last_tag)
+#define SAMPL_RATE(hwc) ((hwc)->event_base)
+#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
+
+/* Structure for sampling data entries to be passed as perf raw sample data
+ * to user space. Note that raw sample data must be aligned and, thus, might
+ * be padded with zeros.
+ */
+struct sf_raw_sample {
+#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE
+#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE
+ u64 format;
+ u32 size; /* Size of sf_raw_sample */
+ u16 bsdes; /* Basic-sampling data entry size */
+ u16 dsdes; /* Diagnostic-sampling data entry size */
+ struct hws_basic_entry basic; /* Basic-sampling data entry */
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+ u8 padding[]; /* Padding to next multiple of 8 */
+} __packed;
+
+/* Perf hardware reserve and release functions */
+int perf_reserve_sampling(void);
+void perf_release_sampling(void);
+
+#endif /* CONFIG_64BIT */
+#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 590c3219c63..9e18a61d3df 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -22,6 +22,11 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *);
+void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
+ bool init_skey);
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned long key, bool nq);
+
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
{
typedef struct { char _[n]; } addrtype;
@@ -88,11 +93,22 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
{
unsigned long *table = crst_table_alloc(mm);
- if (table)
- crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
+
+ if (!table)
+ return NULL;
+ crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
+ if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+ crst_table_free(mm, table);
+ return NULL;
+ }
return (pmd_t *) table;
}
-#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd)
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+ pgtable_pmd_page_dtor(virt_to_page(pmd));
+ crst_table_free(mm, (unsigned long *) pmd);
+}
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index c1d7930a82f..fcba5e03839 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -29,6 +29,7 @@
#ifndef __ASSEMBLY__
#include <linux/sched.h>
#include <linux/mm_types.h>
+#include <linux/page-flags.h>
#include <asm/bug.h>
#include <asm/page.h>
@@ -56,6 +57,7 @@ extern unsigned long zero_page_mask;
(((unsigned long)(vaddr)) &zero_page_mask))))
#define __HAVE_COLOR_ZERO_PAGE
+/* TODO: s390 cannot support io_remap_pfn_range... */
#endif /* !__ASSEMBLY__ */
/*
@@ -215,61 +217,58 @@ extern unsigned long MODULES_END;
/* Hardware bits in the page table entry */
#define _PAGE_CO 0x100 /* HW Change-bit override */
-#define _PAGE_RO 0x200 /* HW read-only bit */
+#define _PAGE_PROTECT 0x200 /* HW read-only bit */
#define _PAGE_INVALID 0x400 /* HW invalid bit */
+#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */
/* Software bits in the page table entry */
-#define _PAGE_SWT 0x001 /* SW pte type bit t */
-#define _PAGE_SWX 0x002 /* SW pte type bit x */
-#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */
-#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */
-#define _PAGE_SPECIAL 0x010 /* SW associated with special page */
+#define _PAGE_PRESENT 0x001 /* SW pte present bit */
+#define _PAGE_TYPE 0x002 /* SW pte type bit */
+#define _PAGE_YOUNG 0x004 /* SW pte young bit */
+#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
+#define _PAGE_READ 0x010 /* SW pte read bit */
+#define _PAGE_WRITE 0x020 /* SW pte write bit */
+#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
+#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */
#define __HAVE_ARCH_PTE_SPECIAL
/* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR)
-
-/* Six different types of pages. */
-#define _PAGE_TYPE_EMPTY 0x400
-#define _PAGE_TYPE_NONE 0x401
-#define _PAGE_TYPE_SWAP 0x403
-#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */
-#define _PAGE_TYPE_RO 0x200
-#define _PAGE_TYPE_RW 0x000
+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
+ _PAGE_DIRTY | _PAGE_YOUNG)
/*
- * Only four types for huge pages, using the invalid bit and protection bit
- * of a segment table entry.
- */
-#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */
-#define _HPAGE_TYPE_NONE 0x220
-#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */
-#define _HPAGE_TYPE_RW 0x000
-
-/*
- * PTE type bits are rather complicated. handle_pte_fault uses pte_present,
- * pte_none and pte_file to find out the pte type WITHOUT holding the page
- * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to
- * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs
- * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards.
- * This change is done while holding the lock, but the intermediate step
- * of a previously valid pte with the hw invalid bit set can be observed by
- * handle_pte_fault. That makes it necessary that all valid pte types with
- * the hw invalid bit set must be distinguishable from the four pte types
- * empty, none, swap and file.
+ * handle_pte_fault uses pte_present, pte_none and pte_file to find out the
+ * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit
+ * is used to distinguish present from not-present ptes. It is changed only
+ * with the page table lock held.
+ *
+ * The following table gives the different possible bit combinations for
+ * the pte hardware and software bits in the last 12 bits of a pte:
*
- * irxt ipte irxt
- * _PAGE_TYPE_EMPTY 1000 -> 1000
- * _PAGE_TYPE_NONE 1001 -> 1001
- * _PAGE_TYPE_SWAP 1011 -> 1011
- * _PAGE_TYPE_FILE 11?1 -> 11?1
- * _PAGE_TYPE_RO 0100 -> 1100
- * _PAGE_TYPE_RW 0000 -> 1000
+ * 842100000000
+ * 000084210000
+ * 000000008421
+ * .IR...wrdytp
+ * empty .10...000000
+ * swap .10...xxxx10
+ * file .11...xxxxx0
+ * prot-none, clean, old .11...000001
+ * prot-none, clean, young .11...000101
+ * prot-none, dirty, old .10...001001
+ * prot-none, dirty, young .10...001101
+ * read-only, clean, old .11...010001
+ * read-only, clean, young .01...010101
+ * read-only, dirty, old .11...011001
+ * read-only, dirty, young .01...011101
+ * read-write, clean, old .11...110001
+ * read-write, clean, young .01...110101
+ * read-write, dirty, old .10...111001
+ * read-write, dirty, young .00...111101
*
- * pte_none is true for bits combinations 1000, 1010, 1100, 1110
- * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
- * pte_file is true for bits combinations 1101, 1111
- * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
+ * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
+ * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
+ * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600
+ * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
*/
#ifndef CONFIG_64BIT
@@ -282,27 +281,36 @@ extern unsigned long MODULES_END;
#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */
/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */
#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */
-#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */
-#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
+#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
+#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
-#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
+
+/*
+ * Segment table entry encoding (I = invalid, R = read-only bit):
+ * ..R...I.....
+ * prot-none ..1...1.....
+ * read-only ..1...0.....
+ * read-write ..0...0.....
+ * empty ..0...1.....
+ */
/* Page status table bits for virtualization */
-#define RCP_ACC_BITS 0xf0000000UL
-#define RCP_FP_BIT 0x08000000UL
-#define RCP_PCL_BIT 0x00800000UL
-#define RCP_HR_BIT 0x00400000UL
-#define RCP_HC_BIT 0x00200000UL
-#define RCP_GR_BIT 0x00040000UL
-#define RCP_GC_BIT 0x00020000UL
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT 0x00008000UL
-#define KVM_UC_BIT 0x00004000UL
+#define PGSTE_ACC_BITS 0xf0000000UL
+#define PGSTE_FP_BIT 0x08000000UL
+#define PGSTE_PCL_BIT 0x00800000UL
+#define PGSTE_HR_BIT 0x00400000UL
+#define PGSTE_HC_BIT 0x00200000UL
+#define PGSTE_GR_BIT 0x00040000UL
+#define PGSTE_GC_BIT 0x00020000UL
+#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */
#else /* CONFIG_64BIT */
@@ -321,7 +329,8 @@ extern unsigned long MODULES_END;
/* Bits in the region table entry */
#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */
-#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */
+#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */
+#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */
#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */
#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */
@@ -329,46 +338,71 @@ extern unsigned long MODULES_END;
#define _REGION_ENTRY_LENGTH 0x03 /* region third length */
#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
-#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV)
+#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
-#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV)
+#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
-#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV)
+#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */
+#define _REGION3_ENTRY_RO 0x200 /* page protection bit */
+#define _REGION3_ENTRY_CO 0x100 /* change-recording override */
/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
+#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
-#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */
-#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
+#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY (0)
-#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
+#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
+#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
+#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
+#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
+
+/*
+ * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ * ..R...I...y.
+ * prot-none, old ..0...1...1.
+ * prot-none, young ..1...1...1.
+ * read-only, old ..1...1...0.
+ * read-only, young ..1...0...1.
+ * read-write, old ..0...1...0.
+ * read-write, young ..0...0...1.
+ * The segment table origin is used to distinguish empty (origin==0) from
+ * read-write, old segment table entries (origin!=0)
+ */
+
#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
-#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT)
/* Set of bits not changed in pmd_modify */
#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
| _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
/* Page status table bits for virtualization */
-#define RCP_ACC_BITS 0xf000000000000000UL
-#define RCP_FP_BIT 0x0800000000000000UL
-#define RCP_PCL_BIT 0x0080000000000000UL
-#define RCP_HR_BIT 0x0040000000000000UL
-#define RCP_HC_BIT 0x0020000000000000UL
-#define RCP_GR_BIT 0x0004000000000000UL
-#define RCP_GC_BIT 0x0002000000000000UL
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT 0x0000800000000000UL
-#define KVM_UC_BIT 0x0000400000000000UL
+#define PGSTE_ACC_BITS 0xf000000000000000UL
+#define PGSTE_FP_BIT 0x0800000000000000UL
+#define PGSTE_PCL_BIT 0x0080000000000000UL
+#define PGSTE_HR_BIT 0x0040000000000000UL
+#define PGSTE_HC_BIT 0x0020000000000000UL
+#define PGSTE_GR_BIT 0x0004000000000000UL
+#define PGSTE_GC_BIT 0x0002000000000000UL
+#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
#endif /* CONFIG_64BIT */
+/* Guest Page State used for virtualization */
+#define _PGSTE_GPS_ZERO 0x0000000080000000UL
+#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
+#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
+#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
+
/*
* A user page table pointer has the space-switch-event bit, the
* private-space-control bit and the storage-alteration-event-control
@@ -380,12 +414,18 @@ extern unsigned long MODULES_END;
/*
* Page protection definitions.
*/
-#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE)
-#define PAGE_RO __pgprot(_PAGE_TYPE_RO)
-#define PAGE_RW __pgprot(_PAGE_TYPE_RW)
-
-#define PAGE_KERNEL PAGE_RW
-#define PAGE_COPY PAGE_RO
+#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID)
+#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+ _PAGE_PROTECT)
/*
* On s390 the page table entry has an invalid bit and a read-only bit.
@@ -394,28 +434,31 @@ extern unsigned long MODULES_END;
*/
/*xwr*/
#define __P000 PAGE_NONE
-#define __P001 PAGE_RO
-#define __P010 PAGE_RO
-#define __P011 PAGE_RO
-#define __P100 PAGE_RO
-#define __P101 PAGE_RO
-#define __P110 PAGE_RO
-#define __P111 PAGE_RO
+#define __P001 PAGE_READ
+#define __P010 PAGE_READ
+#define __P011 PAGE_READ
+#define __P100 PAGE_READ
+#define __P101 PAGE_READ
+#define __P110 PAGE_READ
+#define __P111 PAGE_READ
#define __S000 PAGE_NONE
-#define __S001 PAGE_RO
-#define __S010 PAGE_RW
-#define __S011 PAGE_RW
-#define __S100 PAGE_RO
-#define __S101 PAGE_RO
-#define __S110 PAGE_RW
-#define __S111 PAGE_RW
+#define __S001 PAGE_READ
+#define __S010 PAGE_WRITE
+#define __S011 PAGE_WRITE
+#define __S100 PAGE_READ
+#define __S101 PAGE_READ
+#define __S110 PAGE_WRITE
+#define __S111 PAGE_WRITE
-static inline int mm_exclusive(struct mm_struct *mm)
-{
- return likely(mm == current->active_mm &&
- atomic_read(&mm->context.attach_count) <= 1);
-}
+/*
+ * Segment entry (large page) protection definitions.
+ */
+#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
+ _SEGMENT_ENTRY_NONE)
+#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
+ _SEGMENT_ENTRY_PROTECT)
+#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
static inline int mm_has_pgste(struct mm_struct *mm)
{
@@ -425,6 +468,16 @@ static inline int mm_has_pgste(struct mm_struct *mm)
#endif
return 0;
}
+
+static inline int mm_use_skey(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (mm->context.use_skey)
+ return 1;
+#endif
+ return 0;
+}
+
/*
* pgd/pmd/pte query functions
*/
@@ -452,7 +505,7 @@ static inline int pgd_none(pgd_t pgd)
{
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
return 0;
- return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL;
+ return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
}
static inline int pgd_bad(pgd_t pgd)
@@ -463,7 +516,7 @@ static inline int pgd_bad(pgd_t pgd)
* invalid for either table entry.
*/
unsigned long mask =
- ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+ ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
return (pgd_val(pgd) & mask) != 0;
}
@@ -479,7 +532,7 @@ static inline int pud_none(pud_t pud)
{
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
return 0;
- return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL;
+ return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
}
static inline int pud_large(pud_t pud)
@@ -497,7 +550,7 @@ static inline int pud_bad(pud_t pud)
* invalid for either table entry.
*/
unsigned long mask =
- ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV &
+ ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
return (pud_val(pud) & mask) != 0;
}
@@ -506,30 +559,36 @@ static inline int pud_bad(pud_t pud)
static inline int pmd_present(pmd_t pmd)
{
- unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO;
- return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE ||
- !(pmd_val(pmd) & _SEGMENT_ENTRY_INV);
+ return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
}
static inline int pmd_none(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) &&
- !(pmd_val(pmd) & _SEGMENT_ENTRY_RO);
+ return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
}
static inline int pmd_large(pmd_t pmd)
{
#ifdef CONFIG_64BIT
- return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
#else
return 0;
#endif
}
+static inline int pmd_prot_none(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
+ (pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
+}
+
static inline int pmd_bad(pmd_t pmd)
{
- unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV;
- return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY;
+#ifdef CONFIG_64BIT
+ if (pmd_large(pmd))
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+#endif
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
@@ -548,31 +607,48 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
#define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0;
+ if (pmd_prot_none(pmd))
+ return 0;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
}
static inline int pmd_young(pmd_t pmd)
{
- return 0;
+ int young = 0;
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd))
+ young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
+ else
+ young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+#endif
+ return young;
+}
+
+static inline int pte_present(pte_t pte)
+{
+ /* Bit pattern: (pte & 0x001) == 0x001 */
+ return (pte_val(pte) & _PAGE_PRESENT) != 0;
}
static inline int pte_none(pte_t pte)
{
- return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
+ /* Bit pattern: pte == 0x400 */
+ return pte_val(pte) == _PAGE_INVALID;
}
-static inline int pte_present(pte_t pte)
+static inline int pte_swap(pte_t pte)
{
- unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX;
- return (pte_val(pte) & mask) == _PAGE_TYPE_NONE ||
- (!(pte_val(pte) & _PAGE_INVALID) &&
- !(pte_val(pte) & _PAGE_SWT));
+ /* Bit pattern: (pte & 0x603) == 0x402 */
+ return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT |
+ _PAGE_TYPE | _PAGE_PRESENT))
+ == (_PAGE_INVALID | _PAGE_TYPE);
}
static inline int pte_file(pte_t pte)
{
- unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT;
- return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;
+ /* Bit pattern: (pte & 0x601) == 0x600 */
+ return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT))
+ == (_PAGE_INVALID | _PAGE_PROTECT);
}
static inline int pte_special(pte_t pte)
@@ -596,12 +672,12 @@ static inline pgste_t pgste_get_lock(pte_t *ptep)
asm(
" lg %0,%2\n"
"0: lgr %1,%0\n"
- " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */
- " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */
+ " nihh %0,0xff7f\n" /* clear PCL bit in old */
+ " oihh %1,0x0080\n" /* set PCL bit in new */
" csg %0,%1,%2\n"
" jl 0b\n"
: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
- : "Q" (ptep[PTRS_PER_PTE]) : "cc");
+ : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
#endif
return __pgste(new);
}
@@ -610,132 +686,177 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
asm(
- " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */
+ " nihh %1,0xff7f\n" /* clear PCL bit */
" stg %1,%0\n"
: "=Q" (ptep[PTRS_PER_PTE])
- : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc");
+ : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+ : "cc", "memory");
preempt_enable();
#endif
}
-static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
+static inline pgste_t pgste_get(pte_t *ptep)
{
+ unsigned long pgste = 0;
#ifdef CONFIG_PGSTE
- unsigned long address, bits;
- unsigned char skey;
-
- if (!pte_present(*ptep))
- return pgste;
- address = pte_val(*ptep) & PAGE_MASK;
- skey = page_get_storage_key(address);
- bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
- /* Clear page changed & referenced bit in the storage key */
- if (bits & _PAGE_CHANGED)
- page_set_storage_key(address, skey ^ bits, 1);
- else if (bits)
- page_reset_referenced(address);
- /* Transfer page changed & referenced bit to guest bits in pgste */
- pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */
- /* Get host changed & referenced bits from pgste */
- bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
- /* Clear host bits in pgste. */
- pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
- pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
- /* Copy page access key and fetch protection bit to pgste */
- pgste_val(pgste) |=
- (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
- /* Transfer changed and referenced to kvm user bits */
- pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */
- /* Transfer changed & referenced to pte sofware bits */
- pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */
+ pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
#endif
- return pgste;
+ return __pgste(pgste);
+}
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
}
-static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
+static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
+ struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
- int young;
+ unsigned long address, bits, skey;
- if (!pte_present(*ptep))
+ if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
return pgste;
- young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
- /* Transfer page referenced bit to pte software bit (host view) */
- if (young || (pgste_val(pgste) & RCP_HR_BIT))
- pte_val(*ptep) |= _PAGE_SWR;
- /* Clear host referenced bit in pgste. */
- pgste_val(pgste) &= ~RCP_HR_BIT;
- /* Transfer page referenced bit to guest bit in pgste */
- pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */
+ address = pte_val(*ptep) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(address);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ /* Transfer page changed & referenced bit to guest bits in pgste */
+ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
+ /* Copy page access key and fetch protection bit to pgste */
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
#endif
return pgste;
}
-static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+ struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
unsigned long address;
- unsigned long okey, nkey;
+ unsigned long nkey;
- if (!pte_present(entry))
+ if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
return;
+ VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
address = pte_val(entry) & PAGE_MASK;
- okey = nkey = page_get_storage_key(address);
- nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
- /* Set page access key and fetch protection bit from pgste */
- nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
- if (okey != nkey)
- page_set_storage_key(address, nkey, 1);
+ /*
+ * Set page access key and fetch protection bit from pgste.
+ * The guest C/R information is still in the PGSTE, set real
+ * key C/R to 0.
+ */
+ nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+ page_set_storage_key(address, nkey, 0);
#endif
}
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
+{
+ if ((pte_val(entry) & _PAGE_PRESENT) &&
+ (pte_val(entry) & _PAGE_WRITE) &&
+ !(pte_val(entry) & _PAGE_INVALID)) {
+ if (!MACHINE_HAS_ESOP) {
+ /*
+ * Without enhanced suppression-on-protection force
+ * the dirty bit on for all writable ptes.
+ */
+ pte_val(entry) |= _PAGE_DIRTY;
+ pte_val(entry) &= ~_PAGE_PROTECT;
+ }
+ if (!(pte_val(entry) & _PAGE_PROTECT))
+ /* This pte allows write access, set user-dirty */
+ pgste_val(pgste) |= PGSTE_UC_BIT;
+ }
+ *ptep = entry;
+ return pgste;
+}
+
/**
* struct gmap_struct - guest address space
* @mm: pointer to the parent mm_struct
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @crst_list: list of all crst tables used in the guest address space
+ * @pfault_enabled: defines if pfaults are applicable for the guest
*/
struct gmap {
struct list_head list;
struct mm_struct *mm;
unsigned long *table;
unsigned long asce;
+ void *private;
struct list_head crst_list;
+ bool pfault_enabled;
};
/**
* struct gmap_rmap - reverse mapping for segment table entries
- * @next: pointer to the next gmap_rmap structure in the list
+ * @gmap: pointer to the gmap_struct
* @entry: pointer to a segment table entry
+ * @vmaddr: virtual address in the guest address space
*/
struct gmap_rmap {
struct list_head list;
+ struct gmap *gmap;
unsigned long *entry;
+ unsigned long vmaddr;
};
/**
* struct gmap_pgtable - gmap information attached to a page table
* @vmaddr: address of the 1MB segment in the process virtual memory
- * @mapper: list of segment table entries maping a page table
+ * @mapper: list of segment table entries mapping a page table
*/
struct gmap_pgtable {
unsigned long vmaddr;
struct list_head mapper;
};
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+ struct list_head list;
+ void (*notifier_call)(struct gmap *gmap, unsigned long address);
+};
+
struct gmap *gmap_alloc(struct mm_struct *mm);
void gmap_free(struct gmap *gmap);
void gmap_enable(struct gmap *gmap);
void gmap_disable(struct gmap *gmap);
int gmap_map_segment(struct gmap *gmap, unsigned long from,
- unsigned long to, unsigned long length);
+ unsigned long to, unsigned long len);
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_translate(unsigned long address, struct gmap *);
+unsigned long gmap_translate(unsigned long address, struct gmap *);
unsigned long __gmap_fault(unsigned long address, struct gmap *);
unsigned long gmap_fault(unsigned long address, struct gmap *);
void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
+void __gmap_zap(unsigned long address, struct gmap *);
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
+
+
+void gmap_register_ipte_notifier(struct gmap_notifier *);
+void gmap_unregister_ipte_notifier(struct gmap_notifier *);
+int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
+void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
+
+static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+ pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ if (pgste_val(pgste) & PGSTE_IN_BIT) {
+ pgste_val(pgste) &= ~PGSTE_IN_BIT;
+ gmap_do_ipte_notify(mm, ptep);
+ }
+#endif
+ return pgste;
+}
/*
* Certain architectures need to do special things when PTEs
@@ -749,11 +870,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste_set_pte(ptep, pgste, entry);
- *ptep = entry;
+ pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+ pgste_set_key(ptep, pgste, entry, mm);
+ pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
- } else
+ } else {
+ if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
+ pte_val(entry) |= _PAGE_CO;
*ptep = entry;
+ }
}
/*
@@ -762,25 +887,23 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
*/
static inline int pte_write(pte_t pte)
{
- return (pte_val(pte) & _PAGE_RO) == 0;
+ return (pte_val(pte) & _PAGE_WRITE) != 0;
}
static inline int pte_dirty(pte_t pte)
{
-#ifdef CONFIG_PGSTE
- if (pte_val(pte) & _PAGE_SWC)
- return 1;
-#endif
- return 0;
+ return (pte_val(pte) & _PAGE_DIRTY) != 0;
}
static inline int pte_young(pte_t pte)
{
-#ifdef CONFIG_PGSTE
- if (pte_val(pte) & _PAGE_SWR)
- return 1;
-#endif
- return 0;
+ return (pte_val(pte) & _PAGE_YOUNG) != 0;
+}
+
+#define __HAVE_ARCH_PTE_UNUSED
+static inline int pte_unused(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_UNUSED;
}
/*
@@ -805,12 +928,12 @@ static inline void pud_clear(pud_t *pud)
static inline void pmd_clear(pmd_t *pmdp)
{
- pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
}
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
}
/*
@@ -821,46 +944,63 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pte_val(pte) &= _PAGE_CHG_MASK;
pte_val(pte) |= pgprot_val(newprot);
+ /*
+ * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
+ * invalid bit set, clear it again for readable, young pages
+ */
+ if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
+ pte_val(pte) &= ~_PAGE_INVALID;
+ /*
+ * newprot for PAGE_READ and PAGE_WRITE has the page protection
+ * bit set, clear it again for writable, dirty pages
+ */
+ if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
}
static inline pte_t pte_wrprotect(pte_t pte)
{
- /* Do not clobber _PAGE_TYPE_NONE pages! */
- if (!(pte_val(pte) & _PAGE_INVALID))
- pte_val(pte) |= _PAGE_RO;
+ pte_val(pte) &= ~_PAGE_WRITE;
+ pte_val(pte) |= _PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkwrite(pte_t pte)
{
- pte_val(pte) &= ~_PAGE_RO;
+ pte_val(pte) |= _PAGE_WRITE;
+ if (pte_val(pte) & _PAGE_DIRTY)
+ pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkclean(pte_t pte)
{
-#ifdef CONFIG_PGSTE
- pte_val(pte) &= ~_PAGE_SWC;
-#endif
+ pte_val(pte) &= ~_PAGE_DIRTY;
+ pte_val(pte) |= _PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkdirty(pte_t pte)
{
+ pte_val(pte) |= _PAGE_DIRTY;
+ if (pte_val(pte) & _PAGE_WRITE)
+ pte_val(pte) &= ~_PAGE_PROTECT;
return pte;
}
static inline pte_t pte_mkold(pte_t pte)
{
-#ifdef CONFIG_PGSTE
- pte_val(pte) &= ~_PAGE_SWR;
-#endif
+ pte_val(pte) &= ~_PAGE_YOUNG;
+ pte_val(pte) |= _PAGE_INVALID;
return pte;
}
static inline pte_t pte_mkyoung(pte_t pte)
{
+ pte_val(pte) |= _PAGE_YOUNG;
+ if (pte_val(pte) & _PAGE_READ)
+ pte_val(pte) &= ~_PAGE_INVALID;
return pte;
}
@@ -873,68 +1013,101 @@ static inline pte_t pte_mkspecial(pte_t pte)
#ifdef CONFIG_HUGETLB_PAGE
static inline pte_t pte_mkhuge(pte_t pte)
{
- /*
- * PROT_NONE needs to be remapped from the pte type to the ste type.
- * The HW invalid bit is also different for pte and ste. The pte
- * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE
- * bit, so we don't have to clear it.
- */
- if (pte_val(pte) & _PAGE_INVALID) {
- if (pte_val(pte) & _PAGE_SWT)
- pte_val(pte) |= _HPAGE_TYPE_NONE;
- pte_val(pte) |= _SEGMENT_ENTRY_INV;
- }
- /*
- * Clear SW pte bits SWT and SWX, there are no SW bits in a segment
- * table entry.
- */
- pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX);
- /*
- * Also set the change-override bit because we don't need dirty bit
- * tracking for hugetlbfs pages.
- */
- pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO);
+ pte_val(pte) |= _PAGE_LARGE;
return pte;
}
#endif
-/*
- * Get (and clear) the user dirty bit for a pte.
- */
-static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
- pte_t *ptep)
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
{
- pgste_t pgste;
- int dirty = 0;
+ unsigned long pto = (unsigned long) ptep;
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_update_all(ptep, pgste);
- dirty = !!(pgste_val(pgste) & KVM_UC_BIT);
- pgste_val(pgste) &= ~KVM_UC_BIT;
- pgste_set_unlock(ptep, pgste);
- return dirty;
- }
- return dirty;
+#ifndef CONFIG_64BIT
+ /* pto in ESA mode must point to the start of the segment table */
+ pto &= 0x7ffffc00;
+#endif
+ /* Invalidation + global TLB flush for the pte */
+ asm volatile(
+ " ipte %2,%3"
+ : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+}
+
+static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
+{
+ unsigned long pto = (unsigned long) ptep;
+
+#ifndef CONFIG_64BIT
+ /* pto in ESA mode must point to the start of the segment table */
+ pto &= 0x7ffffc00;
+#endif
+ /* Invalidation + local TLB flush for the pte */
+ asm volatile(
+ " .insn rrf,0xb2210000,%2,%3,0,1"
+ : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+}
+
+static inline void ptep_flush_direct(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ int active, count;
+
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ return;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_ipte_local(address, ptep);
+ else
+ __ptep_ipte(address, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
+}
+
+static inline void ptep_flush_lazy(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ int active, count;
+
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ return;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pte_val(*ptep) |= _PAGE_INVALID;
+ mm->context.flush_mm = 1;
+ } else
+ __ptep_ipte(address, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
}
/*
- * Get (and clear) the user referenced bit for a pte.
+ * Get (and clear) the user dirty bit for a pte.
*/
-static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
+static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
+ unsigned long addr,
pte_t *ptep)
{
pgste_t pgste;
- int young = 0;
+ pte_t pte;
+ int dirty;
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_update_young(ptep, pgste);
- young = !!(pgste_val(pgste) & KVM_UR_BIT);
- pgste_val(pgste) &= ~KVM_UR_BIT;
- pgste_set_unlock(ptep, pgste);
+ if (!mm_has_pgste(mm))
+ return 0;
+ pgste = pgste_get_lock(ptep);
+ dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+ pgste_val(pgste) &= ~PGSTE_UC_BIT;
+ pte = *ptep;
+ if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
+ __ptep_ipte(addr, ptep);
+ if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_INVALID;
+ *ptep = pte;
}
- return young;
+ pgste_set_unlock(ptep, pgste);
+ return dirty;
}
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
@@ -943,46 +1116,34 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
{
pgste_t pgste;
pte_t pte;
+ int young;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_update_young(ptep, pgste);
- pte = *ptep;
- *ptep = pte_mkold(pte);
- pgste_set_unlock(ptep, pgste);
- return pte_young(pte);
+ pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
}
- return 0;
+
+ pte = *ptep;
+ ptep_flush_direct(vma->vm_mm, addr, ptep);
+ young = pte_young(pte);
+ pte = pte_mkold(pte);
+
+ if (mm_has_pgste(vma->vm_mm)) {
+ pgste = pgste_set_pte(ptep, pgste, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = pte;
+
+ return young;
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- /* No need to flush TLB
- * On s390 reference bits are in storage key and never in TLB
- * With virtualization we handle the reference bit, without we
- * we can simply return */
return ptep_test_and_clear_young(vma, address, ptep);
}
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
-{
- if (!(pte_val(*ptep) & _PAGE_INVALID)) {
-#ifndef CONFIG_64BIT
- /* pto must point to the start of the segment table */
- pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
-#else
- /* ipte in zarch mode can do the math */
- pte_t *pto = ptep;
-#endif
- asm volatile(
- " ipte %2,%3"
- : "=m" (*ptep) : "m" (*ptep),
- "a" (pto), "a" (address));
- }
-}
-
/*
* This is hard to understand. ptep_get_and_clear and ptep_clear_flush
* both clear the TLB for the unmapped pte. The reason is that
@@ -1003,17 +1164,17 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
pgste_t pgste;
pte_t pte;
- mm->context.flush_mm = 1;
- if (mm_has_pgste(mm))
+ if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
+ }
pte = *ptep;
- if (!mm_exclusive(mm))
- __ptep_ipte(address, ptep);
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
if (mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste);
+ pgste = pgste_update_all(&pte, pgste, mm);
pgste_set_unlock(ptep, pgste);
}
return pte;
@@ -1024,15 +1185,21 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
unsigned long address,
pte_t *ptep)
{
+ pgste_t pgste;
pte_t pte;
- mm->context.flush_mm = 1;
- if (mm_has_pgste(mm))
- pgste_get_lock(ptep);
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste_ipte_notify(mm, ptep, pgste);
+ }
pte = *ptep;
- if (!mm_exclusive(mm))
- __ptep_ipte(address, ptep);
+ ptep_flush_lazy(mm, address, ptep);
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_update_all(&pte, pgste, mm);
+ pgste_set(ptep, pgste);
+ }
return pte;
}
@@ -1040,9 +1207,15 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
unsigned long address,
pte_t *ptep, pte_t pte)
{
- *ptep = pte;
- if (mm_has_pgste(mm))
- pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE));
+ pgste_t pgste;
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get(ptep);
+ pgste_set_key(ptep, pgste, pte, mm);
+ pgste = pgste_set_pte(ptep, pgste, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = pte;
}
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
@@ -1052,15 +1225,20 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
pgste_t pgste;
pte_t pte;
- if (mm_has_pgste(vma->vm_mm))
+ if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ }
pte = *ptep;
- __ptep_ipte(address, ptep);
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ ptep_flush_direct(vma->vm_mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_update_all(&pte, pgste);
+ if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+ _PGSTE_GPS_USAGE_UNUSED)
+ pte_val(pte) |= _PAGE_UNUSED;
+ pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
pgste_set_unlock(ptep, pgste);
}
return pte;
@@ -1081,16 +1259,18 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
pgste_t pgste;
pte_t pte;
- if (mm_has_pgste(mm))
+ if (!full && mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
+ }
pte = *ptep;
if (!full)
- __ptep_ipte(address, ptep);
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
- if (mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste);
+ if (!full && mm_has_pgste(mm)) {
+ pgste = pgste_update_all(&pte, pgste, mm);
pgste_set_unlock(ptep, pgste);
}
return pte;
@@ -1104,16 +1284,19 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
pte_t pte = *ptep;
if (pte_write(pte)) {
- mm->context.flush_mm = 1;
- if (mm_has_pgste(mm))
+ if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
+ }
- if (!mm_exclusive(mm))
- __ptep_ipte(address, ptep);
- *ptep = pte_wrprotect(pte);
+ ptep_flush_lazy(mm, address, ptep);
+ pte = pte_wrprotect(pte);
- if (mm_has_pgste(mm))
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = pte;
}
return pte;
}
@@ -1127,14 +1310,18 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
if (pte_same(*ptep, entry))
return 0;
- if (mm_has_pgste(vma->vm_mm))
+ if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ }
- __ptep_ipte(address, ptep);
- *ptep = entry;
+ ptep_flush_direct(vma->vm_mm, address, ptep);
- if (mm_has_pgste(vma->vm_mm))
+ if (mm_has_pgste(vma->vm_mm)) {
+ pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = entry;
return 1;
}
@@ -1146,14 +1333,17 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{
pte_t __pte;
pte_val(__pte) = physpage + pgprot_val(pgprot);
- return __pte;
+ return pte_mkyoung(__pte);
}
static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
{
unsigned long physpage = page_to_phys(page);
+ pte_t __pte = mk_pte_phys(physpage, pgprot);
- return mk_pte_phys(physpage, pgprot);
+ if (pte_write(__pte) && PageDirty(page))
+ __pte = pte_mkdirty(__pte);
+ return __pte;
}
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -1209,100 +1399,190 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
#define pte_unmap(pte) do { } while (0)
-static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
{
- unsigned long sto = (unsigned long) pmdp -
- pmd_index(address) * sizeof(pmd_t);
+ /*
+ * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
+ * Convert to segment table entry format.
+ */
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
+ return pgprot_val(SEGMENT_NONE);
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+ return pgprot_val(SEGMENT_READ);
+ return pgprot_val(SEGMENT_WRITE);
+}
- if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
- asm volatile(
- " .insn rrf,0xb98e0000,%2,%3,0,0"
- : "=m" (*pmdp)
- : "m" (*pmdp), "a" (sto),
- "a" ((address & HPAGE_MASK))
- : "cc"
- );
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ } else {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
}
+#endif
+ return pmd;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ } else {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ }
+#endif
+ return pmd;
+}
-#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE)
-#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO)
-#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW)
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ int young;
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+ young = pmd_young(pmd);
+ pmd_val(pmd) &= _SEGMENT_CHG_MASK;
+ pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (young)
+ pmd = pmd_mkyoung(pmd);
+ return pmd;
+}
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+{
+ pmd_t __pmd;
+ pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
+ return pmd_mkyoung(__pmd);
+}
-static inline int pmd_trans_splitting(pmd_t pmd)
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
- return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+ /* Do not clobber PROT_NONE segments! */
+ if (!pmd_prot_none(pmd))
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ return pmd;
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t entry)
+static inline void __pmdp_csp(pmd_t *pmdp)
{
- *pmdp = entry;
+ register unsigned long reg2 asm("2") = pmd_val(*pmdp);
+ register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
+ _SEGMENT_ENTRY_INVALID;
+ register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
+
+ asm volatile(
+ " csp %1,%3"
+ : "=m" (*pmdp)
+ : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
}
-static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
{
- /*
- * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx)
- * Convert to segment table entry format.
- */
- if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
- return pgprot_val(SEGMENT_NONE);
- if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
- return pgprot_val(SEGMENT_RO);
- return pgprot_val(SEGMENT_RW);
+ unsigned long sto;
+
+ sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+ asm volatile(
+ " .insn rrf,0xb98e0000,%2,%3,0,0"
+ : "=m" (*pmdp)
+ : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+ : "cc" );
}
-static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
{
- pmd_val(pmd) &= _SEGMENT_CHG_MASK;
- pmd_val(pmd) |= massage_pgprot_pmd(newprot);
- return pmd;
+ unsigned long sto;
+
+ sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+ asm volatile(
+ " .insn rrf,0xb98e0000,%2,%3,0,1"
+ : "=m" (*pmdp)
+ : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+ : "cc" );
}
-static inline pmd_t pmd_mkhuge(pmd_t pmd)
+static inline void pmdp_flush_direct(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
- return pmd;
+ int active, count;
+
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return;
+ if (!MACHINE_HAS_IDTE) {
+ __pmdp_csp(pmdp);
+ return;
+ }
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __pmdp_idte_local(address, pmdp);
+ else
+ __pmdp_idte(address, pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline void pmdp_flush_lazy(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
{
- /* Do not clobber _HPAGE_TYPE_NONE pages! */
- if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV))
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
- return pmd;
+ int active, count;
+
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
+ mm->context.flush_mm = 1;
+ } else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(address, pmdp);
+ else
+ __pmdp_csp(pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
}
-static inline pmd_t pmd_wrprotect(pmd_t pmd)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+static inline int pmd_trans_splitting(pmd_t pmd)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
- return pmd;
+ return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
}
-static inline pmd_t pmd_mkdirty(pmd_t pmd)
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t entry)
{
- /* No dirty bit in the segment table entry. */
+ if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
+ pmd_val(entry) |= _SEGMENT_ENTRY_CO;
+ *pmdp = entry;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
return pmd;
}
-static inline pmd_t pmd_mkold(pmd_t pmd)
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
- /* No referenced bit in the segment table entry. */
+ /* Do not clobber PROT_NONE segments! */
+ if (!pmd_prot_none(pmd))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
return pmd;
}
-static inline pmd_t pmd_mkyoung(pmd_t pmd)
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
- /* No referenced bit in the segment table entry. */
+ /* No dirty bit in the segment table entry. */
return pmd;
}
@@ -1310,34 +1590,12 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
- unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
- long tmp, rc;
- int counter;
-
- rc = 0;
- if (MACHINE_HAS_RRBM) {
- counter = PTRS_PER_PTE >> 6;
- asm volatile(
- "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */
- " ogr %1,%0\n"
- " la %3,0(%4,%3)\n"
- " brct %2,0b\n"
- : "=&d" (tmp), "+&d" (rc), "+d" (counter),
- "+a" (pmd_addr)
- : "a" (64 * 4096UL) : "cc");
- rc = !!rc;
- } else {
- counter = PTRS_PER_PTE;
- asm volatile(
- "0: rrbe 0,%2\n"
- " la %2,0(%3,%2)\n"
- " brc 12,1f\n"
- " lhi %0,1\n"
- "1: brct %1,0b\n"
- : "+d" (rc), "+d" (counter), "+a" (pmd_addr)
- : "a" (4096UL) : "cc");
- }
- return rc;
+ pmd_t pmd;
+
+ pmd = *pmdp;
+ pmdp_flush_direct(vma->vm_mm, address, pmdp);
+ *pmdp = pmd_mkold(pmd);
+ return pmd_young(pmd);
}
#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
@@ -1346,7 +1604,7 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
{
pmd_t pmd = *pmdp;
- __pmd_idte(address, pmdp);
+ pmdp_flush_direct(mm, address, pmdp);
pmd_clear(pmdp);
return pmd;
}
@@ -1362,14 +1620,19 @@ static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
static inline void pmdp_invalidate(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
- __pmd_idte(address, pmdp);
+ pmdp_flush_direct(vma->vm_mm, address, pmdp);
}
-static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
{
- pmd_t __pmd;
- pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
- return __pmd;
+ pmd_t pmd = *pmdp;
+
+ if (pmd_write(pmd)) {
+ pmdp_flush_direct(mm, address, pmdp);
+ set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
+ }
}
#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
@@ -1398,10 +1661,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
* exception will occur instead of a page translation exception. The
* specifiation exception has the bad habit not to store necessary
* information in the lowcore.
- * Bit 21 and bit 22 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 30 and 31 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 21, 22, 30 and 31 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
* This leaves the bits 1-19 and bits 24-29 to store type and offset.
* We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
* plus 24 for the offset.
@@ -1415,10 +1676,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
* exception will occur instead of a page translation exception. The
* specifiation exception has the bad habit not to store necessary
* information in the lowcore.
- * Bit 53 and bit 54 are the page invalid bit and the page protection
- * bit. We set both to indicate a swapped page.
- * Bit 62 and 63 are used to distinguish the different page types. For
- * a swapped page these bits need to be zero.
+ * Bits 53, 54, 62 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
* This leaves the bits 0-51 and bits 56-61 to store type and offset.
* We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
* plus 56 for the offset.
@@ -1435,7 +1694,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{
pte_t pte;
offset &= __SWP_OFFSET_MASK;
- pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) |
+ pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
return pte;
}
@@ -1458,7 +1717,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define pgoff_to_pte(__off) \
((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
- | _PAGE_TYPE_FILE })
+ | _PAGE_INVALID | _PAGE_PROTECT })
#endif /* !__ASSEMBLY__ */
@@ -1467,11 +1726,13 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern int vmem_remove_mapping(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
+extern void s390_enable_skey(void);
/*
* No page table caches to initialise
*/
-#define pgtable_cache_init() do { } while (0)
+static inline void pgtable_cache_init(void) { }
+static inline void check_pgt_cache(void) { }
#include <asm-generic/pgtable.h>
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 94e749c9023..6f02d452bbe 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -11,6 +11,13 @@
#ifndef __ASM_S390_PROCESSOR_H
#define __ASM_S390_PROCESSOR_H
+#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
+#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
+
+#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING)
+#define _CIF_ASCE (1<<CIF_ASCE)
+
+
#ifndef __ASSEMBLY__
#include <linux/linkage.h>
@@ -21,6 +28,21 @@
#include <asm/setup.h>
#include <asm/runtime_instr.h>
+static inline void set_cpu_flag(int flag)
+{
+ S390_lowcore.cpu_flags |= (1U << flag);
+}
+
+static inline void clear_cpu_flag(int flag)
+{
+ S390_lowcore.cpu_flags &= ~(1U << flag);
+}
+
+static inline int test_cpu_flag(int flag)
+{
+ return !!(S390_lowcore.cpu_flags & (1U << flag));
+}
+
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -43,6 +65,7 @@ extern void execve_tail(void);
#ifndef CONFIG_64BIT
#define TASK_SIZE (1UL << 31)
+#define TASK_MAX_SIZE (1UL << 31)
#define TASK_UNMAPPED_BASE (1UL << 30)
#else /* CONFIG_64BIT */
@@ -51,6 +74,7 @@ extern void execve_tail(void);
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
(1UL << 30) : (1UL << 41))
#define TASK_SIZE TASK_SIZE_OF(current)
+#define TASK_MAX_SIZE (1UL << 53)
#endif /* CONFIG_64BIT */
@@ -77,6 +101,7 @@ struct thread_struct {
unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
+ unsigned int gmap_pfault; /* signal of a pending guest pfault */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
@@ -91,7 +116,15 @@ struct thread_struct {
#endif
};
-#define PER_FLAG_NO_TE 1UL /* Flag to disable transactions. */
+/* Flag to disable transactions. */
+#define PER_FLAG_NO_TE 1UL
+/* Flag to enable random transaction aborts. */
+#define PER_FLAG_TE_ABORT_RAND 2UL
+/* Flag to specify random transaction abort mode:
+ * - abort each transaction at a random instruction before TEND if set.
+ * - abort random transactions at a random instruction if cleared.
+ */
+#define PER_FLAG_TE_ABORT_RAND_TEND 4UL
typedef struct thread_struct thread_struct;
@@ -124,19 +157,17 @@ struct stack_frame {
* Do necessary setup to start up a new thread.
*/
#define start_thread(regs, new_psw, new_stackp) do { \
- regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \
+ regs->psw.mask = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA; \
regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
regs->gprs[15] = new_stackp; \
execve_tail(); \
} while (0)
#define start_thread31(regs, new_psw, new_stackp) do { \
- regs->psw.mask = psw_user_bits | PSW_MASK_BA; \
+ regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \
regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
regs->gprs[15] = new_stackp; \
- __tlb_flush_mm(current->mm); \
crst_table_downgrade(current->mm, 1UL << 31); \
- update_mm(current->mm, current); \
execve_tail(); \
} while (0)
@@ -159,16 +190,15 @@ extern void release_thread(struct task_struct *);
*/
extern unsigned long thread_saved_pc(struct task_struct *t);
-extern void show_code(struct pt_regs *regs);
-extern void print_fn_code(unsigned char *code, unsigned long len);
-extern int insn_to_mnemonic(unsigned char *instruction, char buf[8]);
-
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
(task_stack_page(tsk) + THREAD_SIZE) - 1)
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->gprs[15])
+/* Has task runtime instrumentation enabled ? */
+#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
+
static inline unsigned short stap(void)
{
unsigned short cpu_address;
@@ -187,6 +217,8 @@ static inline void cpu_relax(void)
barrier();
}
+#define arch_mutex_cpu_relax() barrier()
+
static inline void psw_set_key(unsigned int key)
{
asm volatile("spka 0(%0)" : : "d" (key));
@@ -335,9 +367,9 @@ __set_psw_mask(unsigned long mask)
}
#define local_mcck_enable() \
- __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK)
+ __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK)
#define local_mcck_disable() \
- __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT)
+ __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT)
/*
* Basic Machine Check/Program Check Handler.
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 3ee5da3bc10..55d69dd7473 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -8,12 +8,63 @@
#include <uapi/asm/ptrace.h>
+#define PIF_SYSCALL 0 /* inside a system call */
+#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */
+
+#define _PIF_SYSCALL (1<<PIF_SYSCALL)
+#define _PIF_PER_TRAP (1<<PIF_PER_TRAP)
+
#ifndef __ASSEMBLY__
-#ifndef __s390x__
-#else /* __s390x__ */
-#endif /* __s390x__ */
-extern long psw_kernel_bits;
-extern long psw_user_bits;
+
+#define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
+ PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_USER_BITS (PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
+ PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
+ PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
+
+struct psw_bits {
+ unsigned long long : 1;
+ unsigned long long r : 1; /* PER-Mask */
+ unsigned long long : 3;
+ unsigned long long t : 1; /* DAT Mode */
+ unsigned long long i : 1; /* Input/Output Mask */
+ unsigned long long e : 1; /* External Mask */
+ unsigned long long key : 4; /* PSW Key */
+ unsigned long long : 1;
+ unsigned long long m : 1; /* Machine-Check Mask */
+ unsigned long long w : 1; /* Wait State */
+ unsigned long long p : 1; /* Problem State */
+ unsigned long long as : 2; /* Address Space Control */
+ unsigned long long cc : 2; /* Condition Code */
+ unsigned long long pm : 4; /* Program Mask */
+ unsigned long long ri : 1; /* Runtime Instrumentation */
+ unsigned long long : 6;
+ unsigned long long eaba : 2; /* Addressing Mode */
+#ifdef CONFIG_64BIT
+ unsigned long long : 31;
+ unsigned long long ia : 64;/* Instruction Address */
+#else
+ unsigned long long ia : 31;/* Instruction Address */
+#endif
+};
+
+enum {
+ PSW_AMODE_24BIT = 0,
+ PSW_AMODE_31BIT = 1,
+ PSW_AMODE_64BIT = 3
+};
+
+enum {
+ PSW_AS_PRIMARY = 0,
+ PSW_AS_ACCREG = 1,
+ PSW_AS_SECONDARY = 2,
+ PSW_AS_HOME = 3
+};
+
+#define psw_bits(__psw) (*({ \
+ typecheck(psw_t, __psw); \
+ &(*(struct psw_bits *)(&(__psw))); \
+}))
/*
* The pt_regs struct defines the way the registers are stored on
@@ -26,7 +77,9 @@ struct pt_regs
unsigned long gprs[NUM_GPRS];
unsigned long orig_gpr2;
unsigned int int_code;
+ unsigned int int_parm;
unsigned long int_parm_long;
+ unsigned long flags;
};
/*
@@ -77,12 +130,26 @@ struct per_struct_kernel {
#define PER_CONTROL_SUSPENSION 0x00400000UL
#define PER_CONTROL_ALTERATION 0x00200000UL
-#ifdef __s390x__
-#endif /* __s390x__ */
+static inline void set_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ regs->flags |= (1U << flag);
+}
+
+static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ regs->flags &= ~(1U << flag);
+}
+
+static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ return !!(regs->flags & (1U << flag));
+}
+
/*
* These are defined as per linux/ptrace.h, which see.
*/
#define arch_has_single_step() (1)
+#define arch_has_block_step() (1)
#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 57d0d7e794b..d786c634e05 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -336,7 +336,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
#define QDIO_FLAG_CLEANUP_USING_HALT 0x02
/**
- * struct qdio_initialize - qdio initalization data
+ * struct qdio_initialize - qdio initialization data
* @cdev: associated ccw device
* @q_format: queue format
* @adapter_name: name for the adapter
@@ -378,6 +378,34 @@ struct qdio_initialize {
struct qdio_outbuf_state *output_sbal_state_array;
};
+/**
+ * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
+ * @l3_ipv6_addr: entry contains IPv6 address
+ * @l3_ipv4_addr: entry contains IPv4 address
+ * @l2_addr_lnid: entry contains MAC address and VLAN ID
+ */
+enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
+
+/**
+ * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
+ * @nit: Network interface token
+ * @addr: Address of one of the three types
+ *
+ * The struct is passed to the callback function by qdio_brinfo_desc()
+ */
+struct qdio_brinfo_entry_l3_ipv6 {
+ u64 nit;
+ struct { unsigned char _s6_addr[16]; } addr;
+} __packed;
+struct qdio_brinfo_entry_l3_ipv4 {
+ u64 nit;
+ struct { uint32_t _s_addr; } addr;
+} __packed;
+struct qdio_brinfo_entry_l2 {
+ u64 nit;
+ struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */
#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */
#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */
@@ -399,5 +427,10 @@ extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
extern int qdio_shutdown(struct ccw_device *, int);
extern int qdio_free(struct ccw_device *);
extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+extern int qdio_pnso_brinfo(struct subchannel_id schid,
+ int cnc, u16 *response,
+ void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+ void *entry),
+ void *priv);
#endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 833788693f0..1aba89b53cb 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
#include <asm/chpid.h>
+#include <asm/cpu.h>
#define SCLP_CHP_INFO_MASK_SIZE 32
@@ -27,7 +28,11 @@ struct sclp_ipl_info {
struct sclp_cpu_entry {
u8 address;
- u8 reserved0[13];
+ u8 reserved0[2];
+ u8 : 3;
+ u8 siif : 1;
+ u8 : 4;
+ u8 reserved2[10];
u8 type;
u8 reserved1;
} __attribute__((packed));
@@ -37,25 +42,30 @@ struct sclp_cpu_info {
unsigned int standby;
unsigned int combined;
int has_cpu_type;
- struct sclp_cpu_entry cpu[255];
+ struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1];
};
int sclp_get_cpu_info(struct sclp_cpu_info *info);
int sclp_cpu_configure(u8 cpu);
int sclp_cpu_deconfigure(u8 cpu);
-void sclp_facilities_detect(void);
unsigned long long sclp_get_rnmax(void);
unsigned long long sclp_get_rzm(void);
-u8 sclp_get_fac85(void);
+unsigned int sclp_get_max_cpu(void);
int sclp_sdias_blk_count(void);
int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
int sclp_chp_configure(struct chp_id chpid);
int sclp_chp_deconfigure(struct chp_id chpid);
int sclp_chp_read_info(struct sclp_chp_info *info);
void sclp_get_ipl_info(struct sclp_ipl_info *info);
-bool sclp_has_linemode(void);
-bool sclp_has_vt220(void);
+bool __init sclp_has_linemode(void);
+bool __init sclp_has_vt220(void);
+bool sclp_has_sprp(void);
int sclp_pci_configure(u32 fid);
int sclp_pci_deconfigure(u32 fid);
+int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
+unsigned long sclp_get_hsa_size(void);
+void sclp_early_detect(void);
+int sclp_has_siif(void);
+unsigned int sclp_get_ibc(void);
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h
new file mode 100644
index 00000000000..5b3e48ef534
--- /dev/null
+++ b/arch/s390/include/asm/serial.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_SERIAL_H
+#define _ASM_S390_SERIAL_H
+
+#define BASE_BAUD 0
+
+#endif /* _ASM_S390_SERIAL_H */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index f69f76b3447..089a49814c5 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -9,7 +9,6 @@
#define PARMAREA 0x10400
-#define MEMORY_CHUNKS 256
#ifndef __ASSEMBLY__
@@ -31,32 +30,11 @@
#endif /* CONFIG_64BIT */
#define COMMAND_LINE ((char *) (0x10480))
-#define CHUNK_READ_WRITE 0
-#define CHUNK_READ_ONLY 1
-#define CHUNK_OLDMEM 4
-#define CHUNK_CRASHK 5
-
-struct mem_chunk {
- unsigned long addr;
- unsigned long size;
- int type;
-};
-
-extern struct mem_chunk memory_chunk[];
-extern unsigned long real_memory_size;
extern int memory_end_set;
extern unsigned long memory_end;
+extern unsigned long max_physmem_end;
-void detect_memory_layout(struct mem_chunk chunk[]);
-void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr,
- unsigned long size, int type);
-
-#define PRIMARY_SPACE_MODE 0
-#define ACCESS_REGISTER_MODE 1
-#define SECONDARY_SPACE_MODE 2
-#define HOME_SPACE_MODE 3
-
-extern unsigned int s390_user_mode;
+extern void detect_memory_memblock(void);
/*
* Machine features detected in head.S
@@ -64,26 +42,28 @@ extern unsigned int s390_user_mode;
#define MACHINE_FLAG_VM (1UL << 0)
#define MACHINE_FLAG_IEEE (1UL << 1)
-#define MACHINE_FLAG_CSP (1UL << 3)
-#define MACHINE_FLAG_MVPG (1UL << 4)
-#define MACHINE_FLAG_DIAG44 (1UL << 5)
-#define MACHINE_FLAG_IDTE (1UL << 6)
-#define MACHINE_FLAG_DIAG9C (1UL << 7)
-#define MACHINE_FLAG_MVCOS (1UL << 8)
-#define MACHINE_FLAG_KVM (1UL << 9)
+#define MACHINE_FLAG_CSP (1UL << 2)
+#define MACHINE_FLAG_MVPG (1UL << 3)
+#define MACHINE_FLAG_DIAG44 (1UL << 4)
+#define MACHINE_FLAG_IDTE (1UL << 5)
+#define MACHINE_FLAG_DIAG9C (1UL << 6)
+#define MACHINE_FLAG_KVM (1UL << 8)
+#define MACHINE_FLAG_ESOP (1UL << 9)
#define MACHINE_FLAG_EDAT1 (1UL << 10)
#define MACHINE_FLAG_EDAT2 (1UL << 11)
#define MACHINE_FLAG_LPAR (1UL << 12)
-#define MACHINE_FLAG_SPP (1UL << 13)
+#define MACHINE_FLAG_LPP (1UL << 13)
#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
#define MACHINE_FLAG_TE (1UL << 15)
#define MACHINE_FLAG_RRBM (1UL << 16)
+#define MACHINE_FLAG_TLB_LC (1UL << 17)
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
+#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
#define MACHINE_HAS_PFMF MACHINE_HAS_EDAT1
#define MACHINE_HAS_HPAGE MACHINE_HAS_EDAT1
@@ -93,31 +73,28 @@ extern unsigned int s390_user_mode;
#define MACHINE_HAS_IDTE (0)
#define MACHINE_HAS_DIAG44 (1)
#define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG)
-#define MACHINE_HAS_MVCOS (0)
#define MACHINE_HAS_EDAT1 (0)
#define MACHINE_HAS_EDAT2 (0)
-#define MACHINE_HAS_SPP (0)
+#define MACHINE_HAS_LPP (0)
#define MACHINE_HAS_TOPOLOGY (0)
#define MACHINE_HAS_TE (0)
#define MACHINE_HAS_RRBM (0)
+#define MACHINE_HAS_TLB_LC (0)
#else /* CONFIG_64BIT */
#define MACHINE_HAS_IEEE (1)
#define MACHINE_HAS_CSP (1)
#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
#define MACHINE_HAS_MVPG (1)
-#define MACHINE_HAS_MVCOS (S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS)
#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
-#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP)
+#define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM)
+#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
#endif /* CONFIG_64BIT */
-#define ZFCPDUMP_HSA_SIZE (32UL<<20)
-#define ZFCPDUMP_HSA_SIZE_MAX (64UL<<20)
-
/*
* Console mode. Override with conmode=
*/
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
index db7ddfaf5b7..abf9e573594 100644
--- a/arch/s390/include/asm/signal.h
+++ b/arch/s390/include/asm/signal.h
@@ -21,22 +21,5 @@ typedef struct {
unsigned long sig[_NSIG_WORDS];
} sigset_t;
-struct old_sigaction {
- __sighandler_t sa_handler;
- old_sigset_t sa_mask;
- unsigned long sa_flags;
- void (*sa_restorer)(void);
-};
-
-struct sigaction {
- __sighandler_t sa_handler;
- unsigned long sa_flags;
- void (*sa_restorer)(void);
- sigset_t sa_mask; /* mask last for extensibility */
-};
-
-struct k_sigaction {
- struct sigaction sa;
-};
-
+#define __ARCH_HAS_SA_RESTORER
#endif
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 5a87d16d3e7..bf9c823d402 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -5,6 +5,7 @@
#define SIGP_SENSE 1
#define SIGP_EXTERNAL_CALL 2
#define SIGP_EMERGENCY_SIGNAL 3
+#define SIGP_START 4
#define SIGP_STOP 5
#define SIGP_RESTART 6
#define SIGP_STOP_AND_STORE_STATUS 9
@@ -12,6 +13,7 @@
#define SIGP_SET_PREFIX 13
#define SIGP_STORE_STATUS_AT_ADDRESS 14
#define SIGP_SET_ARCHITECTURE 18
+#define SIGP_COND_EMERGENCY_SIGNAL 19
#define SIGP_SENSE_RUNNING 21
/* SIGP condition codes */
@@ -29,4 +31,23 @@
#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL
#define SIGP_STATUS_NOT_RUNNING 0x00000400UL
+#ifndef __ASSEMBLY__
+
+static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
+{
+ register unsigned int reg1 asm ("1") = parm;
+ int cc;
+
+ asm volatile(
+ " sigp %1,%2,0(%3)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+ if (status && cc == 1)
+ *status = reg1;
+ return cc;
+}
+
+#endif /* __ASSEMBLY__ */
+
#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index b64f15c3b4c..4f1307962a9 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -7,6 +7,8 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+#include <asm/sigp.h>
+
#ifdef CONFIG_SMP
#include <asm/lowcore.h>
@@ -14,7 +16,6 @@
#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
extern struct mutex smp_cpu_state_mutex;
-extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1];
extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
@@ -29,9 +30,9 @@ extern int smp_store_status(int cpu);
extern int smp_vcpu_scheduled(int cpu);
extern void smp_yield_cpu(int cpu);
extern void smp_yield(void);
-extern void smp_stop_cpu(void);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
+extern void smp_fill_possible_mask(void);
#else /* CONFIG_SMP */
@@ -50,10 +51,20 @@ static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_yield(void) { }
-static inline void smp_stop_cpu(void) { }
+static inline void smp_fill_possible_mask(void) { }
#endif /* CONFIG_SMP */
+static inline void smp_stop_cpu(void)
+{
+ u16 pcpu = stap();
+
+ for (;;) {
+ __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL);
+ cpu_relax();
+ }
+}
+
#ifdef CONFIG_HOTPLUG_CPU
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 701fe8c59e1..96879f7ad6d 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -11,18 +11,21 @@
#include <linux/smp.h>
+#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
+
extern int spin_retry;
static inline int
-_raw_compare_and_swap(volatile unsigned int *lock,
- unsigned int old, unsigned int new)
+_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
{
+ unsigned int old_expected = old;
+
asm volatile(
" cs %0,%3,%1"
: "=d" (old), "=Q" (*lock)
: "0" (old), "d" (new), "Q" (*lock)
: "cc", "memory" );
- return old;
+ return old == old_expected;
}
/*
@@ -34,52 +37,69 @@ _raw_compare_and_swap(volatile unsigned int *lock,
* (the type definitions are in asm/spinlock_types.h)
*/
-#define arch_spin_is_locked(x) ((x)->owner_cpu != 0)
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) \
- arch_spin_relax(lock); } while (0)
+void arch_spin_lock_wait(arch_spinlock_t *);
+int arch_spin_trylock_retry(arch_spinlock_t *);
+void arch_spin_relax(arch_spinlock_t *);
+void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
-extern void arch_spin_lock_wait(arch_spinlock_t *);
-extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
-extern int arch_spin_trylock_retry(arch_spinlock_t *);
-extern void arch_spin_relax(arch_spinlock_t *lock);
+static inline u32 arch_spin_lockval(int cpu)
+{
+ return ~cpu;
+}
-static inline void arch_spin_lock(arch_spinlock_t *lp)
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
- int old;
+ return lock.lock == 0;
+}
- old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id());
- if (likely(old == 0))
- return;
- arch_spin_lock_wait(lp);
+static inline int arch_spin_is_locked(arch_spinlock_t *lp)
+{
+ return ACCESS_ONCE(lp->lock) != 0;
}
-static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
- unsigned long flags)
+static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
{
- int old;
+ barrier();
+ return likely(arch_spin_value_unlocked(*lp) &&
+ _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL));
+}
- old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id());
- if (likely(old == 0))
- return;
- arch_spin_lock_wait_flags(lp, flags);
+static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp)
+{
+ return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0);
}
-static inline int arch_spin_trylock(arch_spinlock_t *lp)
+static inline void arch_spin_lock(arch_spinlock_t *lp)
{
- int old;
+ if (!arch_spin_trylock_once(lp))
+ arch_spin_lock_wait(lp);
+}
- old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id());
- if (likely(old == 0))
- return 1;
- return arch_spin_trylock_retry(lp);
+static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
+ unsigned long flags)
+{
+ if (!arch_spin_trylock_once(lp))
+ arch_spin_lock_wait_flags(lp, flags);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lp)
+{
+ if (!arch_spin_trylock_once(lp))
+ return arch_spin_trylock_retry(lp);
+ return 1;
}
static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
- _raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0);
+ arch_spin_tryrelease_once(lp);
+}
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ while (arch_spin_is_locked(lock))
+ arch_spin_relax(lock);
}
-
+
/*
* Read-write spinlocks, allowing multiple readers
* but only one writer.
@@ -110,42 +130,50 @@ extern void _raw_write_lock_wait(arch_rwlock_t *lp);
extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
+static inline int arch_read_trylock_once(arch_rwlock_t *rw)
+{
+ unsigned int old = ACCESS_ONCE(rw->lock);
+ return likely((int) old >= 0 &&
+ _raw_compare_and_swap(&rw->lock, old, old + 1));
+}
+
+static inline int arch_write_trylock_once(arch_rwlock_t *rw)
+{
+ unsigned int old = ACCESS_ONCE(rw->lock);
+ return likely(old == 0 &&
+ _raw_compare_and_swap(&rw->lock, 0, 0x80000000));
+}
+
static inline void arch_read_lock(arch_rwlock_t *rw)
{
- unsigned int old;
- old = rw->lock & 0x7fffffffU;
- if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old)
+ if (!arch_read_trylock_once(rw))
_raw_read_lock_wait(rw);
}
static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags)
{
- unsigned int old;
- old = rw->lock & 0x7fffffffU;
- if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old)
+ if (!arch_read_trylock_once(rw))
_raw_read_lock_wait_flags(rw, flags);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
- unsigned int old, cmp;
+ unsigned int old;
- old = rw->lock;
do {
- cmp = old;
- old = _raw_compare_and_swap(&rw->lock, old, old - 1);
- } while (cmp != old);
+ old = ACCESS_ONCE(rw->lock);
+ } while (!_raw_compare_and_swap(&rw->lock, old, old - 1));
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
- if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0))
+ if (!arch_write_trylock_once(rw))
_raw_write_lock_wait(rw);
}
static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
{
- if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0))
+ if (!arch_write_trylock_once(rw))
_raw_write_lock_wait_flags(rw, flags);
}
@@ -156,18 +184,16 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
- unsigned int old;
- old = rw->lock & 0x7fffffffU;
- if (likely(_raw_compare_and_swap(&rw->lock, old, old + 1) == old))
- return 1;
- return _raw_read_trylock_retry(rw);
+ if (!arch_read_trylock_once(rw))
+ return _raw_read_trylock_retry(rw);
+ return 1;
}
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
- if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0))
- return 1;
- return _raw_write_trylock_retry(rw);
+ if (!arch_write_trylock_once(rw))
+ return _raw_write_trylock_retry(rw);
+ return 1;
}
#define arch_read_relax(lock) cpu_relax()
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index 9c76656a0af..b2cd6ff7c2c 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -6,13 +6,13 @@
#endif
typedef struct {
- volatile unsigned int owner_cpu;
+ unsigned int lock;
} __attribute__ ((aligned (4))) arch_spinlock_t;
-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
typedef struct {
- volatile unsigned int lock;
+ unsigned int lock;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 }
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index f3a9e0f9270..18ea9e3f814 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,91 +8,130 @@
#define __ASM_SWITCH_TO_H
#include <linux/thread_info.h>
+#include <asm/ptrace.h>
extern struct task_struct *__switch_to(void *, void *);
-extern void update_per_regs(struct task_struct *task);
+extern void update_cr_regs(struct task_struct *task);
-static inline void save_fp_regs(s390_fp_regs *fpregs)
+static inline int test_fp_ctl(u32 fpc)
{
+ u32 orig_fpc;
+ int rc;
+
+ if (!MACHINE_HAS_IEEE)
+ return 0;
+
asm volatile(
- " std 0,%O0+8(%R0)\n"
- " std 2,%O0+24(%R0)\n"
- " std 4,%O0+40(%R0)\n"
- " std 6,%O0+56(%R0)"
- : "=Q" (*fpregs) : "Q" (*fpregs));
+ " efpc %1\n"
+ " sfpc %2\n"
+ "0: sfpc %1\n"
+ " la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc), "=d" (orig_fpc)
+ : "d" (fpc), "0" (-EINVAL));
+ return rc;
+}
+
+static inline void save_fp_ctl(u32 *fpc)
+{
if (!MACHINE_HAS_IEEE)
return;
+
asm volatile(
- " stfpc %0\n"
- " std 1,%O0+16(%R0)\n"
- " std 3,%O0+32(%R0)\n"
- " std 5,%O0+48(%R0)\n"
- " std 7,%O0+64(%R0)\n"
- " std 8,%O0+72(%R0)\n"
- " std 9,%O0+80(%R0)\n"
- " std 10,%O0+88(%R0)\n"
- " std 11,%O0+96(%R0)\n"
- " std 12,%O0+104(%R0)\n"
- " std 13,%O0+112(%R0)\n"
- " std 14,%O0+120(%R0)\n"
- " std 15,%O0+128(%R0)\n"
- : "=Q" (*fpregs) : "Q" (*fpregs));
+ " stfpc %0\n"
+ : "+Q" (*fpc));
}
-static inline void restore_fp_regs(s390_fp_regs *fpregs)
+static inline int restore_fp_ctl(u32 *fpc)
{
+ int rc;
+
+ if (!MACHINE_HAS_IEEE)
+ return 0;
+
asm volatile(
- " ld 0,%O0+8(%R0)\n"
- " ld 2,%O0+24(%R0)\n"
- " ld 4,%O0+40(%R0)\n"
- " ld 6,%O0+56(%R0)"
- : : "Q" (*fpregs));
+ " lfpc %1\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
+ return rc;
+}
+
+static inline void save_fp_regs(freg_t *fprs)
+{
+ asm volatile("std 0,%0" : "=Q" (fprs[0]));
+ asm volatile("std 2,%0" : "=Q" (fprs[2]));
+ asm volatile("std 4,%0" : "=Q" (fprs[4]));
+ asm volatile("std 6,%0" : "=Q" (fprs[6]));
if (!MACHINE_HAS_IEEE)
return;
- asm volatile(
- " lfpc %0\n"
- " ld 1,%O0+16(%R0)\n"
- " ld 3,%O0+32(%R0)\n"
- " ld 5,%O0+48(%R0)\n"
- " ld 7,%O0+64(%R0)\n"
- " ld 8,%O0+72(%R0)\n"
- " ld 9,%O0+80(%R0)\n"
- " ld 10,%O0+88(%R0)\n"
- " ld 11,%O0+96(%R0)\n"
- " ld 12,%O0+104(%R0)\n"
- " ld 13,%O0+112(%R0)\n"
- " ld 14,%O0+120(%R0)\n"
- " ld 15,%O0+128(%R0)\n"
- : : "Q" (*fpregs));
+ asm volatile("std 1,%0" : "=Q" (fprs[1]));
+ asm volatile("std 3,%0" : "=Q" (fprs[3]));
+ asm volatile("std 5,%0" : "=Q" (fprs[5]));
+ asm volatile("std 7,%0" : "=Q" (fprs[7]));
+ asm volatile("std 8,%0" : "=Q" (fprs[8]));
+ asm volatile("std 9,%0" : "=Q" (fprs[9]));
+ asm volatile("std 10,%0" : "=Q" (fprs[10]));
+ asm volatile("std 11,%0" : "=Q" (fprs[11]));
+ asm volatile("std 12,%0" : "=Q" (fprs[12]));
+ asm volatile("std 13,%0" : "=Q" (fprs[13]));
+ asm volatile("std 14,%0" : "=Q" (fprs[14]));
+ asm volatile("std 15,%0" : "=Q" (fprs[15]));
+}
+
+static inline void restore_fp_regs(freg_t *fprs)
+{
+ asm volatile("ld 0,%0" : : "Q" (fprs[0]));
+ asm volatile("ld 2,%0" : : "Q" (fprs[2]));
+ asm volatile("ld 4,%0" : : "Q" (fprs[4]));
+ asm volatile("ld 6,%0" : : "Q" (fprs[6]));
+ if (!MACHINE_HAS_IEEE)
+ return;
+ asm volatile("ld 1,%0" : : "Q" (fprs[1]));
+ asm volatile("ld 3,%0" : : "Q" (fprs[3]));
+ asm volatile("ld 5,%0" : : "Q" (fprs[5]));
+ asm volatile("ld 7,%0" : : "Q" (fprs[7]));
+ asm volatile("ld 8,%0" : : "Q" (fprs[8]));
+ asm volatile("ld 9,%0" : : "Q" (fprs[9]));
+ asm volatile("ld 10,%0" : : "Q" (fprs[10]));
+ asm volatile("ld 11,%0" : : "Q" (fprs[11]));
+ asm volatile("ld 12,%0" : : "Q" (fprs[12]));
+ asm volatile("ld 13,%0" : : "Q" (fprs[13]));
+ asm volatile("ld 14,%0" : : "Q" (fprs[14]));
+ asm volatile("ld 15,%0" : : "Q" (fprs[15]));
}
static inline void save_access_regs(unsigned int *acrs)
{
- asm volatile("stam 0,15,%0" : "=Q" (*acrs));
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
}
static inline void restore_access_regs(unsigned int *acrs)
{
- asm volatile("lam 0,15,%0" : : "Q" (*acrs));
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
}
#define switch_to(prev,next,last) do { \
if (prev->mm) { \
- save_fp_regs(&prev->thread.fp_regs); \
+ save_fp_ctl(&prev->thread.fp_regs.fpc); \
+ save_fp_regs(prev->thread.fp_regs.fprs); \
save_access_regs(&prev->thread.acrs[0]); \
save_ri_cb(prev->thread.ri_cb); \
} \
if (next->mm) { \
- restore_fp_regs(&next->thread.fp_regs); \
+ restore_fp_ctl(&next->thread.fp_regs.fpc); \
+ restore_fp_regs(next->thread.fp_regs.fprs); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
- update_per_regs(next); \
+ update_cr_regs(next); \
} \
prev = __switch_to(prev,next); \
} while (0)
-#define finish_arch_switch(prev) do { \
- set_fs(current->thread.mm_segment); \
-} while (0)
-
#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index fe7b99759e1..abad78d5b10 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -12,7 +12,7 @@
#ifndef _ASM_SYSCALL_H
#define _ASM_SYSCALL_H 1
-#include <linux/audit.h>
+#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <asm/ptrace.h>
@@ -23,11 +23,12 @@
* type here is what we want [need] for both 32 bit and 64 bit systems.
*/
extern const unsigned int sys_call_table[];
+extern const unsigned int sys_call_table_emu[];
static inline long syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
{
- return test_tsk_thread_flag(task, TIF_SYSCALL) ?
+ return test_pt_regs_flag(regs, PIF_SYSCALL) ?
(regs->int_code & 0xffff) : -1;
}
@@ -88,11 +89,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->orig_gpr2 = args[0];
}
-static inline int syscall_get_arch(struct task_struct *task,
- struct pt_regs *regs)
+static inline int syscall_get_arch(void)
{
#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_31BIT))
+ if (test_tsk_thread_flag(current, TIF_31BIT))
return AUDIT_ARCH_S390;
#endif
return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390;
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 9e2cfe0349c..b833e9c0bfb 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -14,13 +14,8 @@
#define THREAD_ORDER 1
#define ASYNC_ORDER 1
#else /* CONFIG_64BIT */
-#ifndef __SMALL_STACK
#define THREAD_ORDER 2
#define ASYNC_ORDER 2
-#else
-#define THREAD_ORDER 1
-#define ASYNC_ORDER 1
-#endif
#endif /* CONFIG_64BIT */
#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
@@ -41,6 +36,7 @@ struct thread_info {
struct task_struct *task; /* main task structure */
struct exec_domain *exec_domain; /* execution domain */
unsigned long flags; /* low level flags */
+ unsigned long sys_call_table; /* System call table address */
unsigned int cpu; /* current CPU */
int preempt_count; /* 0 => preemptable, <0 => BUG */
struct restart_block restart_block;
@@ -81,27 +77,22 @@ static inline struct thread_info *current_thread_info(void)
/*
* thread information flags bit numbers
*/
-#define TIF_SYSCALL 0 /* inside a system call */
-#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
-#define TIF_SIGPENDING 2 /* signal pending */
-#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */
-#define TIF_MCCK_PENDING 7 /* machine check handling is pending */
-#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
-#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */
-#define TIF_SECCOMP 10 /* secure computing */
-#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
-#define TIF_31BIT 17 /* 32bit process */
-#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
-#define TIF_RESTORE_SIGMASK 19 /* restore signal mask in do_signal() */
-#define TIF_SINGLE_STEP 20 /* This task is single stepped */
+#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
+#define TIF_SIGPENDING 1 /* signal pending */
+#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
+#define TIF_SYSCALL_TRACE 3 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
+#define TIF_SECCOMP 5 /* secure computing */
+#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
+#define TIF_31BIT 16 /* 32bit process */
+#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
+#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
+#define TIF_SINGLE_STEP 19 /* This task is single stepped */
+#define TIF_BLOCK_STEP 20 /* This task is block stepped */
-#define _TIF_SYSCALL (1<<TIF_SYSCALL)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
-#define _TIF_PER_TRAP (1<<TIF_PER_TRAP)
-#define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING)
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
@@ -115,6 +106,4 @@ static inline struct thread_info *current_thread_info(void)
#define is_32bit_task() (1)
#endif
-#define PREEMPT_ACTIVE 0x4000000
-
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 4c060bb5b8e..8beee1cceba 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -15,7 +15,7 @@
#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
/* Inline functions for clock register access. */
-static inline int set_clock(__u64 time)
+static inline int set_tod_clock(__u64 time)
{
int cc;
@@ -27,7 +27,7 @@ static inline int set_clock(__u64 time)
return cc;
}
-static inline int store_clock(__u64 *time)
+static inline int store_tod_clock(__u64 *time)
{
int cc;
@@ -71,33 +71,35 @@ static inline void local_tick_enable(unsigned long long comp)
typedef unsigned long long cycles_t;
-static inline unsigned long long get_clock(void)
+static inline void get_tod_clock_ext(char clk[16])
{
- unsigned long long clk;
+ typedef struct { char _[sizeof(clk)]; } addrtype;
-#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
- asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc");
-#else
- asm volatile("stck %0" : "=Q" (clk) : : "cc");
-#endif
- return clk;
+ asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc");
}
-static inline void get_clock_ext(char *clk)
+static inline unsigned long long get_tod_clock(void)
{
- asm volatile("stcke %0" : "=Q" (*clk) : : "cc");
+ unsigned char clk[16];
+ get_tod_clock_ext(clk);
+ return *((unsigned long long *)&clk[1]);
}
-static inline unsigned long long get_clock_xt(void)
+static inline unsigned long long get_tod_clock_fast(void)
{
- unsigned char clk[16];
- get_clock_ext(clk);
- return *((unsigned long long *)&clk[1]);
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ unsigned long long clk;
+
+ asm volatile("stckf %0" : "=Q" (clk) : : "cc");
+ return clk;
+#else
+ return get_tod_clock();
+#endif
}
static inline cycles_t get_cycles(void)
{
- return (cycles_t) get_clock() >> 2;
+ return (cycles_t) get_tod_clock() >> 2;
}
int get_sync_clock(unsigned long long *clock);
@@ -123,9 +125,9 @@ extern u64 sched_clock_base_cc;
* function, otherwise the returned value is not guaranteed to
* be monotonic.
*/
-static inline unsigned long long get_clock_monotonic(void)
+static inline unsigned long long get_tod_clock_monotonic(void)
{
- return get_clock_xt() - sched_clock_base_cc;
+ return get_tod_clock() - sched_clock_base_cc;
}
/**
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b75d7d68668..a25f09fbaf3 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -32,6 +32,7 @@ struct mmu_gather {
struct mm_struct *mm;
struct mmu_table_batch *batch;
unsigned int fullmm;
+ unsigned long start, end;
};
struct mmu_table_batch {
@@ -48,24 +49,37 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
static inline void tlb_gather_mmu(struct mmu_gather *tlb,
struct mm_struct *mm,
- unsigned int full_mm_flush)
+ unsigned long start,
+ unsigned long end)
{
tlb->mm = mm;
- tlb->fullmm = full_mm_flush;
+ tlb->start = start;
+ tlb->end = end;
+ tlb->fullmm = !(start | (end+1));
tlb->batch = NULL;
- if (tlb->fullmm)
- __tlb_flush_mm(mm);
}
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+ __tlb_flush_mm_lazy(tlb->mm);
+}
+
+static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
{
tlb_table_flush(tlb);
}
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+ tlb_flush_mmu_tlbonly(tlb);
+ tlb_flush_mmu_free(tlb);
+}
+
static inline void tlb_finish_mmu(struct mmu_gather *tlb,
unsigned long start, unsigned long end)
{
- tlb_table_flush(tlb);
+ tlb_flush_mmu(tlb);
}
/*
@@ -91,9 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long address)
{
- if (!tlb->fullmm)
- return page_table_free_rcu(tlb, (unsigned long *) pte);
- page_table_free(tlb->mm, (unsigned long *) pte);
+ page_table_free_rcu(tlb, (unsigned long *) pte);
}
/*
@@ -109,9 +121,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
#ifdef CONFIG_64BIT
if (tlb->mm->context.asce_limit <= (1UL << 31))
return;
- if (!tlb->fullmm)
- return tlb_remove_table(tlb, pmd);
- crst_table_free(tlb->mm, (unsigned long *) pmd);
+ tlb_remove_table(tlb, pmd);
#endif
}
@@ -128,9 +138,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#ifdef CONFIG_64BIT
if (tlb->mm->context.asce_limit <= (1UL << 42))
return;
- if (!tlb->fullmm)
- return tlb_remove_table(tlb, pud);
- crst_table_free(tlb->mm, (unsigned long *) pud);
+ tlb_remove_table(tlb, pud);
#endif
}
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 1d8fe2b17ef..16c9c88658c 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -7,19 +7,41 @@
#include <asm/pgalloc.h>
/*
- * Flush all tlb entries on the local cpu.
+ * Flush all TLB entries on the local CPU.
*/
static inline void __tlb_flush_local(void)
{
asm volatile("ptlb" : : : "memory");
}
-#ifdef CONFIG_SMP
/*
- * Flush all tlb entries on all cpus.
+ * Flush TLB entries for a specific ASCE on all CPUs
*/
+static inline void __tlb_flush_idte(unsigned long asce)
+{
+ /* Global TLB flush for the mm */
+ asm volatile(
+ " .insn rrf,0xb98e0000,0,%0,%1,0"
+ : : "a" (2048), "a" (asce) : "cc");
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on the local CPU
+ */
+static inline void __tlb_flush_idte_local(unsigned long asce)
+{
+ /* Local TLB flush for the mm */
+ asm volatile(
+ " .insn rrf,0xb98e0000,0,%0,%1,1"
+ : : "a" (2048), "a" (asce) : "cc");
+}
+
+#ifdef CONFIG_SMP
void smp_ptlb_all(void);
+/*
+ * Flush all TLB entries on all CPUs.
+ */
static inline void __tlb_flush_global(void)
{
register unsigned long reg2 asm("2");
@@ -42,53 +64,104 @@ static inline void __tlb_flush_global(void)
: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
}
+/*
+ * Flush TLB entries for a specific mm on all CPUs (in case gmap is used
+ * this implicates multiple ASCEs!).
+ */
static inline void __tlb_flush_full(struct mm_struct *mm)
{
- cpumask_t local_cpumask;
-
preempt_disable();
- /*
- * If the process only ran on the local cpu, do a local flush.
- */
- cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id()));
- if (cpumask_equal(mm_cpumask(mm), &local_cpumask))
+ atomic_add(0x10000, &mm->context.attach_count);
+ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+ /* Local TLB flush */
__tlb_flush_local();
- else
+ } else {
+ /* Global TLB flush */
__tlb_flush_global();
+ /* Reset TLB flush mask */
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_copy(mm_cpumask(mm),
+ &mm->context.cpu_attach_mask);
+ }
+ atomic_sub(0x10000, &mm->context.attach_count);
+ preempt_enable();
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs.
+ */
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+{
+ int active, count;
+
+ preempt_disable();
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+ __tlb_flush_idte_local(asce);
+ } else {
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte(asce);
+ else
+ __tlb_flush_global();
+ /* Reset TLB flush mask */
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_copy(mm_cpumask(mm),
+ &mm->context.cpu_attach_mask);
+ }
+ atomic_sub(0x10000, &mm->context.attach_count);
preempt_enable();
}
+
+static inline void __tlb_flush_kernel(void)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte((unsigned long) init_mm.pgd |
+ init_mm.context.asce_bits);
+ else
+ __tlb_flush_global();
+}
#else
-#define __tlb_flush_full(mm) __tlb_flush_local()
#define __tlb_flush_global() __tlb_flush_local()
-#endif
+#define __tlb_flush_full(mm) __tlb_flush_local()
/*
- * Flush all tlb entries of a page table on all cpus.
+ * Flush TLB entries for a specific ASCE on all CPUs.
*/
-static inline void __tlb_flush_idte(unsigned long asce)
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
{
- asm volatile(
- " .insn rrf,0xb98e0000,0,%0,%1,0"
- : : "a" (2048), "a" (asce) : "cc" );
+ if (MACHINE_HAS_TLB_LC)
+ __tlb_flush_idte_local(asce);
+ else
+ __tlb_flush_local();
}
+static inline void __tlb_flush_kernel(void)
+{
+ if (MACHINE_HAS_TLB_LC)
+ __tlb_flush_idte_local((unsigned long) init_mm.pgd |
+ init_mm.context.asce_bits);
+ else
+ __tlb_flush_local();
+}
+#endif
+
static inline void __tlb_flush_mm(struct mm_struct * mm)
{
- if (unlikely(cpumask_empty(mm_cpumask(mm))))
- return;
/*
* If the machine has IDTE we prefer to do a per mm flush
* on all cpus instead of doing a local flush if the mm
* only ran on the local cpu.
*/
if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
- __tlb_flush_idte((unsigned long) mm->pgd |
+ __tlb_flush_asce(mm, (unsigned long) mm->pgd |
mm->context.asce_bits);
else
__tlb_flush_full(mm);
}
-static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
+static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
{
if (mm->context.flush_mm) {
__tlb_flush_mm(mm);
@@ -120,19 +193,19 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
static inline void flush_tlb_mm(struct mm_struct *mm)
{
- __tlb_flush_mm_cond(mm);
+ __tlb_flush_mm_lazy(mm);
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- __tlb_flush_mm_cond(vma->vm_mm);
+ __tlb_flush_mm_lazy(vma->vm_mm);
}
static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
- __tlb_flush_mm(&init_mm);
+ __tlb_flush_kernel();
}
#endif /* _S390_TLBFLUSH_H */
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 05425b18c0a..56af53093d2 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -26,21 +26,12 @@ extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
#define mc_capable() 1
-static inline const struct cpumask *cpu_coregroup_mask(int cpu)
-{
- return &cpu_topology[cpu].core_mask;
-}
-
-static inline const struct cpumask *cpu_book_mask(int cpu)
-{
- return &cpu_topology[cpu].book_mask;
-}
-
int topology_cpu_init(struct cpu *);
int topology_set_cpu_management(int fc);
void topology_schedule_update(void);
void store_topology(struct sysinfo_15_1_x *info);
void topology_expect_change(void);
+const struct cpumask *cpu_coregroup_mask(int cpu);
#else /* CONFIG_SCHED_BOOK */
@@ -64,8 +55,6 @@ static inline void s390_init_cpu_topology(void)
};
#endif
-#define SD_BOOK_INIT SD_CPU_INIT
-
#include <asm-generic/topology.h>
#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 34268df959a..cd4c68e0398 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -92,39 +92,88 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
#define ARCH_HAS_SORT_EXTABLE
#define ARCH_HAS_SEARCH_EXTABLE
-struct uaccess_ops {
- size_t (*copy_from_user)(size_t, const void __user *, void *);
- size_t (*copy_from_user_small)(size_t, const void __user *, void *);
- size_t (*copy_to_user)(size_t, void __user *, const void *);
- size_t (*copy_to_user_small)(size_t, void __user *, const void *);
- size_t (*copy_in_user)(size_t, void __user *, const void __user *);
- size_t (*clear_user)(size_t, void __user *);
- size_t (*strnlen_user)(size_t, const char __user *);
- size_t (*strncpy_from_user)(size_t, const char __user *, char *);
- int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old);
- int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new);
-};
+/**
+ * __copy_from_user: - Copy a block of data from user space, with less checking.
+ * @to: Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from user space to kernel space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long __must_check __copy_from_user(void *to, const void __user *from,
+ unsigned long n);
-extern struct uaccess_ops uaccess;
-extern struct uaccess_ops uaccess_std;
-extern struct uaccess_ops uaccess_mvcos;
-extern struct uaccess_ops uaccess_mvcos_switch;
-extern struct uaccess_ops uaccess_pt;
+/**
+ * __copy_to_user: - Copy a block of data into user space, with less checking.
+ * @to: Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from kernel space to user space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long __must_check __copy_to_user(void __user *to, const void *from,
+ unsigned long n);
+
+#define __copy_to_user_inatomic __copy_to_user
+#define __copy_from_user_inatomic __copy_from_user
+
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+
+#define __put_get_user_asm(to, from, size, spec) \
+({ \
+ register unsigned long __reg0 asm("0") = spec; \
+ int __rc; \
+ \
+ asm volatile( \
+ "0: mvcos %1,%3,%2\n" \
+ "1: xr %0,%0\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: lhi %0,%5\n" \
+ " jg 2b\n" \
+ ".popsection\n" \
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
+ : "=d" (__rc), "=Q" (*(to)) \
+ : "d" (size), "Q" (*(from)), \
+ "d" (__reg0), "K" (-EFAULT) \
+ : "cc"); \
+ __rc; \
+})
+
+#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL)
+#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL)
-extern int __handle_fault(unsigned long, unsigned long, int);
+#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
-static inline int __put_user_fn(size_t size, void __user *ptr, void *x)
+static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
{
- size = uaccess.copy_to_user_small(size, ptr, x);
- return size ? -EFAULT : size;
+ size = __copy_to_user(ptr, x, size);
+ return size ? -EFAULT : 0;
}
-static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
+static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
{
- size = uaccess.copy_from_user_small(size, ptr, x);
- return size ? -EFAULT : size;
+ size = __copy_from_user(x, ptr, size);
+ return size ? -EFAULT : 0;
}
+#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */
+
/*
* These are the main single-value transfer routines. They automatically
* use the right size if we just have the right pointer type.
@@ -139,8 +188,8 @@ static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
case 2: \
case 4: \
case 8: \
- __pu_err = __put_user_fn(sizeof (*(ptr)), \
- ptr, &__x); \
+ __pu_err = __put_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
break; \
default: \
__put_user_bad(); \
@@ -156,7 +205,7 @@ static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
})
-extern int __put_user_bad(void) __attribute__((noreturn));
+int __put_user_bad(void) __attribute__((noreturn));
#define __get_user(x, ptr) \
({ \
@@ -165,29 +214,29 @@ extern int __put_user_bad(void) __attribute__((noreturn));
switch (sizeof(*(ptr))) { \
case 1: { \
unsigned char __x; \
- __gu_err = __get_user_fn(sizeof (*(ptr)), \
- ptr, &__x); \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *) &__x; \
break; \
}; \
case 2: { \
unsigned short __x; \
- __gu_err = __get_user_fn(sizeof (*(ptr)), \
- ptr, &__x); \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *) &__x; \
break; \
}; \
case 4: { \
unsigned int __x; \
- __gu_err = __get_user_fn(sizeof (*(ptr)), \
- ptr, &__x); \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *) &__x; \
break; \
}; \
case 8: { \
unsigned long long __x; \
- __gu_err = __get_user_fn(sizeof (*(ptr)), \
- ptr, &__x); \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *) &__x; \
break; \
}; \
@@ -204,38 +253,12 @@ extern int __put_user_bad(void) __attribute__((noreturn));
__get_user(x, ptr); \
})
-extern int __get_user_bad(void) __attribute__((noreturn));
+int __get_user_bad(void) __attribute__((noreturn));
#define __put_user_unaligned __put_user
#define __get_user_unaligned __get_user
/**
- * __copy_to_user: - Copy a block of data into user space, with less checking.
- * @to: Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep.
- *
- * Copy data from kernel space to user space. Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-static inline unsigned long __must_check
-__copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- if (__builtin_constant_p(n) && (n <= 256))
- return uaccess.copy_to_user_small(n, to, from);
- else
- return uaccess.copy_to_user(n, to, from);
-}
-
-#define __copy_to_user_inatomic __copy_to_user
-#define __copy_from_user_inatomic __copy_from_user
-
-/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
* @from: Source address, in kernel space.
@@ -252,38 +275,10 @@ static inline unsigned long __must_check
copy_to_user(void __user *to, const void *from, unsigned long n)
{
might_fault();
- if (access_ok(VERIFY_WRITE, to, n))
- n = __copy_to_user(to, from, n);
- return n;
+ return __copy_to_user(to, from, n);
}
-/**
- * __copy_from_user: - Copy a block of data from user space, with less checking.
- * @to: Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep.
- *
- * Copy data from user space to kernel space. Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- */
-static inline unsigned long __must_check
-__copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- if (__builtin_constant_p(n) && (n <= 256))
- return uaccess.copy_from_user_small(n, from, to);
- else
- return uaccess.copy_from_user(n, from, to);
-}
-
-extern void copy_from_user_overflow(void)
+void copy_from_user_overflow(void)
#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
__compiletime_warning("copy_from_user() buffer size is not provably correct")
#endif
@@ -315,46 +310,38 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
copy_from_user_overflow();
return n;
}
- if (access_ok(VERIFY_READ, from, n))
- n = __copy_from_user(to, from, n);
- else
- memset(to, 0, n);
- return n;
+ return __copy_from_user(to, from, n);
}
-static inline unsigned long __must_check
-__copy_in_user(void __user *to, const void __user *from, unsigned long n)
-{
- return uaccess.copy_in_user(n, to, from);
-}
+unsigned long __must_check
+__copy_in_user(void __user *to, const void __user *from, unsigned long n);
static inline unsigned long __must_check
copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
might_fault();
- if (__access_ok(from,n) && __access_ok(to,n))
- n = __copy_in_user(to, from, n);
- return n;
+ return __copy_in_user(to, from, n);
}
/*
* Copy a null terminated string from userspace.
*/
+
+long __strncpy_from_user(char *dst, const char __user *src, long count);
+
static inline long __must_check
strncpy_from_user(char *dst, const char __user *src, long count)
{
- long res = -EFAULT;
might_fault();
- if (access_ok(VERIFY_READ, src, 1))
- res = uaccess.strncpy_from_user(count, src, dst);
- return res;
+ return __strncpy_from_user(dst, src, count);
}
-static inline unsigned long
-strnlen_user(const char __user * src, unsigned long n)
+unsigned long __must_check __strnlen_user(const char __user *src, unsigned long count);
+
+static inline unsigned long strnlen_user(const char __user *src, unsigned long n)
{
might_fault();
- return uaccess.strnlen_user(n, src);
+ return __strnlen_user(src, n);
}
/**
@@ -376,23 +363,14 @@ strnlen_user(const char __user * src, unsigned long n)
/*
* Zero Userspace
*/
+unsigned long __must_check __clear_user(void __user *to, unsigned long size);
-static inline unsigned long __must_check
-__clear_user(void __user *to, unsigned long n)
-{
- return uaccess.clear_user(n, to);
-}
-
-static inline unsigned long __must_check
-clear_user(void __user *to, unsigned long n)
+static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
{
might_fault();
- if (access_ok(VERIFY_WRITE, to, n))
- n = uaccess.clear_user(n, to);
- return n;
+ return __clear_user(to, n);
}
-extern int copy_to_user_real(void __user *dest, void *src, size_t count);
-extern int copy_from_user_real(void *dest, void __user *src, size_t count);
+int copy_to_user_real(void __user *dest, void *src, unsigned long count);
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 63653087251..65188635355 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -43,26 +43,15 @@
#define __ARCH_WANT_SYS_OLDUMOUNT
#define __ARCH_WANT_SYS_SIGPENDING
#define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
# ifndef CONFIG_64BIT
# define __ARCH_WANT_STAT64
# define __ARCH_WANT_SYS_TIME
# endif
# ifdef CONFIG_COMPAT
# define __ARCH_WANT_COMPAT_SYS_TIME
-# define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
# endif
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_CLONE
-/*
- * "Conditional" syscalls
- *
- * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
- * but it doesn't work on all toolchains, so we just do it by hand
- */
-#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
-
#endif /* _ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index a73eb2e1e91..bc9746a7d47 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -26,8 +26,9 @@ struct vdso_data {
__u64 wtom_clock_nsec; /* 0x28 */
__u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
__u32 tz_dsttime; /* Type of dst correction 0x34 */
- __u32 ectg_available;
- __u32 ntp_mult; /* NTP adjusted multiplier 0x3C */
+ __u32 ectg_available; /* ECTG instruction present 0x38 */
+ __u32 tk_mult; /* Mult. used for xtime_nsec 0x3c */
+ __u32 tk_shift; /* Shift used for xtime_nsec 0x40 */
};
struct vdso_per_cpu_data {
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 00000000000..af9896c53eb
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 7bf68fff7c5..736637363d3 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -35,6 +35,8 @@ header-y += siginfo.h
header-y += signal.h
header-y += socket.h
header-y += sockios.h
+header-y += sclp_ctl.h
+header-y += sie.h
header-y += stat.h
header-y += statfs.h
header-y += swab.h
@@ -44,5 +46,6 @@ header-y += termios.h
header-y += types.h
header-y += ucontext.h
header-y += unistd.h
+header-y += virtio-ccw.h
header-y += vtoc.h
header-y += zcrypt.h
diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h
index 1c6a7f85a58..65dc694725a 100644
--- a/arch/s390/include/uapi/asm/chsc.h
+++ b/arch/s390/include/uapi/asm/chsc.h
@@ -29,6 +29,16 @@ struct chsc_async_area {
__u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)];
} __attribute__ ((packed));
+struct chsc_header {
+ __u16 length;
+ __u16 code;
+} __attribute__ ((packed));
+
+struct chsc_sync_area {
+ struct chsc_header header;
+ __u8 data[CHSC_SIZE - sizeof(struct chsc_header)];
+} __attribute__ ((packed));
+
struct chsc_response_struct {
__u16 length;
__u16 code;
@@ -126,5 +136,8 @@ struct chsc_cpd_info {
#define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list)
#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
+#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area)
+#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area)
+#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b)
#endif
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 38eca3ba40e..5812a3b2df9 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -261,6 +261,10 @@ struct dasd_snid_ioctl_data {
#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6)
/* Resume IO on device */
#define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7)
+/* Abort all I/O on a device */
+#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240)
+/* Allow I/O on a device */
+#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241)
/* retrieve API version number */
diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h
new file mode 100644
index 00000000000..37998b44953
--- /dev/null
+++ b/arch/s390/include/uapi/asm/hypfs.h
@@ -0,0 +1,25 @@
+/*
+ * IOCTL interface for hypfs
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_HYPFS_CTL_H
+#define _ASM_HYPFS_CTL_H
+
+#include <linux/types.h>
+
+struct hypfs_diag304 {
+ __u32 args[2];
+ __u64 data;
+ __u64 rc;
+} __attribute__((packed));
+
+#define HYPFS_IOCTL_MAGIC 0x10
+
+#define HYPFS_DIAG304 \
+ _IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index d25da598ec6..0fc26430a1e 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -15,6 +15,52 @@
#include <linux/types.h>
#define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
+
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS 1
+#define KVM_DEV_FLIC_ENQUEUE 2
+#define KVM_DEV_FLIC_CLEAR_IRQS 3
+#define KVM_DEV_FLIC_APF_ENABLE 4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+#define KVM_S390_MAX_FLOAT_IRQS 266250
+#define KVM_S390_FLIC_MAX_BUFFER 0x2000000
+
+struct kvm_s390_io_adapter {
+ __u32 id;
+ __u8 isc;
+ __u8 maskable;
+ __u8 swap;
+ __u8 pad;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+ __u32 id;
+ __u8 type;
+ __u8 mask;
+ __u16 pad0;
+ __u64 addr;
+};
+
+/* kvm attr_group on vm fd */
+#define KVM_S390_VM_MEM_CTRL 0
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA 0
+#define KVM_S390_VM_MEM_CLR_CMMA 1
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
@@ -34,11 +80,31 @@ struct kvm_fpu {
__u64 fprs[16];
};
+#define KVM_GUESTDBG_USE_HW_BP 0x00010000
+
+#define KVM_HW_BP 1
+#define KVM_HW_WP_WRITE 2
+#define KVM_SINGLESTEP 4
+
struct kvm_debug_exit_arch {
+ __u64 addr;
+ __u8 type;
+ __u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+ __u64 addr;
+ __u64 phys_addr;
+ __u64 len;
+ __u8 type;
+ __u8 pad[7]; /* Should be set to 0 */
};
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
+ __u32 nr_hw_bp;
+ __u32 pad; /* Should be set to 0 */
+ struct kvm_hw_breakpoint __user *hw_bp;
};
#define KVM_SYNC_PREFIX (1UL << 0)
@@ -57,4 +123,9 @@ struct kvm_sync_regs {
#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
#endif
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index a5ca214b34f..a150f4fabe4 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -199,6 +199,7 @@ typedef union
typedef struct
{
__u32 fpc;
+ __u32 pad;
freg_t fprs[NUM_FPRS];
} s390_fp_regs;
@@ -206,7 +207,6 @@ typedef struct
#define FPC_FLAGS_MASK 0x00F80000
#define FPC_DXC_MASK 0x0000FF00
#define FPC_RM_MASK 0x00000003
-#define FPC_VALID_MASK 0xF8F8FF03
/* this typedef defines how a Program Status Word looks like */
typedef struct
@@ -215,12 +215,6 @@ typedef struct
unsigned long addr;
} __attribute__ ((aligned(8))) psw_t;
-typedef struct
-{
- __u32 mask;
- __u32 addr;
-} __attribute__ ((aligned(8))) psw_compat_t;
-
#ifndef __s390x__
#define PSW_MASK_PER 0x40000000UL
@@ -269,7 +263,7 @@ typedef struct
#define PSW_MASK_EA 0x0000000100000000UL
#define PSW_MASK_BA 0x0000000080000000UL
-#define PSW_MASK_USER 0x0000FF8180000000UL
+#define PSW_MASK_USER 0x0000FF0180000000UL
#define PSW_ADDR_AMODE 0x0000000000000000UL
#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
@@ -295,20 +289,6 @@ typedef struct
unsigned long orig_gpr2;
} s390_regs;
-typedef struct
-{
- psw_compat_t psw;
- __u32 gprs[NUM_GPRS];
- __u32 acrs[NUM_ACRS];
- __u32 orig_gpr2;
-} s390_compat_regs;
-
-typedef struct
-{
- __u32 gprs_high[NUM_GPRS];
-} s390_compat_regs_high;
-
-
/*
* Now for the user space program event recording (trace) definitions.
* The following structures are used only for the ptrace interface, don't
@@ -420,6 +400,13 @@ typedef struct
#define PTRACE_POKE_SYSTEM_CALL 0x5008
#define PTRACE_ENABLE_TE 0x5009
#define PTRACE_DISABLE_TE 0x5010
+#define PTRACE_TE_ABORT_RAND 0x5011
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ */
+#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */
/*
* PT_PROT definition is loosely based on hppa bsd definition in
diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h
new file mode 100644
index 00000000000..f2818613ee4
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sclp_ctl.h
@@ -0,0 +1,24 @@
+/*
+ * IOCTL interface for SCLP
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_SCLP_CTL_H
+#define _ASM_SCLP_CTL_H
+
+#include <linux/types.h>
+
+struct sclp_ctl_sccb {
+ __u32 cmdw;
+ __u64 sccb;
+} __attribute__((packed));
+
+#define SCLP_CTL_IOCTL_MAGIC 0x10
+
+#define SCLP_CTL_SCCB \
+ _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
new file mode 100644
index 00000000000..5d9cc19462c
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -0,0 +1,243 @@
+#ifndef _UAPI_ASM_S390_SIE_H
+#define _UAPI_ASM_S390_SIE_H
+
+#define diagnose_codes \
+ { 0x10, "DIAG (0x10) release pages" }, \
+ { 0x44, "DIAG (0x44) time slice end" }, \
+ { 0x9c, "DIAG (0x9c) time slice end directed" }, \
+ { 0x204, "DIAG (0x204) logical-cpu utilization" }, \
+ { 0x258, "DIAG (0x258) page-reference services" }, \
+ { 0x308, "DIAG (0x308) ipl functions" }, \
+ { 0x500, "DIAG (0x500) KVM virtio functions" }, \
+ { 0x501, "DIAG (0x501) KVM breakpoint" }
+
+#define sigp_order_codes \
+ { 0x01, "SIGP sense" }, \
+ { 0x02, "SIGP external call" }, \
+ { 0x03, "SIGP emergency signal" }, \
+ { 0x05, "SIGP stop" }, \
+ { 0x06, "SIGP restart" }, \
+ { 0x09, "SIGP stop and store status" }, \
+ { 0x0b, "SIGP initial cpu reset" }, \
+ { 0x0d, "SIGP set prefix" }, \
+ { 0x0e, "SIGP store status at address" }, \
+ { 0x12, "SIGP set architecture" }, \
+ { 0x15, "SIGP sense running" }
+
+#define icpt_prog_codes \
+ { 0x0001, "Prog Operation" }, \
+ { 0x0002, "Prog Privileged Operation" }, \
+ { 0x0003, "Prog Execute" }, \
+ { 0x0004, "Prog Protection" }, \
+ { 0x0005, "Prog Addressing" }, \
+ { 0x0006, "Prog Specification" }, \
+ { 0x0007, "Prog Data" }, \
+ { 0x0008, "Prog Fixedpoint overflow" }, \
+ { 0x0009, "Prog Fixedpoint divide" }, \
+ { 0x000A, "Prog Decimal overflow" }, \
+ { 0x000B, "Prog Decimal divide" }, \
+ { 0x000C, "Prog HFP exponent overflow" }, \
+ { 0x000D, "Prog HFP exponent underflow" }, \
+ { 0x000E, "Prog HFP significance" }, \
+ { 0x000F, "Prog HFP divide" }, \
+ { 0x0010, "Prog Segment translation" }, \
+ { 0x0011, "Prog Page translation" }, \
+ { 0x0012, "Prog Translation specification" }, \
+ { 0x0013, "Prog Special operation" }, \
+ { 0x0015, "Prog Operand" }, \
+ { 0x0016, "Prog Trace table" }, \
+ { 0x0017, "Prog ASNtranslation specification" }, \
+ { 0x001C, "Prog Spaceswitch event" }, \
+ { 0x001D, "Prog HFP square root" }, \
+ { 0x001F, "Prog PCtranslation specification" }, \
+ { 0x0020, "Prog AFX translation" }, \
+ { 0x0021, "Prog ASX translation" }, \
+ { 0x0022, "Prog LX translation" }, \
+ { 0x0023, "Prog EX translation" }, \
+ { 0x0024, "Prog Primary authority" }, \
+ { 0x0025, "Prog Secondary authority" }, \
+ { 0x0026, "Prog LFXtranslation exception" }, \
+ { 0x0027, "Prog LSXtranslation exception" }, \
+ { 0x0028, "Prog ALET specification" }, \
+ { 0x0029, "Prog ALEN translation" }, \
+ { 0x002A, "Prog ALE sequence" }, \
+ { 0x002B, "Prog ASTE validity" }, \
+ { 0x002C, "Prog ASTE sequence" }, \
+ { 0x002D, "Prog Extended authority" }, \
+ { 0x002E, "Prog LSTE sequence" }, \
+ { 0x002F, "Prog ASTE instance" }, \
+ { 0x0030, "Prog Stack full" }, \
+ { 0x0031, "Prog Stack empty" }, \
+ { 0x0032, "Prog Stack specification" }, \
+ { 0x0033, "Prog Stack type" }, \
+ { 0x0034, "Prog Stack operation" }, \
+ { 0x0039, "Prog Region first translation" }, \
+ { 0x003A, "Prog Region second translation" }, \
+ { 0x003B, "Prog Region third translation" }, \
+ { 0x0040, "Prog Monitor event" }, \
+ { 0x0080, "Prog PER event" }, \
+ { 0x0119, "Prog Crypto operation" }
+
+#define exit_code_ipa0(ipa0, opcode, mnemonic) \
+ { (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
+#define exit_code(opcode, mnemonic) \
+ { opcode, mnemonic }
+
+#define icpt_insn_codes \
+ exit_code_ipa0(0x01, 0x01, "PR"), \
+ exit_code_ipa0(0x01, 0x04, "PTFF"), \
+ exit_code_ipa0(0x01, 0x07, "SCKPF"), \
+ exit_code_ipa0(0xAA, 0x00, "RINEXT"), \
+ exit_code_ipa0(0xAA, 0x01, "RION"), \
+ exit_code_ipa0(0xAA, 0x02, "TRIC"), \
+ exit_code_ipa0(0xAA, 0x03, "RIOFF"), \
+ exit_code_ipa0(0xAA, 0x04, "RIEMIT"), \
+ exit_code_ipa0(0xB2, 0x02, "STIDP"), \
+ exit_code_ipa0(0xB2, 0x04, "SCK"), \
+ exit_code_ipa0(0xB2, 0x05, "STCK"), \
+ exit_code_ipa0(0xB2, 0x06, "SCKC"), \
+ exit_code_ipa0(0xB2, 0x07, "STCKC"), \
+ exit_code_ipa0(0xB2, 0x08, "SPT"), \
+ exit_code_ipa0(0xB2, 0x09, "STPT"), \
+ exit_code_ipa0(0xB2, 0x0d, "PTLB"), \
+ exit_code_ipa0(0xB2, 0x10, "SPX"), \
+ exit_code_ipa0(0xB2, 0x11, "STPX"), \
+ exit_code_ipa0(0xB2, 0x12, "STAP"), \
+ exit_code_ipa0(0xB2, 0x14, "SIE"), \
+ exit_code_ipa0(0xB2, 0x16, "SETR"), \
+ exit_code_ipa0(0xB2, 0x17, "STETR"), \
+ exit_code_ipa0(0xB2, 0x18, "PC"), \
+ exit_code_ipa0(0xB2, 0x20, "SERVC"), \
+ exit_code_ipa0(0xB2, 0x28, "PT"), \
+ exit_code_ipa0(0xB2, 0x29, "ISKE"), \
+ exit_code_ipa0(0xB2, 0x2a, "RRBE"), \
+ exit_code_ipa0(0xB2, 0x2b, "SSKE"), \
+ exit_code_ipa0(0xB2, 0x2c, "TB"), \
+ exit_code_ipa0(0xB2, 0x2e, "PGIN"), \
+ exit_code_ipa0(0xB2, 0x2f, "PGOUT"), \
+ exit_code_ipa0(0xB2, 0x30, "CSCH"), \
+ exit_code_ipa0(0xB2, 0x31, "HSCH"), \
+ exit_code_ipa0(0xB2, 0x32, "MSCH"), \
+ exit_code_ipa0(0xB2, 0x33, "SSCH"), \
+ exit_code_ipa0(0xB2, 0x34, "STSCH"), \
+ exit_code_ipa0(0xB2, 0x35, "TSCH"), \
+ exit_code_ipa0(0xB2, 0x36, "TPI"), \
+ exit_code_ipa0(0xB2, 0x37, "SAL"), \
+ exit_code_ipa0(0xB2, 0x38, "RSCH"), \
+ exit_code_ipa0(0xB2, 0x39, "STCRW"), \
+ exit_code_ipa0(0xB2, 0x3a, "STCPS"), \
+ exit_code_ipa0(0xB2, 0x3b, "RCHP"), \
+ exit_code_ipa0(0xB2, 0x3c, "SCHM"), \
+ exit_code_ipa0(0xB2, 0x40, "BAKR"), \
+ exit_code_ipa0(0xB2, 0x48, "PALB"), \
+ exit_code_ipa0(0xB2, 0x4c, "TAR"), \
+ exit_code_ipa0(0xB2, 0x50, "CSP"), \
+ exit_code_ipa0(0xB2, 0x54, "MVPG"), \
+ exit_code_ipa0(0xB2, 0x58, "BSG"), \
+ exit_code_ipa0(0xB2, 0x5a, "BSA"), \
+ exit_code_ipa0(0xB2, 0x5f, "CHSC"), \
+ exit_code_ipa0(0xB2, 0x74, "SIGA"), \
+ exit_code_ipa0(0xB2, 0x76, "XSCH"), \
+ exit_code_ipa0(0xB2, 0x78, "STCKE"), \
+ exit_code_ipa0(0xB2, 0x7c, "STCKF"), \
+ exit_code_ipa0(0xB2, 0x7d, "STSI"), \
+ exit_code_ipa0(0xB2, 0xb0, "STFLE"), \
+ exit_code_ipa0(0xB2, 0xb1, "STFL"), \
+ exit_code_ipa0(0xB2, 0xb2, "LPSWE"), \
+ exit_code_ipa0(0xB2, 0xf8, "TEND"), \
+ exit_code_ipa0(0xB2, 0xfc, "TABORT"), \
+ exit_code_ipa0(0xB9, 0x1e, "KMAC"), \
+ exit_code_ipa0(0xB9, 0x28, "PCKMO"), \
+ exit_code_ipa0(0xB9, 0x2a, "KMF"), \
+ exit_code_ipa0(0xB9, 0x2b, "KMO"), \
+ exit_code_ipa0(0xB9, 0x2d, "KMCTR"), \
+ exit_code_ipa0(0xB9, 0x2e, "KM"), \
+ exit_code_ipa0(0xB9, 0x2f, "KMC"), \
+ exit_code_ipa0(0xB9, 0x3e, "KIMD"), \
+ exit_code_ipa0(0xB9, 0x3f, "KLMD"), \
+ exit_code_ipa0(0xB9, 0x8a, "CSPG"), \
+ exit_code_ipa0(0xB9, 0x8d, "EPSW"), \
+ exit_code_ipa0(0xB9, 0x8e, "IDTE"), \
+ exit_code_ipa0(0xB9, 0x8f, "CRDTE"), \
+ exit_code_ipa0(0xB9, 0x9c, "EQBS"), \
+ exit_code_ipa0(0xB9, 0xa2, "PTF"), \
+ exit_code_ipa0(0xB9, 0xab, "ESSA"), \
+ exit_code_ipa0(0xB9, 0xae, "RRBM"), \
+ exit_code_ipa0(0xB9, 0xaf, "PFMF"), \
+ exit_code_ipa0(0xE3, 0x03, "LRAG"), \
+ exit_code_ipa0(0xE3, 0x13, "LRAY"), \
+ exit_code_ipa0(0xE3, 0x25, "NTSTG"), \
+ exit_code_ipa0(0xE5, 0x00, "LASP"), \
+ exit_code_ipa0(0xE5, 0x01, "TPROT"), \
+ exit_code_ipa0(0xE5, 0x60, "TBEGIN"), \
+ exit_code_ipa0(0xE5, 0x61, "TBEGINC"), \
+ exit_code_ipa0(0xEB, 0x25, "STCTG"), \
+ exit_code_ipa0(0xEB, 0x2f, "LCTLG"), \
+ exit_code_ipa0(0xEB, 0x60, "LRIC"), \
+ exit_code_ipa0(0xEB, 0x61, "STRIC"), \
+ exit_code_ipa0(0xEB, 0x62, "MRIC"), \
+ exit_code_ipa0(0xEB, 0x8a, "SQBS"), \
+ exit_code_ipa0(0xC8, 0x01, "ECTG"), \
+ exit_code(0x0a, "SVC"), \
+ exit_code(0x80, "SSM"), \
+ exit_code(0x82, "LPSW"), \
+ exit_code(0x83, "DIAG"), \
+ exit_code(0xae, "SIGP"), \
+ exit_code(0xac, "STNSM"), \
+ exit_code(0xad, "STOSM"), \
+ exit_code(0xb1, "LRA"), \
+ exit_code(0xb6, "STCTL"), \
+ exit_code(0xb7, "LCTL"), \
+ exit_code(0xee, "PLO")
+
+#define sie_intercept_code \
+ { 0x00, "Host interruption" }, \
+ { 0x04, "Instruction" }, \
+ { 0x08, "Program interruption" }, \
+ { 0x0c, "Instruction and program interruption" }, \
+ { 0x10, "External request" }, \
+ { 0x14, "External interruption" }, \
+ { 0x18, "I/O request" }, \
+ { 0x1c, "Wait state" }, \
+ { 0x20, "Validity" }, \
+ { 0x28, "Stop request" }, \
+ { 0x2c, "Operation exception" }, \
+ { 0x38, "Partial-execution" }, \
+ { 0x3c, "I/O interruption" }, \
+ { 0x40, "I/O instruction" }, \
+ { 0x48, "Timing subset" }
+
+/*
+ * This is the simple interceptable instructions decoder.
+ *
+ * It will be used as userspace interface and it can be used in places
+ * that does not allow to use general decoder functions,
+ * such as trace events declarations.
+ *
+ * Some userspace tools may want to parse this code
+ * and would be confused by switch(), if() and other statements,
+ * but they can understand conditional operator.
+ */
+#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask) \
+ (insn >> 56) == (ipa0) ? \
+ ((ipa0 << 8) | ((insn >> rshift) & mask)) :
+
+#define INSN_DECODE(insn) (insn >> 56)
+
+/*
+ * The macro icpt_insn_decoder() takes an intercepted instruction
+ * and returns a key, which can be used to find a mnemonic name
+ * of the instruction in the icpt_insn_codes table.
+ */
+#define icpt_insn_decoder(insn) \
+ INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \
+ INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xb9, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xe3, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \
+ INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \
+ INSN_DECODE(insn)
+
+#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h
index 584787f6ce4..b30de9c01bb 100644
--- a/arch/s390/include/uapi/asm/sigcontext.h
+++ b/arch/s390/include/uapi/asm/sigcontext.h
@@ -49,6 +49,7 @@ typedef struct
typedef struct
{
unsigned int fpc;
+ unsigned int pad;
double fprs[__NUM_FPRS];
} _s390_fp_regs;
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 436d07c23be..e031332096d 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -28,7 +28,7 @@
#define SO_PRIORITY 12
#define SO_LINGER 13
#define SO_BSDCOMPAT 14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT 15
#define SO_PASSCRED 16
#define SO_PEERCRED 17
#define SO_RCVLOWAT 18
@@ -76,4 +76,14 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
+#define SO_LOCK_FILTER 44
+
+#define SO_SELECT_ERR_QUEUE 45
+
+#define SO_BUSY_POLL 46
+
+#define SO_MAX_PACING_RATE 47
+
+#define SO_BPF_EXTENSIONS 48
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h
index 5acca0a34c2..471eb09184d 100644
--- a/arch/s390/include/uapi/asm/statfs.h
+++ b/arch/s390/include/uapi/asm/statfs.h
@@ -7,9 +7,6 @@
#ifndef _S390_STATFS_H
#define _S390_STATFS_H
-#ifndef __s390x__
-#include <asm-generic/statfs.h>
-#else
/*
* We can't use <asm-generic/statfs.h> because in 64-bit mode
* we mix ints of different sizes in our struct statfs.
@@ -21,49 +18,33 @@ typedef __kernel_fsid_t fsid_t;
#endif
struct statfs {
- int f_type;
- int f_bsize;
- long f_blocks;
- long f_bfree;
- long f_bavail;
- long f_files;
- long f_ffree;
+ unsigned int f_type;
+ unsigned int f_bsize;
+ unsigned long f_blocks;
+ unsigned long f_bfree;
+ unsigned long f_bavail;
+ unsigned long f_files;
+ unsigned long f_ffree;
__kernel_fsid_t f_fsid;
- int f_namelen;
- int f_frsize;
- int f_flags;
- int f_spare[4];
+ unsigned int f_namelen;
+ unsigned int f_frsize;
+ unsigned int f_flags;
+ unsigned int f_spare[4];
};
struct statfs64 {
- int f_type;
- int f_bsize;
- long f_blocks;
- long f_bfree;
- long f_bavail;
- long f_files;
- long f_ffree;
+ unsigned int f_type;
+ unsigned int f_bsize;
+ unsigned long long f_blocks;
+ unsigned long long f_bfree;
+ unsigned long long f_bavail;
+ unsigned long long f_files;
+ unsigned long long f_ffree;
__kernel_fsid_t f_fsid;
- int f_namelen;
- int f_frsize;
- int f_flags;
- int f_spare[4];
+ unsigned int f_namelen;
+ unsigned int f_frsize;
+ unsigned int f_flags;
+ unsigned int f_spare[4];
};
-struct compat_statfs64 {
- __u32 f_type;
- __u32 f_bsize;
- __u64 f_blocks;
- __u64 f_bfree;
- __u64 f_bavail;
- __u64 f_files;
- __u64 f_ffree;
- __kernel_fsid_t f_fsid;
- __u32 f_namelen;
- __u32 f_frsize;
- __u32 f_flags;
- __u32 f_spare[4];
-};
-
-#endif /* __s390x__ */
#endif
diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h
index 200e06325c6..3e077b2a470 100644
--- a/arch/s390/include/uapi/asm/ucontext.h
+++ b/arch/s390/include/uapi/asm/ucontext.h
@@ -16,7 +16,9 @@ struct ucontext_extended {
struct ucontext *uc_link;
stack_t uc_stack;
_sigregs uc_mcontext;
- unsigned long uc_sigmask[2];
+ sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(sigset_t)];
unsigned long uc_gprs_high[16];
};
@@ -27,7 +29,9 @@ struct ucontext {
struct ucontext *uc_link;
stack_t uc_stack;
_sigregs uc_mcontext;
- sigset_t uc_sigmask; /* mask last for extensibility */
+ sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(sigset_t)];
};
#endif /* !_ASM_S390_UCONTEXT_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 864f693c237..3802d2d3a18 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -280,7 +280,10 @@
#define __NR_s390_runtime_instr 342
#define __NR_kcmp 343
#define __NR_finit_module 344
-#define NR_syscalls 345
+#define __NR_sched_setattr 345
+#define __NR_sched_getattr 346
+#define __NR_renameat2 347
+#define NR_syscalls 348
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
new file mode 100644
index 00000000000..a9a4ebf79fa
--- /dev/null
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -0,0 +1,21 @@
+/*
+ * Definitions for virtio-ccw devices.
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_VIRTIO_CCW_H
+#define __KVM_VIRTIO_CCW_H
+
+/* Alignment of vring buffers. */
+#define KVM_VIRTIO_CCW_RING_ALIGN 4096
+
+/* Subcode for diagnose 500 (virtio hypercall). */
+#define KVM_S390_VIRTIO_CCW_NOTIFY 3
+
+#endif
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index e83fc116f5b..f2b18eacaca 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -154,6 +154,67 @@ struct ica_xcRB {
unsigned short priority_window;
unsigned int status;
} __attribute__((packed));
+
+/**
+ * struct ep11_cprb - EP11 connectivity programming request block
+ * @cprb_len: CPRB header length [0x0020]
+ * @cprb_ver_id: CPRB version id. [0x04]
+ * @pad_000: Alignment pad bytes
+ * @flags: Admin cmd [0x80] or functional cmd [0x00]
+ * @func_id: Function id / subtype [0x5434]
+ * @source_id: Source id [originator id]
+ * @target_id: Target id [usage/ctrl domain id]
+ * @ret_code: Return code
+ * @reserved1: Reserved
+ * @reserved2: Reserved
+ * @payload_len: Payload length
+ */
+struct ep11_cprb {
+ uint16_t cprb_len;
+ unsigned char cprb_ver_id;
+ unsigned char pad_000[2];
+ unsigned char flags;
+ unsigned char func_id[2];
+ uint32_t source_id;
+ uint32_t target_id;
+ uint32_t ret_code;
+ uint32_t reserved1;
+ uint32_t reserved2;
+ uint32_t payload_len;
+} __attribute__((packed));
+
+/**
+ * struct ep11_target_dev - EP11 target device list
+ * @ap_id: AP device id
+ * @dom_id: Usage domain id
+ */
+struct ep11_target_dev {
+ uint16_t ap_id;
+ uint16_t dom_id;
+};
+
+/**
+ * struct ep11_urb - EP11 user request block
+ * @targets_num: Number of target adapters
+ * @targets: Addr to target adapter list
+ * @weight: Level of request priority
+ * @req_no: Request id/number
+ * @req_len: Request length
+ * @req: Addr to request block
+ * @resp_len: Response length
+ * @resp: Addr to response block
+ */
+struct ep11_urb {
+ uint16_t targets_num;
+ uint64_t targets;
+ uint64_t weight;
+ uint64_t req_no;
+ uint64_t req_len;
+ uint64_t req;
+ uint64_t resp_len;
+ uint64_t resp;
+} __attribute__((packed));
+
#define AUTOSELECT ((unsigned int)0xFFFFFFFF)
#define ZCRYPT_IOCTL_MAGIC 'z'
@@ -183,6 +244,9 @@ struct ica_xcRB {
* ZSECSENDCPRB
* Send an arbitrary CPRB to a crypto card.
*
+ * ZSENDEP11CPRB
+ * Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
+ *
* Z90STAT_STATUS_MASK
* Return an 64 element array of unsigned chars for the status of
* all devices.
@@ -256,6 +320,7 @@ struct ica_xcRB {
#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
/* New status calls */
#define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2ac311ef5c9..a95c4ca9961 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -14,16 +14,25 @@ endif
CFLAGS_smp.o := -Wno-nonnull
#
+# Disable tailcall optimizations for stack / callchain walking functions
+# since this might generate broken code when accessing register 15 and
+# passing its content to other functions.
+#
+CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
+CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
+
+#
# Pass UTS_MACHINE for user_regset definition
#
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
-obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \
- processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \
- debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \
- sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
+obj-y := traps.o time.o process.o base.o early.o setup.o vtime.o
+obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
+obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
+obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
+obj-y += dumpstack.o
obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
@@ -38,9 +47,8 @@ obj-$(CONFIG_SCHED_BOOK) += topology.o
obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
-obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \
- compat_wrapper.o compat_exec_domain.o \
- $(compat-obj-y)
+obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
+obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y)
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_KPROBES) += kprobes.o
@@ -51,7 +59,8 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
ifdef CONFIG_64BIT
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \
+ perf_cpum_cf_events.o
obj-y += runtime_instr.o cache.o
endif
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index fface87056e..afe1715a4eb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
#define ASM_OFFSETS_C
#include <linux/kbuild.h>
+#include <linux/kvm_host.h>
#include <linux/sched.h>
#include <asm/cputime.h>
#include <asm/vdso.h>
@@ -35,6 +36,7 @@ int main(void)
DEFINE(__TI_task, offsetof(struct thread_info, task));
DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain));
DEFINE(__TI_flags, offsetof(struct thread_info, flags));
+ DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
@@ -46,7 +48,9 @@ int main(void)
DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
+ DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
+ DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags));
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
@@ -62,12 +66,14 @@ int main(void)
DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
- DEFINE(__VDSO_NTP_MULT, offsetof(struct vdso_data, ntp_mult));
+ DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult));
+ DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift));
DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base));
DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time));
/* constants used by the vdso */
DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
BLANK();
/* idle data offsets */
@@ -84,16 +90,22 @@ int main(void)
DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
- DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid));
+ DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
+ DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
+ DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
- DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id));
- DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id));
+ DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
+ DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
+ DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
+ DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
+ DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
+ DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
@@ -110,6 +122,7 @@ int main(void)
DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
+ DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags));
DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw));
DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
@@ -131,12 +144,12 @@ int main(void)
DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data));
DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source));
+ DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
- DEFINE(__LC_IRB, offsetof(struct _lowcore, irb));
DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
BLANK();
DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
@@ -150,6 +163,8 @@ int main(void)
#ifdef CONFIG_32BIT
DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
#else /* CONFIG_32BIT */
+ DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
+ DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
@@ -160,6 +175,8 @@ int main(void)
DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
+ DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
+ DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
#endif /* CONFIG_32BIT */
return 0;
}
diff --git a/arch/s390/kernel/bitmap.c b/arch/s390/kernel/bitmap.c
deleted file mode 100644
index 102da5e2303..00000000000
--- a/arch/s390/kernel/bitmap.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Bitmaps for set_bit, clear_bit, test_and_set_bit, ...
- * See include/asm/{bitops.h|posix_types.h} for details
- *
- * Copyright IBM Corp. 1999, 2009
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
- */
-
-#include <linux/bitops.h>
-#include <linux/module.h>
-
-const char _oi_bitmap[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
-EXPORT_SYMBOL(_oi_bitmap);
-
-const char _ni_bitmap[] = { 0xfe, 0xfd, 0xfb, 0xf7, 0xef, 0xdf, 0xbf, 0x7f };
-EXPORT_SYMBOL(_ni_bitmap);
-
-const char _zb_findmap[] = {
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
- 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 };
-EXPORT_SYMBOL(_zb_findmap);
-
-const char _sb_findmap[] = {
- 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
- 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 };
-EXPORT_SYMBOL(_sb_findmap);
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 64b24650e4f..c0b03c28d15 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -146,15 +146,14 @@ static void __init cache_build_info(void)
ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
for (level = 0; level < CACHE_MAX_LEVEL; level++) {
switch (ct.ci[level].scope) {
- case CACHE_SCOPE_NOTEXISTS:
- case CACHE_SCOPE_RESERVED:
- return;
case CACHE_SCOPE_SHARED:
private = 0;
break;
case CACHE_SCOPE_PRIVATE:
private = 1;
break;
+ default:
+ return;
}
if (ct.ci[level].type == CACHE_TYPE_SEPARATE) {
rc = cache_add(level, private, CACHE_TYPE_DATA);
@@ -173,7 +172,7 @@ error:
}
}
-static struct cache_dir *__cpuinit cache_create_cache_dir(int cpu)
+static struct cache_dir *cache_create_cache_dir(int cpu)
{
struct cache_dir *cache_dir;
struct kobject *kobj = NULL;
@@ -289,9 +288,8 @@ static struct kobj_type cache_index_type = {
.default_attrs = cache_index_default_attrs,
};
-static int __cpuinit cache_create_index_dir(struct cache_dir *cache_dir,
- struct cache *cache, int index,
- int cpu)
+static int cache_create_index_dir(struct cache_dir *cache_dir,
+ struct cache *cache, int index, int cpu)
{
struct cache_index_dir *index_dir;
int rc;
@@ -313,7 +311,7 @@ out:
return rc;
}
-static int __cpuinit cache_add_cpu(int cpu)
+static int cache_add_cpu(int cpu)
{
struct cache_dir *cache_dir;
struct cache *cache;
@@ -335,7 +333,7 @@ static int __cpuinit cache_add_cpu(int cpu)
return 0;
}
-static void __cpuinit cache_remove_cpu(int cpu)
+static void cache_remove_cpu(int cpu)
{
struct cache_index_dir *index, *next;
struct cache_dir *cache_dir;
@@ -354,8 +352,8 @@ static void __cpuinit cache_remove_cpu(int cpu)
cache_dir_cpu[cpu] = NULL;
}
-static int __cpuinit cache_hotplug(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int cache_hotplug(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
{
int cpu = (long)hcpu;
int rc = 0;
@@ -380,9 +378,12 @@ static int __init cache_init(void)
if (!test_facility(34))
return 0;
cache_build_info();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu)
cache_add_cpu(cpu);
- hotcpu_notifier(cache_hotplug, 0);
+ __hotcpu_notifier(cache_hotplug, 0);
+ cpu_notifier_register_done();
return 0;
}
device_initcall(cache_init);
diff --git a/arch/s390/kernel/compat_exec_domain.c b/arch/s390/kernel/compat_exec_domain.c
deleted file mode 100644
index 765fabdada9..00000000000
--- a/arch/s390/kernel/compat_exec_domain.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Support for 32-bit Linux for S390 personality.
- *
- * Copyright IBM Corp. 2000
- * Author(s): Gerhard Tonn (ton@de.ibm.com)
- *
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/personality.h>
-#include <linux/sched.h>
-
-static struct exec_domain s390_exec_domain;
-
-static int __init s390_init (void)
-{
- s390_exec_domain.name = "Linux/s390";
- s390_exec_domain.handler = NULL;
- s390_exec_domain.pers_low = PER_LINUX32;
- s390_exec_domain.pers_high = PER_LINUX32;
- s390_exec_domain.signal_map = default_exec_domain.signal_map;
- s390_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
- register_exec_domain(&s390_exec_domain);
- return 0;
-}
-
-__initcall(s390_init);
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 65cca95843e..ca38139423a 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -58,10 +58,6 @@
#include "compat_linux.h"
-u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT |
- PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK |
- PSW32_MASK_PSTATE | PSW32_ASC_HOME;
-
/* For this source file, we want overflow handling. */
#undef high2lowuid
@@ -90,48 +86,51 @@ u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT |
#define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
#define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
-asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group)
+COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename,
+ u16, user, u16, group)
{
return sys_chown(filename, low2highuid(user), low2highgid(group));
}
-asmlinkage long sys32_lchown16(const char __user * filename, u16 user, u16 group)
+COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *,
+ filename, u16, user, u16, group)
{
return sys_lchown(filename, low2highuid(user), low2highgid(group));
}
-asmlinkage long sys32_fchown16(unsigned int fd, u16 user, u16 group)
+COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group)
{
return sys_fchown(fd, low2highuid(user), low2highgid(group));
}
-asmlinkage long sys32_setregid16(u16 rgid, u16 egid)
+COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid)
{
return sys_setregid(low2highgid(rgid), low2highgid(egid));
}
-asmlinkage long sys32_setgid16(u16 gid)
+COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid)
{
return sys_setgid((gid_t)gid);
}
-asmlinkage long sys32_setreuid16(u16 ruid, u16 euid)
+COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid)
{
return sys_setreuid(low2highuid(ruid), low2highuid(euid));
}
-asmlinkage long sys32_setuid16(u16 uid)
+COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid)
{
return sys_setuid((uid_t)uid);
}
-asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid)
+COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid)
{
return sys_setresuid(low2highuid(ruid), low2highuid(euid),
- low2highuid(suid));
+ low2highuid(suid));
}
-asmlinkage long sys32_getresuid16(u16 __user *ruidp, u16 __user *euidp, u16 __user *suidp)
+COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp,
+ u16 __user *, euidp, u16 __user *, suidp)
{
const struct cred *cred = current_cred();
int retval;
@@ -148,13 +147,14 @@ asmlinkage long sys32_getresuid16(u16 __user *ruidp, u16 __user *euidp, u16 __us
return retval;
}
-asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid)
+COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid)
{
return sys_setresgid(low2highgid(rgid), low2highgid(egid),
- low2highgid(sgid));
+ low2highgid(sgid));
}
-asmlinkage long sys32_getresgid16(u16 __user *rgidp, u16 __user *egidp, u16 __user *sgidp)
+COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp,
+ u16 __user *, egidp, u16 __user *, sgidp)
{
const struct cred *cred = current_cred();
int retval;
@@ -171,12 +171,12 @@ asmlinkage long sys32_getresgid16(u16 __user *rgidp, u16 __user *egidp, u16 __us
return retval;
}
-asmlinkage long sys32_setfsuid16(u16 uid)
+COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid)
{
return sys_setfsuid((uid_t)uid);
}
-asmlinkage long sys32_setfsgid16(u16 gid)
+COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid)
{
return sys_setfsgid((gid_t)gid);
}
@@ -219,31 +219,32 @@ static int groups16_from_user(struct group_info *group_info, u16 __user *groupli
return 0;
}
-asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist)
+COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist)
{
+ const struct cred *cred = current_cred();
int i;
if (gidsetsize < 0)
return -EINVAL;
- get_group_info(current->cred->group_info);
- i = current->cred->group_info->ngroups;
+ get_group_info(cred->group_info);
+ i = cred->group_info->ngroups;
if (gidsetsize) {
if (i > gidsetsize) {
i = -EINVAL;
goto out;
}
- if (groups16_to_user(grouplist, current->cred->group_info)) {
+ if (groups16_to_user(grouplist, cred->group_info)) {
i = -EFAULT;
goto out;
}
}
out:
- put_group_info(current->cred->group_info);
+ put_group_info(cred->group_info);
return i;
}
-asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist)
+COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist)
{
struct group_info *group_info;
int retval;
@@ -268,231 +269,65 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist)
return retval;
}
-asmlinkage long sys32_getuid16(void)
+COMPAT_SYSCALL_DEFINE0(s390_getuid16)
{
return high2lowuid(from_kuid_munged(current_user_ns(), current_uid()));
}
-asmlinkage long sys32_geteuid16(void)
+COMPAT_SYSCALL_DEFINE0(s390_geteuid16)
{
return high2lowuid(from_kuid_munged(current_user_ns(), current_euid()));
}
-asmlinkage long sys32_getgid16(void)
+COMPAT_SYSCALL_DEFINE0(s390_getgid16)
{
return high2lowgid(from_kgid_munged(current_user_ns(), current_gid()));
}
-asmlinkage long sys32_getegid16(void)
+COMPAT_SYSCALL_DEFINE0(s390_getegid16)
{
return high2lowgid(from_kgid_munged(current_user_ns(), current_egid()));
}
-/*
- * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation.
- *
- * This is really horribly ugly.
- */
#ifdef CONFIG_SYSVIPC
-asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr)
+COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
+ compat_ulong_t, third, compat_uptr_t, ptr)
{
if (call >> 16) /* hack for backward compatibility */
return -EINVAL;
- switch (call) {
- case SEMTIMEDOP:
- return compat_sys_semtimedop(first, compat_ptr(ptr),
- second, compat_ptr(third));
- case SEMOP:
- /* struct sembuf is the same on 32 and 64bit :)) */
- return sys_semtimedop(first, compat_ptr(ptr),
- second, NULL);
- case SEMGET:
- return sys_semget(first, second, third);
- case SEMCTL:
- return compat_sys_semctl(first, second, third,
- compat_ptr(ptr));
- case MSGSND:
- return compat_sys_msgsnd(first, second, third,
- compat_ptr(ptr));
- case MSGRCV:
- return compat_sys_msgrcv(first, second, 0, third,
- 0, compat_ptr(ptr));
- case MSGGET:
- return sys_msgget((key_t) first, second);
- case MSGCTL:
- return compat_sys_msgctl(first, second, compat_ptr(ptr));
- case SHMAT:
- return compat_sys_shmat(first, second, third,
- 0, compat_ptr(ptr));
- case SHMDT:
- return sys_shmdt(compat_ptr(ptr));
- case SHMGET:
- return sys_shmget(first, (unsigned)second, third);
- case SHMCTL:
- return compat_sys_shmctl(first, second, compat_ptr(ptr));
- }
-
- return -ENOSYS;
+ return compat_sys_ipc(call, first, second, third, ptr, third);
}
#endif
-asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low)
+COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low)
{
- if ((int)high < 0)
- return -EINVAL;
- else
- return sys_truncate(path, (high << 32) | low);
-}
-
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low)
-{
- if ((int)high < 0)
- return -EINVAL;
- else
- return sys_ftruncate(fd, (high << 32) | low);
-}
-
-asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval)
-{
- struct timespec t;
- int ret;
- mm_segment_t old_fs = get_fs ();
-
- set_fs (KERNEL_DS);
- ret = sys_sched_rr_get_interval(pid,
- (struct timespec __force __user *) &t);
- set_fs (old_fs);
- if (put_compat_timespec(&t, interval))
- return -EFAULT;
- return ret;
-}
-
-asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
- compat_sigset_t __user *oset, size_t sigsetsize)
-{
- sigset_t s;
- compat_sigset_t s32;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- if (set) {
- if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
- return -EFAULT;
- s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
- }
- set_fs (KERNEL_DS);
- ret = sys_rt_sigprocmask(how,
- set ? (sigset_t __force __user *) &s : NULL,
- oset ? (sigset_t __force __user *) &s : NULL,
- sigsetsize);
- set_fs (old_fs);
- if (ret) return ret;
- if (oset) {
- s32.sig[1] = (s.sig[0] >> 32);
- s32.sig[0] = s.sig[0];
- if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
- return -EFAULT;
- }
- return 0;
-}
-
-asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
- size_t sigsetsize)
-{
- sigset_t s;
- compat_sigset_t s32;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- set_fs (KERNEL_DS);
- ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize);
- set_fs (old_fs);
- if (!ret) {
- s32.sig[1] = (s.sig[0] >> 32);
- s32.sig[0] = s.sig[0];
- if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
- return -EFAULT;
- }
- return ret;
+ return sys_truncate(path, (unsigned long)high << 32 | low);
}
-asmlinkage long
-sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
+COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low)
{
- siginfo_t info;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- if (copy_siginfo_from_user32(&info, uinfo))
- return -EFAULT;
- set_fs (KERNEL_DS);
- ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *) &info);
- set_fs (old_fs);
- return ret;
+ return sys_ftruncate(fd, (unsigned long)high << 32 | low);
}
-asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
- size_t count, u32 poshi, u32 poslo)
+COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf,
+ compat_size_t, count, u32, high, u32, low)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
- return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo));
+ return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low);
}
-asmlinkage long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
- size_t count, u32 poshi, u32 poslo)
+COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf,
+ compat_size_t, count, u32, high, u32, low)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
- return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo));
-}
-
-asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count)
-{
- return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count);
+ return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low);
}
-asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, size_t count)
+COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count)
{
- mm_segment_t old_fs = get_fs();
- int ret;
- off_t of;
-
- if (offset && get_user(of, offset))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- ret = sys_sendfile(out_fd, in_fd,
- offset ? (off_t __force __user *) &of : NULL, count);
- set_fs(old_fs);
-
- if (offset && put_user(of, offset))
- return -EFAULT;
-
- return ret;
-}
-
-asmlinkage long sys32_sendfile64(int out_fd, int in_fd,
- compat_loff_t __user *offset, s32 count)
-{
- mm_segment_t old_fs = get_fs();
- int ret;
- loff_t lof;
-
- if (offset && get_user(lof, offset))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- ret = sys_sendfile64(out_fd, in_fd,
- offset ? (loff_t __force __user *) &lof : NULL,
- count);
- set_fs(old_fs);
-
- if (offset && put_user(lof, offset))
- return -EFAULT;
-
- return ret;
+ return sys_readahead(fd, (unsigned long)high << 32 | low, count);
}
struct stat64_emu31 {
@@ -544,7 +379,7 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
}
-asmlinkage long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf)
+COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_stat(filename, &stat);
@@ -553,7 +388,7 @@ asmlinkage long sys32_stat64(const char __user * filename, struct stat64_emu31 _
return ret;
}
-asmlinkage long sys32_lstat64(const char __user * filename, struct stat64_emu31 __user * statbuf)
+COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_lstat(filename, &stat);
@@ -562,7 +397,7 @@ asmlinkage long sys32_lstat64(const char __user * filename, struct stat64_emu31
return ret;
}
-asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf)
+COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_fstat(fd, &stat);
@@ -571,8 +406,8 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * sta
return ret;
}
-asmlinkage long sys32_fstatat64(unsigned int dfd, const char __user *filename,
- struct stat64_emu31 __user* statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename,
+ struct stat64_emu31 __user *, statbuf, int, flag)
{
struct kstat stat;
int error;
@@ -598,7 +433,7 @@ struct mmap_arg_struct_emu31 {
compat_ulong_t offset;
};
-asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg)
+COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg)
{
struct mmap_arg_struct_emu31 a;
@@ -610,7 +445,7 @@ asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg)
a.offset >> PAGE_SHIFT);
}
-asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg)
+COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg)
{
struct mmap_arg_struct_emu31 a;
@@ -619,7 +454,7 @@ asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg)
return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
}
-asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count)
+COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
@@ -627,7 +462,7 @@ asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count)
return sys_read(fd, buf, count);
}
-asmlinkage long sys32_write(unsigned int fd, const char __user * buf, size_t count)
+COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
@@ -641,14 +476,13 @@ asmlinkage long sys32_write(unsigned int fd, const char __user * buf, size_t cou
* because the 31 bit values differ from the 64 bit values.
*/
-asmlinkage long
-sys32_fadvise64(int fd, loff_t offset, size_t len, int advise)
+COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise)
{
if (advise == 4)
advise = POSIX_FADV_DONTNEED;
else if (advise == 5)
advise = POSIX_FADV_NOREUSE;
- return sys_fadvise64(fd, offset, len, advise);
+ return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise);
}
struct fadvise64_64_args {
@@ -658,8 +492,7 @@ struct fadvise64_64_args {
int advice;
};
-asmlinkage long
-sys32_fadvise64_64(struct fadvise64_64_args __user *args)
+COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
{
struct fadvise64_64_args a;
@@ -671,3 +504,17 @@ sys32_fadvise64_64(struct fadvise64_64_args __user *args)
a.advice = POSIX_FADV_NOREUSE;
return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
}
+
+COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow,
+ u32, nhigh, u32, nlow, unsigned int, flags)
+{
+ return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow,
+ ((u64)nhigh << 32) + nlow, flags);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow,
+ u32, lenhigh, u32, lenlow)
+{
+ return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow,
+ ((u64)lenhigh << 32) + lenlow);
+}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index d4d0239970a..70d4b7c4bea 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -17,13 +17,6 @@ struct ipc_kludge_32 {
__s32 msgtyp;
};
-struct old_sigaction32 {
- __u32 sa_handler; /* Really a pointer, but need to deal with 32 bits */
- compat_old_sigset_t sa_mask; /* A 32 bit mask */
- __u32 sa_flags;
- __u32 sa_restorer; /* Another 32 bit pointer */
-};
-
/* asm/sigcontext.h */
typedef union
{
@@ -34,6 +27,7 @@ typedef union
typedef struct
{
unsigned int fpc;
+ unsigned int pad;
freg_t32 fprs[__NUM_FPRS];
} _s390_fp_regs32;
@@ -68,90 +62,59 @@ struct sigcontext32
};
/* asm/signal.h */
-struct sigaction32 {
- __u32 sa_handler; /* pointer */
- __u32 sa_flags;
- __u32 sa_restorer; /* pointer */
- compat_sigset_t sa_mask; /* mask last for extensibility */
-};
-
-typedef struct {
- __u32 ss_sp; /* pointer */
- int ss_flags;
- compat_size_t ss_size;
-} stack_t32;
/* asm/ucontext.h */
struct ucontext32 {
__u32 uc_flags;
__u32 uc_link; /* pointer */
- stack_t32 uc_stack;
+ compat_stack_t uc_stack;
_sigregs32 uc_mcontext;
- compat_sigset_t uc_sigmask; /* mask last for extensibility */
+ compat_sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(compat_sigset_t)];
};
struct stat64_emu31;
struct mmap_arg_struct_emu31;
struct fadvise64_64_args;
-struct old_sigaction32;
-struct old_sigaction32;
-long sys32_chown16(const char __user * filename, u16 user, u16 group);
-long sys32_lchown16(const char __user * filename, u16 user, u16 group);
-long sys32_fchown16(unsigned int fd, u16 user, u16 group);
-long sys32_setregid16(u16 rgid, u16 egid);
-long sys32_setgid16(u16 gid);
-long sys32_setreuid16(u16 ruid, u16 euid);
-long sys32_setuid16(u16 uid);
-long sys32_setresuid16(u16 ruid, u16 euid, u16 suid);
-long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
-long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid);
-long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
-long sys32_setfsuid16(u16 uid);
-long sys32_setfsgid16(u16 gid);
-long sys32_getgroups16(int gidsetsize, u16 __user *grouplist);
-long sys32_setgroups16(int gidsetsize, u16 __user *grouplist);
-long sys32_getuid16(void);
-long sys32_geteuid16(void);
-long sys32_getgid16(void);
-long sys32_getegid16(void);
-long sys32_ipc(u32 call, int first, int second, int third, u32 ptr);
-long sys32_truncate64(const char __user * path, unsigned long high,
- unsigned long low);
-long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low);
-long sys32_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval);
-long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
- compat_sigset_t __user *oset, size_t sigsetsize);
-long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
-long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
-long sys32_init_module(void __user *umod, unsigned long len,
- const char __user *uargs);
-long sys32_delete_module(const char __user *name_user, unsigned int flags);
-long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
- u32 poshi, u32 poslo);
-long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
- size_t count, u32 poshi, u32 poslo);
-compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count);
-long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset,
- size_t count);
-long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset,
- s32 count);
-long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf);
-long sys32_lstat64(const char __user * filename,
- struct stat64_emu31 __user * statbuf);
-long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf);
-long sys32_fstatat64(unsigned int dfd, const char __user *filename,
- struct stat64_emu31 __user* statbuf, int flag);
-unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg);
-long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg);
-long sys32_read(unsigned int fd, char __user * buf, size_t count);
-long sys32_write(unsigned int fd, const char __user * buf, size_t count);
-long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise);
-long sys32_fadvise64_64(struct fadvise64_64_args __user *args);
-long sys32_sigaction(int sig, const struct old_sigaction32 __user *act,
- struct old_sigaction32 __user *oact);
-long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
- struct sigaction32 __user *oact, size_t sigsetsize);
-long sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss);
+long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group);
+long compat_sys_s390_setregid16(u16 rgid, u16 egid);
+long compat_sys_s390_setgid16(u16 gid);
+long compat_sys_s390_setreuid16(u16 ruid, u16 euid);
+long compat_sys_s390_setuid16(u16 uid);
+long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid);
+long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
+long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid);
+long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
+long compat_sys_s390_setfsuid16(u16 uid);
+long compat_sys_s390_setfsgid16(u16 gid);
+long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_getuid16(void);
+long compat_sys_s390_geteuid16(void);
+long compat_sys_s390_getgid16(void);
+long compat_sys_s390_getegid16(void);
+long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
+long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
+long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count);
+long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
+long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count);
+long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
+long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
+long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+
#endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 593fcc9253f..f204d692036 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -49,13 +49,10 @@ typedef struct
__u32 gprs_high[NUM_GPRS];
} rt_sigframe32;
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
{
int err;
- if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
- return -EFAULT;
-
/* If you change siginfo_t structure, please be sure
this code is fixed accordingly.
It should never copy any pad contained in the structure
@@ -102,7 +99,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
break;
}
}
- return err;
+ return err ? -EFAULT : 0;
}
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
@@ -110,9 +107,6 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
int err;
u32 tmp;
- if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t)))
- return -EFAULT;
-
err = __get_user(to->si_signo, &from->si_signo);
err |= __get_user(to->si_errno, &from->si_errno);
err |= __get_user(to->si_code, &from->si_code);
@@ -154,179 +148,72 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
break;
}
}
- return err;
-}
-
-asmlinkage long
-sys32_sigaction(int sig, const struct old_sigaction32 __user *act,
- struct old_sigaction32 __user *oact)
-{
- struct k_sigaction new_ka, old_ka;
- unsigned long sa_handler, sa_restorer;
- int ret;
-
- if (act) {
- compat_old_sigset_t mask;
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(sa_handler, &act->sa_handler) ||
- __get_user(sa_restorer, &act->sa_restorer) ||
- __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(mask, &act->sa_mask))
- return -EFAULT;
- new_ka.sa.sa_handler = (__sighandler_t) sa_handler;
- new_ka.sa.sa_restorer = (void (*)(void)) sa_restorer;
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- sa_handler = (unsigned long) old_ka.sa.sa_handler;
- sa_restorer = (unsigned long) old_ka.sa.sa_restorer;
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(sa_handler, &oact->sa_handler) ||
- __put_user(sa_restorer, &oact->sa_restorer) ||
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
- return -EFAULT;
- }
-
- return ret;
-}
-
-asmlinkage long
-sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
- struct sigaction32 __user *oact, size_t sigsetsize)
-{
- struct k_sigaction new_ka, old_ka;
- unsigned long sa_handler;
- int ret;
- compat_sigset_t set32;
-
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
-
- if (act) {
- ret = get_user(sa_handler, &act->sa_handler);
- ret |= __copy_from_user(&set32, &act->sa_mask,
- sizeof(compat_sigset_t));
- new_ka.sa.sa_mask.sig[0] =
- set32.sig[0] | (((long)set32.sig[1]) << 32);
- ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
-
- if (ret)
- return -EFAULT;
- new_ka.sa.sa_handler = (__sighandler_t) sa_handler;
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
- set32.sig[0] = old_ka.sa.sa_mask.sig[0];
- ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler);
- ret |= __copy_to_user(&oact->sa_mask, &set32,
- sizeof(compat_sigset_t));
- ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
- }
-
- return ret;
-}
-
-asmlinkage long
-sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss)
-{
- struct pt_regs *regs = task_pt_regs(current);
- stack_t kss, koss;
- unsigned long ss_sp;
- int ret, err = 0;
- mm_segment_t old_fs = get_fs();
-
- if (uss) {
- if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
- return -EFAULT;
- err |= __get_user(ss_sp, &uss->ss_sp);
- err |= __get_user(kss.ss_size, &uss->ss_size);
- err |= __get_user(kss.ss_flags, &uss->ss_flags);
- if (err)
- return -EFAULT;
- kss.ss_sp = (void __user *) ss_sp;
- }
-
- set_fs (KERNEL_DS);
- ret = do_sigaltstack((stack_t __force __user *) (uss ? &kss : NULL),
- (stack_t __force __user *) (uoss ? &koss : NULL),
- regs->gprs[15]);
- set_fs (old_fs);
-
- if (!ret && uoss) {
- if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss)))
- return -EFAULT;
- ss_sp = (unsigned long) koss.ss_sp;
- err |= __put_user(ss_sp, &uoss->ss_sp);
- err |= __put_user(koss.ss_size, &uoss->ss_size);
- err |= __put_user(koss.ss_flags, &uoss->ss_flags);
- if (err)
- return -EFAULT;
- }
- return ret;
+ return err ? -EFAULT : 0;
}
static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
{
- _s390_regs_common32 regs32;
- int err, i;
+ _sigregs32 user_sregs;
+ int i;
- regs32.psw.mask = psw32_user_bits |
- ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER);
- regs32.psw.addr = (__u32) regs->psw.addr |
+ user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32);
+ user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI;
+ user_sregs.regs.psw.mask |= PSW32_USER_BITS;
+ user_sregs.regs.psw.addr = (__u32) regs->psw.addr |
(__u32)(regs->psw.mask & PSW_MASK_BA);
for (i = 0; i < NUM_GPRS; i++)
- regs32.gprs[i] = (__u32) regs->gprs[i];
+ user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
save_access_regs(current->thread.acrs);
- memcpy(regs32.acrs, current->thread.acrs, sizeof(regs32.acrs));
- err = __copy_to_user(&sregs->regs, &regs32, sizeof(regs32));
- if (err)
- return err;
- save_fp_regs(&current->thread.fp_regs);
- /* s390_fp_regs and _s390_fp_regs32 are the same ! */
- return __copy_to_user(&sregs->fpregs, &current->thread.fp_regs,
- sizeof(_s390_fp_regs32));
+ memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+ sizeof(user_sregs.regs.acrs));
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ save_fp_regs(current->thread.fp_regs.fprs);
+ memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
+ sizeof(user_sregs.fpregs));
+ if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
+ return -EFAULT;
+ return 0;
}
static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
{
- _s390_regs_common32 regs32;
- int err, i;
+ _sigregs32 user_sregs;
+ int i;
/* Alwys make any pending restarted system call return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
- err = __copy_from_user(&regs32, &sregs->regs, sizeof(regs32));
- if (err)
- return err;
- regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
- (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 |
- (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE);
+ if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
+ return -EFAULT;
+
+ if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
+ return -EINVAL;
+
+ /* Loading the floating-point-control word can fail. Do that first. */
+ if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ return -EINVAL;
+
+ /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+ regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+ (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
+ (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 |
+ (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE);
/* Check for invalid user address space control. */
- if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
- regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+ regs->psw.mask = PSW_ASC_PRIMARY |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN);
+ regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN);
for (i = 0; i < NUM_GPRS; i++)
- regs->gprs[i] = (__u64) regs32.gprs[i];
- memcpy(current->thread.acrs, regs32.acrs, sizeof(current->thread.acrs));
+ regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
+ memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+ sizeof(current->thread.acrs));
restore_access_regs(current->thread.acrs);
- err = __copy_from_user(&current->thread.fp_regs, &sregs->fpregs,
- sizeof(_s390_fp_regs32));
- current->thread.fp_regs.fpc &= FPC_VALID_MASK;
- if (err)
- return err;
+ memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
+ sizeof(current->thread.fp_regs));
- restore_fp_regs(&current->thread.fp_regs);
- clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */
+ restore_fp_regs(current->thread.fp_regs.fprs);
+ clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
}
@@ -337,31 +224,29 @@ static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
for (i = 0; i < NUM_GPRS; i++)
gprs_high[i] = regs->gprs[i] >> 32;
-
- return __copy_to_user(uregs, &gprs_high, sizeof(gprs_high));
+ if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high)))
+ return -EFAULT;
+ return 0;
}
static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
{
__u32 gprs_high[NUM_GPRS];
- int err, i;
+ int i;
- err = __copy_from_user(&gprs_high, uregs, sizeof(gprs_high));
- if (err)
- return err;
+ if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high)))
+ return -EFAULT;
for (i = 0; i < NUM_GPRS; i++)
*(__u32 *)&regs->gprs[i] = gprs_high[i];
return 0;
}
-asmlinkage long sys32_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
sigset_t set;
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
goto badframe;
set_current_blocked(&set);
@@ -375,18 +260,12 @@ badframe:
return 0;
}
-asmlinkage long sys32_rt_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
sigset_t set;
- stack_t st;
- __u32 ss_sp;
- int err;
- mm_segment_t old_fs = get_fs();
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
set_current_blocked(&set);
@@ -394,15 +273,8 @@ asmlinkage long sys32_rt_sigreturn(void)
goto badframe;
if (restore_sigregs_gprs_high(regs, frame->gprs_high))
goto badframe;
- err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp);
- st.ss_sp = compat_ptr(ss_sp);
- err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size);
- err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags);
- if (err)
+ if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
- set_fs (KERNEL_DS);
- do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]);
- set_fs (old_fs);
return regs->gprs[2];
badframe:
force_sig(SIGSEGV, current);
@@ -452,8 +324,6 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
sigset_t *set, struct pt_regs * regs)
{
sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32));
- if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32)))
- goto give_sigsegv;
if (frame == (void __user *) -1UL)
goto give_sigsegv;
@@ -471,9 +341,9 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
+ regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE;
+ regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
(u16 __force __user *)(frame->retcode)))
goto give_sigsegv;
@@ -487,7 +357,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
regs->gprs[15] = (__force __u64) frame;
/* Force 31 bit amode and default user address space control. */
regs->psw.mask = PSW_MASK_BA |
- (psw_user_bits & PSW_MASK_ASC) |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (__force __u64) ka->sa.sa_handler;
@@ -501,6 +371,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
/* set extra registers only for synchronous signals */
regs->gprs[4] = regs->int_code & 127;
regs->gprs[5] = regs->int_parm_long;
+ regs->gprs[6] = task_thread_info(current)->last_break;
}
/* Place signal number on stack to allow backtrace from handler. */
@@ -518,8 +389,6 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
{
int err = 0;
rt_sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32));
- if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32)))
- goto give_sigsegv;
if (frame == (void __user *) -1UL)
goto give_sigsegv;
@@ -530,10 +399,7 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Create the ucontext. */
err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->gprs[15]),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
err |= save_sigregs32(regs, &frame->uc.uc_mcontext);
err |= save_sigregs_gprs_high(regs, frame->gprs_high);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -543,11 +409,12 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
+ regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE;
- err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
- (u16 __force __user *)(frame->retcode));
+ regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
+ (u16 __force __user *)(frame->retcode)))
+ goto give_sigsegv;
}
/* Set up backchain. */
@@ -558,13 +425,14 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->gprs[15] = (__force __u64) frame;
/* Force 31 bit amode and default user address space control. */
regs->psw.mask = PSW_MASK_BA |
- (psw_user_bits & PSW_MASK_ASC) |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__u64) ka->sa.sa_handler;
+ regs->psw.addr = (__u64 __force) ka->sa.sa_handler;
regs->gprs[2] = map_signal(sig);
regs->gprs[3] = (__force __u64) &frame->info;
regs->gprs[4] = (__force __u64) &frame->uc;
+ regs->gprs[5] = task_thread_info(current)->last_break;
return 0;
give_sigsegv:
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
deleted file mode 100644
index 9b9a805656b..00000000000
--- a/arch/s390/kernel/compat_wrapper.S
+++ /dev/null
@@ -1,1667 +0,0 @@
-/*
-* wrapper for 31 bit compatible system calls.
-*
-* Copyright IBM Corp. 2000, 2006
-* Author(s): Gerhard Tonn (ton@de.ibm.com),
-* Thomas Spatzier (tspat@de.ibm.com)
-*/
-
-#include <linux/linkage.h>
-
-ENTRY(sys32_exit_wrapper)
- lgfr %r2,%r2 # int
- jg sys_exit # branch to sys_exit
-
-ENTRY(sys32_read_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys32_read # branch to sys_read
-
-ENTRY(sys32_write_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # size_t
- jg sys32_write # branch to system call
-
-ENTRY(sys32_open_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- jg compat_sys_open # branch to system call
-
-ENTRY(sys32_close_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_close # branch to system call
-
-ENTRY(sys32_creat_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_creat # branch to system call
-
-ENTRY(sys32_link_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_link # branch to system call
-
-ENTRY(sys32_unlink_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_unlink # branch to system call
-
-ENTRY(sys32_chdir_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_chdir # branch to system call
-
-ENTRY(sys32_time_wrapper)
- llgtr %r2,%r2 # int *
- jg compat_sys_time # branch to system call
-
-ENTRY(sys32_mknod_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # dev
- jg sys_mknod # branch to system call
-
-ENTRY(sys32_chmod_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # mode_t
- jg sys_chmod # branch to system call
-
-ENTRY(sys32_lchown16_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- llgfr %r4,%r4 # __kernel_old_uid_emu31_t
- jg sys32_lchown16 # branch to system call
-
-ENTRY(sys32_lseek_wrapper)
- llgfr %r2,%r2 # unsigned int
- lgfr %r3,%r3 # off_t
- llgfr %r4,%r4 # unsigned int
- jg sys_lseek # branch to system call
-
-#sys32_getpid_wrapper # void
-
-ENTRY(sys32_mount_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # char *
- llgfr %r5,%r5 # unsigned long
- llgtr %r6,%r6 # void *
- jg compat_sys_mount # branch to system call
-
-ENTRY(sys32_oldumount_wrapper)
- llgtr %r2,%r2 # char *
- jg sys_oldumount # branch to system call
-
-ENTRY(sys32_setuid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- jg sys32_setuid16 # branch to system call
-
-#sys32_getuid16_wrapper # void
-
-ENTRY(sys32_ptrace_wrapper)
- lgfr %r2,%r2 # long
- lgfr %r3,%r3 # long
- llgtr %r4,%r4 # long
- llgfr %r5,%r5 # long
- jg compat_sys_ptrace # branch to system call
-
-ENTRY(sys32_alarm_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_alarm # branch to system call
-
-ENTRY(compat_sys_utime_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct compat_utimbuf *
- jg compat_sys_utime # branch to system call
-
-ENTRY(sys32_access_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_access # branch to system call
-
-ENTRY(sys32_nice_wrapper)
- lgfr %r2,%r2 # int
- jg sys_nice # branch to system call
-
-#sys32_sync_wrapper # void
-
-ENTRY(sys32_kill_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_kill # branch to system call
-
-ENTRY(sys32_rename_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_rename # branch to system call
-
-ENTRY(sys32_mkdir_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_mkdir # branch to system call
-
-ENTRY(sys32_rmdir_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_rmdir # branch to system call
-
-ENTRY(sys32_dup_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_dup # branch to system call
-
-ENTRY(sys32_pipe_wrapper)
- llgtr %r2,%r2 # u32 *
- jg sys_pipe # branch to system call
-
-ENTRY(compat_sys_times_wrapper)
- llgtr %r2,%r2 # struct compat_tms *
- jg compat_sys_times # branch to system call
-
-ENTRY(sys32_brk_wrapper)
- llgtr %r2,%r2 # unsigned long
- jg sys_brk # branch to system call
-
-ENTRY(sys32_setgid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- jg sys32_setgid16 # branch to system call
-
-#sys32_getgid16_wrapper # void
-
-ENTRY(sys32_signal_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # __sighandler_t
- jg sys_signal
-
-#sys32_geteuid16_wrapper # void
-
-#sys32_getegid16_wrapper # void
-
-ENTRY(sys32_acct_wrapper)
- llgtr %r2,%r2 # char *
- jg sys_acct # branch to system call
-
-ENTRY(sys32_umount_wrapper)
- llgtr %r2,%r2 # char *
- lgfr %r3,%r3 # int
- jg sys_umount # branch to system call
-
-ENTRY(compat_sys_ioctl_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- llgfr %r4,%r4 # unsigned int
- jg compat_sys_ioctl # branch to system call
-
-ENTRY(compat_sys_fcntl_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_fcntl # branch to system call
-
-ENTRY(sys32_setpgid_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # pid_t
- jg sys_setpgid # branch to system call
-
-ENTRY(sys32_umask_wrapper)
- lgfr %r2,%r2 # int
- jg sys_umask # branch to system call
-
-ENTRY(sys32_chroot_wrapper)
- llgtr %r2,%r2 # char *
- jg sys_chroot # branch to system call
-
-ENTRY(sys32_ustat_wrapper)
- llgfr %r2,%r2 # dev_t
- llgtr %r3,%r3 # struct ustat *
- jg compat_sys_ustat
-
-ENTRY(sys32_dup2_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- jg sys_dup2 # branch to system call
-
-#sys32_getppid_wrapper # void
-
-#sys32_getpgrp_wrapper # void
-
-#sys32_setsid_wrapper # void
-
-ENTRY(sys32_sigaction_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct old_sigaction *
- llgtr %r4,%r4 # struct old_sigaction32 *
- jg sys32_sigaction # branch to system call
-
-ENTRY(sys32_setreuid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- jg sys32_setreuid16 # branch to system call
-
-ENTRY(sys32_setregid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- llgfr %r3,%r3 # __kernel_old_gid_emu31_t
- jg sys32_setregid16 # branch to system call
-
-ENTRY(sys_sigsuspend_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # old_sigset_t
- jg sys_sigsuspend
-
-ENTRY(compat_sys_sigpending_wrapper)
- llgtr %r2,%r2 # compat_old_sigset_t *
- jg compat_sys_sigpending # branch to system call
-
-ENTRY(sys32_sethostname_wrapper)
- llgtr %r2,%r2 # char *
- lgfr %r3,%r3 # int
- jg sys_sethostname # branch to system call
-
-ENTRY(compat_sys_setrlimit_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct rlimit_emu31 *
- jg compat_sys_setrlimit # branch to system call
-
-ENTRY(compat_sys_old_getrlimit_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct rlimit_emu31 *
- jg compat_sys_old_getrlimit # branch to system call
-
-ENTRY(compat_sys_getrlimit_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct rlimit_emu31 *
- jg compat_sys_getrlimit # branch to system call
-
-ENTRY(sys32_mmap2_wrapper)
- llgtr %r2,%r2 # struct mmap_arg_struct_emu31 *
- jg sys32_mmap2 # branch to system call
-
-ENTRY(compat_sys_getrusage_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct rusage_emu31 *
- jg compat_sys_getrusage # branch to system call
-
-ENTRY(compat_sys_gettimeofday_wrapper)
- llgtr %r2,%r2 # struct timeval_emu31 *
- llgtr %r3,%r3 # struct timezone *
- jg compat_sys_gettimeofday # branch to system call
-
-ENTRY(compat_sys_settimeofday_wrapper)
- llgtr %r2,%r2 # struct timeval_emu31 *
- llgtr %r3,%r3 # struct timezone *
- jg compat_sys_settimeofday # branch to system call
-
-ENTRY(sys32_getgroups16_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # __kernel_old_gid_emu31_t *
- jg sys32_getgroups16 # branch to system call
-
-ENTRY(sys32_setgroups16_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # __kernel_old_gid_emu31_t *
- jg sys32_setgroups16 # branch to system call
-
-ENTRY(sys32_symlink_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_symlink # branch to system call
-
-ENTRY(sys32_readlink_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # char *
- lgfr %r4,%r4 # int
- jg sys_readlink # branch to system call
-
-ENTRY(sys32_uselib_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_uselib # branch to system call
-
-ENTRY(sys32_swapon_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_swapon # branch to system call
-
-ENTRY(sys32_reboot_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # unsigned int
- llgtr %r5,%r5 # void *
- jg sys_reboot # branch to system call
-
-ENTRY(old32_readdir_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # void *
- llgfr %r4,%r4 # unsigned int
- jg compat_sys_old_readdir # branch to system call
-
-ENTRY(old32_mmap_wrapper)
- llgtr %r2,%r2 # struct mmap_arg_struct_emu31 *
- jg old32_mmap # branch to system call
-
-ENTRY(sys32_munmap_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- jg sys_munmap # branch to system call
-
-ENTRY(sys32_truncate_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # long
- jg sys_truncate # branch to system call
-
-ENTRY(sys32_ftruncate_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned long
- jg sys_ftruncate # branch to system call
-
-ENTRY(sys32_fchmod_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # mode_t
- jg sys_fchmod # branch to system call
-
-ENTRY(sys32_fchown16_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # compat_uid_t
- llgfr %r4,%r4 # compat_uid_t
- jg sys32_fchown16 # branch to system call
-
-ENTRY(sys32_getpriority_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_getpriority # branch to system call
-
-ENTRY(sys32_setpriority_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- jg sys_setpriority # branch to system call
-
-ENTRY(compat_sys_statfs_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct compat_statfs *
- jg compat_sys_statfs # branch to system call
-
-ENTRY(compat_sys_fstatfs_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct compat_statfs *
- jg compat_sys_fstatfs # branch to system call
-
-ENTRY(compat_sys_socketcall_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # u32 *
- jg compat_sys_socketcall # branch to system call
-
-ENTRY(sys32_syslog_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- lgfr %r4,%r4 # int
- jg sys_syslog # branch to system call
-
-ENTRY(compat_sys_setitimer_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct itimerval_emu31 *
- llgtr %r4,%r4 # struct itimerval_emu31 *
- jg compat_sys_setitimer # branch to system call
-
-ENTRY(compat_sys_getitimer_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct itimerval_emu31 *
- jg compat_sys_getitimer # branch to system call
-
-ENTRY(compat_sys_newstat_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat_emu31 *
- jg compat_sys_newstat # branch to system call
-
-ENTRY(compat_sys_newlstat_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat_emu31 *
- jg compat_sys_newlstat # branch to system call
-
-ENTRY(compat_sys_newfstat_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct stat_emu31 *
- jg compat_sys_newfstat # branch to system call
-
-#sys32_vhangup_wrapper # void
-
-ENTRY(compat_sys_wait4_wrapper)
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # unsigned int *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # struct rusage *
- jg compat_sys_wait4 # branch to system call
-
-ENTRY(sys32_swapoff_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_swapoff # branch to system call
-
-ENTRY(compat_sys_sysinfo_wrapper)
- llgtr %r2,%r2 # struct sysinfo_emu31 *
- jg compat_sys_sysinfo # branch to system call
-
-ENTRY(sys32_ipc_wrapper)
- llgfr %r2,%r2 # uint
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- llgfr %r6,%r6 # u32
- jg sys32_ipc # branch to system call
-
-ENTRY(sys32_fsync_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_fsync # branch to system call
-
-#sys32_sigreturn_wrapper # done in sigreturn_glue
-
-#sys32_clone_wrapper # done in clone_glue
-
-ENTRY(sys32_setdomainname_wrapper)
- llgtr %r2,%r2 # char *
- lgfr %r3,%r3 # int
- jg sys_setdomainname # branch to system call
-
-ENTRY(sys32_newuname_wrapper)
- llgtr %r2,%r2 # struct new_utsname *
- jg sys_newuname # branch to system call
-
-ENTRY(compat_sys_adjtimex_wrapper)
- llgtr %r2,%r2 # struct compat_timex *
- jg compat_sys_adjtimex # branch to system call
-
-ENTRY(sys32_mprotect_wrapper)
- llgtr %r2,%r2 # unsigned long (actually pointer
- llgfr %r3,%r3 # size_t
- llgfr %r4,%r4 # unsigned long
- jg sys_mprotect # branch to system call
-
-ENTRY(compat_sys_sigprocmask_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_old_sigset_t *
- llgtr %r4,%r4 # compat_old_sigset_t *
- jg compat_sys_sigprocmask # branch to system call
-
-ENTRY(sys_init_module_wrapper)
- llgtr %r2,%r2 # void *
- llgfr %r3,%r3 # unsigned long
- llgtr %r4,%r4 # char *
- jg sys_init_module # branch to system call
-
-ENTRY(sys_delete_module_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # unsigned int
- jg sys_delete_module # branch to system call
-
-ENTRY(sys32_quotactl_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # qid_t
- llgtr %r5,%r5 # caddr_t
- jg sys_quotactl # branch to system call
-
-ENTRY(sys32_getpgid_wrapper)
- lgfr %r2,%r2 # pid_t
- jg sys_getpgid # branch to system call
-
-ENTRY(sys32_fchdir_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_fchdir # branch to system call
-
-ENTRY(sys32_bdflush_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # long
- jg sys_bdflush # branch to system call
-
-ENTRY(sys32_sysfs_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- jg sys_sysfs # branch to system call
-
-ENTRY(sys32_personality_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_s390_personality # branch to system call
-
-ENTRY(sys32_setfsuid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- jg sys32_setfsuid16 # branch to system call
-
-ENTRY(sys32_setfsgid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- jg sys32_setfsgid16 # branch to system call
-
-ENTRY(sys32_llseek_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgtr %r5,%r5 # loff_t *
- llgfr %r6,%r6 # unsigned int
- jg sys_llseek # branch to system call
-
-ENTRY(sys32_getdents_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # void *
- llgfr %r4,%r4 # unsigned int
- jg compat_sys_getdents # branch to system call
-
-ENTRY(compat_sys_select_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_fd_set *
- llgtr %r4,%r4 # compat_fd_set *
- llgtr %r5,%r5 # compat_fd_set *
- llgtr %r6,%r6 # struct compat_timeval *
- jg compat_sys_select # branch to system call
-
-ENTRY(sys32_flock_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- jg sys_flock # branch to system call
-
-ENTRY(sys32_msync_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- lgfr %r4,%r4 # int
- jg sys_msync # branch to system call
-
-ENTRY(compat_sys_readv_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct compat_iovec *
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_readv # branch to system call
-
-ENTRY(compat_sys_writev_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct compat_iovec *
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_writev # branch to system call
-
-ENTRY(sys32_getsid_wrapper)
- lgfr %r2,%r2 # pid_t
- jg sys_getsid # branch to system call
-
-ENTRY(sys32_fdatasync_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_fdatasync # branch to system call
-
-ENTRY(sys32_mlock_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- jg sys_mlock # branch to system call
-
-ENTRY(sys32_munlock_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- jg sys_munlock # branch to system call
-
-ENTRY(sys32_mlockall_wrapper)
- lgfr %r2,%r2 # int
- jg sys_mlockall # branch to system call
-
-#sys32_munlockall_wrapper # void
-
-ENTRY(sys32_sched_setparam_wrapper)
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # struct sched_param *
- jg sys_sched_setparam # branch to system call
-
-ENTRY(sys32_sched_getparam_wrapper)
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # struct sched_param *
- jg sys_sched_getparam # branch to system call
-
-ENTRY(sys32_sched_setscheduler_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct sched_param *
- jg sys_sched_setscheduler # branch to system call
-
-ENTRY(sys32_sched_getscheduler_wrapper)
- lgfr %r2,%r2 # pid_t
- jg sys_sched_getscheduler # branch to system call
-
-#sys32_sched_yield_wrapper # void
-
-ENTRY(sys32_sched_get_priority_max_wrapper)
- lgfr %r2,%r2 # int
- jg sys_sched_get_priority_max # branch to system call
-
-ENTRY(sys32_sched_get_priority_min_wrapper)
- lgfr %r2,%r2 # int
- jg sys_sched_get_priority_min # branch to system call
-
-ENTRY(sys32_sched_rr_get_interval_wrapper)
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # struct compat_timespec *
- jg sys32_sched_rr_get_interval # branch to system call
-
-ENTRY(compat_sys_nanosleep_wrapper)
- llgtr %r2,%r2 # struct compat_timespec *
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_nanosleep # branch to system call
-
-ENTRY(sys32_mremap_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_mremap # branch to system call
-
-ENTRY(sys32_setresuid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- llgfr %r4,%r4 # __kernel_old_uid_emu31_t
- jg sys32_setresuid16 # branch to system call
-
-ENTRY(sys32_getresuid16_wrapper)
- llgtr %r2,%r2 # __kernel_old_uid_emu31_t *
- llgtr %r3,%r3 # __kernel_old_uid_emu31_t *
- llgtr %r4,%r4 # __kernel_old_uid_emu31_t *
- jg sys32_getresuid16 # branch to system call
-
-ENTRY(sys32_poll_wrapper)
- llgtr %r2,%r2 # struct pollfd *
- llgfr %r3,%r3 # unsigned int
- lgfr %r4,%r4 # int
- jg sys_poll # branch to system call
-
-ENTRY(sys32_setresgid16_wrapper)
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- llgfr %r3,%r3 # __kernel_old_gid_emu31_t
- llgfr %r4,%r4 # __kernel_old_gid_emu31_t
- jg sys32_setresgid16 # branch to system call
-
-ENTRY(sys32_getresgid16_wrapper)
- llgtr %r2,%r2 # __kernel_old_gid_emu31_t *
- llgtr %r3,%r3 # __kernel_old_gid_emu31_t *
- llgtr %r4,%r4 # __kernel_old_gid_emu31_t *
- jg sys32_getresgid16 # branch to system call
-
-ENTRY(sys32_prctl_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_prctl # branch to system call
-
-#sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue
-
-ENTRY(sys32_rt_sigaction_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct sigaction_emu31 *
- llgtr %r4,%r4 # const struct sigaction_emu31 *
- llgfr %r5,%r5 # size_t
- jg sys32_rt_sigaction # branch to system call
-
-ENTRY(sys32_rt_sigprocmask_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # old_sigset_emu31 *
- llgtr %r4,%r4 # old_sigset_emu31 *
- llgfr %r5,%r5 # size_t
- jg sys32_rt_sigprocmask # branch to system call
-
-ENTRY(sys32_rt_sigpending_wrapper)
- llgtr %r2,%r2 # sigset_emu31 *
- llgfr %r3,%r3 # size_t
- jg sys32_rt_sigpending # branch to system call
-
-ENTRY(compat_sys_rt_sigtimedwait_wrapper)
- llgtr %r2,%r2 # const sigset_emu31_t *
- llgtr %r3,%r3 # siginfo_emu31_t *
- llgtr %r4,%r4 # const struct compat_timespec *
- llgfr %r5,%r5 # size_t
- jg compat_sys_rt_sigtimedwait # branch to system call
-
-ENTRY(sys32_rt_sigqueueinfo_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # siginfo_emu31_t *
- jg sys32_rt_sigqueueinfo # branch to system call
-
-ENTRY(compat_sys_rt_sigsuspend_wrapper)
- llgtr %r2,%r2 # compat_sigset_t *
- llgfr %r3,%r3 # compat_size_t
- jg compat_sys_rt_sigsuspend
-
-ENTRY(sys32_pread64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg sys32_pread64 # branch to system call
-
-ENTRY(sys32_pwrite64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg sys32_pwrite64 # branch to system call
-
-ENTRY(sys32_chown16_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- llgfr %r4,%r4 # __kernel_old_gid_emu31_t
- jg sys32_chown16 # branch to system call
-
-ENTRY(sys32_getcwd_wrapper)
- llgtr %r2,%r2 # char *
- llgfr %r3,%r3 # unsigned long
- jg sys_getcwd # branch to system call
-
-ENTRY(sys32_capget_wrapper)
- llgtr %r2,%r2 # cap_user_header_t
- llgtr %r3,%r3 # cap_user_data_t
- jg sys_capget # branch to system call
-
-ENTRY(sys32_capset_wrapper)
- llgtr %r2,%r2 # cap_user_header_t
- llgtr %r3,%r3 # const cap_user_data_t
- jg sys_capset # branch to system call
-
-ENTRY(sys32_sigaltstack_wrapper)
- llgtr %r2,%r2 # const stack_emu31_t *
- llgtr %r3,%r3 # stack_emu31_t *
- jg sys32_sigaltstack
-
-ENTRY(sys32_sendfile_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # __kernel_off_emu31_t *
- llgfr %r5,%r5 # size_t
- jg sys32_sendfile # branch to system call
-
-#sys32_vfork_wrapper # done in vfork_glue
-
-ENTRY(sys32_truncate64_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- jg sys32_truncate64 # branch to system call
-
-ENTRY(sys32_ftruncate64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- jg sys32_ftruncate64 # branch to system call
-
-ENTRY(sys32_lchown_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # gid_t
- jg sys_lchown # branch to system call
-
-#sys32_getuid_wrapper # void
-#sys32_getgid_wrapper # void
-#sys32_geteuid_wrapper # void
-#sys32_getegid_wrapper # void
-
-ENTRY(sys32_setreuid_wrapper)
- llgfr %r2,%r2 # uid_t
- llgfr %r3,%r3 # uid_t
- jg sys_setreuid # branch to system call
-
-ENTRY(sys32_setregid_wrapper)
- llgfr %r2,%r2 # gid_t
- llgfr %r3,%r3 # gid_t
- jg sys_setregid # branch to system call
-
-ENTRY(sys32_getgroups_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # gid_t *
- jg sys_getgroups # branch to system call
-
-ENTRY(sys32_setgroups_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # gid_t *
- jg sys_setgroups # branch to system call
-
-ENTRY(sys32_fchown_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # gid_t
- jg sys_fchown # branch to system call
-
-ENTRY(sys32_setresuid_wrapper)
- llgfr %r2,%r2 # uid_t
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # uid_t
- jg sys_setresuid # branch to system call
-
-ENTRY(sys32_getresuid_wrapper)
- llgtr %r2,%r2 # uid_t *
- llgtr %r3,%r3 # uid_t *
- llgtr %r4,%r4 # uid_t *
- jg sys_getresuid # branch to system call
-
-ENTRY(sys32_setresgid_wrapper)
- llgfr %r2,%r2 # gid_t
- llgfr %r3,%r3 # gid_t
- llgfr %r4,%r4 # gid_t
- jg sys_setresgid # branch to system call
-
-ENTRY(sys32_getresgid_wrapper)
- llgtr %r2,%r2 # gid_t *
- llgtr %r3,%r3 # gid_t *
- llgtr %r4,%r4 # gid_t *
- jg sys_getresgid # branch to system call
-
-ENTRY(sys32_chown_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # gid_t
- jg sys_chown # branch to system call
-
-ENTRY(sys32_setuid_wrapper)
- llgfr %r2,%r2 # uid_t
- jg sys_setuid # branch to system call
-
-ENTRY(sys32_setgid_wrapper)
- llgfr %r2,%r2 # gid_t
- jg sys_setgid # branch to system call
-
-ENTRY(sys32_setfsuid_wrapper)
- llgfr %r2,%r2 # uid_t
- jg sys_setfsuid # branch to system call
-
-ENTRY(sys32_setfsgid_wrapper)
- llgfr %r2,%r2 # gid_t
- jg sys_setfsgid # branch to system call
-
-ENTRY(sys32_pivot_root_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_pivot_root # branch to system call
-
-ENTRY(sys32_mincore_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- llgtr %r4,%r4 # unsigned char *
- jg sys_mincore # branch to system call
-
-ENTRY(sys32_madvise_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- lgfr %r4,%r4 # int
- jg sys_madvise # branch to system call
-
-ENTRY(sys32_getdents64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # void *
- llgfr %r4,%r4 # unsigned int
- jg sys_getdents64 # branch to system call
-
-ENTRY(compat_sys_fcntl64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_fcntl64 # branch to system call
-
-ENTRY(sys32_stat64_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat64 *
- jg sys32_stat64 # branch to system call
-
-ENTRY(sys32_lstat64_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat64 *
- jg sys32_lstat64 # branch to system call
-
-ENTRY(sys32_stime_wrapper)
- llgtr %r2,%r2 # long *
- jg compat_sys_stime # branch to system call
-
-ENTRY(sys32_sysctl_wrapper)
- llgtr %r2,%r2 # struct compat_sysctl_args *
- jg compat_sys_sysctl
-
-ENTRY(sys32_fstat64_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgtr %r3,%r3 # struct stat64 *
- jg sys32_fstat64 # branch to system call
-
-ENTRY(compat_sys_futex_wrapper)
- llgtr %r2,%r2 # u32 *
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # struct compat_timespec *
- llgtr %r6,%r6 # u32 *
- lgf %r0,164(%r15) # int
- stg %r0,160(%r15)
- jg compat_sys_futex # branch to system call
-
-ENTRY(sys32_setxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- lgfr %r6,%r6 # int
- jg sys_setxattr
-
-ENTRY(sys32_lsetxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- lgfr %r6,%r6 # int
- jg sys_lsetxattr
-
-ENTRY(sys32_fsetxattr_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- lgfr %r6,%r6 # int
- jg sys_fsetxattr
-
-ENTRY(sys32_getxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- jg sys_getxattr
-
-ENTRY(sys32_lgetxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- jg sys_lgetxattr
-
-ENTRY(sys32_fgetxattr_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- jg sys_fgetxattr
-
-ENTRY(sys32_listxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys_listxattr
-
-ENTRY(sys32_llistxattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys_llistxattr
-
-ENTRY(sys32_flistxattr_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys_flistxattr
-
-ENTRY(sys32_removexattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- jg sys_removexattr
-
-ENTRY(sys32_lremovexattr_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- jg sys_lremovexattr
-
-ENTRY(sys32_fremovexattr_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- jg sys_fremovexattr
-
-ENTRY(sys32_sched_setaffinity_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # unsigned long *
- jg compat_sys_sched_setaffinity
-
-ENTRY(sys32_sched_getaffinity_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # unsigned long *
- jg compat_sys_sched_getaffinity
-
-ENTRY(sys32_exit_group_wrapper)
- lgfr %r2,%r2 # int
- jg sys_exit_group # branch to system call
-
-ENTRY(sys32_set_tid_address_wrapper)
- llgtr %r2,%r2 # int *
- jg sys_set_tid_address # branch to system call
-
-ENTRY(sys_epoll_create_wrapper)
- lgfr %r2,%r2 # int
- jg sys_epoll_create # branch to system call
-
-ENTRY(sys_epoll_ctl_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # struct epoll_event *
- jg sys_epoll_ctl # branch to system call
-
-ENTRY(sys_epoll_wait_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct epoll_event *
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- jg sys_epoll_wait # branch to system call
-
-ENTRY(sys32_lookup_dcookie_wrapper)
- sllg %r2,%r2,32 # get high word of 64bit dcookie
- or %r2,%r3 # get low word of 64bit dcookie
- llgtr %r3,%r4 # char *
- llgfr %r4,%r5 # size_t
- jg sys_lookup_dcookie
-
-ENTRY(sys32_fadvise64_wrapper)
- lgfr %r2,%r2 # int
- sllg %r3,%r3,32 # get high word of 64bit loff_t
- or %r3,%r4 # get low word of 64bit loff_t
- llgfr %r4,%r5 # size_t (unsigned long)
- lgfr %r5,%r6 # int
- jg sys32_fadvise64
-
-ENTRY(sys32_fadvise64_64_wrapper)
- llgtr %r2,%r2 # struct fadvise64_64_args *
- jg sys32_fadvise64_64
-
-ENTRY(sys32_clock_settime_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_clock_settime
-
-ENTRY(sys32_clock_gettime_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_clock_gettime
-
-ENTRY(sys32_clock_getres_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_clock_getres
-
-ENTRY(sys32_clock_nanosleep_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct compat_timespec *
- llgtr %r5,%r5 # struct compat_timespec *
- jg compat_sys_clock_nanosleep
-
-ENTRY(sys32_timer_create_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- llgtr %r3,%r3 # struct compat_sigevent *
- llgtr %r4,%r4 # timer_t *
- jg compat_sys_timer_create
-
-ENTRY(sys32_timer_settime_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct compat_itimerspec *
- llgtr %r5,%r5 # struct compat_itimerspec *
- jg compat_sys_timer_settime
-
-ENTRY(sys32_timer_gettime_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- llgtr %r3,%r3 # struct compat_itimerspec *
- jg compat_sys_timer_gettime
-
-ENTRY(sys32_timer_getoverrun_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- jg sys_timer_getoverrun
-
-ENTRY(sys32_timer_delete_wrapper)
- lgfr %r2,%r2 # timer_t (int)
- jg sys_timer_delete
-
-ENTRY(sys32_io_setup_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # u32 *
- jg compat_sys_io_setup
-
-ENTRY(sys32_io_destroy_wrapper)
- llgfr %r2,%r2 # (aio_context_t) u32
- jg sys_io_destroy
-
-ENTRY(sys32_io_getevents_wrapper)
- llgfr %r2,%r2 # (aio_context_t) u32
- lgfr %r3,%r3 # long
- lgfr %r4,%r4 # long
- llgtr %r5,%r5 # struct io_event *
- llgtr %r6,%r6 # struct compat_timespec *
- jg compat_sys_io_getevents
-
-ENTRY(sys32_io_submit_wrapper)
- llgfr %r2,%r2 # (aio_context_t) u32
- lgfr %r3,%r3 # long
- llgtr %r4,%r4 # struct iocb **
- jg compat_sys_io_submit
-
-ENTRY(sys32_io_cancel_wrapper)
- llgfr %r2,%r2 # (aio_context_t) u32
- llgtr %r3,%r3 # struct iocb *
- llgtr %r4,%r4 # struct io_event *
- jg sys_io_cancel
-
-ENTRY(compat_sys_statfs64_wrapper)
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # compat_size_t
- llgtr %r4,%r4 # struct compat_statfs64 *
- jg compat_sys_statfs64
-
-ENTRY(compat_sys_fstatfs64_wrapper)
- llgfr %r2,%r2 # unsigned int fd
- llgfr %r3,%r3 # compat_size_t
- llgtr %r4,%r4 # struct compat_statfs64 *
- jg compat_sys_fstatfs64
-
-ENTRY(compat_sys_mq_open_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # mode_t
- llgtr %r5,%r5 # struct compat_mq_attr *
- jg compat_sys_mq_open
-
-ENTRY(sys32_mq_unlink_wrapper)
- llgtr %r2,%r2 # const char *
- jg sys_mq_unlink
-
-ENTRY(compat_sys_mq_timedsend_wrapper)
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # unsigned int
- llgtr %r6,%r6 # const struct compat_timespec *
- jg compat_sys_mq_timedsend
-
-ENTRY(compat_sys_mq_timedreceive_wrapper)
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- llgtr %r5,%r5 # unsigned int *
- llgtr %r6,%r6 # const struct compat_timespec *
- jg compat_sys_mq_timedreceive
-
-ENTRY(compat_sys_mq_notify_wrapper)
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # struct compat_sigevent *
- jg compat_sys_mq_notify
-
-ENTRY(compat_sys_mq_getsetattr_wrapper)
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # struct compat_mq_attr *
- llgtr %r4,%r4 # struct compat_mq_attr *
- jg compat_sys_mq_getsetattr
-
-ENTRY(compat_sys_add_key_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- llgtr %r4,%r4 # const void *
- llgfr %r5,%r5 # size_t
- llgfr %r6,%r6 # (key_serial_t) u32
- jg sys_add_key
-
-ENTRY(compat_sys_request_key_wrapper)
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- llgtr %r4,%r4 # const void *
- llgfr %r5,%r5 # (key_serial_t) u32
- jg sys_request_key
-
-ENTRY(sys32_remap_file_pages_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_remap_file_pages
-
-ENTRY(compat_sys_waitid_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # pid_t
- llgtr %r4,%r4 # siginfo_emu31_t *
- lgfr %r5,%r5 # int
- llgtr %r6,%r6 # struct rusage_emu31 *
- jg compat_sys_waitid
-
-ENTRY(compat_sys_kexec_load_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgtr %r4,%r4 # struct kexec_segment *
- llgfr %r5,%r5 # unsigned long
- jg compat_sys_kexec_load
-
-ENTRY(sys_ioprio_set_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- jg sys_ioprio_set
-
-ENTRY(sys_ioprio_get_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_ioprio_get
-
-ENTRY(sys_inotify_add_watch_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # u32
- jg sys_inotify_add_watch
-
-ENTRY(sys_inotify_rm_watch_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # u32
- jg sys_inotify_rm_watch
-
-ENTRY(compat_sys_openat_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- jg compat_sys_openat
-
-ENTRY(sys_mkdirat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- jg sys_mkdirat
-
-ENTRY(sys_mknodat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- llgfr %r5,%r5 # unsigned int
- jg sys_mknodat
-
-ENTRY(sys_fchownat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # uid_t
- llgfr %r5,%r5 # gid_t
- lgfr %r6,%r6 # int
- jg sys_fchownat
-
-ENTRY(compat_sys_futimesat_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # struct timeval *
- jg compat_sys_futimesat
-
-ENTRY(sys32_fstatat64_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # struct stat64 *
- lgfr %r5,%r5 # int
- jg sys32_fstatat64
-
-ENTRY(sys_unlinkat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- jg sys_unlinkat
-
-ENTRY(sys_renameat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # const char *
- jg sys_renameat
-
-ENTRY(sys_linkat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # const char *
- lgfr %r6,%r6 # int
- jg sys_linkat
-
-ENTRY(sys_symlinkat_wrapper)
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # const char *
- jg sys_symlinkat
-
-ENTRY(sys_readlinkat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgtr %r4,%r4 # char *
- lgfr %r5,%r5 # int
- jg sys_readlinkat
-
-ENTRY(sys_fchmodat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # mode_t
- jg sys_fchmodat
-
-ENTRY(sys_faccessat_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- jg sys_faccessat
-
-ENTRY(compat_sys_pselect6_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # fd_set *
- llgtr %r4,%r4 # fd_set *
- llgtr %r5,%r5 # fd_set *
- llgtr %r6,%r6 # struct timespec *
- llgt %r0,164(%r15) # void *
- stg %r0,160(%r15)
- jg compat_sys_pselect6
-
-ENTRY(compat_sys_ppoll_wrapper)
- llgtr %r2,%r2 # struct pollfd *
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # struct timespec *
- llgtr %r5,%r5 # const sigset_t *
- llgfr %r6,%r6 # size_t
- jg compat_sys_ppoll
-
-ENTRY(sys_unshare_wrapper)
- llgfr %r2,%r2 # unsigned long
- jg sys_unshare
-
-ENTRY(compat_sys_set_robust_list_wrapper)
- llgtr %r2,%r2 # struct compat_robust_list_head *
- llgfr %r3,%r3 # size_t
- jg compat_sys_set_robust_list
-
-ENTRY(compat_sys_get_robust_list_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_uptr_t_t *
- llgtr %r4,%r4 # compat_size_t *
- jg compat_sys_get_robust_list
-
-ENTRY(sys_splice_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # loff_t *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # loff_t *
- llgfr %r6,%r6 # size_t
- llgf %r0,164(%r15) # unsigned int
- stg %r0,160(%r15)
- jg sys_splice
-
-ENTRY(sys_sync_file_range_wrapper)
- lgfr %r2,%r2 # int
- sllg %r3,%r3,32 # get high word of 64bit loff_t
- or %r3,%r4 # get low word of 64bit loff_t
- sllg %r4,%r5,32 # get high word of 64bit loff_t
- or %r4,%r6 # get low word of 64bit loff_t
- llgf %r5,164(%r15) # unsigned int
- jg sys_sync_file_range
-
-ENTRY(sys_tee_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # unsigned int
- jg sys_tee
-
-ENTRY(compat_sys_vmsplice_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_iovec *
- llgfr %r4,%r4 # unsigned int
- llgfr %r5,%r5 # unsigned int
- jg compat_sys_vmsplice
-
-ENTRY(sys_getcpu_wrapper)
- llgtr %r2,%r2 # unsigned *
- llgtr %r3,%r3 # unsigned *
- llgtr %r4,%r4 # struct getcpu_cache *
- jg sys_getcpu
-
-ENTRY(compat_sys_epoll_pwait_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct compat_epoll_event *
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- llgtr %r6,%r6 # compat_sigset_t *
- llgf %r0,164(%r15) # compat_size_t
- stg %r0,160(%r15)
- jg compat_sys_epoll_pwait
-
-ENTRY(compat_sys_utimes_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct compat_timeval *
- jg compat_sys_utimes
-
-ENTRY(compat_sys_utimensat_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # struct compat_timespec *
- lgfr %r5,%r5 # int
- jg compat_sys_utimensat
-
-ENTRY(compat_sys_signalfd_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_sigset_t *
- llgfr %r4,%r4 # compat_size_t
- jg compat_sys_signalfd
-
-ENTRY(sys_eventfd_wrapper)
- llgfr %r2,%r2 # unsigned int
- jg sys_eventfd
-
-ENTRY(sys_fallocate_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- sllg %r4,%r4,32 # get high word of 64bit loff_t
- lr %r4,%r5 # get low word of 64bit loff_t
- sllg %r5,%r6,32 # get high word of 64bit loff_t
- l %r5,164(%r15) # get low word of 64bit loff_t
- jg sys_fallocate
-
-ENTRY(sys_timerfd_create_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_timerfd_create
-
-ENTRY(compat_sys_timerfd_settime_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct compat_itimerspec *
- llgtr %r5,%r5 # struct compat_itimerspec *
- jg compat_sys_timerfd_settime
-
-ENTRY(compat_sys_timerfd_gettime_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct compat_itimerspec *
- jg compat_sys_timerfd_gettime
-
-ENTRY(compat_sys_signalfd4_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_sigset_t *
- llgfr %r4,%r4 # compat_size_t
- lgfr %r5,%r5 # int
- jg compat_sys_signalfd4
-
-ENTRY(sys_eventfd2_wrapper)
- llgfr %r2,%r2 # unsigned int
- lgfr %r3,%r3 # int
- jg sys_eventfd2
-
-ENTRY(sys_inotify_init1_wrapper)
- lgfr %r2,%r2 # int
- jg sys_inotify_init1
-
-ENTRY(sys_pipe2_wrapper)
- llgtr %r2,%r2 # u32 *
- lgfr %r3,%r3 # int
- jg sys_pipe2 # branch to system call
-
-ENTRY(sys_dup3_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- lgfr %r4,%r4 # int
- jg sys_dup3 # branch to system call
-
-ENTRY(sys_epoll_create1_wrapper)
- lgfr %r2,%r2 # int
- jg sys_epoll_create1 # branch to system call
-
-ENTRY(sys32_readahead_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # u32
- llgfr %r4,%r4 # u32
- lgfr %r5,%r5 # s32
- jg sys32_readahead # branch to system call
-
-ENTRY(sys32_sendfile64_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # compat_loff_t *
- lgfr %r5,%r5 # s32
- jg sys32_sendfile64 # branch to system call
-
-ENTRY(sys_tkill_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # int
- jg sys_tkill # branch to system call
-
-ENTRY(sys_tgkill_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # pid_t
- lgfr %r4,%r4 # int
- jg sys_tgkill # branch to system call
-
-ENTRY(compat_sys_keyctl_wrapper)
- llgfr %r2,%r2 # u32
- llgfr %r3,%r3 # u32
- llgfr %r4,%r4 # u32
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg compat_sys_keyctl # branch to system call
-
-ENTRY(compat_sys_preadv_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgtr %r3,%r3 # compat_iovec *
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg compat_sys_preadv # branch to system call
-
-ENTRY(compat_sys_pwritev_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgtr %r3,%r3 # compat_iovec *
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg compat_sys_pwritev # branch to system call
-
-ENTRY(compat_sys_rt_tgsigqueueinfo_wrapper)
- lgfr %r2,%r2 # compat_pid_t
- lgfr %r3,%r3 # compat_pid_t
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # struct compat_siginfo *
- jg compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call
-
-ENTRY(sys_perf_event_open_wrapper)
- llgtr %r2,%r2 # const struct perf_event_attr *
- lgfr %r3,%r3 # pid_t
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- llgfr %r6,%r6 # unsigned long
- jg sys_perf_event_open # branch to system call
-
-ENTRY(sys_clone_wrapper)
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgtr %r4,%r4 # int *
- llgtr %r5,%r5 # int *
- jg sys_clone # branch to system call
-
-ENTRY(sys32_execve_wrapper)
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # compat_uptr_t *
- llgtr %r4,%r4 # compat_uptr_t *
- jg compat_sys_execve # branch to system call
-
-ENTRY(sys_fanotify_init_wrapper)
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- jg sys_fanotify_init # branch to system call
-
-ENTRY(sys_fanotify_mark_wrapper)
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned int
- sllg %r4,%r4,32 # get high word of 64bit mask
- lr %r4,%r5 # get low word of 64bit mask
- llgfr %r5,%r6 # unsigned int
- llgt %r6,164(%r15) # char *
- jg sys_fanotify_mark # branch to system call
-
-ENTRY(sys_prlimit64_wrapper)
- lgfr %r2,%r2 # pid_t
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # const struct rlimit64 __user *
- llgtr %r5,%r5 # struct rlimit64 __user *
- jg sys_prlimit64 # branch to system call
-
-ENTRY(sys_name_to_handle_at_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char __user *
- llgtr %r4,%r4 # struct file_handle __user *
- llgtr %r5,%r5 # int __user *
- lgfr %r6,%r6 # int
- jg sys_name_to_handle_at
-
-ENTRY(compat_sys_open_by_handle_at_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct file_handle __user *
- lgfr %r4,%r4 # int
- jg compat_sys_open_by_handle_at
-
-ENTRY(compat_sys_clock_adjtime_wrapper)
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timex __user *
- jg compat_sys_clock_adjtime
-
-ENTRY(sys_syncfs_wrapper)
- lgfr %r2,%r2 # int
- jg sys_syncfs
-
-ENTRY(sys_setns_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_setns
-
-ENTRY(compat_sys_process_vm_readv_wrapper)
- lgfr %r2,%r2 # compat_pid_t
- llgtr %r3,%r3 # struct compat_iovec __user *
- llgfr %r4,%r4 # unsigned long
- llgtr %r5,%r5 # struct compat_iovec __user *
- llgfr %r6,%r6 # unsigned long
- llgf %r0,164(%r15) # unsigned long
- stg %r0,160(%r15)
- jg compat_sys_process_vm_readv
-
-ENTRY(compat_sys_process_vm_writev_wrapper)
- lgfr %r2,%r2 # compat_pid_t
- llgtr %r3,%r3 # struct compat_iovec __user *
- llgfr %r4,%r4 # unsigned long
- llgtr %r5,%r5 # struct compat_iovec __user *
- llgfr %r6,%r6 # unsigned long
- llgf %r0,164(%r15) # unsigned long
- stg %r0,160(%r15)
- jg compat_sys_process_vm_writev
-
-ENTRY(sys_s390_runtime_instr_wrapper)
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_s390_runtime_instr
-
-ENTRY(sys_kcmp_wrapper)
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # pid_t
- lgfr %r4,%r4 # int
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_kcmp
-
-ENTRY(sys_finit_module_wrapper)
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char __user *
- lgfr %r4,%r4 # int
- jg sys_finit_module
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
new file mode 100644
index 00000000000..45cdb37aa6f
--- /dev/null
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -0,0 +1,216 @@
+/*
+ * Compat system call wrappers.
+ *
+ * Copyright IBM Corp. 2014
+ */
+
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include "entry.h"
+
+#define COMPAT_SYSCALL_WRAP1(name, ...) \
+ COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP2(name, ...) \
+ COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP3(name, ...) \
+ COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP4(name, ...) \
+ COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP5(name, ...) \
+ COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP6(name, ...) \
+ COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__)
+
+#define __SC_COMPAT_TYPE(t, a) \
+ __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a
+
+#define __SC_COMPAT_CAST(t, a) \
+({ \
+ long __ReS = a; \
+ \
+ BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \
+ !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \
+ if (__TYPE_IS_L(t)) \
+ __ReS = (s32)a; \
+ if (__TYPE_IS_UL(t)) \
+ __ReS = (u32)a; \
+ if (__TYPE_IS_PTR(t)) \
+ __ReS = a & 0x7fffffff; \
+ (t)__ReS; \
+})
+
+/*
+ * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by
+ * compat tasks. These wrappers will only be used for system calls where only
+ * the system call arguments need sign or zero extension or zeroing of the upper
+ * 33 bits of pointers.
+ * Note: since the wrapper function will afterwards call a system call which
+ * again performs zero and sign extension for all system call arguments with
+ * a size of less than eight bytes, these compat wrappers only touch those
+ * system call arguments with a size of eight bytes ((unsigned) long and
+ * pointers). Zero and sign extension for e.g. int parameters will be done by
+ * the regular system call wrappers.
+ */
+#define COMPAT_SYSCALL_WRAPx(x, name, ...) \
+ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\
+ asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \
+ { \
+ return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \
+ }
+
+COMPAT_SYSCALL_WRAP1(exit, int, error_code);
+COMPAT_SYSCALL_WRAP1(close, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename);
+COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name);
+COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds);
+COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP1(nice, int, increment);
+COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes);
+COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes);
+COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk);
+COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler);
+COMPAT_SYSCALL_WRAP1(acct, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags);
+COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid);
+COMPAT_SYSCALL_WRAP1(umask, int, mask);
+COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename);
+COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd);
+COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask);
+COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new);
+COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library);
+COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags);
+COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg);
+COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len);
+COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode);
+COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who);
+COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval);
+COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len);
+COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile);
+COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name);
+COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot);
+COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs);
+COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr);
+COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data);
+COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2);
+COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality);
+COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence);
+COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd);
+COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP1(mlockall, int, flags);
+COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy);
+COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy);
+COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr);
+COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout);
+COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5);
+COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size);
+COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr);
+COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data);
+COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid);
+COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid);
+COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid);
+COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid);
+COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid);
+COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid);
+COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid);
+COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid);
+COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid);
+COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid);
+COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old);
+COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec);
+COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior);
+COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count);
+COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name);
+COMPAT_SYSCALL_WRAP1(exit_group, int, error_code);
+COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr);
+COMPAT_SYSCALL_WRAP1(epoll_create, int, size);
+COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event);
+COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout);
+COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id);
+COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id);
+COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx);
+COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result);
+COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name);
+COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id);
+COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id);
+COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags);
+COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio);
+COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who);
+COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask);
+COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd);
+COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag);
+COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag);
+COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags);
+COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags);
+COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache);
+COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count);
+COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags);
+COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags);
+COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags);
+COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags);
+COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags);
+COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags);
+COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags);
+COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val);
+COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags);
+COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim);
+COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag);
+COMPAT_SYSCALL_WRAP1(syncfs, int, fd);
+COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype);
+COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum);
+COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2);
+COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags);
+COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index fb8d8781a01..a3b9150e680 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -13,41 +13,233 @@
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/elf.h>
+#include <linux/memblock.h>
#include <asm/os_info.h>
#include <asm/elf.h>
#include <asm/ipl.h>
+#include <asm/sclp.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+static struct memblock_region oldmem_region;
+
+static struct memblock_type oldmem_type = {
+ .cnt = 1,
+ .max = 1,
+ .total_size = 0,
+ .regions = &oldmem_region,
+};
+
+#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \
+ for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \
+ &oldmem_type, p_start, \
+ p_end, p_nid); \
+ i != (u64)ULLONG_MAX; \
+ __next_mem_range(&i, nid, &memblock.physmem, \
+ &oldmem_type, \
+ p_start, p_end, p_nid))
+
+struct dump_save_areas dump_save_areas;
+
/*
- * Copy one page from "oldmem"
+ * Allocate and add a save area for a CPU
+ */
+struct save_area *dump_save_area_create(int cpu)
+{
+ struct save_area **save_areas, *save_area;
+
+ save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
+ if (!save_area)
+ return NULL;
+ if (cpu + 1 > dump_save_areas.count) {
+ dump_save_areas.count = cpu + 1;
+ save_areas = krealloc(dump_save_areas.areas,
+ dump_save_areas.count * sizeof(void *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!save_areas) {
+ kfree(save_area);
+ return NULL;
+ }
+ dump_save_areas.areas = save_areas;
+ }
+ dump_save_areas.areas[cpu] = save_area;
+ return save_area;
+}
+
+/*
+ * Return physical address for virtual address
+ */
+static inline void *load_real_addr(void *addr)
+{
+ unsigned long real_addr;
+
+ asm volatile(
+ " lra %0,0(%1)\n"
+ " jz 0f\n"
+ " la %0,0\n"
+ "0:"
+ : "=a" (real_addr) : "a" (addr) : "cc");
+ return (void *)real_addr;
+}
+
+/*
+ * Copy real to virtual or real memory
+ */
+static int copy_from_realmem(void *dest, void *src, size_t count)
+{
+ unsigned long size;
+
+ if (!count)
+ return 0;
+ if (!is_vmalloc_or_module_addr(dest))
+ return memcpy_real(dest, src, count);
+ do {
+ size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK));
+ if (memcpy_real(load_real_addr(dest), src, size))
+ return -EFAULT;
+ count -= size;
+ dest += size;
+ src += size;
+ } while (count);
+ return 0;
+}
+
+/*
+ * Pointer to ELF header in new kernel
+ */
+static void *elfcorehdr_newmem;
+
+/*
+ * Copy one page from zfcpdump "oldmem"
+ *
+ * For pages below HSA size memory from the HSA is copied. Otherwise
+ * real memory copy is used.
+ */
+static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize,
+ unsigned long src, int userbuf)
+{
+ int rc;
+
+ if (src < sclp_get_hsa_size()) {
+ rc = memcpy_hsa(buf, src, csize, userbuf);
+ } else {
+ if (userbuf)
+ rc = copy_to_user_real((void __force __user *) buf,
+ (void *) src, csize);
+ else
+ rc = memcpy_real(buf, (void *) src, csize);
+ }
+ return rc ? rc : csize;
+}
+
+/*
+ * Copy one page from kdump "oldmem"
*
* For the kdump reserved memory this functions performs a swap operation:
* - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
* - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
*/
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
-{
- unsigned long src;
+static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize,
+ unsigned long src, int userbuf)
- if (!csize)
- return 0;
+{
+ int rc;
- src = (pfn << PAGE_SHIFT) + offset;
if (src < OLDMEM_SIZE)
src += OLDMEM_BASE;
else if (src > OLDMEM_BASE &&
src < OLDMEM_BASE + OLDMEM_SIZE)
src -= OLDMEM_BASE;
if (userbuf)
- copy_to_user_real((void __force __user *) buf, (void *) src,
- csize);
+ rc = copy_to_user_real((void __force __user *) buf,
+ (void *) src, csize);
+ else
+ rc = copy_from_realmem(buf, (void *) src, csize);
+ return (rc == 0) ? rc : csize;
+}
+
+/*
+ * Copy one page from "oldmem"
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+ unsigned long offset, int userbuf)
+{
+ unsigned long src;
+
+ if (!csize)
+ return 0;
+ src = (pfn << PAGE_SHIFT) + offset;
+ if (OLDMEM_BASE)
+ return copy_oldmem_page_kdump(buf, csize, src, userbuf);
+ else
+ return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf);
+}
+
+/*
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long size_old;
+ int rc;
+
+ if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) {
+ size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT));
+ rc = remap_pfn_range(vma, from,
+ pfn + (OLDMEM_BASE >> PAGE_SHIFT),
+ size_old, prot);
+ if (rc || size == size_old)
+ return rc;
+ size -= size_old;
+ from += size_old;
+ pfn += size_old >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for zfcpdump
+ *
+ * We only map available memory above HSA size. Memory below HSA size
+ * is read on demand using the copy_oldmem_page() function.
+ */
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+ unsigned long from,
+ unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long hsa_end = sclp_get_hsa_size();
+ unsigned long size_hsa;
+
+ if (pfn < hsa_end >> PAGE_SHIFT) {
+ size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT));
+ if (size == size_hsa)
+ return 0;
+ size -= size_hsa;
+ from += size_hsa;
+ pfn += size_hsa >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for kdump or zfcpdump
+ */
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+ if (OLDMEM_BASE)
+ return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
else
- memcpy_real(buf, (void *) src, csize);
- return csize;
+ return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+ prot);
}
/*
@@ -58,13 +250,23 @@ int copy_from_oldmem(void *dest, void *src, size_t count)
unsigned long copied = 0;
int rc;
- if ((unsigned long) src < OLDMEM_SIZE) {
- copied = min(count, OLDMEM_SIZE - (unsigned long) src);
- rc = memcpy_real(dest, src + OLDMEM_BASE, copied);
- if (rc)
- return rc;
+ if (OLDMEM_BASE) {
+ if ((unsigned long) src < OLDMEM_SIZE) {
+ copied = min(count, OLDMEM_SIZE - (unsigned long) src);
+ rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied);
+ if (rc)
+ return rc;
+ }
+ } else {
+ unsigned long hsa_end = sclp_get_hsa_size();
+ if ((unsigned long) src < hsa_end) {
+ copied = min(count, hsa_end - (unsigned long) src);
+ rc = memcpy_hsa(dest, (unsigned long) src, copied, 0);
+ if (rc)
+ return rc;
+ }
}
- return memcpy_real(dest + copied, src + copied, count - copied);
+ return copy_from_realmem(dest + copied, src + copied, count - copied);
}
/*
@@ -81,19 +283,6 @@ static void *kzalloc_panic(int len)
}
/*
- * Get memory layout and create hole for oldmem
- */
-static struct mem_chunk *get_memory_layout(void)
-{
- struct mem_chunk *chunk_array;
-
- chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk));
- detect_memory_layout(chunk_array);
- create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK);
- return chunk_array;
-}
-
-/*
* Initialize ELF note
*/
static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
@@ -294,8 +483,8 @@ static int get_cpu_cnt(void)
{
int i, cpus = 0;
- for (i = 0; zfcpdump_save_areas[i]; i++) {
- if (zfcpdump_save_areas[i]->pref_reg == 0)
+ for (i = 0; i < dump_save_areas.count; i++) {
+ if (dump_save_areas.areas[i]->pref_reg == 0)
continue;
cpus++;
}
@@ -307,60 +496,33 @@ static int get_cpu_cnt(void)
*/
static int get_mem_chunk_cnt(void)
{
- struct mem_chunk *chunk_array, *mem_chunk;
- int i, cnt = 0;
+ int cnt = 0;
+ u64 idx;
- chunk_array = get_memory_layout();
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- mem_chunk = &chunk_array[i];
- if (chunk_array[i].type != CHUNK_READ_WRITE &&
- chunk_array[i].type != CHUNK_READ_ONLY)
- continue;
- if (mem_chunk->size == 0)
- continue;
+ for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL)
cnt++;
- }
- kfree(chunk_array);
return cnt;
}
/*
- * Relocate pointer in order to allow vmcore code access the data
- */
-static inline unsigned long relocate(unsigned long addr)
-{
- return OLDMEM_BASE + addr;
-}
-
-/*
* Initialize ELF loads (new kernel)
*/
-static int loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
{
- struct mem_chunk *chunk_array, *mem_chunk;
- int i;
+ phys_addr_t start, end;
+ u64 idx;
- chunk_array = get_memory_layout();
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- mem_chunk = &chunk_array[i];
- if (mem_chunk->size == 0)
- break;
- if (chunk_array[i].type != CHUNK_READ_WRITE &&
- chunk_array[i].type != CHUNK_READ_ONLY)
- continue;
- else
- phdr->p_filesz = mem_chunk->size;
+ for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) {
+ phdr->p_filesz = end - start;
phdr->p_type = PT_LOAD;
- phdr->p_offset = mem_chunk->addr;
- phdr->p_vaddr = mem_chunk->addr;
- phdr->p_paddr = mem_chunk->addr;
- phdr->p_memsz = mem_chunk->size;
+ phdr->p_offset = start;
+ phdr->p_vaddr = start;
+ phdr->p_paddr = start;
+ phdr->p_memsz = end - start;
phdr->p_flags = PF_R | PF_W | PF_X;
phdr->p_align = PAGE_SIZE;
phdr++;
}
- kfree(chunk_array);
- return i;
}
/*
@@ -374,8 +536,8 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
ptr = nt_prpsinfo(ptr);
- for (i = 0; zfcpdump_save_areas[i]; i++) {
- sa = zfcpdump_save_areas[i];
+ for (i = 0; i < dump_save_areas.count; i++) {
+ sa = dump_save_areas.areas[i];
if (sa->pref_reg == 0)
continue;
ptr = fill_cpu_elf_notes(ptr, sa);
@@ -383,7 +545,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
ptr = nt_vmcoreinfo(ptr);
memset(phdr, 0, sizeof(*phdr));
phdr->p_type = PT_NOTE;
- phdr->p_offset = relocate(notes_offset);
+ phdr->p_offset = notes_offset;
phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
phdr->p_memsz = phdr->p_filesz;
return ptr;
@@ -392,7 +554,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
/*
* Create ELF core header (new kernel)
*/
-static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
Elf64_Phdr *phdr_notes, *phdr_loads;
int mem_chunk_cnt;
@@ -400,6 +562,23 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
u32 alloc_size;
u64 hdr_off;
+ /* If we are not in kdump or zfcpdump mode return */
+ if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+ return 0;
+ /* If elfcorehdr= has been passed via cmdline, we use that one */
+ if (elfcorehdr_addr != ELFCORE_ADDR_MAX)
+ return 0;
+ /* If we cannot get HSA size for zfcpdump return error */
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size())
+ return -ENODEV;
+
+ /* For kdump, exclude previous crashkernel memory */
+ if (OLDMEM_BASE) {
+ oldmem_region.base = OLDMEM_BASE;
+ oldmem_region.size = OLDMEM_SIZE;
+ oldmem_type.total_size = OLDMEM_SIZE;
+ }
+
mem_chunk_cnt = get_mem_chunk_cnt();
alloc_size = 0x1000 + get_cpu_cnt() * 0x300 +
@@ -417,27 +596,52 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
/* Init loads */
hdr_off = PTR_DIFF(ptr, hdr);
- loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off);
- *elfcorebuf_sz = hdr_off;
- *elfcorebuf = (void *) relocate((unsigned long) hdr);
- BUG_ON(*elfcorebuf_sz > alloc_size);
+ loads_init(phdr_loads, hdr_off);
+ *addr = (unsigned long long) hdr;
+ elfcorehdr_newmem = hdr;
+ *size = (unsigned long long) hdr_off;
+ BUG_ON(elfcorehdr_size > alloc_size);
+ return 0;
}
/*
- * Create kdump ELF core header in new kernel, if it has not been passed via
- * the "elfcorehdr" kernel parameter
+ * Free ELF core header (new kernel)
*/
-static int setup_kdump_elfcorehdr(void)
+void elfcorehdr_free(unsigned long long addr)
{
- size_t elfcorebuf_sz;
- char *elfcorebuf;
+ if (!elfcorehdr_newmem)
+ return;
+ kfree((void *)(unsigned long)addr);
+}
- if (!OLDMEM_BASE || is_kdump_kernel())
- return -EINVAL;
- s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz);
- elfcorehdr_addr = (unsigned long long) elfcorebuf;
- elfcorehdr_size = elfcorebuf_sz;
- return 0;
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+
+ src = elfcorehdr_newmem ? src : src - OLDMEM_BASE;
+ memcpy(buf, src, count);
+ *ppos += count;
+ return count;
}
-subsys_initcall(setup_kdump_elfcorehdr);
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+ int rc;
+
+ if (elfcorehdr_newmem) {
+ memcpy(buf, src, count);
+ } else {
+ rc = copy_from_oldmem(buf, src, count);
+ if (rc)
+ return rc;
+ }
+ *ppos += count;
+ return count;
+}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4e8215e0d4b..ee8390da6ea 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -611,7 +611,7 @@ debug_open(struct inode *inode, struct file *file)
debug_info_t *debug_info, *debug_info_snapshot;
mutex_lock(&debug_mutex);
- debug_info = file->f_path.dentry->d_inode->i_private;
+ debug_info = file_inode(file)->i_private;
/* find debug view */
for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
if (!debug_info->views[i])
@@ -867,7 +867,7 @@ static inline void
debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level,
int exception)
{
- active->id.stck = get_clock();
+ active->id.stck = get_tod_clock_fast();
active->id.fields.cpuid = smp_processor_id();
active->caller = __builtin_return_address(0);
active->id.fields.exception = exception;
@@ -889,7 +889,7 @@ static int debug_active=1;
* if debug_active is already off
*/
static int
-s390dbf_procactive(ctl_table *table, int write,
+s390dbf_procactive(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index a7f9abd98cf..993efe6a887 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -23,6 +23,7 @@
#include <linux/kdebug.h>
#include <asm/uaccess.h>
+#include <asm/dis.h>
#include <asm/io.h>
#include <linux/atomic.h>
#include <asm/mathemu.h>
@@ -37,17 +38,6 @@
#define ONELONG "%016lx: "
#endif /* CONFIG_64BIT */
-#define OPERAND_GPR 0x1 /* Operand printed as %rx */
-#define OPERAND_FPR 0x2 /* Operand printed as %fx */
-#define OPERAND_AR 0x4 /* Operand printed as %ax */
-#define OPERAND_CR 0x8 /* Operand printed as %cx */
-#define OPERAND_DISP 0x10 /* Operand printed as displacement */
-#define OPERAND_BASE 0x20 /* Operand printed as base register */
-#define OPERAND_INDEX 0x40 /* Operand printed as index register */
-#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */
-#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */
-#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */
-
enum {
UNUSED, /* Indicates the end of the operand list */
R_8, /* GPR starting at position 8 */
@@ -155,19 +145,7 @@ enum {
INSTR_S_00, INSTR_S_RD,
};
-struct operand {
- int bits; /* The number of bits in the operand. */
- int shift; /* The number of bits to shift. */
- int flags; /* One bit syntax flags. */
-};
-
-struct insn {
- const char name[5];
- unsigned char opfrag;
- unsigned char format;
-};
-
-static const struct operand operands[] =
+static const struct s390_operand operands[] =
{
[UNUSED] = { 0, 0, 0 },
[R_8] = { 4, 8, OPERAND_GPR },
@@ -479,7 +457,7 @@ static char *long_insn_name[] = {
[LONG_INSN_PCISTB] = "pcistb",
};
-static struct insn opcode[] = {
+static struct s390_insn opcode[] = {
#ifdef CONFIG_64BIT
{ "bprp", 0xc5, INSTR_MII_UPI },
{ "bpp", 0xc7, INSTR_SMI_U0RDP },
@@ -668,7 +646,7 @@ static struct insn opcode[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_01[] = {
+static struct s390_insn opcode_01[] = {
#ifdef CONFIG_64BIT
{ "ptff", 0x04, INSTR_E },
{ "pfpo", 0x0a, INSTR_E },
@@ -684,7 +662,7 @@ static struct insn opcode_01[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_a5[] = {
+static struct s390_insn opcode_a5[] = {
#ifdef CONFIG_64BIT
{ "iihh", 0x00, INSTR_RI_RU },
{ "iihl", 0x01, INSTR_RI_RU },
@@ -706,7 +684,7 @@ static struct insn opcode_a5[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_a7[] = {
+static struct s390_insn opcode_a7[] = {
#ifdef CONFIG_64BIT
{ "tmhh", 0x02, INSTR_RI_RU },
{ "tmhl", 0x03, INSTR_RI_RU },
@@ -728,7 +706,7 @@ static struct insn opcode_a7[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_aa[] = {
+static struct s390_insn opcode_aa[] = {
#ifdef CONFIG_64BIT
{ { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI },
{ "rion", 0x01, INSTR_RI_RI },
@@ -739,7 +717,7 @@ static struct insn opcode_aa[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_b2[] = {
+static struct s390_insn opcode_b2[] = {
#ifdef CONFIG_64BIT
{ "stckf", 0x7c, INSTR_S_RD },
{ "lpp", 0x80, INSTR_S_RD },
@@ -840,7 +818,6 @@ static struct insn opcode_b2[] = {
{ "stcke", 0x78, INSTR_S_RD },
{ "sacf", 0x79, INSTR_S_RD },
{ "stsi", 0x7d, INSTR_S_RD },
- { "spp", 0x80, INSTR_S_RD },
{ "srnm", 0x99, INSTR_S_RD },
{ "stfpc", 0x9c, INSTR_S_RD },
{ "lfpc", 0x9d, INSTR_S_RD },
@@ -852,7 +829,7 @@ static struct insn opcode_b2[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_b3[] = {
+static struct s390_insn opcode_b3[] = {
#ifdef CONFIG_64BIT
{ "maylr", 0x38, INSTR_RRF_F0FF },
{ "mylr", 0x39, INSTR_RRF_F0FF },
@@ -1035,7 +1012,7 @@ static struct insn opcode_b3[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_b9[] = {
+static struct s390_insn opcode_b9[] = {
#ifdef CONFIG_64BIT
{ "lpgr", 0x00, INSTR_RRE_RR },
{ "lngr", 0x01, INSTR_RRE_RR },
@@ -1168,7 +1145,7 @@ static struct insn opcode_b9[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_c0[] = {
+static struct s390_insn opcode_c0[] = {
#ifdef CONFIG_64BIT
{ "lgfi", 0x01, INSTR_RIL_RI },
{ "xihf", 0x06, INSTR_RIL_RU },
@@ -1188,7 +1165,7 @@ static struct insn opcode_c0[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_c2[] = {
+static struct s390_insn opcode_c2[] = {
#ifdef CONFIG_64BIT
{ "msgfi", 0x00, INSTR_RIL_RI },
{ "msfi", 0x01, INSTR_RIL_RI },
@@ -1206,7 +1183,7 @@ static struct insn opcode_c2[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_c4[] = {
+static struct s390_insn opcode_c4[] = {
#ifdef CONFIG_64BIT
{ "llhrl", 0x02, INSTR_RIL_RP },
{ "lghrl", 0x04, INSTR_RIL_RP },
@@ -1223,7 +1200,7 @@ static struct insn opcode_c4[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_c6[] = {
+static struct s390_insn opcode_c6[] = {
#ifdef CONFIG_64BIT
{ "exrl", 0x00, INSTR_RIL_RP },
{ "pfdrl", 0x02, INSTR_RIL_UP },
@@ -1241,7 +1218,7 @@ static struct insn opcode_c6[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_c8[] = {
+static struct s390_insn opcode_c8[] = {
#ifdef CONFIG_64BIT
{ "mvcos", 0x00, INSTR_SSF_RRDRD },
{ "ectg", 0x01, INSTR_SSF_RRDRD },
@@ -1252,7 +1229,7 @@ static struct insn opcode_c8[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_cc[] = {
+static struct s390_insn opcode_cc[] = {
#ifdef CONFIG_64BIT
{ "brcth", 0x06, INSTR_RIL_RP },
{ "aih", 0x08, INSTR_RIL_RI },
@@ -1264,7 +1241,7 @@ static struct insn opcode_cc[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_e3[] = {
+static struct s390_insn opcode_e3[] = {
#ifdef CONFIG_64BIT
{ "ltg", 0x02, INSTR_RXY_RRRD },
{ "lrag", 0x03, INSTR_RXY_RRRD },
@@ -1370,7 +1347,7 @@ static struct insn opcode_e3[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_e5[] = {
+static struct s390_insn opcode_e5[] = {
#ifdef CONFIG_64BIT
{ "strag", 0x02, INSTR_SSE_RDRD },
{ "mvhhi", 0x44, INSTR_SIL_RDI },
@@ -1392,7 +1369,7 @@ static struct insn opcode_e5[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_eb[] = {
+static struct s390_insn opcode_eb[] = {
#ifdef CONFIG_64BIT
{ "lmg", 0x04, INSTR_RSY_RRRD },
{ "srag", 0x0a, INSTR_RSY_RRRD },
@@ -1466,7 +1443,7 @@ static struct insn opcode_eb[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_ec[] = {
+static struct s390_insn opcode_ec[] = {
#ifdef CONFIG_64BIT
{ "brxhg", 0x44, INSTR_RIE_RRP },
{ "brxlg", 0x45, INSTR_RIE_RRP },
@@ -1505,7 +1482,7 @@ static struct insn opcode_ec[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_ed[] = {
+static struct s390_insn opcode_ed[] = {
#ifdef CONFIG_64BIT
{ "mayl", 0x38, INSTR_RXF_FRRDF },
{ "myl", 0x39, INSTR_RXF_FRRDF },
@@ -1573,7 +1550,7 @@ static struct insn opcode_ed[] = {
/* Extracts an operand value from an instruction. */
static unsigned int extract_operand(unsigned char *code,
- const struct operand *operand)
+ const struct s390_operand *operand)
{
unsigned int val;
int bits;
@@ -1609,16 +1586,11 @@ static unsigned int extract_operand(unsigned char *code,
return val;
}
-static inline int insn_length(unsigned char code)
-{
- return ((((int) code + 64) >> 7) + 1) << 1;
-}
-
-static struct insn *find_insn(unsigned char *code)
+struct s390_insn *find_insn(unsigned char *code)
{
unsigned char opfrag = code[1];
unsigned char opmask;
- struct insn *table;
+ struct s390_insn *table;
switch (code[0]) {
case 0x01:
@@ -1697,34 +1669,35 @@ static struct insn *find_insn(unsigned char *code)
* insn_to_mnemonic - decode an s390 instruction
* @instruction: instruction to decode
* @buf: buffer to fill with mnemonic
+ * @len: length of buffer
*
* Decode the instruction at @instruction and store the corresponding
- * mnemonic into @buf.
+ * mnemonic into @buf of length @len.
* @buf is left unchanged if the instruction could not be decoded.
* Returns:
* %0 on success, %-ENOENT if the instruction was not found.
*/
-int insn_to_mnemonic(unsigned char *instruction, char buf[8])
+int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len)
{
- struct insn *insn;
+ struct s390_insn *insn;
insn = find_insn(instruction);
if (!insn)
return -ENOENT;
if (insn->name[0] == '\0')
- snprintf(buf, sizeof(buf), "%s",
+ snprintf(buf, len, "%s",
long_insn_name[(int) insn->name[1]]);
else
- snprintf(buf, sizeof(buf), "%.5s", insn->name);
+ snprintf(buf, len, "%.5s", insn->name);
return 0;
}
EXPORT_SYMBOL_GPL(insn_to_mnemonic);
static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
{
- struct insn *insn;
+ struct s390_insn *insn;
const unsigned char *ops;
- const struct operand *operand;
+ const struct s390_operand *operand;
unsigned int value;
char separator;
char *ptr;
@@ -1862,6 +1835,8 @@ void print_fn_code(unsigned char *code, unsigned long len)
while (len) {
ptr = buffer;
opsize = insn_length(*code);
+ if (opsize > len)
+ break;
ptr += sprintf(ptr, "%p: ", code);
for (i = 0; i < opsize; i++)
ptr += sprintf(ptr, "%02x", code[i]);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
new file mode 100644
index 00000000000..acb412442e5
--- /dev/null
+++ b/arch/s390/kernel/dumpstack.c
@@ -0,0 +1,217 @@
+/*
+ * Stack dumping functions
+ *
+ * Copyright IBM Corp. 1999, 2013
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/utsname.h>
+#include <linux/export.h>
+#include <linux/kdebug.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+#include <asm/ipl.h>
+
+#ifndef CONFIG_64BIT
+#define LONG "%08lx "
+#define FOURLONG "%08lx %08lx %08lx %08lx\n"
+static int kstack_depth_to_print = 12;
+#else /* CONFIG_64BIT */
+#define LONG "%016lx "
+#define FOURLONG "%016lx %016lx %016lx %016lx\n"
+static int kstack_depth_to_print = 20;
+#endif /* CONFIG_64BIT */
+
+/*
+ * For show_trace we have tree different stack to consider:
+ * - the panic stack which is used if the kernel stack has overflown
+ * - the asynchronous interrupt stack (cpu related)
+ * - the synchronous kernel stack (process related)
+ * The stack trace can start at any of the three stack and can potentially
+ * touch all of them. The order is: panic stack, async stack, sync stack.
+ */
+static unsigned long
+__show_trace(unsigned long sp, unsigned long low, unsigned long high)
+{
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+ unsigned long addr;
+
+ while (1) {
+ sp = sp & PSW_ADDR_INSN;
+ if (sp < low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ addr = sf->gprs[8] & PSW_ADDR_INSN;
+ printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
+ /* Follow the backchain. */
+ while (1) {
+ low = sp;
+ sp = sf->back_chain & PSW_ADDR_INSN;
+ if (!sp)
+ break;
+ if (sp <= low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ addr = sf->gprs[8] & PSW_ADDR_INSN;
+ printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+ }
+ /* Zero backchain detected, check for interrupt frame. */
+ sp = (unsigned long) (sf + 1);
+ if (sp <= low || sp > high - sizeof(*regs))
+ return sp;
+ regs = (struct pt_regs *) sp;
+ addr = regs->psw.addr & PSW_ADDR_INSN;
+ printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+ low = sp;
+ sp = regs->gprs[15];
+ }
+}
+
+static void show_trace(struct task_struct *task, unsigned long *stack)
+{
+ const unsigned long frame_size =
+ STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ register unsigned long __r15 asm ("15");
+ unsigned long sp;
+
+ sp = (unsigned long) stack;
+ if (!sp)
+ sp = task ? task->thread.ksp : __r15;
+ printk("Call Trace:\n");
+#ifdef CONFIG_CHECK_STACK
+ sp = __show_trace(sp,
+ S390_lowcore.panic_stack + frame_size - 4096,
+ S390_lowcore.panic_stack + frame_size);
+#endif
+ sp = __show_trace(sp,
+ S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+ S390_lowcore.async_stack + frame_size);
+ if (task)
+ __show_trace(sp, (unsigned long) task_stack_page(task),
+ (unsigned long) task_stack_page(task) + THREAD_SIZE);
+ else
+ __show_trace(sp, S390_lowcore.thread_info,
+ S390_lowcore.thread_info + THREAD_SIZE);
+ if (!task)
+ task = current;
+ debug_show_held_locks(task);
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+ register unsigned long *__r15 asm ("15");
+ unsigned long *stack;
+ int i;
+
+ if (!sp)
+ stack = task ? (unsigned long *) task->thread.ksp : __r15;
+ else
+ stack = sp;
+
+ for (i = 0; i < kstack_depth_to_print; i++) {
+ if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
+ break;
+ if ((i * sizeof(long) % 32) == 0)
+ printk("%s ", i == 0 ? "" : "\n");
+ printk(LONG, *stack++);
+ }
+ printk("\n");
+ show_trace(task, sp);
+}
+
+static void show_last_breaking_event(struct pt_regs *regs)
+{
+#ifdef CONFIG_64BIT
+ printk("Last Breaking-Event-Address:\n");
+ printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
+#endif
+}
+
+static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
+{
+ return (regs->psw.mask & bits) / ((~bits + 1) & bits);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+ char *mode;
+
+ mode = user_mode(regs) ? "User" : "Krnl";
+ printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+ if (!user_mode(regs))
+ printk(" (%pSR)", (void *)regs->psw.addr);
+ printk("\n");
+ printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
+ "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
+ mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
+ mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
+ mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
+ mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
+ mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
+#ifdef CONFIG_64BIT
+ printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
+#endif
+ printk("\n%s GPRS: " FOURLONG, mode,
+ regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
+ printk(" " FOURLONG,
+ regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
+ printk(" " FOURLONG,
+ regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
+ printk(" " FOURLONG,
+ regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
+ show_code(regs);
+}
+
+void show_regs(struct pt_regs *regs)
+{
+ show_regs_print_info(KERN_DEFAULT);
+ show_registers(regs);
+ /* Show stack backtrace if pt_regs is from kernel mode */
+ if (!user_mode(regs))
+ show_trace(NULL, (unsigned long *) regs->gprs[15]);
+ show_last_breaking_event(regs);
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void die(struct pt_regs *regs, const char *str)
+{
+ static int die_counter;
+
+ oops_enter();
+ lgr_info_log();
+ debug_stop_all();
+ console_verbose();
+ spin_lock_irq(&die_lock);
+ bust_spinlocks(1);
+ printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+ notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
+ print_modules();
+ show_regs(regs);
+ bust_spinlocks(0);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ spin_unlock_irq(&die_lock);
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception: panic_on_oops");
+ oops_exit();
+ do_exit(SIGSEGV);
+}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 1f0eee9e7da..0dff972a169 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -47,10 +47,10 @@ static void __init reset_tod_clock(void)
{
u64 time;
- if (store_clock(&time) == 0)
+ if (store_tod_clock(&time) == 0)
return;
/* TOD clock not running. Set the clock to Unix Epoch. */
- if (set_clock(TOD_UNIX_EPOCH) != 0 || store_clock(&time) != 0)
+ if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
disabled_wait(0);
sched_clock_base_cc = TOD_UNIX_EPOCH;
@@ -173,7 +173,7 @@ static noinline __init void create_kernel_nss(void)
}
/* re-initialize cputime accounting. */
- sched_clock_base_cc = get_clock();
+ sched_clock_base_cc = get_tod_clock();
S390_lowcore.last_update_clock = sched_clock_base_cc;
S390_lowcore.last_update_timer = 0x7fffffffffffffffULL;
S390_lowcore.user_timer = 0;
@@ -206,6 +206,7 @@ static noinline __init void clear_bss_section(void)
*/
static noinline __init void init_kernel_storage_key(void)
{
+#if PAGE_DEFAULT_KEY
unsigned long end_pfn, init_pfn;
end_pfn = PFN_UP(__pa(&_end));
@@ -213,6 +214,7 @@ static noinline __init void init_kernel_storage_key(void)
for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
page_set_storage_key(init_pfn << PAGE_SHIFT,
PAGE_DEFAULT_KEY, 0);
+#endif
}
static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
@@ -256,13 +258,19 @@ static __init void setup_topology(void)
static void early_pgm_check_handler(void)
{
const struct exception_table_entry *fixup;
+ unsigned long cr0, cr0_new;
unsigned long addr;
addr = S390_lowcore.program_old_psw.addr;
fixup = search_exception_tables(addr & PSW_ADDR_INSN);
if (!fixup)
disabled_wait(0);
+ /* Disable low address protection before storing into lowcore. */
+ __ctl_store(cr0, 0, 0);
+ cr0_new = cr0 & ~(1UL << 28);
+ __ctl_load(cr0_new, 0, 0);
S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+ __ctl_load(cr0, 0, 0);
}
static noinline __init void setup_lowcore_early(void)
@@ -378,14 +386,14 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
if (test_facility(3))
S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
- if (test_facility(27))
- S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS;
if (test_facility(40))
- S390_lowcore.machine_flags |= MACHINE_FLAG_SPP;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_LPP;
if (test_facility(50) && test_facility(73))
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
if (test_facility(66))
S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM;
+ if (test_facility(51))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
#endif
}
@@ -481,8 +489,7 @@ void __init startup_init(void)
detect_diag44();
detect_machine_facilities();
setup_topology();
- sclp_facilities_detect();
- detect_memory_layout(memory_chunk);
+ sclp_early_detect();
#ifdef CONFIG_DYNAMIC_FTRACE
S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
#endif
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 55022852326..70203265196 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ptrace.h>
@@ -18,6 +19,7 @@
#include <asm/unistd.h>
#include <asm/page.h>
#include <asm/sigp.h>
+#include <asm/irq.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 4
@@ -36,15 +38,15 @@ __PT_R13 = __PT_GPRS + 524
__PT_R14 = __PT_GPRS + 56
__PT_R15 = __PT_GPRS + 60
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING | _TIF_PER_TRAP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING)
-_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
- _TIF_SYSCALL_TRACEPOINT)
-
STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
STACK_SIZE = 1 << STACK_SHIFT
+STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
+
+_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
+_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_TRACEPOINT)
+_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE)
+_PIF_WORK = (_PIF_PER_TRAP)
#define BASED(name) name-system_call(%r13)
@@ -97,10 +99,10 @@ STACK_SIZE = 1 << STACK_SHIFT
sra %r14,\shift
jnz 1f
CHECK_STACK 1<<\shift,\savearea
+ ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: l %r15,\stack # load target stack
-2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
.macro ADD64 high,low,timer
@@ -150,18 +152,14 @@ ENTRY(__switch_to)
l %r4,__THREAD_info(%r2) # get thread_info of prev
l %r5,__THREAD_info(%r3) # get thread_info of next
lr %r15,%r5
- ahi %r15,STACK_SIZE # end of kernel stack of next
+ ahi %r15,STACK_INIT # end of kernel stack of next
st %r3,__LC_CURRENT # store task struct of next
st %r5,__LC_THREAD_INFO # store thread info of next
st %r15,__LC_KERNEL_STACK # store end of kernel stack
lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
l %r15,__THREAD_ksp(%r3) # load kernel stack of next
- tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending?
- jz 0f
- ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev
- oi __TI_flags+3(%r5),_TIF_MCCK_PENDING # set it in next
-0: lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
+ lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
br %r14
__critical_start:
@@ -176,9 +174,9 @@ sysc_stm:
stm %r8,%r15,__LC_SAVE_AREA_SYNC
l %r12,__LC_THREAD_INFO
l %r13,__LC_SVC_NEW_PSW+4
+ lhi %r14,_PIF_SYSCALL
sysc_per:
l %r15,__LC_KERNEL_STACK
- ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
sysc_vtime:
UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
@@ -186,8 +184,9 @@ sysc_vtime:
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
+ st %r14,__PT_FLAGS(%r11)
sysc_do_svc:
- oi __TI_flags+3(%r12),_TIF_SYSCALL
+ l %r10,__TI_sysc_table(%r12) # 31 bit system call table
lh %r8,__PT_INT_CODE+2(%r11)
sla %r8,2 # shift and test for svc0
jnz sysc_nr_ok
@@ -198,12 +197,11 @@ sysc_do_svc:
lr %r8,%r1
sla %r8,2
sysc_nr_ok:
- l %r10,BASED(.Lsys_call_table) # 31 bit system call table
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
st %r2,__PT_ORIG_GPR2(%r11)
st %r7,STACK_FRAME_OVERHEAD(%r15)
l %r9,0(%r8,%r10) # get system call addr.
- tm __TI_flags+2(%r12),_TIF_TRACE >> 8
+ tm __TI_flags+3(%r12),_TIF_TRACE
jnz sysc_tracesys
basr %r14,%r9 # call sys_xxxx
st %r2,__PT_R2(%r11) # store return value
@@ -213,9 +211,12 @@ sysc_return:
sysc_tif:
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jno sysc_restore
- tm __TI_flags+3(%r12),_TIF_WORK_SVC
- jnz sysc_work # check for work
- ni __TI_flags+3(%r12),255-_TIF_SYSCALL
+ tm __PT_FLAGS+3(%r11),_PIF_WORK
+ jnz sysc_work
+ tm __TI_flags+3(%r12),_TIF_WORK
+ jnz sysc_work # check for thread work
+ tm __LC_CPU_FLAGS+3,_CIF_WORK
+ jnz sysc_work
sysc_restore:
mvc __LC_RETURN_PSW(8),__PT_PSW(%r11)
stpt __LC_EXIT_TIMER
@@ -227,16 +228,18 @@ sysc_done:
# One of the work bits is on. Find out which one.
#
sysc_work:
- tm __TI_flags+3(%r12),_TIF_MCCK_PENDING
+ tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
jo sysc_mcck_pending
tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
jo sysc_reschedule
- tm __TI_flags+3(%r12),_TIF_PER_TRAP
+ tm __PT_FLAGS+3(%r11),_PIF_PER_TRAP
jo sysc_singlestep
tm __TI_flags+3(%r12),_TIF_SIGPENDING
jo sysc_sigpending
tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
jo sysc_notify_resume
+ tm __LC_CPU_FLAGS+3,_CIF_ASCE
+ jo sysc_uaccess
j sysc_return # beware of critical section cleanup
#
@@ -248,7 +251,7 @@ sysc_reschedule:
br %r1 # call schedule
#
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
#
sysc_mcck_pending:
l %r1,BASED(.Lhandle_mcck)
@@ -256,15 +259,24 @@ sysc_mcck_pending:
br %r1 # TIF bit will be cleared by handler
#
+# _CIF_ASCE is set, load user space asce
+#
+sysc_uaccess:
+ ni __LC_CPU_FLAGS+3,255-_CIF_ASCE
+ lctl %c1,%c1,__LC_USER_ASCE # load primary asce
+ j sysc_return
+
+#
# _TIF_SIGPENDING is set, call do_signal
#
sysc_sigpending:
lr %r2,%r11 # pass pointer to pt_regs
l %r1,BASED(.Ldo_signal)
basr %r14,%r1 # call do_signal
- tm __TI_flags+3(%r12),_TIF_SYSCALL
+ tm __PT_FLAGS+3(%r11),_PIF_SYSCALL
jno sysc_return
lm %r2,%r7,__PT_R2(%r11) # load svc arguments
+ l %r10,__TI_sysc_table(%r12) # 31 bit system call table
xr %r8,%r8 # svc 0 returns -ENOSYS
clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2)
jnl sysc_nr_ok # invalid svc number -> do svc 0
@@ -282,10 +294,10 @@ sysc_notify_resume:
br %r1 # call do_notify_resume
#
-# _TIF_PER_TRAP is set, call do_per_trap
+# _PIF_PER_TRAP is set, call do_per_trap
#
sysc_singlestep:
- ni __TI_flags+3(%r12),255-_TIF_PER_TRAP
+ ni __PT_FLAGS+3(%r11),255-_PIF_PER_TRAP
lr %r2,%r11 # pass pointer to pt_regs
l %r1,BASED(.Ldo_per_trap)
la %r14,BASED(sysc_return)
@@ -315,7 +327,7 @@ sysc_tracego:
basr %r14,%r9 # call sys_xxx
st %r2,__PT_R2(%r11) # store return value
sysc_tracenogo:
- tm __TI_flags+2(%r12),_TIF_TRACE >> 8
+ tm __TI_flags+3(%r12),_TIF_TRACE
jz sysc_return
l %r1,BASED(.Ltrace_exit)
lr %r2,%r11 # pass pointer to pt_regs
@@ -359,25 +371,26 @@ ENTRY(pgm_check_handler)
tm __LC_PGM_ILC+3,0x80 # check for per exception
jnz pgm_svcper # -> single stepped svc
0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+ ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
l %r15,__LC_KERNEL_STACK
-2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
stm %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
stm %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE
+ xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
jz 0f
l %r1,__TI_task(%r12)
tmh %r8,0x0001 # kernel per event ?
jz pgm_kprobe
- oi __TI_flags+3(%r12),_TIF_PER_TRAP
+ oi __PT_FLAGS+3(%r11),_PIF_PER_TRAP
mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS
- mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE
- mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID
+ mvc __THREAD_per_cause(2,%r1),__LC_PER_CODE
+ mvc __THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
0: REENABLE_IRQS
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
l %r1,BASED(.Ljump_table)
@@ -405,9 +418,9 @@ pgm_kprobe:
# single stepped system call
#
pgm_svcper:
- oi __TI_flags+3(%r12),_TIF_PER_TRAP
mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW
mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per)
+ lhi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs
/*
@@ -429,17 +442,33 @@ io_skip:
stm %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
stm %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+io_loop:
l %r1,BASED(.Ldo_IRQ)
lr %r2,%r11 # pass pointer to pt_regs
+ lhi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz io_call
+ lhi %r3,THIN_INTERRUPT
+io_call:
basr %r14,%r1 # call do_IRQ
+ tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR
+ jz io_return
+ tpi 0
+ jz io_return
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ j io_loop
io_return:
LOCKDEP_SYS_EXIT
TRACE_IRQS_ON
io_tif:
- tm __TI_flags+3(%r12),_TIF_WORK_INT
+ tm __TI_flags+3(%r12),_TIF_WORK
jnz io_work # there is work to do (signals etc.)
+ tm __LC_CPU_FLAGS+3,_CIF_WORK
+ jnz io_work
io_restore:
mvc __LC_RETURN_PSW(8),__PT_PSW(%r11)
stpt __LC_EXIT_TIMER
@@ -449,7 +478,7 @@ io_done:
#
# There is work todo, find out in which context we have been interrupted:
-# 1) if we return to user space we can do all _TIF_WORK_INT work
+# 1) if we return to user space we can do all _TIF_WORK work
# 2) if we return to kernel code and preemptive scheduling is enabled check
# the preemption counter and if it is zero call preempt_schedule_irq
# Before any work can be done, a switch to the kernel stack is required.
@@ -485,7 +514,6 @@ io_work:
#
io_work_user:
l %r1,__LC_KERNEL_STACK
- ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
@@ -493,11 +521,9 @@ io_work_user:
#
# One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED
-# and _TIF_MCCK_PENDING
#
io_work_tif:
- tm __TI_flags+3(%r12),_TIF_MCCK_PENDING
+ tm __LC_CPU_FLAGS+3(%r12),_CIF_MCCK_PENDING
jo io_mcck_pending
tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
jo io_reschedule
@@ -505,10 +531,12 @@ io_work_tif:
jo io_sigpending
tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
jo io_notify_resume
+ tm __LC_CPU_FLAGS+3,_CIF_ASCE
+ jo io_uaccess
j io_return # beware of critical section cleanup
#
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
#
io_mcck_pending:
# TRACE_IRQS_ON already done at io_return
@@ -518,6 +546,14 @@ io_mcck_pending:
j io_return
#
+# _CIF_ASCE is set, load user space asce
+#
+io_uaccess:
+ ni __LC_CPU_FLAGS+3,255-_CIF_ASCE
+ lctl %c1,%c1,__LC_USER_ASCE # load primary asce
+ j io_return
+
+#
# _TIF_NEED_RESCHED is set, call schedule
#
io_reschedule:
@@ -574,12 +610,14 @@ ext_skip:
stm %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
stm %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+ mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+ xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
TRACE_IRQS_OFF
+ l %r1,BASED(.Ldo_IRQ)
lr %r2,%r11 # pass pointer to pt_regs
- l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code
- l %r4,__LC_EXT_PARAMS # get external parameters
- l %r1,BASED(.Ldo_extint)
- basr %r14,%r1 # call do_extint
+ lhi %r3,EXT_INTERRUPT
+ basr %r14,%r1 # call do_IRQ
j io_return
/*
@@ -636,8 +674,10 @@ ENTRY(mcck_int_handler)
UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER
mcck_skip:
SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT
- mvc __PT_R0(64,%r11),__LC_GPREGS_SAVE_AREA
+ stm %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32
stm %r8,%r9,__PT_PSW(%r11)
+ xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
l %r1,BASED(.Ldo_machine_check)
lr %r2,%r11 # pass pointer to pt_regs
@@ -645,13 +685,12 @@ mcck_skip:
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jno mcck_return
l %r1,__LC_KERNEL_STACK # switch to kernel stack
- ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r15)
lr %r15,%r1
ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
- tm __TI_flags+3(%r12),_TIF_MCCK_PENDING
+ tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
jno mcck_return
TRACE_IRQS_OFF
l %r1,BASED(.Lhandle_mcck)
@@ -673,6 +712,7 @@ mcck_panic:
sra %r14,PAGE_SHIFT
jz 0f
l %r15,__LC_PANIC_STACK
+ j mcck_skip
0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j mcck_skip
@@ -713,12 +753,10 @@ ENTRY(restart_int_handler)
*/
stack_overflow:
l %r15,__LC_PANIC_STACK # change to panic stack
- ahi %r15,-__PT_SIZE # create pt_regs
- stm %r0,%r7,__PT_R0(%r15)
- stm %r8,%r9,__PT_PSW(%r15)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stm %r0,%r7,__PT_R0(%r11)
+ stm %r8,%r9,__PT_PSW(%r11)
mvc __PT_R8(32,%r11),0(%r14)
- lr %r15,%r11
- ahi %r15,-STACK_FRAME_OVERHEAD
l %r1,BASED(1f)
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
lr %r2,%r11 # pass pointer to pt_regs
@@ -798,15 +836,16 @@ cleanup_system_call:
mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
# set up saved register 11
l %r15,__LC_KERNEL_STACK
- ahi %r15,-__PT_SIZE
- st %r15,12(%r11) # r11 pt_regs pointer
+ la %r9,STACK_FRAME_OVERHEAD(%r15)
+ st %r9,12(%r11) # r11 pt_regs pointer
# fill pt_regs
- mvc __PT_R8(32,%r15),__LC_SAVE_AREA_SYNC
- stm %r0,%r7,__PT_R0(%r15)
- mvc __PT_PSW(8,%r15),__LC_SVC_OLD_PSW
- mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC
+ mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC
+ stm %r0,%r7,__PT_R0(%r9)
+ mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
+ xc __PT_FLAGS(4,%r9),__PT_FLAGS(%r9)
+ mvi __PT_FLAGS+3(%r9),_PIF_SYSCALL
# setup saved register 15
- ahi %r15,-STACK_FRAME_OVERHEAD
st %r15,28(%r11) # r15 stack pointer
# set new psw address and exit
l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000
@@ -874,13 +913,13 @@ cleanup_idle:
stm %r9,%r10,__LC_SYSTEM_TIMER
mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
# prepare return psw
- n %r8,BASED(cleanup_idle_wait) # clear wait state bit
+ n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits
l %r9,24(%r11) # return from psw_idle
br %r14
cleanup_idle_insn:
.long psw_idle_lpsw + 0x80000000
cleanup_idle_wait:
- .long 0xfffdffff
+ .long 0xfcfdffff
/*
* Integer constants
@@ -897,7 +936,6 @@ cleanup_idle_wait:
.Ldo_machine_check: .long s390_do_machine_check
.Lhandle_mcck: .long s390_handle_mcck
.Ldo_IRQ: .long do_IRQ
-.Ldo_extint: .long do_extint
.Ldo_signal: .long do_signal
.Ldo_notify_resume: .long do_notify_resume
.Ldo_per_trap: .long do_per_trap
@@ -909,7 +947,6 @@ cleanup_idle_wait:
.Ltrace_enter: .long do_syscall_trace_enter
.Ltrace_exit: .long do_syscall_trace_exit
.Lschedule_tail: .long schedule_tail
-.Lsys_call_table: .long sys_call_table
.Lsysc_per: .long sysc_per + 0x80000000
#ifdef CONFIG_TRACE_IRQFLAGS
.Lhardirqs_on: .long trace_hardirqs_on_caller
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 2711936fe70..6ac78192455 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -7,6 +7,7 @@
#include <asm/cputime.h>
extern void *restart_stack;
+extern unsigned long suspend_zero_pages;
void system_call(void);
void pgm_check_handler(void);
@@ -22,7 +23,6 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
-void do_asce_exception(struct pt_regs *regs);
void addressing_exception(struct pt_regs *regs);
void data_exception(struct pt_regs *regs);
@@ -52,34 +52,22 @@ void handle_signal32(unsigned long sig, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
void do_notify_resume(struct pt_regs *regs);
-struct ext_code;
-void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long);
+void __init init_IRQ(void);
+void do_IRQ(struct pt_regs *regs, int irq);
void do_restart(void);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
-
+int setup_profiling_timer(unsigned int multiplier);
void __init time_init(void);
+int pfn_is_nosave(unsigned long);
+void s390_early_resume(void);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
struct s390_mmap_arg_struct;
struct fadvise64_64_args;
struct old_sigaction;
-long sys_mmap2(struct s390_mmap_arg_struct __user *arg);
-long sys_s390_ipc(uint call, int first, unsigned long second,
- unsigned long third, void __user *ptr);
long sys_s390_personality(unsigned int personality);
-long sys_s390_fadvise64(int fd, u32 offset_high, u32 offset_low,
- size_t len, int advice);
-long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
-long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high,
- u32 len_low);
-long sys_sigsuspend(int history0, int history1, old_sigset_t mask);
-long sys_sigaction(int sig, const struct old_sigaction __user *act,
- struct old_sigaction __user *oact);
-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss);
-long sys_sigreturn(void);
-long sys_rt_sigreturn(void);
-long sys32_sigreturn(void);
-long sys32_rt_sigreturn(void);
+long sys_s390_runtime_instr(int command, int signum);
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 6d34e0c97a3..f2e674c702e 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -19,6 +19,7 @@
#include <asm/unistd.h>
#include <asm/page.h>
#include <asm/sigp.h>
+#include <asm/irq.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 8
@@ -39,14 +40,13 @@ __PT_R15 = __PT_GPRS + 120
STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
STACK_SIZE = 1 << STACK_SHIFT
+STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING | _TIF_PER_TRAP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING)
-_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
- _TIF_SYSCALL_TRACEPOINT)
-_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
+_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
+_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_TRACEPOINT)
+_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE)
+_PIF_WORK = (_PIF_PER_TRAP)
#define BASED(name) name-system_call(%r13)
@@ -72,31 +72,35 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
#endif
.endm
- .macro SPP newpp
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
- tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP
+ .macro LPP newpp
+#if IS_ENABLED(CONFIG_KVM)
+ tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
jz .+8
.insn s,0xb2800000,\newpp
#endif
.endm
- .macro HANDLE_SIE_INTERCEPT scratch,pgmcheck
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+ .macro HANDLE_SIE_INTERCEPT scratch,reason
+#if IS_ENABLED(CONFIG_KVM)
tmhh %r8,0x0001 # interrupting from user ?
- jnz .+42
+ jnz .+62
lgr \scratch,%r9
- slg \scratch,BASED(.Lsie_loop)
- clg \scratch,BASED(.Lsie_length)
- .if \pgmcheck
+ slg \scratch,BASED(.Lsie_critical)
+ clg \scratch,BASED(.Lsie_critical_length)
+ .if \reason==1
# Some program interrupts are suppressing (e.g. protection).
# We must also check the instruction after SIE in that case.
# do_protection_exception will rewind to rewind_pad
- jh .+22
+ jh .+42
.else
- jhe .+22
+ jhe .+42
.endif
- lg %r9,BASED(.Lsie_loop)
- SPP BASED(.Lhost_id) # set host id
+ lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ LPP __SF_EMPTY+16(%r15) # set host id
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ mvi __SF_EMPTY+31(%r15),\reason # set exit reason
#endif
.endm
@@ -124,10 +128,10 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
srag %r14,%r14,\shift
jnz 1f
CHECK_STACK 1<<\shift,\savearea
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: lg %r15,\stack # load target stack
-2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
.macro UPDATE_VTIME scratch,enter_timer
@@ -177,18 +181,14 @@ ENTRY(__switch_to)
lg %r4,__THREAD_info(%r2) # get thread_info of prev
lg %r5,__THREAD_info(%r3) # get thread_info of next
lgr %r15,%r5
- aghi %r15,STACK_SIZE # end of kernel stack of next
+ aghi %r15,STACK_INIT # end of kernel stack of next
stg %r3,__LC_CURRENT # store task struct of next
stg %r5,__LC_THREAD_INFO # store thread info of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
lg %r15,__THREAD_ksp(%r3) # load kernel stack of next
- tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending?
- jz 0f
- ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev
- oi __TI_flags+7(%r5),_TIF_MCCK_PENDING # set it in next
-0: lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
+ lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
br %r14
__critical_start:
@@ -203,10 +203,9 @@ sysc_stmg:
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_THREAD_INFO
- larl %r13,system_call
+ lghi %r14,_PIF_SYSCALL
sysc_per:
lg %r15,__LC_KERNEL_STACK
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
sysc_vtime:
UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
@@ -215,8 +214,9 @@ sysc_vtime:
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
+ stg %r14,__PT_FLAGS(%r11)
sysc_do_svc:
- oi __TI_flags+7(%r12),_TIF_SYSCALL
+ lg %r10,__TI_sysc_table(%r12) # address of system call table
llgh %r8,__PT_INT_CODE+2(%r11)
slag %r8,%r8,2 # shift and test for svc 0
jnz sysc_nr_ok
@@ -227,18 +227,11 @@ sysc_do_svc:
sth %r1,__PT_INT_CODE+2(%r11)
slag %r8,%r1,2
sysc_nr_ok:
- larl %r10,sys_call_table # 64 bit system call table
-#ifdef CONFIG_COMPAT
- tm __TI_flags+5(%r12),(_TIF_31BIT>>16)
- jno sysc_noemu
- larl %r10,sys_call_table_emu # 31 bit system call table
-sysc_noemu:
-#endif
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stg %r2,__PT_ORIG_GPR2(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lgf %r9,0(%r8,%r10) # get system call add.
- tm __TI_flags+6(%r12),_TIF_TRACE >> 8
+ tm __TI_flags+7(%r12),_TIF_TRACE
jnz sysc_tracesys
basr %r14,%r9 # call sys_xxxx
stg %r2,__PT_R2(%r11) # store return value
@@ -248,9 +241,12 @@ sysc_return:
sysc_tif:
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jno sysc_restore
- tm __TI_flags+7(%r12),_TIF_WORK_SVC
+ tm __PT_FLAGS+7(%r11),_PIF_WORK
+ jnz sysc_work
+ tm __TI_flags+7(%r12),_TIF_WORK
jnz sysc_work # check for work
- ni __TI_flags+7(%r12),255-_TIF_SYSCALL
+ tm __LC_CPU_FLAGS+7,_CIF_WORK
+ jnz sysc_work
sysc_restore:
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
@@ -265,16 +261,18 @@ sysc_done:
# One of the work bits is on. Find out which one.
#
sysc_work:
- tm __TI_flags+7(%r12),_TIF_MCCK_PENDING
+ tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
jo sysc_mcck_pending
tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
jo sysc_reschedule
- tm __TI_flags+7(%r12),_TIF_PER_TRAP
+ tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP
jo sysc_singlestep
tm __TI_flags+7(%r12),_TIF_SIGPENDING
jo sysc_sigpending
tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
jo sysc_notify_resume
+ tm __LC_CPU_FLAGS+7,_CIF_ASCE
+ jo sysc_uaccess
j sysc_return # beware of critical section cleanup
#
@@ -285,21 +283,30 @@ sysc_reschedule:
jg schedule
#
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
#
sysc_mcck_pending:
larl %r14,sysc_return
jg s390_handle_mcck # TIF bit will be cleared by handler
#
+# _CIF_ASCE is set, load user space asce
+#
+sysc_uaccess:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j sysc_return
+
+#
# _TIF_SIGPENDING is set, call do_signal
#
sysc_sigpending:
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_signal
- tm __TI_flags+7(%r12),_TIF_SYSCALL
+ tm __PT_FLAGS+7(%r11),_PIF_SYSCALL
jno sysc_return
lmg %r2,%r7,__PT_R2(%r11) # load svc arguments
+ lg %r10,__TI_sysc_table(%r12) # address of system call table
lghi %r8,0 # svc 0 returns -ENOSYS
llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number
cghi %r1,NR_syscalls
@@ -316,10 +323,10 @@ sysc_notify_resume:
jg do_notify_resume
#
-# _TIF_PER_TRAP is set, call do_per_trap
+# _PIF_PER_TRAP is set, call do_per_trap
#
sysc_singlestep:
- ni __TI_flags+7(%r12),255-_TIF_PER_TRAP
+ ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
lgr %r2,%r11 # pass pointer to pt_regs
larl %r14,sysc_return
jg do_per_trap
@@ -346,7 +353,7 @@ sysc_tracego:
basr %r14,%r9 # call sys_xxx
stg %r2,__PT_R2(%r11) # store return value
sysc_tracenogo:
- tm __TI_flags+6(%r12),_TIF_TRACE >> 8
+ tm __TI_flags+7(%r12),_TIF_TRACE
jz sysc_return
lgr %r2,%r11 # pass pointer to pt_regs
larl %r14,sysc_return
@@ -389,6 +396,7 @@ ENTRY(pgm_check_handler)
tm __LC_PGM_ILC+3,0x80 # check for per exception
jnz pgm_svcper # -> single stepped svc
0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
LAST_BREAK %r14
@@ -398,22 +406,22 @@ ENTRY(pgm_check_handler)
tm __LC_PGM_ILC+2,0x02 # check for transaction abort
jz 2f
mvc __THREAD_trap_tdb(256,%r14),0(%r13)
-2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
jz 0f
tmhh %r8,0x0001 # kernel per event ?
jz pgm_kprobe
- oi __TI_flags+7(%r12),_TIF_PER_TRAP
+ oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
- mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE
- mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID
+ mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
+ mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
0: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
@@ -440,10 +448,10 @@ pgm_kprobe:
# single stepped system call
#
pgm_svcper:
- oi __TI_flags+7(%r12),_TIF_PER_TRAP
mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
larl %r14,sysc_per
stg %r14,__LC_RETURN_PSW+8
+ lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs
/*
@@ -457,7 +465,7 @@ ENTRY(io_int_handler)
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
lmg %r8,%r9,__LC_IO_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,0
+ HANDLE_SIE_INTERCEPT %r14,2
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
tmhh %r8,0x0001 # interrupting from user?
jz io_skip
@@ -467,16 +475,32 @@ io_skip:
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+io_loop:
lgr %r2,%r11 # pass pointer to pt_regs
+ lghi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz io_call
+ lghi %r3,THIN_INTERRUPT
+io_call:
brasl %r14,do_IRQ
+ tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR
+ jz io_return
+ tpi 0
+ jz io_return
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ j io_loop
io_return:
LOCKDEP_SYS_EXIT
TRACE_IRQS_ON
io_tif:
- tm __TI_flags+7(%r12),_TIF_WORK_INT
+ tm __TI_flags+7(%r12),_TIF_WORK
jnz io_work # there is work to do (signals etc.)
+ tm __LC_CPU_FLAGS+7,_CIF_WORK
+ jnz io_work
io_restore:
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
@@ -489,7 +513,7 @@ io_done:
#
# There is work todo, find out in which context we have been interrupted:
-# 1) if we return to user space we can do all _TIF_WORK_INT work
+# 1) if we return to user space we can do all _TIF_WORK work
# 2) if we return to kernel code and kvm is enabled check if we need to
# modify the psw to leave SIE
# 3) if we return to kernel code and preemptive scheduling is enabled check
@@ -526,7 +550,6 @@ io_work:
#
io_work_user:
lg %r1,__LC_KERNEL_STACK
- aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
@@ -534,11 +557,9 @@ io_work_user:
#
# One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED
-# and _TIF_MCCK_PENDING
#
io_work_tif:
- tm __TI_flags+7(%r12),_TIF_MCCK_PENDING
+ tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
jo io_mcck_pending
tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
jo io_reschedule
@@ -546,10 +567,12 @@ io_work_tif:
jo io_sigpending
tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
jo io_notify_resume
+ tm __LC_CPU_FLAGS+7,_CIF_ASCE
+ jo io_uaccess
j io_return # beware of critical section cleanup
#
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
#
io_mcck_pending:
# TRACE_IRQS_ON already done at io_return
@@ -558,6 +581,14 @@ io_mcck_pending:
j io_return
#
+# _CIF_ASCE is set, load user space asce
+#
+io_uaccess:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j io_return
+
+#
# _TIF_NEED_RESCHED is set, call schedule
#
io_reschedule:
@@ -603,7 +634,7 @@ ENTRY(ext_int_handler)
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
lmg %r8,%r9,__LC_EXT_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,0
+ HANDLE_SIE_INTERCEPT %r14,3
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
tmhh %r8,0x0001 # interrupting from user ?
jz ext_skip
@@ -613,14 +644,16 @@ ext_skip:
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
+ lghi %r1,__LC_EXT_PARAMS2
+ mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+ mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+ mvc __PT_INT_PARM_LONG(8,%r11),0(%r1)
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
TRACE_IRQS_OFF
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lghi %r1,4096
lgr %r2,%r11 # pass pointer to pt_regs
- llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code
- llgf %r4,__LC_EXT_PARAMS # get external parameter
- lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter
- brasl %r14,do_extint
+ lghi %r3,EXT_INTERRUPT
+ brasl %r14,do_IRQ
j io_return
/*
@@ -651,7 +684,7 @@ ENTRY(mcck_int_handler)
lg %r12,__LC_THREAD_INFO
larl %r13,system_call
lmg %r8,%r9,__LC_MCK_OLD_PSW
- HANDLE_SIE_INTERCEPT %r14,0
+ HANDLE_SIE_INTERCEPT %r14,4
tm __LC_MCCK_CODE,0x80 # system damage?
jo mcck_panic # yes -> rest of mcck code invalid
lghi %r14,__LC_CPU_TIMER_SAVE_AREA
@@ -678,22 +711,23 @@ ENTRY(mcck_int_handler)
UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
LAST_BREAK %r14
mcck_skip:
- lghi %r14,__LC_GPREGS_SAVE_AREA
- mvc __PT_R0(128,%r11),0(%r14)
+ lghi %r14,__LC_GPREGS_SAVE_AREA+64
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jno mcck_return
lg %r1,__LC_KERNEL_STACK # switch to kernel stack
- aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
- tm __TI_flags+7(%r12),_TIF_MCCK_PENDING
+ tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
jno mcck_return
TRACE_IRQS_OFF
brasl %r14,s390_handle_mcck
@@ -754,14 +788,12 @@ ENTRY(restart_int_handler)
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
stack_overflow:
- lg %r11,__LC_PANIC_STACK # change to panic stack
- aghi %r11,-__PT_SIZE # create pt_regs
+ lg %r15,__LC_PANIC_STACK # change to panic stack
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_R8(64,%r11),0(%r14)
stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
- lgr %r15,%r11
- aghi %r15,-STACK_FRAME_OVERHEAD
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
jg kernel_stack_overflow
@@ -845,15 +877,16 @@ cleanup_system_call:
mvc __TI_last_break(8,%r12),16(%r11)
0: # set up saved register r11
lg %r15,__LC_KERNEL_STACK
- aghi %r15,-__PT_SIZE
- stg %r15,24(%r11) # r11 pt_regs pointer
+ la %r9,STACK_FRAME_OVERHEAD(%r15)
+ stg %r9,24(%r11) # r11 pt_regs pointer
# fill pt_regs
- mvc __PT_R8(64,%r15),__LC_SAVE_AREA_SYNC
- stmg %r0,%r7,__PT_R0(%r15)
- mvc __PT_PSW(16,%r15),__LC_SVC_OLD_PSW
- mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC
+ mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
+ stmg %r0,%r7,__PT_R0(%r9)
+ mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
+ xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
+ mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL
# setup saved register r15
- aghi %r15,-STACK_FRAME_OVERHEAD
stg %r15,56(%r11) # r15 stack pointer
# set new psw address and exit
larl %r9,sysc_do_svc
@@ -922,7 +955,7 @@ cleanup_idle:
stg %r9,__LC_SYSTEM_TIMER
mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
# prepare return psw
- nihh %r8,0xfffd # clear wait state bit
+ nihh %r8,0xfcfd # clear irq & wait state bits
lg %r9,48(%r11) # return from psw_idle
br %r14
cleanup_idle_insn:
@@ -938,7 +971,7 @@ cleanup_idle_insn:
.quad __critical_end - __critical_start
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
/*
* sie64a calling convention:
* %r2 pointer to sie control block
@@ -948,56 +981,50 @@ ENTRY(sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
stg %r2,__SF_EMPTY(%r15) # save control block pointer
stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
- xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0
+ xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
lmg %r0,%r13,0(%r3) # load guest gprs 0-13
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions in the sie_loop should not cause program interrupts. So
-# lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-rewind_pad:
- nop 0
-sie_loop:
- lg %r14,__LC_THREAD_INFO # pointer thread_info struct
- tm __TI_flags+7(%r14),_TIF_EXIT_SIE
- jnz sie_exit
lg %r14,__LC_GMAP # get gmap pointer
ltgr %r14,%r14
jz sie_gmap
lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
sie_gmap:
lg %r14,__SF_EMPTY(%r15) # get control block pointer
- SPP __SF_EMPTY(%r15) # set guest id
+ oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
+ tm __SIE_PROG20+3(%r14),1 # last exit...
+ jnz sie_done
+ LPP __SF_EMPTY(%r15) # set guest id
sie 0(%r14)
sie_done:
- SPP __SF_EMPTY+16(%r15) # set host id
- lg %r14,__LC_THREAD_INFO # pointer thread_info struct
-sie_exit:
+ LPP __SF_EMPTY+16(%r15) # set host id
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and sie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also HANDLE_SIE_INTERCEPT
+rewind_pad:
+ nop 0
+ .globl sie_exit
+sie_exit:
lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lghi %r2,0
+ lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
br %r14
sie_fault:
- lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
- lg %r14,__LC_THREAD_INFO # pointer thread_info struct
- lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
- stmg %r0,%r13,0(%r14) # save guest gprs 0-13
- lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lghi %r2,-EFAULT
- br %r14
+ lghi %r14,-EFAULT
+ stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
+ j sie_exit
.align 8
-.Lsie_loop:
- .quad sie_loop
-.Lsie_length:
- .quad sie_done - sie_loop
-.Lhost_id:
- .quad 0
+.Lsie_critical:
+ .quad sie_gmap
+.Lsie_critical_length:
+ .quad sie_done - sie_gmap
EX_TABLE(rewind_pad,sie_fault)
- EX_TABLE(sie_loop,sie_fault)
+ EX_TABLE(sie_exit,sie_fault)
#endif
.section .rodata, "a"
@@ -1010,6 +1037,7 @@ sys_call_table:
#ifdef CONFIG_COMPAT
#define SYSCALL(esa,esame,emu) .long emu
+ .globl sys_call_table_emu
sys_call_table_emu:
#include "syscalls.S"
#undef SYSCALL
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 78bdf0e5dff..54d6493c4a5 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -15,12 +15,7 @@
#include <linux/kprobes.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
-
-#ifdef CONFIG_64BIT
-#define MCOUNT_OFFSET_RET 12
-#else
-#define MCOUNT_OFFSET_RET 22
-#endif
+#include "entry.h"
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -135,9 +130,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return 0;
}
-int __init ftrace_dyn_arch_init(void *data)
+int __init ftrace_dyn_arch_init(void)
{
- *(unsigned long *) data = 0;
return 0;
}
@@ -155,14 +149,14 @@ unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
if (unlikely(atomic_read(&current->tracing_graph_pause)))
goto out;
- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
- goto out;
- trace.func = (ip & PSW_ADDR_INSN) - MCOUNT_OFFSET_RET;
+ ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+ trace.func = ip;
+ trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to. */
- if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
+ if (!ftrace_graph_entry(&trace))
+ goto out;
+ if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
goto out;
- }
parent = (unsigned long) return_to_handler;
out:
return parent;
@@ -182,7 +176,7 @@ int ftrace_enable_ftrace_graph_caller(void)
offset = ((void *) prepare_ftrace_return -
(void *) ftrace_graph_caller) / 2;
- return probe_kernel_write(ftrace_graph_caller + 2,
+ return probe_kernel_write((void *) ftrace_graph_caller + 2,
&offset, sizeof(offset));
}
@@ -190,7 +184,7 @@ int ftrace_disable_ftrace_graph_caller(void)
{
static unsigned short offset = 0x0002;
- return probe_kernel_write(ftrace_graph_caller + 2,
+ return probe_kernel_write((void *) ftrace_graph_caller + 2,
&offset, sizeof(offset));
}
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index fd8db63dfc9..e88d35d7495 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -437,13 +437,13 @@ ENTRY(startup_kdump)
#if defined(CONFIG_64BIT)
#if defined(CONFIG_MARCH_ZEC12)
- .long 3, 0xc100efe3, 0xf46ce000, 0x00400000
+ .long 3, 0xc100eff2, 0xf46ce800, 0x00400000
#elif defined(CONFIG_MARCH_Z196)
- .long 2, 0xc100efe3, 0xf46c0000
+ .long 2, 0xc100eff2, 0xf46c0000
#elif defined(CONFIG_MARCH_Z10)
- .long 2, 0xc100efe3, 0xf0680000
+ .long 2, 0xc100eff2, 0xf0680000
#elif defined(CONFIG_MARCH_Z9_109)
- .long 1, 0xc100efc3
+ .long 1, 0xc100efc2
#elif defined(CONFIG_MARCH_Z990)
.long 1, 0xc0002000
#elif defined(CONFIG_MARCH_Z900)
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index 9a99856df1c..6dbe80983a2 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -59,7 +59,6 @@ ENTRY(startup_continue)
.long 0 # cr13: home space segment table
.long 0xc0000000 # cr14: machine check handling off
.long 0 # cr15: linkage stack operations
-.Lmchunk:.long memory_chunk
.Lbss_bgn: .long __bss_start
.Lbss_end: .long _end
.Lparmaddr: .long PARMAREA
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index b9e25ae2579..d7c00507568 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -59,7 +59,7 @@ ENTRY(startup_continue)
.quad 0 # cr12: tracing off
.quad 0 # cr13: home space segment table
.quad 0xc0000000 # cr14: machine check handling off
- .quad 0 # cr15: linkage stack operations
+ .quad .Llinkage_stack # cr15: linkage stack operations
.Lpcmsk:.quad 0x0000000180000000
.L4malign:.quad 0xffffffffffc00000
.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8
@@ -67,12 +67,15 @@ ENTRY(startup_continue)
.Lparmaddr:
.quad PARMAREA
.align 64
-.Lduct: .long 0,0,0,0,.Lduald,0,0,0
+.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0
.long 0,0,0,0,0,0,0,0
+.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0
.align 128
.Lduald:.rept 8
.long 0x80000000,0,0,0 # invalid access-list entries
.endr
+.Llinkage_stack:
+ .long 0,0,0x89000000,0,0,0,0x8a000000,0
ENTRY(_ehead)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 6ffcd320321..633ca750453 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -754,9 +754,9 @@ static struct bin_attribute sys_reipl_fcp_scp_data_attr = {
.write = reipl_fcp_scpdata_write,
};
-DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
reipl_block_fcp->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
reipl_block_fcp->ipl_info.fcp.lun);
DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
reipl_block_fcp->ipl_info.fcp.bootprog);
@@ -1323,9 +1323,9 @@ static struct shutdown_action __refdata reipl_action = {
/* FCP dump device attributes */
-DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
dump_block_fcp->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
dump_block_fcp->ipl_info.fcp.lun);
DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
dump_block_fcp->ipl_info.fcp.bootprog);
@@ -1414,6 +1414,16 @@ static struct kobj_attribute dump_type_attr =
static struct kset *dump_kset;
+static void diag308_dump(void *dump_block)
+{
+ diag308(DIAG308_SET, dump_block);
+ while (1) {
+ if (diag308(DIAG308_DUMP, NULL) != 0x302)
+ break;
+ udelay_simple(USEC_PER_SEC);
+ }
+}
+
static void __dump_run(void *unused)
{
struct ccw_dev_id devid;
@@ -1432,12 +1442,10 @@ static void __dump_run(void *unused)
__cpcmd(buf, NULL, 0, NULL);
break;
case DUMP_METHOD_CCW_DIAG:
- diag308(DIAG308_SET, dump_block_ccw);
- diag308(DIAG308_DUMP, NULL);
+ diag308_dump(dump_block_ccw);
break;
case DUMP_METHOD_FCP_DIAG:
- diag308(DIAG308_SET, dump_block_fcp);
- diag308(DIAG308_DUMP, NULL);
+ diag308_dump(dump_block_fcp);
break;
default:
break;
@@ -2043,12 +2051,12 @@ void s390_reset_system(void (*func)(void *), void *data)
__ctl_clear_bit(0,28);
/* Set new machine check handler */
- S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT;
+ S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
S390_lowcore.mcck_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
/* Set new program check handler */
- S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT;
+ S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
S390_lowcore.program_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 9df824ea166..99b0b09646c 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -18,10 +18,12 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/irq.h>
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/lowcore.h>
#include <asm/irq.h>
+#include <asm/hw_irq.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -33,7 +35,7 @@ struct irq_class {
};
/*
- * The list of "main" irq classes on s390. This is the list of interrrupts
+ * The list of "main" irq classes on s390. This is the list of interrupts
* that appear both in /proc/stat ("intr" line) and /proc/interrupts.
* Historically only external and I/O interrupts have been part of /proc/stat.
* We can't add the split external and I/O sub classes since the first field
@@ -42,9 +44,10 @@ struct irq_class {
* Since the external and I/O interrupt fields are already sums we would end
* up with having a sum which accounts each interrupt twice.
*/
-static const struct irq_class irqclass_main_desc[NR_IRQS] = {
- [EXTERNAL_INTERRUPT] = {.name = "EXT"},
- [IO_INTERRUPT] = {.name = "I/O"}
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+ [EXT_INTERRUPT] = {.name = "EXT"},
+ [IO_INTERRUPT] = {.name = "I/O"},
+ [THIN_INTERRUPT] = {.name = "AIO"},
};
/*
@@ -81,10 +84,33 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
[IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"},
[IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
[IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
+ [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+ [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
[NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
[CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
};
+void __init init_IRQ(void)
+{
+ init_cio_interrupts();
+ init_airq_interrupts();
+ init_ext_interrupts();
+}
+
+void do_IRQ(struct pt_regs *regs, int irq)
+{
+ struct pt_regs *old_regs;
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+ if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+ generic_handle_irq(irq);
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
/*
* show_interrupts is needed by /proc/interrupts.
*/
@@ -99,105 +125,88 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(cpu)
seq_printf(p, "CPU%d ", cpu);
seq_putc(p, '\n');
+ goto out;
}
if (irq < NR_IRQS) {
+ if (irq >= NR_IRQS_BASE)
+ goto out;
seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]);
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_putc(p, '\n');
- goto skip_arch_irqs;
+ goto out;
}
for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]);
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat, cpu).irqs[irq]);
if (irqclass_sub_desc[irq].desc)
seq_printf(p, " %s", irqclass_sub_desc[irq].desc);
seq_putc(p, '\n');
}
-skip_arch_irqs:
+out:
put_online_cpus();
return 0;
}
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+ return from < THIN_INTERRUPT ? THIN_INTERRUPT : from;
+}
+
/*
* Switch to the asynchronous interrupt stack for softirq execution.
*/
-asmlinkage void do_softirq(void)
+void do_softirq_own_stack(void)
{
- unsigned long flags, old, new;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
-
- if (local_softirq_pending()) {
- /* Get current stack pointer. */
- asm volatile("la %0,0(15)" : "=a" (old));
- /* Check against async. stack address range. */
- new = S390_lowcore.async_stack;
- if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
- /* Need to switch to the async. stack. */
- new -= STACK_FRAME_OVERHEAD;
- ((struct stack_frame *) new)->back_chain = old;
-
- asm volatile(" la 15,0(%0)\n"
- " basr 14,%2\n"
- " la 15,0(%1)\n"
- : : "a" (new), "a" (old),
- "a" (__do_softirq)
- : "0", "1", "2", "3", "4", "5", "14",
- "cc", "memory" );
- } else {
- /* We are already on the async stack. */
- __do_softirq();
- }
+ unsigned long old, new;
+
+ /* Get current stack pointer. */
+ asm volatile("la %0,0(15)" : "=a" (old));
+ /* Check against async. stack address range. */
+ new = S390_lowcore.async_stack;
+ if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
+ /* Need to switch to the async. stack. */
+ new -= STACK_FRAME_OVERHEAD;
+ ((struct stack_frame *) new)->back_chain = old;
+ asm volatile(" la 15,0(%0)\n"
+ " basr 14,%2\n"
+ " la 15,0(%1)\n"
+ : : "a" (new), "a" (old),
+ "a" (__do_softirq)
+ : "0", "1", "2", "3", "4", "5", "14",
+ "cc", "memory" );
+ } else {
+ /* We are already on the async stack. */
+ __do_softirq();
}
-
- local_irq_restore(flags);
}
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
-{
- struct proc_dir_entry *root_irq_dir;
-
- root_irq_dir = proc_mkdir("irq", NULL);
- create_prof_cpu_mask(root_irq_dir);
-}
-#endif
-
/*
* ext_int_hash[index] is the list head for all external interrupts that hash
* to this index.
*/
-static struct list_head ext_int_hash[256];
+static struct hlist_head ext_int_hash[32] ____cacheline_aligned;
struct ext_int_info {
ext_int_handler_t handler;
- u16 code;
- struct list_head entry;
+ struct hlist_node entry;
struct rcu_head rcu;
+ u16 code;
};
/* ext_int_hash_lock protects the handler lists for external interrupts */
-DEFINE_SPINLOCK(ext_int_hash_lock);
-
-static void __init init_external_interrupts(void)
-{
- int idx;
-
- for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
- INIT_LIST_HEAD(&ext_int_hash[idx]);
-}
+static DEFINE_SPINLOCK(ext_int_hash_lock);
static inline int ext_hash(u16 code)
{
- return (code + (code >> 9)) & 0xff;
+ BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash)));
+
+ return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
}
-int register_external_interrupt(u16 code, ext_int_handler_t handler)
+int register_external_irq(u16 code, ext_int_handler_t handler)
{
struct ext_int_info *p;
unsigned long flags;
@@ -211,104 +220,88 @@ int register_external_interrupt(u16 code, ext_int_handler_t handler)
index = ext_hash(code);
spin_lock_irqsave(&ext_int_hash_lock, flags);
- list_add_rcu(&p->entry, &ext_int_hash[index]);
+ hlist_add_head_rcu(&p->entry, &ext_int_hash[index]);
spin_unlock_irqrestore(&ext_int_hash_lock, flags);
return 0;
}
-EXPORT_SYMBOL(register_external_interrupt);
+EXPORT_SYMBOL(register_external_irq);
-int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
+int unregister_external_irq(u16 code, ext_int_handler_t handler)
{
struct ext_int_info *p;
unsigned long flags;
int index = ext_hash(code);
spin_lock_irqsave(&ext_int_hash_lock, flags);
- list_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
if (p->code == code && p->handler == handler) {
- list_del_rcu(&p->entry);
+ hlist_del_rcu(&p->entry);
kfree_rcu(p, rcu);
}
}
spin_unlock_irqrestore(&ext_int_hash_lock, flags);
return 0;
}
-EXPORT_SYMBOL(unregister_external_interrupt);
+EXPORT_SYMBOL(unregister_external_irq);
-void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code,
- unsigned int param32, unsigned long param64)
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
{
- struct pt_regs *old_regs;
+ struct pt_regs *regs = get_irq_regs();
+ struct ext_code ext_code;
struct ext_int_info *p;
int index;
- old_regs = set_irq_regs(regs);
- irq_enter();
- if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) {
- /* Serve timer interrupts first. */
- clock_comparator_work();
- }
- kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL);
- if (ext_code.code != 0x1004)
+ ext_code = *(struct ext_code *) &regs->int_code;
+ if (ext_code.code != EXT_IRQ_CLK_COMP)
__get_cpu_var(s390_idle).nohz_delay = 1;
index = ext_hash(ext_code.code);
rcu_read_lock();
- list_for_each_entry_rcu(p, &ext_int_hash[index], entry)
- if (likely(p->code == ext_code.code))
- p->handler(ext_code, param32, param64);
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ if (unlikely(p->code != ext_code.code))
+ continue;
+ p->handler(ext_code, regs->int_parm, regs->int_parm_long);
+ }
rcu_read_unlock();
- irq_exit();
- set_irq_regs(old_regs);
-}
-
-void __init init_IRQ(void)
-{
- init_external_interrupts();
+ return IRQ_HANDLED;
}
-static DEFINE_SPINLOCK(sc_irq_lock);
-static int sc_irq_refcount;
+static struct irqaction external_interrupt = {
+ .name = "EXT",
+ .handler = do_ext_interrupt,
+};
-void service_subclass_irq_register(void)
+void __init init_ext_interrupts(void)
{
- spin_lock(&sc_irq_lock);
- if (!sc_irq_refcount)
- ctl_set_bit(0, 9);
- sc_irq_refcount++;
- spin_unlock(&sc_irq_lock);
-}
-EXPORT_SYMBOL(service_subclass_irq_register);
+ int idx;
-void service_subclass_irq_unregister(void)
-{
- spin_lock(&sc_irq_lock);
- sc_irq_refcount--;
- if (!sc_irq_refcount)
- ctl_clear_bit(0, 9);
- spin_unlock(&sc_irq_lock);
+ for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+ INIT_HLIST_HEAD(&ext_int_hash[idx]);
+
+ irq_set_chip_and_handler(EXT_INTERRUPT,
+ &dummy_irq_chip, handle_percpu_irq);
+ setup_irq(EXT_INTERRUPT, &external_interrupt);
}
-EXPORT_SYMBOL(service_subclass_irq_unregister);
-static DEFINE_SPINLOCK(ma_subclass_lock);
-static int ma_subclass_refcount;
+static DEFINE_SPINLOCK(irq_subclass_lock);
+static unsigned char irq_subclass_refcount[64];
-void measurement_alert_subclass_register(void)
+void irq_subclass_register(enum irq_subclass subclass)
{
- spin_lock(&ma_subclass_lock);
- if (!ma_subclass_refcount)
- ctl_set_bit(0, 5);
- ma_subclass_refcount++;
- spin_unlock(&ma_subclass_lock);
+ spin_lock(&irq_subclass_lock);
+ if (!irq_subclass_refcount[subclass])
+ ctl_set_bit(0, subclass);
+ irq_subclass_refcount[subclass]++;
+ spin_unlock(&irq_subclass_lock);
}
-EXPORT_SYMBOL(measurement_alert_subclass_register);
+EXPORT_SYMBOL(irq_subclass_register);
-void measurement_alert_subclass_unregister(void)
+void irq_subclass_unregister(enum irq_subclass subclass)
{
- spin_lock(&ma_subclass_lock);
- ma_subclass_refcount--;
- if (!ma_subclass_refcount)
- ctl_clear_bit(0, 5);
- spin_unlock(&ma_subclass_lock);
+ spin_lock(&irq_subclass_lock);
+ irq_subclass_refcount[subclass]--;
+ if (!irq_subclass_refcount[subclass])
+ ctl_clear_bit(0, subclass);
+ spin_unlock(&irq_subclass_lock);
}
-EXPORT_SYMBOL(measurement_alert_subclass_unregister);
+EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index d1c7214e157..bc71a7b95af 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -26,19 +26,42 @@
#include <linux/stop_machine.h>
#include <linux/kdebug.h>
#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/sections.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hardirq.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+#include <asm/dis.h>
DEFINE_PER_CPU(struct kprobe *, current_kprobe);
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = { };
+DEFINE_INSN_CACHE_OPS(dmainsn);
+
+static void *alloc_dmainsn_page(void)
+{
+ return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+}
+
+static void free_dmainsn_page(void *page)
+{
+ free_page((unsigned long)page);
+}
+
+struct kprobe_insn_cache kprobe_dmainsn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
+ .alloc = alloc_dmainsn_page,
+ .free = free_dmainsn_page,
+ .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+ .insn_size = MAX_INSN_SIZE,
+};
+
static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
{
+ if (!is_known_insn((unsigned char *)insn))
+ return -EINVAL;
switch (insn[0] >> 8) {
case 0x0c: /* bassm */
case 0x0b: /* bsm */
@@ -47,6 +70,11 @@ static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
case 0xac: /* stnsm */
case 0xad: /* stosm */
return -EINVAL;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x00: /* exrl */
+ return -EINVAL;
+ }
}
switch (insn[0]) {
case 0x0101: /* pr */
@@ -100,35 +128,160 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
fixup |= FIXUP_RETURN_REGISTER;
break;
case 0xc0:
- if ((insn[0] & 0x0f) == 0x00 || /* larl */
- (insn[0] & 0x0f) == 0x05) /* brasl */
- fixup |= FIXUP_RETURN_REGISTER;
+ if ((insn[0] & 0x0f) == 0x05) /* brasl */
+ fixup |= FIXUP_RETURN_REGISTER;
break;
case 0xeb:
- if ((insn[2] & 0xff) == 0x44 || /* bxhg */
- (insn[2] & 0xff) == 0x45) /* bxleg */
+ switch (insn[2] & 0xff) {
+ case 0x44: /* bxhg */
+ case 0x45: /* bxleg */
fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
break;
case 0xe3: /* bctg */
if ((insn[2] & 0xff) == 0x46)
fixup = FIXUP_BRANCH_NOT_TAKEN;
break;
+ case 0xec:
+ switch (insn[2] & 0xff) {
+ case 0xe5: /* clgrb */
+ case 0xe6: /* cgrb */
+ case 0xf6: /* crb */
+ case 0xf7: /* clrb */
+ case 0xfc: /* cgib */
+ case 0xfd: /* cglib */
+ case 0xfe: /* cib */
+ case 0xff: /* clib */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
+ break;
}
return fixup;
}
+static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn)
+{
+ /* Check if we have a RIL-b or RIL-c format instruction which
+ * we need to modify in order to avoid instruction emulation. */
+ switch (insn[0] >> 8) {
+ case 0xc0:
+ if ((insn[0] & 0x0f) == 0x00) /* larl */
+ return true;
+ break;
+ case 0xc4:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* llhrl */
+ case 0x04: /* lghrl */
+ case 0x05: /* lhrl */
+ case 0x06: /* llghrl */
+ case 0x07: /* sthrl */
+ case 0x08: /* lgrl */
+ case 0x0b: /* stgrl */
+ case 0x0c: /* lgfrl */
+ case 0x0d: /* lrl */
+ case 0x0e: /* llgfrl */
+ case 0x0f: /* strl */
+ return true;
+ }
+ break;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* pfdrl */
+ case 0x04: /* cghrl */
+ case 0x05: /* chrl */
+ case 0x06: /* clghrl */
+ case 0x07: /* clhrl */
+ case 0x08: /* cgrl */
+ case 0x0a: /* clgrl */
+ case 0x0c: /* cgfrl */
+ case 0x0d: /* crl */
+ case 0x0e: /* clgfrl */
+ case 0x0f: /* clrl */
+ return true;
+ }
+ break;
+ }
+ return false;
+}
+
+static void __kprobes copy_instruction(struct kprobe *p)
+{
+ s64 disp, new_disp;
+ u64 addr, new_addr;
+
+ memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8));
+ if (!is_insn_relative_long(p->ainsn.insn))
+ return;
+ /*
+ * For pc-relative instructions in RIL-b or RIL-c format patch the
+ * RI2 displacement field. We have already made sure that the insn
+ * slot for the patched instruction is within the same 2GB area
+ * as the original instruction (either kernel image or module area).
+ * Therefore the new displacement will always fit.
+ */
+ disp = *(s32 *)&p->ainsn.insn[1];
+ addr = (u64)(unsigned long)p->addr;
+ new_addr = (u64)(unsigned long)p->ainsn.insn;
+ new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+ *(s32 *)&p->ainsn.insn[1] = new_disp;
+}
+
+static inline int is_kernel_addr(void *addr)
+{
+ return addr < (void *)_end;
+}
+
+static inline int is_module_addr(void *addr)
+{
+#ifdef CONFIG_64BIT
+ BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+ if (addr < (void *)MODULES_VADDR)
+ return 0;
+ if (addr > (void *)MODULES_END)
+ return 0;
+#endif
+ return 1;
+}
+
+static int __kprobes s390_get_insn_slot(struct kprobe *p)
+{
+ /*
+ * Get an insn slot that is within the same 2GB area like the original
+ * instruction. That way instructions with a 32bit signed displacement
+ * field can be patched and executed within the insn slot.
+ */
+ p->ainsn.insn = NULL;
+ if (is_kernel_addr(p->addr))
+ p->ainsn.insn = get_dmainsn_slot();
+ else if (is_module_addr(p->addr))
+ p->ainsn.insn = get_insn_slot();
+ return p->ainsn.insn ? 0 : -ENOMEM;
+}
+
+static void __kprobes s390_free_insn_slot(struct kprobe *p)
+{
+ if (!p->ainsn.insn)
+ return;
+ if (is_kernel_addr(p->addr))
+ free_dmainsn_slot(p->ainsn.insn, 0);
+ else
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+}
+
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
if ((unsigned long) p->addr & 0x01)
return -EINVAL;
-
/* Make sure the probe isn't going on a difficult instruction */
if (is_prohibited_opcode(p->addr))
return -EINVAL;
-
+ if (s390_get_insn_slot(p))
+ return -ENOMEM;
p->opcode = *p->addr;
- memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
-
+ copy_instruction(p);
return 0;
}
@@ -169,6 +322,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
+ s390_free_insn_slot(p);
}
static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
@@ -354,7 +508,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
{
struct kretprobe_instance *ri;
struct hlist_head *head, empty_rp;
- struct hlist_node *node, *tmp;
+ struct hlist_node *tmp;
unsigned long flags, orig_ret_address;
unsigned long trampoline_address;
kprobe_opcode_t *correct_ret_addr;
@@ -379,7 +533,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
orig_ret_address = 0;
correct_ret_addr = NULL;
trampoline_address = (unsigned long) &kretprobe_trampoline;
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
@@ -398,7 +552,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
kretprobe_assert(ri, orig_ret_address, trampoline_address);
correct_ret_addr = ri->ret_addr;
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
@@ -427,7 +581,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
- hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
}
@@ -457,7 +611,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
- int ilen = ((p->ainsn.insn[0] >> 14) + 3) & -2;
+ int ilen = insn_length(p->ainsn.insn[0] >> 8);
if (ip - (unsigned long) p->ainsn.insn == ilen)
ip = (unsigned long) p->addr + ilen;
}
@@ -526,7 +680,7 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_SSDONE:
/*
* We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accouting
+ * we can also use npre/npostfault count for accounting
* these specific fault cases.
*/
kprobes_inc_nmissed_count(p);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index b3de2770001..719e27b2cf2 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -13,6 +13,7 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/debug_locks.h>
+#include <linux/suspend.h>
#include <asm/cio.h>
#include <asm/setup.h>
#include <asm/pgtable.h>
@@ -49,7 +50,7 @@ static void add_elf_notes(int cpu)
/*
* Initialize CPU ELF notes
*/
-void setup_regs(void)
+static void setup_regs(void)
{
unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
int cpu, this_cpu;
@@ -67,6 +68,35 @@ void setup_regs(void)
memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
}
+/*
+ * PM notifier callback for kdump
+ */
+static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
+ void *ptr)
+{
+ switch (action) {
+ case PM_SUSPEND_PREPARE:
+ case PM_HIBERNATION_PREPARE:
+ if (crashk_res.start)
+ crash_map_reserved_pages();
+ break;
+ case PM_POST_SUSPEND:
+ case PM_POST_HIBERNATION:
+ if (crashk_res.start)
+ crash_unmap_reserved_pages();
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+static int __init machine_kdump_pm_init(void)
+{
+ pm_notifier(machine_kdump_pm_cb, 0);
+ return 0;
+}
+arch_initcall(machine_kdump_pm_init);
#endif
/*
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 4567ce20d90..08dcf21cb8d 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -7,6 +7,7 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
.section .kprobes.text, "ax"
@@ -33,6 +34,7 @@ ENTRY(ftrace_caller)
la %r2,0(%r14)
st %r0,__SF_BACKCHAIN(%r15)
la %r3,0(%r3)
+ ahi %r2,-MCOUNT_INSN_SIZE
l %r14,0b-0b(%r1)
l %r14,0(%r14)
basr %r14,%r14
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
index 11332193db3..1c52eae3396 100644
--- a/arch/s390/kernel/mcount64.S
+++ b/arch/s390/kernel/mcount64.S
@@ -7,6 +7,7 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
.section .kprobes.text, "ax"
@@ -29,6 +30,7 @@ ENTRY(ftrace_caller)
stg %r1,__SF_BACKCHAIN(%r15)
lgr %r2,%r14
lg %r3,168(%r15)
+ aghi %r2,-MCOUNT_INSN_SIZE
larl %r14,ftrace_trace_function
lg %r14,0(%r14)
basr %r14,%r14
diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c
deleted file mode 100644
index 22d502e885e..00000000000
--- a/arch/s390/kernel/mem_detect.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright IBM Corp. 2008, 2009
- *
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/ipl.h>
-#include <asm/sclp.h>
-#include <asm/setup.h>
-
-#define ADDR2G (1ULL << 31)
-
-static void find_memory_chunks(struct mem_chunk chunk[])
-{
- unsigned long long memsize, rnmax, rzm;
- unsigned long addr = 0, size;
- int i = 0, type;
-
- rzm = sclp_get_rzm();
- rnmax = sclp_get_rnmax();
- memsize = rzm * rnmax;
- if (!rzm)
- rzm = 1ULL << 17;
- if (sizeof(long) == 4) {
- rzm = min(ADDR2G, rzm);
- memsize = memsize ? min(ADDR2G, memsize) : ADDR2G;
- }
- do {
- size = 0;
- type = tprot(addr);
- do {
- size += rzm;
- if (memsize && addr + size >= memsize)
- break;
- } while (type == tprot(addr + size));
- if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
- chunk[i].addr = addr;
- chunk[i].size = size;
- chunk[i].type = type;
- i++;
- }
- addr += size;
- } while (addr < memsize && i < MEMORY_CHUNKS);
-}
-
-void detect_memory_layout(struct mem_chunk chunk[])
-{
- unsigned long flags, cr0;
-
- memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk));
- /* Disable IRQs, DAT and low address protection so tprot does the
- * right thing and we don't get scheduled away with low address
- * protection disabled.
- */
- flags = __arch_local_irq_stnsm(0xf8);
- __ctl_store(cr0, 0, 0);
- __ctl_clear_bit(0, 28);
- find_memory_chunks(chunk);
- __ctl_load(cr0, 0, 0);
- arch_local_irq_restore(flags);
-}
-EXPORT_SYMBOL(detect_memory_layout);
-
-/*
- * Move memory chunks array from index "from" to index "to"
- */
-static void mem_chunk_move(struct mem_chunk chunk[], int to, int from)
-{
- int cnt = MEMORY_CHUNKS - to;
-
- memmove(&chunk[to], &chunk[from], cnt * sizeof(struct mem_chunk));
-}
-
-/*
- * Initialize memory chunk
- */
-static void mem_chunk_init(struct mem_chunk *chunk, unsigned long addr,
- unsigned long size, int type)
-{
- chunk->type = type;
- chunk->addr = addr;
- chunk->size = size;
-}
-
-/*
- * Create memory hole with given address, size, and type
- */
-void create_mem_hole(struct mem_chunk chunk[], unsigned long addr,
- unsigned long size, int type)
-{
- unsigned long lh_start, lh_end, lh_size, ch_start, ch_end, ch_size;
- int i, ch_type;
-
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- if (chunk[i].size == 0)
- continue;
-
- /* Define chunk properties */
- ch_start = chunk[i].addr;
- ch_size = chunk[i].size;
- ch_end = ch_start + ch_size - 1;
- ch_type = chunk[i].type;
-
- /* Is memory chunk hit by memory hole? */
- if (addr + size <= ch_start)
- continue; /* No: memory hole in front of chunk */
- if (addr > ch_end)
- continue; /* No: memory hole after chunk */
-
- /* Yes: Define local hole properties */
- lh_start = max(addr, chunk[i].addr);
- lh_end = min(addr + size - 1, ch_end);
- lh_size = lh_end - lh_start + 1;
-
- if (lh_start == ch_start && lh_end == ch_end) {
- /* Hole covers complete memory chunk */
- mem_chunk_init(&chunk[i], lh_start, lh_size, type);
- } else if (lh_end == ch_end) {
- /* Hole starts in memory chunk and convers chunk end */
- mem_chunk_move(chunk, i + 1, i);
- mem_chunk_init(&chunk[i], ch_start, ch_size - lh_size,
- ch_type);
- mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type);
- i += 1;
- } else if (lh_start == ch_start) {
- /* Hole ends in memory chunk */
- mem_chunk_move(chunk, i + 1, i);
- mem_chunk_init(&chunk[i], lh_start, lh_size, type);
- mem_chunk_init(&chunk[i + 1], lh_end + 1,
- ch_size - lh_size, ch_type);
- break;
- } else {
- /* Hole splits memory chunk */
- mem_chunk_move(chunk, i + 2, i);
- mem_chunk_init(&chunk[i], ch_start,
- lh_start - ch_start, ch_type);
- mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type);
- mem_chunk_init(&chunk[i + 2], lh_end + 1,
- ch_end - lh_end, ch_type);
- break;
- }
- }
-}
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 4610deafd95..b89b59158b9 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -50,7 +50,7 @@ void *module_alloc(unsigned long size)
if (PAGE_ALIGN(size) > MODULES_LEN)
return NULL;
return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL, -1,
+ GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE,
__builtin_return_address(0));
}
#endif
@@ -65,8 +65,7 @@ void module_free(struct module *mod, void *module_region)
vfree(module_region);
}
-static void
-check_rela(Elf_Rela *rela, struct module *me)
+static void check_rela(Elf_Rela *rela, struct module *me)
{
struct mod_arch_syminfo *info;
@@ -115,9 +114,8 @@ check_rela(Elf_Rela *rela, struct module *me)
* Account for GOT and PLT relocations. We can't add sections for
* got and plt but we can increase the core module size.
*/
-int
-module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
- char *secstrings, struct module *me)
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *me)
{
Elf_Shdr *symtab;
Elf_Sym *symbols;
@@ -179,13 +177,52 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
return 0;
}
-static int
-apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
- struct module *me)
+static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
+ int sign, int bits, int shift)
+{
+ unsigned long umax;
+ long min, max;
+
+ if (val & ((1UL << shift) - 1))
+ return -ENOEXEC;
+ if (sign) {
+ val = (Elf_Addr)(((long) val) >> shift);
+ min = -(1L << (bits - 1));
+ max = (1L << (bits - 1)) - 1;
+ if ((long) val < min || (long) val > max)
+ return -ENOEXEC;
+ } else {
+ val >>= shift;
+ umax = ((1UL << (bits - 1)) << 1) - 1;
+ if ((unsigned long) val > umax)
+ return -ENOEXEC;
+ }
+
+ if (bits == 8)
+ *(unsigned char *) loc = val;
+ else if (bits == 12)
+ *(unsigned short *) loc = (val & 0xfff) |
+ (*(unsigned short *) loc & 0xf000);
+ else if (bits == 16)
+ *(unsigned short *) loc = val;
+ else if (bits == 20)
+ *(unsigned int *) loc = (val & 0xfff) << 16 |
+ (val & 0xff000) >> 4 |
+ (*(unsigned int *) loc & 0xf00000ff);
+ else if (bits == 32)
+ *(unsigned int *) loc = val;
+ else if (bits == 64)
+ *(unsigned long *) loc = val;
+ return 0;
+}
+
+static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ const char *strtab, struct module *me)
{
struct mod_arch_syminfo *info;
Elf_Addr loc, val;
int r_type, r_sym;
+ int rc = -ENOEXEC;
/* This is where to make the change */
loc = base + rela->r_offset;
@@ -197,6 +234,9 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val = symtab[r_sym].st_value;
switch (r_type) {
+ case R_390_NONE: /* No relocation. */
+ rc = 0;
+ break;
case R_390_8: /* Direct 8 bit. */
case R_390_12: /* Direct 12 bit. */
case R_390_16: /* Direct 16 bit. */
@@ -205,20 +245,17 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_64: /* Direct 64 bit. */
val += rela->r_addend;
if (r_type == R_390_8)
- *(unsigned char *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 8, 0);
else if (r_type == R_390_12)
- *(unsigned short *) loc = (val & 0xfff) |
- (*(unsigned short *) loc & 0xf000);
+ rc = apply_rela_bits(loc, val, 0, 12, 0);
else if (r_type == R_390_16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
else if (r_type == R_390_20)
- *(unsigned int *) loc =
- (*(unsigned int *) loc & 0xf00000ff) |
- (val & 0xfff) << 16 | (val & 0xff000) >> 4;
+ rc = apply_rela_bits(loc, val, 1, 20, 0);
else if (r_type == R_390_32)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
else if (r_type == R_390_64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
break;
case R_390_PC16: /* PC relative 16 bit. */
case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
@@ -227,15 +264,15 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PC64: /* PC relative 64 bit. */
val += rela->r_addend - loc;
if (r_type == R_390_PC16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 1, 16, 0);
else if (r_type == R_390_PC16DBL)
- *(unsigned short *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 16, 1);
else if (r_type == R_390_PC32DBL)
- *(unsigned int *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
else if (r_type == R_390_PC32)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 1, 32, 0);
else if (r_type == R_390_PC64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 1, 64, 0);
break;
case R_390_GOT12: /* 12 bit GOT offset. */
case R_390_GOT16: /* 16 bit GOT offset. */
@@ -260,26 +297,24 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val = info->got_offset + rela->r_addend;
if (r_type == R_390_GOT12 ||
r_type == R_390_GOTPLT12)
- *(unsigned short *) loc = (val & 0xfff) |
- (*(unsigned short *) loc & 0xf000);
+ rc = apply_rela_bits(loc, val, 0, 12, 0);
else if (r_type == R_390_GOT16 ||
r_type == R_390_GOTPLT16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
else if (r_type == R_390_GOT20 ||
r_type == R_390_GOTPLT20)
- *(unsigned int *) loc =
- (*(unsigned int *) loc & 0xf00000ff) |
- (val & 0xfff) << 16 | (val & 0xff000) >> 4;
+ rc = apply_rela_bits(loc, val, 1, 20, 0);
else if (r_type == R_390_GOT32 ||
r_type == R_390_GOTPLT32)
- *(unsigned int *) loc = val;
- else if (r_type == R_390_GOTENT ||
- r_type == R_390_GOTPLTENT)
- *(unsigned int *) loc =
- (val + (Elf_Addr) me->module_core - loc) >> 1;
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
else if (r_type == R_390_GOT64 ||
r_type == R_390_GOTPLT64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ else if (r_type == R_390_GOTENT ||
+ r_type == R_390_GOTPLTENT) {
+ val += (Elf_Addr) me->module_core - loc;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ }
break;
case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */
@@ -321,17 +356,17 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val += rela->r_addend - loc;
}
if (r_type == R_390_PLT16DBL)
- *(unsigned short *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 16, 1);
else if (r_type == R_390_PLTOFF16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
else if (r_type == R_390_PLT32DBL)
- *(unsigned int *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
else if (r_type == R_390_PLT32 ||
r_type == R_390_PLTOFF32)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
else if (r_type == R_390_PLT64 ||
r_type == R_390_PLTOFF64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
break;
case R_390_GOTOFF16: /* 16 bit offset to GOT. */
case R_390_GOTOFF32: /* 32 bit offset to GOT. */
@@ -339,20 +374,20 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val = val + rela->r_addend -
((Elf_Addr) me->module_core + me->arch.got_offset);
if (r_type == R_390_GOTOFF16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
else if (r_type == R_390_GOTOFF32)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
else if (r_type == R_390_GOTOFF64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
break;
case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
val = (Elf_Addr) me->module_core + me->arch.got_offset +
rela->r_addend - loc;
if (r_type == R_390_GOTPC)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 1, 32, 0);
else if (r_type == R_390_GOTPCDBL)
- *(unsigned int *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
break;
case R_390_COPY:
case R_390_GLOB_DAT: /* Create GOT entry. */
@@ -360,19 +395,25 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_RELATIVE: /* Adjust by program base. */
/* Only needed if we want to support loading of
modules linked with -shared. */
- break;
+ return -ENOEXEC;
default:
- printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+ printk(KERN_ERR "module %s: unknown relocation: %u\n",
me->name, r_type);
return -ENOEXEC;
}
+ if (rc) {
+ printk(KERN_ERR "module %s: relocation error for symbol %s "
+ "(r_type %i, value 0x%lx)\n",
+ me->name, strtab + symtab[r_sym].st_name,
+ r_type, (unsigned long) val);
+ return rc;
+ }
return 0;
}
-int
-apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
- unsigned int symindex, unsigned int relsec,
- struct module *me)
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relsec,
+ struct module *me)
{
Elf_Addr base;
Elf_Sym *symtab;
@@ -388,7 +429,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
for (i = 0; i < n; i++, rela++) {
- rc = apply_rela(rela, base, symtab, me);
+ rc = apply_rela(rela, base, symtab, strtab, me);
if (rc)
return rc;
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 7918fbea36b..210e1285f75 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -55,7 +55,7 @@ void s390_handle_mcck(void)
local_mcck_disable();
mcck = __get_cpu_var(cpu_mcck);
memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
- clear_thread_flag(TIF_MCCK_PENDING);
+ clear_cpu_flag(CIF_MCCK_PENDING);
local_mcck_enable();
local_irq_restore(flags);
@@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
: "0", "cc");
#endif
/* Revalidate clock comparator register */
- if (S390_lowcore.clock_comparator == -1)
- set_clock_comparator(S390_lowcore.mcck_clock);
- else
- set_clock_comparator(S390_lowcore.clock_comparator);
+ set_clock_comparator(S390_lowcore.clock_comparator);
/* Check if old PSW is valid */
if (!mci->wp)
/*
@@ -293,7 +290,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
* retry this instruction.
*/
spin_lock(&ipd_lock);
- tmp = get_clock();
+ tmp = get_tod_clock();
if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
ipd_count++;
else
@@ -316,7 +313,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
*/
mcck->kill_task = 1;
mcck->mcck_code = *(unsigned long long *) mci;
- set_thread_flag(TIF_MCCK_PENDING);
+ set_cpu_flag(CIF_MCCK_PENDING);
} else {
/*
* Couldn't restore all register contents while in
@@ -355,12 +352,12 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (mci->cp) {
/* Channel report word pending */
mcck->channel_report = 1;
- set_thread_flag(TIF_MCCK_PENDING);
+ set_cpu_flag(CIF_MCCK_PENDING);
}
if (mci->w) {
/* Warning pending */
mcck->warning = 1;
- set_thread_flag(TIF_MCCK_PENDING);
+ set_cpu_flag(CIF_MCCK_PENDING);
}
nmi_exit();
}
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 46480d81df0..d112fc66f99 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -10,6 +10,7 @@
#include <linux/crash_dump.h>
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <asm/checksum.h>
#include <asm/lowcore.h>
#include <asm/os_info.h>
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 86ec7447e1f..ea75d011a6f 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -274,7 +274,7 @@ static int reserve_pmc_hardware(void)
int flags = PMC_INIT;
on_each_cpu(setup_pmc_cpu, &flags, 1);
- measurement_alert_subclass_register();
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
return 0;
}
@@ -285,7 +285,7 @@ static void release_pmc_hardware(void)
int flags = PMC_RELEASE;
on_each_cpu(setup_pmc_cpu, &flags, 1);
- measurement_alert_subclass_unregister();
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
}
/* Release the PMU if event is the last perf event */
@@ -367,13 +367,6 @@ static int __hw_perf_event_init(struct perf_event *event)
if (ev >= PERF_CPUM_CF_MAX_CTR)
return -EINVAL;
- /* The CPU measurement counter facility does not have any interrupts
- * to do sampling. Sampling must be provided by external means,
- * for example, by timers.
- */
- if (hwc->sample_period)
- return -EINVAL;
-
/* Use the hardware perf event structure to store the counter number
* in 'config' member and the counter set to which the counter belongs
* in the 'config_base'. The counter set (config_base) is then used
@@ -418,6 +411,12 @@ static int cpumf_pmu_event_init(struct perf_event *event)
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
case PERF_TYPE_RAW:
+ /* The CPU measurement counter facility does not have overflow
+ * interrupts to do sampling. Sampling must be provided by
+ * external means, for example, by timers.
+ */
+ if (is_sampling_event(event))
+ return -ENOENT;
err = __hw_perf_event_init(event);
break;
default:
@@ -640,8 +639,8 @@ static struct pmu cpumf_pmu = {
.cancel_txn = cpumf_pmu_cancel_txn,
};
-static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (long) hcpu;
int flags;
@@ -674,17 +673,20 @@ static int __init cpumf_pmu_init(void)
ctl_clear_bit(0, 48);
/* register handler for measurement-alert interruptions */
- rc = register_external_interrupt(0x1407, cpumf_measurement_alert);
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
if (rc) {
pr_err("Registering for CPU-measurement alerts "
"failed with rc=%i\n", rc);
goto out;
}
+ cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
if (rc) {
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
- unregister_external_interrupt(0x1407, cpumf_measurement_alert);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
goto out;
}
perf_cpu_notifier(cpumf_pmu_notifier);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 00000000000..4554a4bae39
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,322 @@
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+
+static struct attribute *cpumcf_pmu_event_attr[] = {
+ CPUMF_EVENT_PTR(cf, CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+ NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+ NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumcf_pmu_event_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+ struct attribute **b)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+ struct attribute **combined, **model;
+ struct cpuid cpu_id;
+
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
+ case 0x2097:
+ case 0x2098:
+ model = cpumcf_z10_pmu_event_attr;
+ break;
+ case 0x2817:
+ case 0x2818:
+ model = cpumcf_z196_pmu_event_attr;
+ break;
+ case 0x2827:
+ case 0x2828:
+ model = cpumcf_zec12_pmu_event_attr;
+ break;
+ default:
+ model = NULL;
+ break;
+ };
+
+ if (!model)
+ goto out;
+
+ combined = merge_attr(cpumcf_pmu_event_attr, model);
+ if (combined)
+ cpumsf_pmu_events_group.attrs = combined;
+out:
+ return cpumsf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 00000000000..ea0c7b2ef03
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,1643 @@
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "cpum_sf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT 1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+ return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account. Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size Buffer characteristics
+ * ---------------------------------------------------
+ * 64KB == 16 pages (4KB per page)
+ * 1 page for SDB-tables
+ * 15 pages for SDBs
+ *
+ * 32MB == 8192 pages (4KB per page)
+ * 16 pages for SDB-tables
+ * 8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+ unsigned long *sdbt; /* Sample-data-block-table origin */
+ /* buffer characteristics (required for buffer increments) */
+ unsigned long num_sdb; /* Number of sample-data-blocks */
+ unsigned long num_sdbt; /* Number of sample-data-block-tables */
+ unsigned long *tail; /* last sample-data-block-table */
+};
+
+struct cpu_hw_sf {
+ /* CPU-measurement sampling information block */
+ struct hws_qsi_info_block qsi;
+ /* CPU-measurement sampling control block */
+ struct hws_lsctl_request_block lsctl;
+ struct sf_buffer sfb; /* Sampling buffer */
+ unsigned int flags; /* Status flags */
+ struct perf_event *event; /* Scheduled perf event */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+ struct hws_lsctl_request_block sreq;
+
+ memset(&sreq, 0, sizeof(sreq));
+ return lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+ return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+ unsigned long *sdbt, *curr;
+
+ if (!sfb->sdbt)
+ return;
+
+ sdbt = sfb->sdbt;
+ curr = sdbt;
+
+ /* Free the SDBT after all SDBs are processed... */
+ while (1) {
+ if (!*curr || !sdbt)
+ break;
+
+ /* Process table-link entries */
+ if (is_link_entry(curr)) {
+ curr = get_next_sdbt(curr);
+ if (sdbt)
+ free_page((unsigned long) sdbt);
+
+ /* If the origin is reached, sampling buffer is freed */
+ if (curr == sfb->sdbt)
+ break;
+ else
+ sdbt = curr;
+ } else {
+ /* Process SDB pointer */
+ if (*curr) {
+ free_page(*curr);
+ curr++;
+ }
+ }
+ }
+
+ debug_sprintf_event(sfdbg, 5,
+ "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+ memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+ unsigned long sdb, *trailer;
+
+ /* Allocate and initialize sample-data-block */
+ sdb = get_zeroed_page(gfp_flags);
+ if (!sdb)
+ return -ENOMEM;
+ trailer = trailer_entry_ptr(sdb);
+ *trailer = SDB_TE_ALERT_REQ_MASK;
+
+ /* Link SDB into the sample-data-block-table */
+ *sdbt = sdb;
+
+ return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ * sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+ unsigned long num_sdb, gfp_t gfp_flags)
+{
+ int i, rc;
+ unsigned long *new, *tail;
+
+ if (!sfb->sdbt || !sfb->tail)
+ return -EINVAL;
+
+ if (!is_link_entry(sfb->tail))
+ return -EINVAL;
+
+ /* Append to the existing sampling buffer, overwriting the table-link
+ * register.
+ * The tail variables always points to the "tail" (last and table-link)
+ * entry in an SDB-table.
+ */
+ tail = sfb->tail;
+
+ /* Do a sanity check whether the table-link entry points to
+ * the sampling buffer origin.
+ */
+ if (sfb->sdbt != get_next_sdbt(tail)) {
+ debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+ "sampling buffer is not linked: origin=%p"
+ "tail=%p\n",
+ (void *) sfb->sdbt, (void *) tail);
+ return -EINVAL;
+ }
+
+ /* Allocate remaining SDBs */
+ rc = 0;
+ for (i = 0; i < num_sdb; i++) {
+ /* Allocate a new SDB-table if it is full. */
+ if (require_table_link(tail)) {
+ new = (unsigned long *) get_zeroed_page(gfp_flags);
+ if (!new) {
+ rc = -ENOMEM;
+ break;
+ }
+ sfb->num_sdbt++;
+ /* Link current page to tail of chain */
+ *tail = (unsigned long)(void *) new + 1;
+ tail = new;
+ }
+
+ /* Allocate a new sample-data-block.
+ * If there is not enough memory, stop the realloc process
+ * and simply use what was allocated. If this is a temporary
+ * issue, a new realloc call (if required) might succeed.
+ */
+ rc = alloc_sample_data_block(tail, gfp_flags);
+ if (rc)
+ break;
+ sfb->num_sdb++;
+ tail++;
+ }
+
+ /* Link sampling buffer to its origin */
+ *tail = (unsigned long) sfb->sdbt + 1;
+ sfb->tail = tail;
+
+ debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
+ " settings: sdbt=%lu sdb=%lu\n",
+ sfb->num_sdbt, sfb->num_sdb);
+ return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB). For each allocation,
+ * a 4K page is used. The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+ int rc;
+
+ if (sfb->sdbt)
+ return -EINVAL;
+
+ /* Allocate the sample-data-block-table origin */
+ sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!sfb->sdbt)
+ return -ENOMEM;
+ sfb->num_sdb = 0;
+ sfb->num_sdbt = 1;
+
+ /* Link the table origin to point to itself to prepare for
+ * realloc_sampling_buffer() invocation.
+ */
+ sfb->tail = sfb->sdbt;
+ *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+
+ /* Allocate requested number of sample-data-blocks */
+ rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+ if (rc) {
+ free_sampling_buffer(sfb);
+ debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
+ "realloc_sampling_buffer failed with rc=%i\n", rc);
+ } else
+ debug_sprintf_event(sfdbg, 4,
+ "alloc_sampling_buffer: tear=%p dear=%p\n",
+ sfb->sdbt, (void *) *sfb->sdbt);
+ return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+ struct hws_qsi_info_block si;
+
+ CPUM_SF_MIN_SDB = min;
+ CPUM_SF_MAX_SDB = max;
+
+ memset(&si, 0, sizeof(si));
+ if (!qsi(&si))
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+ return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+ : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ if (!sfb->sdbt)
+ return SFB_ALLOC_REG(hwc);
+ if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+ return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+ return 0;
+}
+
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ return sfb_pending_allocs(sfb, hwc) > 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ /* Limit the number of SDBs to not exceed the maximum */
+ num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+ if (num)
+ SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ SFB_ALLOC_REG(hwc) = 0;
+ sfb_account_allocs(num, hwc);
+}
+
+static size_t event_sample_size(struct hw_perf_event *hwc)
+{
+ struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+ size_t sample_size;
+
+ /* The sample size depends on the sampling function: The basic-sampling
+ * function must be always enabled, diagnostic-sampling function is
+ * optional.
+ */
+ sample_size = sfr->bsdes;
+ if (SAMPL_DIAG_MODE(hwc))
+ sample_size += sfr->dsdes;
+
+ return sample_size;
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+ if (cpuhw->sfb.sdbt)
+ free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+ unsigned long n_sdb, freq, factor;
+ size_t sfr_size, sample_size;
+ struct sf_raw_sample *sfr;
+
+ /* Allocate raw sample buffer
+ *
+ * The raw sample buffer is used to temporarily store sampling data
+ * entries for perf raw sample processing. The buffer size mainly
+ * depends on the size of diagnostic-sampling data entries which is
+ * machine-specific. The exact size calculation includes:
+ * 1. The first 4 bytes of diagnostic-sampling data entries are
+ * already reflected in the sf_raw_sample structure. Subtract
+ * these bytes.
+ * 2. The perf raw sample data must be 8-byte aligned (u64) and
+ * perf's internal data size must be considered too. So add
+ * an additional u32 for correct alignment and subtract before
+ * allocating the buffer.
+ * 3. Store the raw sample buffer pointer in the perf event
+ * hardware structure.
+ */
+ sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
+ sizeof(u32), sizeof(u64));
+ sfr_size -= sizeof(u32);
+ sfr = kzalloc(sfr_size, GFP_KERNEL);
+ if (!sfr)
+ return -ENOMEM;
+ sfr->size = sfr_size;
+ sfr->bsdes = cpuhw->qsi.bsdes;
+ sfr->dsdes = cpuhw->qsi.dsdes;
+ RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
+
+ /* Calculate sampling buffers using 4K pages
+ *
+ * 1. Determine the sample data size which depends on the used
+ * sampling functions, for example, basic-sampling or
+ * basic-sampling with diagnostic-sampling.
+ *
+ * 2. Use the sampling frequency as input. The sampling buffer is
+ * designed for almost one second. This can be adjusted through
+ * the "factor" variable.
+ * In any case, alloc_sampling_buffer() sets the Alert Request
+ * Control indicator to trigger a measurement-alert to harvest
+ * sample-data-blocks (sdb).
+ *
+ * 3. Compute the number of sample-data-blocks and ensure a minimum
+ * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
+ * exceed a "calculated" maximum. The symbolic maximum is
+ * designed for basic-sampling only and needs to be increased if
+ * diagnostic-sampling is active.
+ * See also the remarks for these symbolic constants.
+ *
+ * 4. Compute the number of sample-data-block-tables (SDBT) and
+ * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+ * to 511 SDBs).
+ */
+ sample_size = event_sample_size(hwc);
+ freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+ factor = 1;
+ n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
+ if (n_sdb < CPUM_SF_MIN_SDB)
+ n_sdb = CPUM_SF_MIN_SDB;
+
+ /* If there is already a sampling buffer allocated, it is very likely
+ * that the sampling facility is enabled too. If the event to be
+ * initialized requires a greater sampling buffer, the allocation must
+ * be postponed. Changing the sampling buffer requires the sampling
+ * facility to be in the disabled state. So, account the number of
+ * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+ * before the event is started.
+ */
+ sfb_init_allocs(n_sdb, hwc);
+ if (sf_buffer_available(cpuhw))
+ return 0;
+
+ debug_sprintf_event(sfdbg, 3,
+ "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
+ " sample_size=%lu cpuhw=%p\n",
+ SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+ sample_size, cpuhw);
+
+ return alloc_sampling_buffer(&cpuhw->sfb,
+ sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+ unsigned long min)
+{
+ return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+ /* Use a percentage-based approach to extend the sampling facility
+ * buffer. Accept up to 5% sample data loss.
+ * Vary the extents between 1% to 5% of the current number of
+ * sample-data-blocks.
+ */
+ if (ratio <= 5)
+ return 0;
+ if (ratio <= 25)
+ return min_percent(1, base, 1);
+ if (ratio <= 50)
+ return min_percent(1, base, 1);
+ if (ratio <= 75)
+ return min_percent(2, base, 2);
+ if (ratio <= 100)
+ return min_percent(3, base, 3);
+ if (ratio <= 250)
+ return min_percent(4, base, 4);
+
+ return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+ struct hw_perf_event *hwc)
+{
+ unsigned long ratio, num;
+
+ if (!OVERFLOW_REG(hwc))
+ return;
+
+ /* The sample_overflow contains the average number of sample data
+ * that has been lost because sample-data-blocks were full.
+ *
+ * Calculate the total number of sample data entries that has been
+ * discarded. Then calculate the ratio of lost samples to total samples
+ * per second in percent.
+ */
+ ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+ sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+ /* Compute number of sample-data-blocks */
+ num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+ if (num)
+ sfb_account_allocs(num, hwc);
+
+ debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
+ " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+ OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb: Sampling buffer structure (for local CPU)
+ * @hwc: Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ * change the sampling buffer structure. Do not call this function
+ * when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ unsigned long num, num_old;
+ int rc;
+
+ num = sfb_pending_allocs(sfb, hwc);
+ if (!num)
+ return;
+ num_old = sfb->num_sdb;
+
+ /* Disable the sampling facility to reset any states and also
+ * clear pending measurement alerts.
+ */
+ sf_disable();
+
+ /* Extend the sampling buffer.
+ * This memory allocation typically happens in an atomic context when
+ * called by perf. Because this is a reallocation, it is fine if the
+ * new SDB-request cannot be satisfied immediately.
+ */
+ rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+ if (rc)
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
+ "failed with rc=%i\n", rc);
+
+ if (sfb_has_pending_allocs(sfb, hwc))
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+ "req=%lu alloc=%lu remaining=%lu\n",
+ num, sfb->num_sdb - num_old,
+ sfb_pending_allocs(sfb, hwc));
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+#define PMC_FAILURE 2
+static void setup_pmc_cpu(void *flags)
+{
+ int err;
+ struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
+
+ err = 0;
+ switch (*((int *) flags)) {
+ case PMC_INIT:
+ memset(cpusf, 0, sizeof(*cpusf));
+ err = qsi(&cpusf->qsi);
+ if (err)
+ break;
+ cpusf->flags |= PMU_F_RESERVED;
+ err = sf_disable();
+ if (err)
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+ break;
+ case PMC_RELEASE:
+ cpusf->flags &= ~PMU_F_RESERVED;
+ err = sf_disable();
+ if (err) {
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ } else
+ deallocate_buffers(cpusf);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+ break;
+ }
+ if (err)
+ *((int *) flags) |= PMC_FAILURE;
+}
+
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ perf_release_sampling();
+}
+
+static int reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+ int err;
+
+ err = perf_reserve_sampling();
+ if (err)
+ return err;
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ if (flags & PMC_FAILURE) {
+ release_pmc_hardware();
+ return -ENODEV;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ /* Free raw sample buffer */
+ if (RAWSAMPLE_REG(&event->hw))
+ kfree((void *) RAWSAMPLE_REG(&event->hw));
+
+ /* Release PMC if this is the last perf event */
+ if (!atomic_add_unless(&num_events, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+ hwc->sample_period = period;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static void hw_reset_registers(struct hw_perf_event *hwc,
+ unsigned long *sdbt_origin)
+{
+ struct sf_raw_sample *sfr;
+
+ /* (Re)set to first sample-data-block-table */
+ TEAR_REG(hwc) = (unsigned long) sdbt_origin;
+
+ /* (Re)set raw sampling buffer register */
+ sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+ memset(&sfr->basic, 0, sizeof(sfr->basic));
+ memset(&sfr->diag, 0, sfr->dsdes);
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+ unsigned long rate)
+{
+ return clamp_t(unsigned long, rate,
+ si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct cpu_hw_sf *cpuhw;
+ struct hws_qsi_info_block si;
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long rate;
+ int cpu, err;
+
+ /* Reserve CPU-measurement sampling facility */
+ err = 0;
+ if (!atomic_inc_not_zero(&num_events)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ atomic_inc(&num_events);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ if (err)
+ goto out;
+
+ /* Access per-CPU sampling information (query sampling info) */
+ /*
+ * The event->cpu value can be -1 to count on every CPU, for example,
+ * when attaching to a task. If this is specified, use the query
+ * sampling info from the current CPU, otherwise use event->cpu to
+ * retrieve the per-CPU information.
+ * Later, cpuhw indicates whether to allocate sampling buffers for a
+ * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+ */
+ memset(&si, 0, sizeof(si));
+ cpuhw = NULL;
+ if (event->cpu == -1)
+ qsi(&si);
+ else {
+ /* Event is pinned to a particular CPU, retrieve the per-CPU
+ * sampling structure for accessing the CPU-specific QSI.
+ */
+ cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+ si = cpuhw->qsi;
+ }
+
+ /* Check sampling facility authorization and, if not authorized,
+ * fall back to other PMUs. It is safe to check any CPU because
+ * the authorization is identical for all configured CPUs.
+ */
+ if (!si.as) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* Always enable basic sampling */
+ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+ /* Check if diagnostic sampling is requested. Deny if the required
+ * sampling authorization is missing.
+ */
+ if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+ if (!si.ad) {
+ err = -EPERM;
+ goto out;
+ }
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+ }
+
+ /* Check and set other sampling flags */
+ if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
+
+ /* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed. So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ */
+ rate = 0;
+ if (attr->freq) {
+ rate = freq_to_sample_rate(&si, attr->sample_freq);
+ rate = hw_limit_rate(&si, rate);
+ attr->freq = 0;
+ attr->sample_period = rate;
+ } else {
+ /* The min/max sampling rates specifies the valid range
+ * of sample periods. If the specified sample period is
+ * out of range, limit the period to the range boundary.
+ */
+ rate = hw_limit_rate(&si, hwc->sample_period);
+
+ /* The perf core maintains a maximum sample rate that is
+ * configurable through the sysctl interface. Ensure the
+ * sampling rate does not exceed this value. This also helps
+ * to avoid throttling when pushing samples with
+ * perf_event_overflow().
+ */
+ if (sample_rate_to_freq(&si, rate) >
+ sysctl_perf_event_sample_rate) {
+ err = -EINVAL;
+ debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
+ goto out;
+ }
+ }
+ SAMPL_RATE(hwc) = rate;
+ hw_init_period(hwc, SAMPL_RATE(hwc));
+
+ /* Initialize sample data overflow accounting */
+ hwc->extra_reg.reg = REG_OVERFLOW;
+ OVERFLOW_REG(hwc) = 0;
+
+ /* Allocate the per-CPU sampling buffer using the CPU information
+ * from the event. If the event is not pinned to a particular
+ * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+ * buffers for each online CPU.
+ */
+ if (cpuhw)
+ /* Event is pinned to a particular CPU */
+ err = allocate_buffers(cpuhw, hwc);
+ else {
+ /* Event is not pinned, allocate sampling buffer on
+ * each online CPU
+ */
+ for_each_online_cpu(cpu) {
+ cpuhw = &per_cpu(cpu_hw_sf, cpu);
+ err = allocate_buffers(cpuhw, hwc);
+ if (err)
+ break;
+ }
+ }
+out:
+ return err;
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ /* No support for taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+ (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+ return -ENOENT;
+ break;
+ case PERF_TYPE_HARDWARE:
+ /* Support sampling of CPU cycles in addition to the
+ * counter facility. However, the counter facility
+ * is more precise and, hence, restrict this PMU to
+ * sampling events only.
+ */
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+ return -ENOENT;
+ if (!is_sampling_event(event))
+ return -ENOENT;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ /* Check online status of the CPU to which the event is pinned */
+ if (event->cpu >= nr_cpumask_bits ||
+ (event->cpu >= 0 && !cpu_online(event->cpu)))
+ return -ENODEV;
+
+ /* Force reset of idle/hv excludes regardless of what the
+ * user requested.
+ */
+ if (event->attr.exclude_hv)
+ event->attr.exclude_hv = 0;
+ if (event->attr.exclude_idle)
+ event->attr.exclude_idle = 0;
+
+ err = __hw_perf_event_init(event);
+ if (unlikely(err))
+ if (event->destroy)
+ event->destroy(event);
+ return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct hw_perf_event *hwc;
+ int err;
+
+ if (cpuhw->flags & PMU_F_ENABLED)
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Check whether to extent the sampling buffer.
+ *
+ * Two conditions trigger an increase of the sampling buffer for a
+ * perf event:
+ * 1. Postponed buffer allocations from the event initialization.
+ * 2. Sampling overflows that contribute to pending allocations.
+ *
+ * Note that the extend_sampling_buffer() function disables the sampling
+ * facility, but it can be fully re-enabled using sampling controls that
+ * have been saved in cpumsf_pmu_disable().
+ */
+ if (cpuhw->event) {
+ hwc = &cpuhw->event->hw;
+ /* Account number of overflow-designated buffer extents */
+ sfb_account_overflows(cpuhw, hwc);
+ if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
+ }
+
+ /* (Re)enable the PMU and sampling facility */
+ cpuhw->flags |= PMU_F_ENABLED;
+ barrier();
+
+ err = lsctl(&cpuhw->lsctl);
+ if (err) {
+ cpuhw->flags &= ~PMU_F_ENABLED;
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 1, err);
+ return;
+ }
+
+ debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
+ "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
+ cpuhw->lsctl.ed, cpuhw->lsctl.cd,
+ (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct hws_lsctl_request_block inactive;
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Switch off sampling activation control */
+ inactive = cpuhw->lsctl;
+ inactive.cs = 0;
+ inactive.cd = 0;
+
+ err = lsctl(&inactive);
+ if (err) {
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 2, err);
+ return;
+ }
+
+ /* Save state of TEAR and DEAR register contents */
+ if (!qsi(&si)) {
+ /* TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
+ } else
+ debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
+ "qsi() failed with err=%i\n", err);
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_exclude_event() - Filter event
+ * @event: The perf event
+ * @regs: pt_regs structure
+ * @sde_regs: Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+ struct perf_sf_sde_regs *sde_regs)
+{
+ if (event->attr.exclude_user && user_mode(regs))
+ return 1;
+ if (event->attr.exclude_kernel && !user_mode(regs))
+ return 1;
+ if (event->attr.exclude_guest && sde_regs->in_guest)
+ return 1;
+ if (event->attr.exclude_host && !sde_regs->in_guest)
+ return 1;
+ return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event: The perf event
+ * @sample: Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample. The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
+{
+ int overflow;
+ struct pt_regs regs;
+ struct perf_sf_sde_regs *sde_regs;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+
+ /* Setup perf sample */
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ raw.size = sfr->size;
+ raw.data = sfr;
+ data.raw = &raw;
+
+ /* Setup pt_regs to look like an CPU-measurement external interrupt
+ * using the Program Request Alert code. The regs.int_parm_long
+ * field which is unused contains additional sample-data-entry related
+ * indicators.
+ */
+ memset(&regs, 0, sizeof(regs));
+ regs.int_code = 0x1407;
+ regs.int_parm = CPU_MF_INT_SF_PRA;
+ sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+ regs.psw.addr = sfr->basic.ia;
+ if (sfr->basic.T)
+ regs.psw.mask |= PSW_MASK_DAT;
+ if (sfr->basic.W)
+ regs.psw.mask |= PSW_MASK_WAIT;
+ if (sfr->basic.P)
+ regs.psw.mask |= PSW_MASK_PSTATE;
+ switch (sfr->basic.AS) {
+ case 0x0:
+ regs.psw.mask |= PSW_ASC_PRIMARY;
+ break;
+ case 0x1:
+ regs.psw.mask |= PSW_ASC_ACCREG;
+ break;
+ case 0x2:
+ regs.psw.mask |= PSW_ASC_SECONDARY;
+ break;
+ case 0x3:
+ regs.psw.mask |= PSW_ASC_HOME;
+ break;
+ }
+
+ /* The host-program-parameter (hpp) contains the sie control
+ * block that is set by sie64a() in entry64.S. Check if hpp
+ * refers to a valid control block and set sde_regs flags
+ * accordingly. This would allow to use hpp values for other
+ * purposes too.
+ * For now, simply use a non-zero value as guest indicator.
+ */
+ if (sfr->basic.hpp)
+ sde_regs->in_guest = 1;
+
+ overflow = 0;
+ if (perf_exclude_event(event, &regs, sde_regs))
+ goto out;
+ if (perf_event_overflow(event, &data, &regs)) {
+ overflow = 1;
+ event->pmu->stop(event, 0);
+ }
+ perf_event_update_userpage(event);
+out:
+ return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+ local64_add(count, &event->count);
+}
+
+static int sample_format_is_valid(struct hws_combined_entry *sample,
+ unsigned int flags)
+{
+ if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+ /* Only basic-sampling data entries with data-entry-format
+ * version of 0x0001 can be processed.
+ */
+ if (sample->basic.def != 0x0001)
+ return 0;
+ if (flags & PERF_CPUM_SF_DIAG_MODE)
+ /* The data-entry-format number of diagnostic-sampling data
+ * entries can vary. Because diagnostic data is just passed
+ * through, do only a sanity check on the DEF.
+ */
+ if (sample->diag.def < 0x8001)
+ return 0;
+ return 1;
+}
+
+static int sample_is_consistent(struct hws_combined_entry *sample,
+ unsigned long flags)
+{
+ /* This check applies only to basic-sampling data entries of potentially
+ * combined-sampling data entries. Invalid entries cannot be processed
+ * by the PMU and, thus, do not deliver an associated
+ * diagnostic-sampling data entry.
+ */
+ if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
+ return 0;
+ /*
+ * Samples are skipped, if they are invalid or for which the
+ * instruction address is not predictable, i.e., the wait-state bit is
+ * set.
+ */
+ if (sample->basic.I || sample->basic.W)
+ return 0;
+ return 1;
+}
+
+static void reset_sample_slot(struct hws_combined_entry *sample,
+ unsigned long flags)
+{
+ if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+ sample->basic.def = 0;
+ if (flags & PERF_CPUM_SF_DIAG_MODE)
+ sample->diag.def = 0;
+}
+
+static void sfr_store_sample(struct sf_raw_sample *sfr,
+ struct hws_combined_entry *sample)
+{
+ if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
+ sfr->basic = sample->basic;
+ if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
+ memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
+}
+
+static void debug_sample_entry(struct hws_combined_entry *sample,
+ struct hws_trailer_entry *te,
+ unsigned long flags)
+{
+ debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
+ "sampling data entry: te->f=%i basic.def=%04x (%p)"
+ " diag.def=%04x (%p)\n", te->f,
+ sample->basic.def, &sample->basic,
+ (flags & PERF_CPUM_SF_DIAG_MODE)
+ ? sample->diag.def : 0xFFFF,
+ (flags & PERF_CPUM_SF_DIAG_MODE)
+ ? &sample->diag : NULL);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event: The perf event
+ * @sdbt: Sample-data-block table
+ * @overflow: Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem. Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries. A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry. The sampling function is determined by the flags in the perf
+ * event hardware structure. The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ unsigned long long *overflow)
+{
+ unsigned long flags = SAMPL_FLAGS(&event->hw);
+ struct hws_combined_entry *sample;
+ struct hws_trailer_entry *te;
+ struct sf_raw_sample *sfr;
+ size_t sample_size;
+
+ /* Prepare and initialize raw sample data */
+ sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
+ sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
+
+ sample_size = event_sample_size(&event->hw);
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+ sample = (struct hws_combined_entry *) *sdbt;
+ while ((unsigned long *) sample < (unsigned long *) te) {
+ /* Check for an empty sample */
+ if (!sample->basic.def)
+ break;
+
+ /* Update perf event period */
+ perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+ /* Check sampling data entry */
+ if (sample_format_is_valid(sample, flags)) {
+ /* If an event overflow occurred, the PMU is stopped to
+ * throttle event delivery. Remaining sample data is
+ * discarded.
+ */
+ if (!*overflow) {
+ if (sample_is_consistent(sample, flags)) {
+ /* Deliver sample data to perf */
+ sfr_store_sample(sfr, sample);
+ *overflow = perf_push_sample(event, sfr);
+ }
+ } else
+ /* Count discarded samples */
+ *overflow += 1;
+ } else {
+ debug_sample_entry(sample, te, flags);
+ /* Sample slot is not yet written or other record.
+ *
+ * This condition can occur if the buffer was reused
+ * from a combined basic- and diagnostic-sampling.
+ * If only basic-sampling is then active, entries are
+ * written into the larger diagnostic entries.
+ * This is typically the case for sample-data-blocks
+ * that are not full. Stop processing if the first
+ * invalid format was detected.
+ */
+ if (!te->f)
+ break;
+ }
+
+ /* Reset sample slot and advance to next sample */
+ reset_sample_slot(sample, flags);
+ sample += sample_size;
+ }
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event: The perf event
+ * @flush_all: Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed. Specify the flash_all flag
+ * to also walk through partially filled sample-data-blocks. It is ignored
+ * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag
+ * enforces the processing of full sample-data-blocks only (trailer entries
+ * with the block-full-indicator bit set).
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hws_trailer_entry *te;
+ unsigned long *sdbt;
+ unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+ int done;
+
+ if (flush_all && SDB_FULL_BLOCKS(hwc))
+ flush_all = 0;
+
+ sdbt = (unsigned long *) TEAR_REG(hwc);
+ done = event_overflow = sampl_overflow = num_sdb = 0;
+ while (!done) {
+ /* Get the trailer entry of the sample-data-block */
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+ /* Leave loop if no more work to do (block full indicator) */
+ if (!te->f) {
+ done = 1;
+ if (!flush_all)
+ break;
+ }
+
+ /* Check the sample overflow count */
+ if (te->overflow)
+ /* Account sample overflows and, if a particular limit
+ * is reached, extend the sampling buffer.
+ * For details, see sfb_account_overflows().
+ */
+ sampl_overflow += te->overflow;
+
+ /* Timestamps are valid for full sample-data-blocks only */
+ debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
+ "overflow=%llu timestamp=0x%llx\n",
+ sdbt, te->overflow,
+ (te->f) ? trailer_timestamp(te) : 0ULL);
+
+ /* Collect all samples from a single sample-data-block and
+ * flag if an (perf) event overflow happened. If so, the PMU
+ * is stopped and remaining samples will be discarded.
+ */
+ hw_collect_samples(event, sdbt, &event_overflow);
+ num_sdb++;
+
+ /* Reset trailer (using compare-double-and-swap) */
+ do {
+ te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+ te_flags |= SDB_TE_ALERT_REQ_MASK;
+ } while (!cmpxchg_double(&te->flags, &te->overflow,
+ te->flags, te->overflow,
+ te_flags, 0ULL));
+
+ /* Advance to next sample-data-block */
+ sdbt++;
+ if (is_link_entry(sdbt))
+ sdbt = get_next_sdbt(sdbt);
+
+ /* Update event hardware registers */
+ TEAR_REG(hwc) = (unsigned long) sdbt;
+
+ /* Stop processing sample-data if all samples of the current
+ * sample-data-block were flushed even if it was not full.
+ */
+ if (flush_all && done)
+ break;
+
+ /* If an event overflow happened, discard samples by
+ * processing any remaining sample-data-blocks.
+ */
+ if (event_overflow)
+ flush_all = 1;
+ }
+
+ /* Account sample overflows in the event hardware structure */
+ if (sampl_overflow)
+ OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+ sampl_overflow, 1 + num_sdb);
+ if (sampl_overflow || event_overflow)
+ debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+ "overflow stats: sample=%llu event=%llu\n",
+ sampl_overflow, event_overflow);
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+ /* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ perf_pmu_disable(event->pmu);
+ event->hw.state = 0;
+ cpuhw->lsctl.cs = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.cd = 1;
+ perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ perf_pmu_disable(event->pmu);
+ cpuhw->lsctl.cs = 0;
+ cpuhw->lsctl.cd = 0;
+ event->hw.state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+ hw_perf_event_update(event, 1);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+ perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ int err;
+
+ if (cpuhw->flags & PMU_F_IN_USE)
+ return -EAGAIN;
+
+ if (!cpuhw->sfb.sdbt)
+ return -EINVAL;
+
+ err = 0;
+ perf_pmu_disable(event->pmu);
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* Set up sampling controls. Always program the sampling register
+ * using the SDB-table start. Reset TEAR_REG event hardware register
+ * that is used by hw_perf_event_update() to store the sampling buffer
+ * position after samples have been flushed.
+ */
+ cpuhw->lsctl.s = 0;
+ cpuhw->lsctl.h = 1;
+ cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+ hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+
+ /* Ensure sampling functions are in the disabled state. If disabled,
+ * switch on sampling enable control. */
+ if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ cpuhw->lsctl.es = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.ed = 1;
+
+ /* Set in_use flag and store event */
+ event->hw.idx = 0; /* only one sampling event per CPU supported */
+ cpuhw->event = event;
+ cpuhw->flags |= PMU_F_IN_USE;
+
+ if (flags & PERF_EF_START)
+ cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+ return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ perf_pmu_disable(event->pmu);
+ cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+ cpuhw->lsctl.es = 0;
+ cpuhw->lsctl.ed = 0;
+ cpuhw->flags &= ~PMU_F_IN_USE;
+ cpuhw->event = NULL;
+
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_event_idx(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+static struct attribute *cpumsf_pmu_events_attr[] = {
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
+ NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumsf_pmu_events_attr,
+};
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+static struct pmu cpumf_sampling = {
+ .pmu_enable = cpumsf_pmu_enable,
+ .pmu_disable = cpumsf_pmu_disable,
+
+ .event_init = cpumsf_pmu_event_init,
+ .add = cpumsf_pmu_add,
+ .del = cpumsf_pmu_del,
+
+ .start = cpumsf_pmu_start,
+ .stop = cpumsf_pmu_stop,
+ .read = cpumsf_pmu_read,
+
+ .event_idx = cpumsf_pmu_event_idx,
+ .attr_groups = cpumsf_pmu_attr_groups,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_sf *cpuhw;
+
+ if (!(alert & CPU_MF_INT_SF_MASK))
+ return;
+ inc_irq_stat(IRQEXT_CMS);
+ cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* The processing below must take care of multiple alert events that
+ * might be indicated concurrently. */
+
+ /* Program alert request */
+ if (alert & CPU_MF_INT_SF_PRA) {
+ if (cpuhw->flags & PMU_F_IN_USE)
+ hw_perf_event_update(cpuhw->event, 0);
+ else
+ WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+ }
+
+ /* Report measurement alerts only for non-PRA codes */
+ if (alert != CPU_MF_INT_SF_PRA)
+ debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+
+ /* Sampling authorization change request */
+ if (alert & CPU_MF_INT_SF_SACA)
+ qsi(&cpuhw->qsi);
+
+ /* Loss of sample data due to high-priority machine activities */
+ if (alert & CPU_MF_INT_SF_LSDA) {
+ pr_err("Sample data was lost\n");
+ cpuhw->flags |= PMU_F_ERR_LSDA;
+ sf_disable();
+ }
+
+ /* Invalid sampling buffer entry */
+ if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+ pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+ alert);
+ cpuhw->flags |= PMU_F_ERR_IBE;
+ sf_disable();
+ }
+}
+
+static int cpumf_pmu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long) hcpu;
+ int flags;
+
+ /* Ignore the notification if no events are scheduled on the PMU.
+ * This might be racy...
+ */
+ if (!atomic_read(&num_events))
+ return NOTIFY_OK;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ flags = PMC_INIT;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ case CPU_DOWN_PREPARE:
+ flags = PMC_RELEASE;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+ int rc;
+ unsigned long min, max;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ if (!val || !strlen(val))
+ return -EINVAL;
+
+ /* Valid parameter values: "min,max" or "max" */
+ min = CPUM_SF_MIN_SDB;
+ max = CPUM_SF_MAX_SDB;
+ if (strchr(val, ','))
+ rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+ else
+ rc = kstrtoul(val, 10, &max);
+
+ if (min < 2 || min >= max || max > get_num_physpages())
+ rc = -EINVAL;
+ if (rc)
+ return rc;
+
+ sfb_set_limits(min, max);
+ pr_info("The sampling buffer limits have changed to: "
+ "min=%lu max=%lu (diag=x%lu)\n",
+ CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+ return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_sfb_size = {
+ .set = param_set_sfb_size,
+ .get = param_get_sfb_size,
+};
+
+#define RS_INIT_FAILURE_QSI 0x0001
+#define RS_INIT_FAILURE_BSDES 0x0002
+#define RS_INIT_FAILURE_ALRT 0x0003
+#define RS_INIT_FAILURE_PERF 0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+ pr_err("Sampling facility support for perf is not available: "
+ "reason=%04x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+ return -ENODEV;
+ }
+
+ if (si.bsdes != sizeof(struct hws_basic_entry)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+ return -EINVAL;
+ }
+
+ if (si.ad)
+ sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+
+ sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+ if (!sfdbg)
+ pr_err("Registering for s390dbf failed\n");
+ debug_register_view(sfdbg, &debug_sprintf_view);
+
+ err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+ goto out;
+ }
+
+ err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ goto out;
+ }
+ perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+ return err;
+}
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index f58f37f6682..61595c1f0a0 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -1,7 +1,7 @@
/*
* Performance event support for s390x
*
- * Copyright IBM Corp. 2012
+ * Copyright IBM Corp. 2012, 2013
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -13,17 +13,22 @@
#include <linux/kernel.h>
#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
#include <linux/percpu.h>
#include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
#include <asm/irq.h>
#include <asm/cpu_mf.h>
#include <asm/lowcore.h>
#include <asm/processor.h>
+#include <asm/sysinfo.h>
const char *perf_pmu_name(void)
{
if (cpum_cf_avail() || cpum_sf_avail())
- return "CPU-measurement facilities (CPUMF)";
+ return "CPU-Measurement Facilities (CPU-MF)";
return "pmu";
}
EXPORT_SYMBOL(perf_pmu_name);
@@ -34,35 +39,189 @@ int perf_num_counters(void)
if (cpum_cf_avail())
num += PERF_CPUM_CF_MAX_CTR;
+ if (cpum_sf_avail())
+ num += PERF_CPUM_SF_MAX_CTR;
return num;
}
EXPORT_SYMBOL(perf_num_counters);
-void perf_event_print_debug(void)
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
{
- struct cpumf_ctr_info cf_info;
+ struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+ if (!stack)
+ return NULL;
+
+ return (struct kvm_s390_sie_block *) stack->empty1[0];
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return false;
+#if IS_ENABLED(CONFIG_KVM)
+ return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+ return false;
+#endif
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ return is_in_guest(regs) ? instruction_pointer_guest(regs)
+ : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+ return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+ struct perf_sf_sde_regs *sde_regs;
unsigned long flags;
- int cpu;
- if (!cpum_cf_avail())
- return;
+ sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+ if (sde_regs->in_guest)
+ flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else
+ flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+ return flags;
+}
- local_irq_save(flags);
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ /* Check if the cpum_sf PMU has created the pt_regs structure.
+ * In this case, perf misc flags can be easily extracted. Otherwise,
+ * do regular checks on the pt_regs content.
+ */
+ if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+ if (!regs->gprs[15])
+ return perf_misc_flags_sf(regs);
+
+ if (is_in_guest(regs))
+ return perf_misc_guest_flags(regs);
+
+ return user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+}
+
+static void print_debug_cf(void)
+{
+ struct cpumf_ctr_info cf_info;
+ int cpu = smp_processor_id();
- cpu = smp_processor_id();
memset(&cf_info, 0, sizeof(cf_info));
- if (!qctri(&cf_info)) {
+ if (!qctri(&cf_info))
pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
cpu, cf_info.cfvn, cf_info.csvn,
cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
- print_hex_dump_bytes("CPUMF Query: ", DUMP_PREFIX_OFFSET,
- &cf_info, sizeof(cf_info));
- }
+}
+
+static void print_debug_sf(void)
+{
+ struct hws_qsi_info_block si;
+ int cpu = smp_processor_id();
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+ cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+
+ if (si.as)
+ pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+ " bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+ if (si.ad)
+ pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+ " dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+ unsigned long flags;
+ local_irq_save(flags);
+ if (cpum_cf_avail())
+ print_debug_cf();
+ if (cpum_sf_avail())
+ print_debug_sf();
local_irq_restore(flags);
}
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+ struct cpumf_ctr_info ci;
+
+ memset(&ci, 0, sizeof(ci));
+ if (qctri(&ci))
+ return;
+
+ seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+ "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+ struct hws_qsi_info_block si;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ if (!si.as && !si.ad)
+ return;
+
+ seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+ " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+ if (si.as)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+ " sample_size=%u\n", si.bsdes);
+ if (si.ad)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+ " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+ struct service_level *sl)
+{
+ if (cpum_cf_avail())
+ sl_print_counter(m);
+ if (cpum_sf_avail())
+ sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+ .seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+ return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
/* See also arch/s390/kernel/traps.c */
static unsigned long __store_trace(struct perf_callchain_entry *entry,
unsigned long sp,
@@ -122,3 +281,44 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
__store_trace(entry, head, S390_lowcore.thread_info,
S390_lowcore.thread_info + THREAD_SIZE);
}
+
+/* Perf defintions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%04llx,name=%s\n",
+ pmu_attr->id, attr->attr.name);
+}
+
+/* Reserve/release functions for sharing perf hardware */
+static DEFINE_SPINLOCK(perf_hw_owner_lock);
+static void *perf_sampling_owner;
+
+int perf_reserve_sampling(void)
+{
+ int err;
+
+ err = 0;
+ spin_lock(&perf_hw_owner_lock);
+ if (perf_sampling_owner) {
+ pr_warn("The sampling facility is already reserved by %p\n",
+ perf_sampling_owner);
+ err = -EBUSY;
+ } else
+ perf_sampling_owner = __builtin_return_address(0);
+ spin_unlock(&perf_hw_owner_lock);
+ return err;
+}
+EXPORT_SYMBOL(perf_reserve_sampling);
+
+void perf_release_sampling(void)
+{
+ spin_lock(&perf_hw_owner_lock);
+ WARN_ON(!perf_sampling_owner);
+ perf_sampling_owner = NULL;
+ spin_unlock(&perf_hw_owner_lock);
+}
+EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 14bdecb6192..813ec726087 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -78,7 +78,7 @@ PGM_CHECK_DEFAULT /* 34 */
PGM_CHECK_DEFAULT /* 35 */
PGM_CHECK_DEFAULT /* 36 */
PGM_CHECK_DEFAULT /* 37 */
-PGM_CHECK_64BIT(do_asce_exception) /* 38 */
+PGM_CHECK_64BIT(do_dat_exception) /* 38 */
PGM_CHECK_64BIT(do_dat_exception) /* 39 */
PGM_CHECK_64BIT(do_dat_exception) /* 3a */
PGM_CHECK_64BIT(do_dat_exception) /* 3b */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 536d64579d9..93b9ca42e5c 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -61,41 +61,28 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return sf->gprs[8];
}
-/*
- * The idle loop on a S390...
- */
-static void default_idle(void)
+void arch_cpu_idle(void)
{
- if (cpu_is_offline(smp_processor_id()))
- cpu_die();
- local_irq_disable();
- if (need_resched()) {
- local_irq_enable();
- return;
- }
local_mcck_disable();
- if (test_thread_flag(TIF_MCCK_PENDING)) {
+ if (test_cpu_flag(CIF_MCCK_PENDING)) {
local_mcck_enable();
local_irq_enable();
return;
}
/* Halt the cpu and keep track of cpu time accounting. */
vtime_stop_cpu();
+ local_irq_enable();
}
-void cpu_idle(void)
+void arch_cpu_idle_exit(void)
{
- for (;;) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
- while (!need_resched() && !test_thread_flag(TIF_MCCK_PENDING))
- default_idle();
- rcu_idle_exit();
- tick_nohz_idle_exit();
- if (test_thread_flag(TIF_MCCK_PENDING))
- s390_handle_mcck();
- schedule_preempt_disabled();
- }
+ if (test_cpu_flag(CIF_MCCK_PENDING))
+ s390_handle_mcck();
+}
+
+void arch_cpu_idle_dead(void)
+{
+ cpu_die();
}
extern void __kprobes kernel_thread_starter(void);
@@ -136,7 +123,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
- clear_tsk_thread_flag(p, TIF_PER_TRAP);
/* Initialize per thread user and system timer values */
ti = task_thread_info(p);
ti->user_timer = 0;
@@ -152,7 +138,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
- frame->childregs.psw.mask = psw_kernel_bits | PSW_MASK_DAT |
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
frame->childregs.psw.addr = PSW_ADDR_AMODE |
(unsigned long) kernel_thread_starter;
@@ -165,6 +151,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
}
frame->childregs = *current_pt_regs();
frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
+ frame->childregs.flags = 0;
if (new_stackp)
frame->childregs.gprs[15] = new_stackp;
@@ -178,7 +165,8 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
* save fprs to current->thread.fp_regs to merge them with
* the emulated registers and then copy the result to the child.
*/
- save_fp_regs(&current->thread.fp_regs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ save_fp_regs(current->thread.fp_regs.fprs);
memcpy(&p->thread.fp_regs, &current->thread.fp_regs,
sizeof(s390_fp_regs));
/* Set a new TLS ? */
@@ -186,7 +174,9 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
p->thread.acrs[0] = frame->childregs.gprs[6];
#else /* CONFIG_64BIT */
/* Save the fpu registers to new thread structure. */
- save_fp_regs(&p->thread.fp_regs);
+ save_fp_ctl(&p->thread.fp_regs.fpc);
+ save_fp_regs(p->thread.fp_regs.fprs);
+ p->thread.fp_regs.pad = 0;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
unsigned long tls = frame->childregs.gprs[6];
@@ -218,10 +208,12 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
* save fprs to current->thread.fp_regs to merge them with
* the emulated registers and then copy the result to the dump.
*/
- save_fp_regs(&current->thread.fp_regs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ save_fp_regs(current->thread.fp_regs.fprs);
memcpy(fpregs, &current->thread.fp_regs, sizeof(s390_fp_regs));
#else /* CONFIG_64BIT */
- save_fp_regs(fpregs);
+ save_fp_ctl(&fpregs->fpc);
+ save_fp_regs(fpregs->fprs);
#endif /* CONFIG_64BIT */
return 1;
}
@@ -269,20 +261,18 @@ static inline unsigned long brk_rnd(void)
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
- unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd());
+ unsigned long ret;
- if (ret < mm->brk)
- return mm->brk;
- return ret;
+ ret = PAGE_ALIGN(mm->brk + brk_rnd());
+ return (ret > mm->brk) ? ret : mm->brk;
}
unsigned long randomize_et_dyn(unsigned long base)
{
- unsigned long ret = PAGE_ALIGN(base + brk_rnd());
+ unsigned long ret;
if (!(current->flags & PF_RANDOMIZE))
return base;
- if (ret < base)
- return base;
- return ret;
+ ret = PAGE_ALIGN(base + brk_rnd());
+ return (ret > base) ? ret : base;
}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 753c41d0ffd..24612029f45 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id);
/*
* cpu_init - initializes state that is per-CPU.
*/
-void __cpuinit cpu_init(void)
+void cpu_init(void)
{
struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
struct cpuid *id = &__get_cpu_var(cpu_id);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index a314c57f4e9..5dc7ad9e2fb 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -47,7 +47,7 @@ enum s390_regset {
REGSET_GENERAL_EXTENDED,
};
-void update_per_regs(struct task_struct *task)
+void update_cr_regs(struct task_struct *task)
{
struct pt_regs *regs = task_pt_regs(task);
struct thread_struct *thread = &task->thread;
@@ -56,17 +56,26 @@ void update_per_regs(struct task_struct *task)
#ifdef CONFIG_64BIT
/* Take care of the enable/disable of transactional execution. */
if (MACHINE_HAS_TE) {
- unsigned long cr0, cr0_new;
+ unsigned long cr, cr_new;
- __ctl_store(cr0, 0, 0);
- /* set or clear transaction execution bits 8 and 9. */
+ __ctl_store(cr, 0, 0);
+ /* Set or clear transaction execution TXC bit 8. */
+ cr_new = cr | (1UL << 55);
if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr0_new = cr0 & ~(3UL << 54);
- else
- cr0_new = cr0 | (3UL << 54);
- /* Only load control register 0 if necessary. */
- if (cr0 != cr0_new)
- __ctl_load(cr0_new, 0, 0);
+ cr_new &= ~(1UL << 55);
+ if (cr_new != cr)
+ __ctl_load(cr_new, 0, 0);
+ /* Set or clear transaction execution TDC bits 62 and 63. */
+ __ctl_store(cr, 2, 2);
+ cr_new = cr & ~3UL;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+ cr_new |= 1UL;
+ else
+ cr_new |= 2UL;
+ }
+ if (cr_new != cr)
+ __ctl_load(cr_new, 2, 2);
}
#endif
/* Copy user specified PER registers */
@@ -76,7 +85,10 @@ void update_per_regs(struct task_struct *task)
/* merge TIF_SINGLE_STEP into user specified PER registers. */
if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) {
- new.control |= PER_EVENT_IFETCH;
+ if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
+ new.control |= PER_EVENT_BRANCH;
+ else
+ new.control |= PER_EVENT_IFETCH;
#ifdef CONFIG_64BIT
new.control |= PER_CONTROL_SUSPENSION;
new.control |= PER_EVENT_TRANSACTION_END;
@@ -98,16 +110,20 @@ void update_per_regs(struct task_struct *task)
void user_enable_single_step(struct task_struct *task)
{
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
set_tsk_thread_flag(task, TIF_SINGLE_STEP);
- if (task == current)
- update_per_regs(task);
}
void user_disable_single_step(struct task_struct *task)
{
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
- if (task == current)
- update_per_regs(task);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+ set_tsk_thread_flag(task, TIF_BLOCK_STEP);
}
/*
@@ -120,7 +136,7 @@ void ptrace_disable(struct task_struct *task)
memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
- clear_tsk_thread_flag(task, TIF_PER_TRAP);
+ clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP);
task->thread.per_flags = 0;
}
@@ -190,9 +206,11 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
* psw and gprs are stored on the stack
*/
tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
- if (addr == (addr_t) &dummy->regs.psw.mask)
+ if (addr == (addr_t) &dummy->regs.psw.mask) {
/* Return a clean psw mask. */
- tmp = psw_user_bits | (tmp & PSW_MASK_USER);
+ tmp &= PSW_MASK_USER | PSW_MASK_RI;
+ tmp |= PSW_USER_BITS;
+ }
} else if (addr < (addr_t) &dummy->regs.orig_gpr2) {
/*
@@ -231,8 +249,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
offset = addr - (addr_t) &dummy->regs.fp_regs;
tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset);
if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
- tmp &= (unsigned long) FPC_VALID_MASK
- << (BITS_PER_LONG - 32);
+ tmp <<= BITS_PER_LONG - 32;
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
@@ -313,11 +330,20 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
/*
* psw and gprs are stored on the stack
*/
- if (addr == (addr_t) &dummy->regs.psw.mask &&
- ((data & ~PSW_MASK_USER) != psw_user_bits ||
- ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))))
- /* Invalid psw mask. */
- return -EINVAL;
+ if (addr == (addr_t) &dummy->regs.psw.mask) {
+ unsigned long mask = PSW_MASK_USER;
+
+ mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
+ if ((data ^ PSW_USER_BITS) & ~mask)
+ /* Invalid psw mask. */
+ return -EINVAL;
+ if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
+ if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+ /* Invalid addressing mode bits */
+ return -EINVAL;
+ }
*(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
} else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
@@ -355,10 +381,10 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
/*
* floating point regs. are stored in the thread structure
*/
- if (addr == (addr_t) &dummy->regs.fp_regs.fpc &&
- (data & ~((unsigned long) FPC_VALID_MASK
- << (BITS_PER_LONG - 32))) != 0)
- return -EINVAL;
+ if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
+ if ((unsigned int) data != 0 ||
+ test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+ return -EINVAL;
offset = addr - (addr_t) &dummy->regs.fp_regs;
*(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data;
@@ -447,6 +473,26 @@ long arch_ptrace(struct task_struct *child, long request,
if (!MACHINE_HAS_TE)
return -EIO;
child->thread.per_flags |= PER_FLAG_NO_TE;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ return 0;
+ case PTRACE_TE_ABORT_RAND:
+ if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+ return -EIO;
+ switch (data) {
+ case 0UL:
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ break;
+ case 1UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ case 2UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ default:
+ return -EINVAL;
+ }
return 0;
default:
/* Removing high order bit from addr (only for 31 bit). */
@@ -529,7 +575,8 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
if (addr == (addr_t) &dummy32->regs.psw.mask) {
/* Fake a 31 bit psw mask. */
tmp = (__u32)(regs->psw.mask >> 32);
- tmp = psw32_user_bits | (tmp & PSW32_MASK_USER);
+ tmp &= PSW32_MASK_USER | PSW32_MASK_RI;
+ tmp |= PSW32_USER_BITS;
} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
/* Fake a 31 bit psw address. */
tmp = (__u32) regs->psw.addr |
@@ -626,13 +673,19 @@ static int __poke_user_compat(struct task_struct *child,
* psw, gprs, acrs and orig_gpr2 are stored on the stack
*/
if (addr == (addr_t) &dummy32->regs.psw.mask) {
+ __u32 mask = PSW32_MASK_USER;
+
+ mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
/* Build a 64 bit psw mask from 31 bit mask. */
- if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits)
+ if ((tmp ^ PSW32_USER_BITS) & ~mask)
/* Invalid psw mask. */
return -EINVAL;
+ if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
(regs->psw.mask & PSW_MASK_BA) |
- (__u64)(tmp & PSW32_MASK_USER) << 32;
+ (__u64)(tmp & mask) << 32;
} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
/* Build a 64 bit psw address from 31 bit address. */
regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
@@ -668,8 +721,7 @@ static int __poke_user_compat(struct task_struct *child,
* floating point regs. are stored in the thread structure
*/
if (addr == (addr_t) &dummy32->regs.fp_regs.fpc &&
- (tmp & ~FPC_VALID_MASK) != 0)
- /* Invalid floating point control. */
+ test_fp_ctl(tmp))
return -EINVAL;
offset = addr - (addr_t) &dummy32->regs.fp_regs;
*(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp;
@@ -769,7 +821,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
* debugger stored an invalid system call number. Skip
* the system call and the system call restart handling.
*/
- clear_thread_flag(TIF_SYSCALL);
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
ret = -1;
}
@@ -867,8 +919,10 @@ static int s390_fpregs_get(struct task_struct *target,
const struct user_regset *regset, unsigned int pos,
unsigned int count, void *kbuf, void __user *ubuf)
{
- if (target == current)
- save_fp_regs(&target->thread.fp_regs);
+ if (target == current) {
+ save_fp_ctl(&target->thread.fp_regs.fpc);
+ save_fp_regs(target->thread.fp_regs.fprs);
+ }
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fp_regs, 0, -1);
@@ -881,19 +935,21 @@ static int s390_fpregs_set(struct task_struct *target,
{
int rc = 0;
- if (target == current)
- save_fp_regs(&target->thread.fp_regs);
+ if (target == current) {
+ save_fp_ctl(&target->thread.fp_regs.fpc);
+ save_fp_regs(target->thread.fp_regs.fprs);
+ }
/* If setting FPC, must validate it first. */
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
- u32 fpc[2] = { target->thread.fp_regs.fpc, 0 };
- rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpc,
+ u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
0, offsetof(s390_fp_regs, fprs));
if (rc)
return rc;
- if ((fpc[0] & ~FPC_VALID_MASK) != 0 || fpc[1] != 0)
+ if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
return -EINVAL;
- target->thread.fp_regs.fpc = fpc[0];
+ target->thread.fp_regs.fpc = ufpc[0];
}
if (rc == 0 && count > 0)
@@ -901,8 +957,10 @@ static int s390_fpregs_set(struct task_struct *target,
target->thread.fp_regs.fprs,
offsetof(s390_fp_regs, fprs), -1);
- if (rc == 0 && target == current)
- restore_fp_regs(&target->thread.fp_regs);
+ if (rc == 0 && target == current) {
+ restore_fp_ctl(&target->thread.fp_regs.fpc);
+ restore_fp_regs(target->thread.fp_regs.fprs);
+ }
return rc;
}
@@ -1271,7 +1329,7 @@ int regs_query_register_offset(const char *name)
if (!name || *name != 'r')
return -EINVAL;
- if (strict_strtoul(name + 1, 10, &offset))
+ if (kstrtoul(name + 1, 10, &offset))
return -EINVAL;
if (offset >= NUM_GPRS)
return -EINVAL;
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 077a99389b0..26b4ae96fdd 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -40,8 +40,6 @@ static void disable_runtime_instr(void)
static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
{
cb->buf_limit = 0xfff;
- if (s390_user_mode == HOME_SPACE_MODE)
- cb->home_space = 1;
cb->int_requested = 1;
cb->pstate = 1;
cb->pstate_set_buf = 1;
@@ -139,10 +137,11 @@ static int __init runtime_instr_init(void)
if (!runtime_instr_avail())
return 0;
- measurement_alert_subclass_register();
- rc = register_external_interrupt(0x1407, runtime_instr_int_handler);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ runtime_instr_int_handler);
if (rc)
- measurement_alert_subclass_unregister();
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
else
pr_info("Runtime instrumentation facility initialized\n");
return rc;
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 9bdbcef1da9..9f60467938d 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -5,8 +5,9 @@
#ifdef CONFIG_FUNCTION_TRACER
EXPORT_SYMBOL(_mcount);
#endif
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
EXPORT_SYMBOL(sie64a);
+EXPORT_SYMBOL(sie_exit);
#endif
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
index b6506ee32a3..a41f2c99dcc 100644
--- a/arch/s390/kernel/sclp.S
+++ b/arch/s390/kernel/sclp.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <asm/irq.h>
LC_EXT_NEW_PSW = 0x58 # addr of ext int handler
LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit
@@ -73,9 +74,9 @@ _sclp_wait_int:
lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt
.LwaitS1:
lh %r7,LC_EXT_INT_CODE
- chi %r7,0x1004 # timeout?
+ chi %r7,EXT_IRQ_CLK_COMP # timeout?
je .LtimeoutS1
- chi %r7,0x2401 # service int?
+ chi %r7,EXT_IRQ_SERVICE_SIG # service int?
jne .LloopS1
sr %r2,%r2
l %r3,LC_EXT_INT_PARAM
@@ -225,7 +226,7 @@ _sclp_print:
ahi %r2,1
ltr %r0,%r0 # end of string?
jz .LfinalizemtoS4
- chi %r0,0x15 # end of line (NL)?
+ chi %r0,0x0a # end of line (NL)?
jz .LfinalizemtoS4
stc %r0,0(%r6,%r7) # copy to mto
la %r11,0(%r6,%r7)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index a5360de85ec..1e2264b46e4 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -47,7 +47,6 @@
#include <linux/compat.h>
#include <asm/ipl.h>
-#include <asm/uaccess.h>
#include <asm/facility.h>
#include <asm/smp.h>
#include <asm/mmu_context.h>
@@ -64,18 +63,6 @@
#include <asm/sclp.h>
#include "entry.h"
-long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |
- PSW_MASK_EA | PSW_MASK_BA;
-long psw_user_bits = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT |
- PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK |
- PSW_MASK_PSTATE | PSW_ASC_HOME;
-
-/*
- * User copy operations.
- */
-struct uaccess_ops uaccess;
-EXPORT_SYMBOL(uaccess);
-
/*
* Machine setup..
*/
@@ -91,10 +78,9 @@ EXPORT_SYMBOL(console_irq);
unsigned long elf_hwcap = 0;
char elf_platform[ELF_PLATFORM_SIZE];
-struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
-
int __initdata memory_end_set;
unsigned long __initdata memory_end;
+unsigned long __initdata max_physmem_end;
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);
@@ -225,27 +211,19 @@ static void __init conmode_default(void)
}
}
-#ifdef CONFIG_ZFCPDUMP
-static void __init setup_zfcpdump(unsigned int console_devno)
+#ifdef CONFIG_CRASH_DUMP
+static void __init setup_zfcpdump(void)
{
- static char str[41];
-
if (ipl_info.type != IPL_TYPE_FCP_DUMP)
return;
if (OLDMEM_BASE)
return;
- if (console_devno != -1)
- sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x",
- ipl_info.data.fcp.dev_id.devno, console_devno);
- else
- sprintf(str, " cio_ignore=all,!0.0.%04x",
- ipl_info.data.fcp.dev_id.devno);
- strcat(boot_command_line, str);
+ strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
console_loglevel = 2;
}
#else
-static inline void setup_zfcpdump(unsigned int console_devno) {}
-#endif /* CONFIG_ZFCPDUMP */
+static inline void setup_zfcpdump(void) {}
+#endif /* CONFIG_CRASH_DUMP */
/*
* Reboot, halt and power_off stubs. They just call _machine_restart,
@@ -294,6 +272,7 @@ EXPORT_SYMBOL_GPL(pm_power_off);
static int __init early_parse_mem(char *p)
{
memory_end = memparse(p, &p);
+ memory_end &= PAGE_MASK;
memory_end_set = 1;
return 0;
}
@@ -308,43 +287,6 @@ static int __init parse_vmalloc(char *arg)
}
early_param("vmalloc", parse_vmalloc);
-unsigned int s390_user_mode = PRIMARY_SPACE_MODE;
-EXPORT_SYMBOL_GPL(s390_user_mode);
-
-static void __init set_user_mode_primary(void)
-{
- psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME;
- psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY;
-#ifdef CONFIG_COMPAT
- psw32_user_bits =
- (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY;
-#endif
- uaccess = MACHINE_HAS_MVCOS ? uaccess_mvcos_switch : uaccess_pt;
-}
-
-static int __init early_parse_user_mode(char *p)
-{
- if (p && strcmp(p, "primary") == 0)
- s390_user_mode = PRIMARY_SPACE_MODE;
- else if (!p || strcmp(p, "home") == 0)
- s390_user_mode = HOME_SPACE_MODE;
- else
- return 1;
- return 0;
-}
-early_param("user_mode", early_parse_user_mode);
-
-static void __init setup_addressing_mode(void)
-{
- if (s390_user_mode != PRIMARY_SPACE_MODE)
- return;
- set_user_mode_primary();
- if (MACHINE_HAS_MVCOS)
- pr_info("Address spaces switched, mvcos available\n");
- else
- pr_info("Address spaces switched, mvcos not available\n");
-}
-
void *restart_stack __attribute__((__section__(".data")));
static void __init setup_lowcore(void)
@@ -356,32 +298,35 @@ static void __init setup_lowcore(void)
*/
BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
- lc->restart_psw.mask = psw_kernel_bits;
+ lc->restart_psw.mask = PSW_KERNEL_BITS;
lc->restart_psw.addr =
PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = psw_kernel_bits |
+ lc->external_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->external_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = psw_kernel_bits |
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
- lc->program_new_psw.mask = psw_kernel_bits |
+ lc->program_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->program_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
- lc->mcck_new_psw.mask = psw_kernel_bits;
+ lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = psw_kernel_bits |
+ lc->io_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
lc->clock_comparator = -1ULL;
- lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
+ lc->kernel_stack = ((unsigned long) &init_thread_union)
+ + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->async_stack = (unsigned long)
- __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
+ __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0)
+ + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->panic_stack = (unsigned long)
- __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
+ __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0)
+ + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->current_task = (unsigned long) init_thread_union.thread_info.task;
lc->thread_info = (unsigned long) &init_thread_union;
lc->machine_flags = S390_lowcore.machine_flags;
@@ -413,7 +358,7 @@ static void __init setup_lowcore(void)
/*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
- * restart data to the absolute zero lowcore. This is necesary if
+ * restart data to the absolute zero lowcore. This is necessary if
* PSW restart is done on an offline CPU that has lowcore zero.
*/
lc->restart_stack = (unsigned long) restart_stack;
@@ -428,6 +373,10 @@ static void __init setup_lowcore(void)
mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
+#ifdef CONFIG_SMP
+ lc->spinlock_lockval = arch_spin_lockval(0);
+#endif
+
set_prefix((u32)(unsigned long) lc);
lowcore_ptr[0] = lc;
}
@@ -456,7 +405,8 @@ static struct resource __initdata *standard_resources[] = {
static void __init setup_resources(void)
{
struct resource *res, *std_res, *sub_res;
- int i, j;
+ struct memblock_region *reg;
+ int j;
code_resource.start = (unsigned long) &_text;
code_resource.end = (unsigned long) &_etext - 1;
@@ -465,28 +415,13 @@ static void __init setup_resources(void)
bss_resource.start = (unsigned long) &__bss_start;
bss_resource.end = (unsigned long) &__bss_stop - 1;
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- if (!memory_chunk[i].size)
- continue;
- if (memory_chunk[i].type == CHUNK_OLDMEM ||
- memory_chunk[i].type == CHUNK_CRASHK)
- continue;
+ for_each_memblock(memory, reg) {
res = alloc_bootmem_low(sizeof(*res));
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
- switch (memory_chunk[i].type) {
- case CHUNK_READ_WRITE:
- case CHUNK_CRASHK:
- res->name = "System RAM";
- break;
- case CHUNK_READ_ONLY:
- res->name = "System ROM";
- res->flags |= IORESOURCE_READONLY;
- break;
- default:
- res->name = "reserved";
- }
- res->start = memory_chunk[i].addr;
- res->end = res->start + memory_chunk[i].size - 1;
+
+ res->name = "System RAM";
+ res->start = reg->base;
+ res->end = reg->base + reg->size - 1;
request_resource(&iomem_resource, res);
for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
@@ -507,51 +442,14 @@ static void __init setup_resources(void)
}
}
-unsigned long real_memory_size;
-EXPORT_SYMBOL_GPL(real_memory_size);
-
static void __init setup_memory_end(void)
{
unsigned long vmax, vmalloc_size, tmp;
- int i;
-
-
-#ifdef CONFIG_ZFCPDUMP
- if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) {
- memory_end = ZFCPDUMP_HSA_SIZE;
- memory_end_set = 1;
- }
-#endif
- real_memory_size = 0;
- memory_end &= PAGE_MASK;
-
- /*
- * Make sure all chunks are MAX_ORDER aligned so we don't need the
- * extra checks that HOLES_IN_ZONE would require.
- */
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- unsigned long start, end;
- struct mem_chunk *chunk;
- unsigned long align;
-
- chunk = &memory_chunk[i];
- align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
- start = (chunk->addr + align - 1) & ~(align - 1);
- end = (chunk->addr + chunk->size) & ~(align - 1);
- if (start >= end)
- memset(chunk, 0, sizeof(*chunk));
- else {
- chunk->addr = start;
- chunk->size = end - start;
- }
- real_memory_size = max(real_memory_size,
- chunk->addr + chunk->size);
- }
/* Choose kernel address space layout: 2, 3, or 4 levels. */
#ifdef CONFIG_64BIT
vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
- tmp = (memory_end ?: real_memory_size) / PAGE_SIZE;
+ tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size;
if (tmp <= (1UL << 42))
vmax = 1UL << 42; /* 3-level kernel page table */
@@ -571,25 +469,19 @@ static void __init setup_memory_end(void)
/* Split remaining virtual space between 1:1 mapping & vmemmap array */
tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+ /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+ tmp = SECTION_ALIGN_UP(tmp);
tmp = VMALLOC_START - tmp * sizeof(struct page);
tmp &= ~((vmax >> 11) - 1); /* align to page table level */
tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
vmemmap = (struct page *) tmp;
/* Take care that memory_end is set and <= vmemmap */
- memory_end = min(memory_end ?: real_memory_size, tmp);
-
- /* Fixup memory chunk array to fit into 0..memory_end */
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- struct mem_chunk *chunk = &memory_chunk[i];
+ memory_end = min(memory_end ?: max_physmem_end, tmp);
+ max_pfn = max_low_pfn = PFN_DOWN(memory_end);
+ memblock_remove(memory_end, ULONG_MAX);
- if (chunk->addr >= memory_end) {
- memset(chunk, 0, sizeof(*chunk));
- continue;
- }
- if (chunk->addr + chunk->size > memory_end)
- chunk->size = memory_end - chunk->addr;
- }
+ pr_notice("Max memory size: %luMB\n", memory_end >> 20);
}
static void __init setup_vmcoreinfo(void)
@@ -600,98 +492,6 @@ static void __init setup_vmcoreinfo(void)
#ifdef CONFIG_CRASH_DUMP
/*
- * Find suitable location for crashkernel memory
- */
-static unsigned long __init find_crash_base(unsigned long crash_size,
- char **msg)
-{
- unsigned long crash_base;
- struct mem_chunk *chunk;
- int i;
-
- if (memory_chunk[0].size < crash_size) {
- *msg = "first memory chunk must be at least crashkernel size";
- return 0;
- }
- if (OLDMEM_BASE && crash_size == OLDMEM_SIZE)
- return OLDMEM_BASE;
-
- for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
- chunk = &memory_chunk[i];
- if (chunk->size == 0)
- continue;
- if (chunk->type != CHUNK_READ_WRITE)
- continue;
- if (chunk->size < crash_size)
- continue;
- crash_base = (chunk->addr + chunk->size) - crash_size;
- if (crash_base < crash_size)
- continue;
- if (crash_base < ZFCPDUMP_HSA_SIZE_MAX)
- continue;
- if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE)
- continue;
- return crash_base;
- }
- *msg = "no suitable area found";
- return 0;
-}
-
-/*
- * Check if crash_base and crash_size is valid
- */
-static int __init verify_crash_base(unsigned long crash_base,
- unsigned long crash_size,
- char **msg)
-{
- struct mem_chunk *chunk;
- int i;
-
- /*
- * Because we do the swap to zero, we must have at least 'crash_size'
- * bytes free space before crash_base
- */
- if (crash_size > crash_base) {
- *msg = "crashkernel offset must be greater than size";
- return -EINVAL;
- }
-
- /* First memory chunk must be at least crash_size */
- if (memory_chunk[0].size < crash_size) {
- *msg = "first memory chunk must be at least crashkernel size";
- return -EINVAL;
- }
- /* Check if we fit into the respective memory chunk */
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- chunk = &memory_chunk[i];
- if (chunk->size == 0)
- continue;
- if (crash_base < chunk->addr)
- continue;
- if (crash_base >= chunk->addr + chunk->size)
- continue;
- /* we have found the memory chunk */
- if (crash_base + crash_size > chunk->addr + chunk->size) {
- *msg = "selected memory chunk is too small for "
- "crashkernel memory";
- return -EINVAL;
- }
- return 0;
- }
- *msg = "invalid memory range specified";
- return -EINVAL;
-}
-
-/*
- * Reserve kdump memory by creating a memory hole in the mem_chunk array
- */
-static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size,
- int type)
-{
- create_mem_hole(memory_chunk, addr, size, type);
-}
-
-/*
* When kdump is enabled, we have to ensure that no memory from
* the area [0 - crashkernel memory size] and
* [crashk_res.start - crashk_res.end] is set offline.
@@ -717,21 +517,44 @@ static struct notifier_block kdump_mem_nb = {
#endif
/*
+ * Make sure that the area behind memory_end is protected
+ */
+static void reserve_memory_end(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
+ !OLDMEM_BASE && sclp_get_hsa_size()) {
+ memory_end = sclp_get_hsa_size();
+ memory_end &= PAGE_MASK;
+ memory_end_set = 1;
+ }
+#endif
+ if (!memory_end_set)
+ return;
+ memblock_reserve(memory_end, ULONG_MAX);
+}
+
+/*
* Make sure that oldmem, where the dump is stored, is protected
*/
static void reserve_oldmem(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (!OLDMEM_BASE)
- return;
+ if (OLDMEM_BASE)
+ /* Forget all memory above the running kdump system */
+ memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
- reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM);
- reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE,
- CHUNK_OLDMEM);
- if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size)
- saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1;
- else
- saved_max_pfn = PFN_DOWN(real_memory_size) - 1;
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void remove_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (OLDMEM_BASE)
+ /* Forget all memory above the running kdump system */
+ memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
#endif
}
@@ -742,168 +565,132 @@ static void __init reserve_crashkernel(void)
{
#ifdef CONFIG_CRASH_DUMP
unsigned long long crash_base, crash_size;
- char *msg = NULL;
+ phys_addr_t low, high;
int rc;
rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
&crash_base);
- if (rc || crash_size == 0)
- return;
+
crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
- if (register_memory_notifier(&kdump_mem_nb))
+ if (rc || crash_size == 0)
return;
- if (!crash_base)
- crash_base = find_crash_base(crash_size, &msg);
- if (!crash_base) {
- pr_info("crashkernel reservation failed: %s\n", msg);
- unregister_memory_notifier(&kdump_mem_nb);
+
+ if (memblock.memory.regions[0].size < crash_size) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "first memory chunk must be at least crashkernel size");
return;
}
- if (verify_crash_base(crash_base, crash_size, &msg)) {
- pr_info("crashkernel reservation failed: %s\n", msg);
- unregister_memory_notifier(&kdump_mem_nb);
+
+ low = crash_base ?: OLDMEM_BASE;
+ high = low + crash_size;
+ if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
+ /* The crashkernel fits into OLDMEM, reuse OLDMEM */
+ crash_base = low;
+ } else {
+ /* Find suitable area in free memory */
+ low = max_t(unsigned long, crash_size, sclp_get_hsa_size());
+ high = crash_base ? crash_base + crash_size : ULONG_MAX;
+
+ if (crash_base && crash_base < low) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "crash_base too low");
+ return;
+ }
+ low = crash_base ?: low;
+ crash_base = memblock_find_in_range(low, high, crash_size,
+ KEXEC_CRASH_MEM_ALIGN);
+ }
+
+ if (!crash_base) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "no suitable area found");
return;
}
+
+ if (register_memory_notifier(&kdump_mem_nb))
+ return;
+
if (!OLDMEM_BASE && MACHINE_IS_VM)
diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
- reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK);
+ memblock_remove(crash_base, crash_size);
pr_info("Reserving %lluMB of memory at %lluMB "
"for crashkernel (System RAM: %luMB)\n",
- crash_size >> 20, crash_base >> 20, memory_end >> 20);
+ crash_size >> 20, crash_base >> 20,
+ (unsigned long)memblock.memory.total_size >> 20);
os_info_crashkernel_add(crash_base, crash_size);
#endif
}
-static void __init setup_memory(void)
+/*
+ * Reserve the initrd from being used by memblock
+ */
+static void __init reserve_initrd(void)
{
- unsigned long bootmap_size;
- unsigned long start_pfn, end_pfn;
- int i;
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_start = INITRD_START;
+ initrd_end = initrd_start + INITRD_SIZE;
+ memblock_reserve(INITRD_START, INITRD_SIZE);
+#endif
+}
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
+/*
+ * Check for initrd being in usable memory
+ */
+static void __init check_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (INITRD_START && INITRD_SIZE &&
+ !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
+ pr_err("initrd does not fit memory.\n");
+ memblock_free(INITRD_START, INITRD_SIZE);
+ initrd_start = initrd_end = 0;
+ }
+#endif
+}
+
+/*
+ * Reserve all kernel text
+ */
+static void __init reserve_kernel(void)
+{
+ unsigned long start_pfn;
start_pfn = PFN_UP(__pa(&_end));
- end_pfn = max_pfn = PFN_DOWN(memory_end);
-#ifdef CONFIG_BLK_DEV_INITRD
/*
- * Move the initrd in case the bitmap of the bootmem allocater
- * would overwrite it.
+ * Reserve memory used for lowcore/command line/kernel image.
*/
+ memblock_reserve(0, (unsigned long)_ehead);
+ memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
+ - (unsigned long)_stext);
+}
- if (INITRD_START && INITRD_SIZE) {
- unsigned long bmap_size;
- unsigned long start;
-
- bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
- bmap_size = PFN_PHYS(bmap_size);
-
- if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
- start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;
-
+static void __init reserve_elfcorehdr(void)
+{
#ifdef CONFIG_CRASH_DUMP
- if (OLDMEM_BASE) {
- /* Move initrd behind kdump oldmem */
- if (start + INITRD_SIZE > OLDMEM_BASE &&
- start < OLDMEM_BASE + OLDMEM_SIZE)
- start = OLDMEM_BASE + OLDMEM_SIZE;
- }
-#endif
- if (start + INITRD_SIZE > memory_end) {
- pr_err("initrd extends beyond end of "
- "memory (0x%08lx > 0x%08lx) "
- "disabling initrd\n",
- start + INITRD_SIZE, memory_end);
- INITRD_START = INITRD_SIZE = 0;
- } else {
- pr_info("Moving initrd (0x%08lx -> "
- "0x%08lx, size: %ld)\n",
- INITRD_START, start, INITRD_SIZE);
- memmove((void *) start, (void *) INITRD_START,
- INITRD_SIZE);
- INITRD_START = start;
- }
- }
- }
+ if (is_kdump_kernel())
+ memblock_reserve(elfcorehdr_addr - OLDMEM_BASE,
+ PAGE_ALIGN(elfcorehdr_size));
#endif
+}
- /*
- * Initialize the boot-time allocator
- */
- bootmap_size = init_bootmem(start_pfn, end_pfn);
+static void __init setup_memory(void)
+{
+ struct memblock_region *reg;
/*
- * Register RAM areas with the bootmem allocator.
+ * Init storage key for present memory
*/
-
- for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
- unsigned long start_chunk, end_chunk, pfn;
-
- if (memory_chunk[i].type != CHUNK_READ_WRITE &&
- memory_chunk[i].type != CHUNK_CRASHK)
- continue;
- start_chunk = PFN_DOWN(memory_chunk[i].addr);
- end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size);
- end_chunk = min(end_chunk, end_pfn);
- if (start_chunk >= end_chunk)
- continue;
- memblock_add_node(PFN_PHYS(start_chunk),
- PFN_PHYS(end_chunk - start_chunk), 0);
- pfn = max(start_chunk, start_pfn);
- storage_key_init_range(PFN_PHYS(pfn), PFN_PHYS(end_chunk));
+ for_each_memblock(memory, reg) {
+ storage_key_init_range(reg->base, reg->base + reg->size);
}
-
psw_set_key(PAGE_DEFAULT_KEY);
- free_bootmem_with_active_regions(0, max_pfn);
-
- /*
- * Reserve memory used for lowcore/command line/kernel image.
- */
- reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT);
- reserve_bootmem((unsigned long)_stext,
- PFN_PHYS(start_pfn) - (unsigned long)_stext,
- BOOTMEM_DEFAULT);
- /*
- * Reserve the bootmem bitmap itself as well. We do this in two
- * steps (first step was init_bootmem()) because this catches
- * the (very unlikely) case of us accidentally initializing the
- * bootmem allocator with an invalid RAM area.
- */
- reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
- BOOTMEM_DEFAULT);
-
-#ifdef CONFIG_CRASH_DUMP
- if (crashk_res.start)
- reserve_bootmem(crashk_res.start,
- crashk_res.end - crashk_res.start + 1,
- BOOTMEM_DEFAULT);
- if (is_kdump_kernel())
- reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE,
- PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT);
-#endif
-#ifdef CONFIG_BLK_DEV_INITRD
- if (INITRD_START && INITRD_SIZE) {
- if (INITRD_START + INITRD_SIZE <= memory_end) {
- reserve_bootmem(INITRD_START, INITRD_SIZE,
- BOOTMEM_DEFAULT);
- initrd_start = INITRD_START;
- initrd_end = initrd_start + INITRD_SIZE;
- } else {
- pr_err("initrd extends beyond end of "
- "memory (0x%08lx > 0x%08lx) "
- "disabling initrd\n",
- initrd_start + INITRD_SIZE, memory_end);
- initrd_start = initrd_end = 0;
- }
- }
-#endif
+ /* Only cosmetics */
+ memblock_enforce_memory_limit(memblock_end_of_DRAM());
}
/*
@@ -1008,6 +795,7 @@ static void __init setup_hwcaps(void)
strcpy(elf_platform, "z196");
break;
case 0x2827:
+ case 0x2828:
strcpy(elf_platform, "zEC12");
break;
}
@@ -1051,29 +839,47 @@ void __init setup_arch(char **cmdline_p)
ROOT_DEV = Root_RAM0;
+ /* Is init_mm really needed? */
init_mm.start_code = PAGE_OFFSET;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
init_mm.brk = (unsigned long) &_end;
- if (MACHINE_HAS_MVCOS)
- memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess));
- else
- memcpy(&uaccess, &uaccess_std, sizeof(uaccess));
-
parse_early_param();
-
os_info_init();
setup_ipl();
- setup_memory_end();
- setup_addressing_mode();
+
+ /* Do some memory reservations *before* memory is added to memblock */
+ reserve_memory_end();
reserve_oldmem();
- reserve_crashkernel();
+ reserve_kernel();
+ reserve_initrd();
+ reserve_elfcorehdr();
+ memblock_allow_resize();
+
+ /* Get information about *all* installed memory */
+ detect_memory_memblock();
+
+ remove_oldmem();
+
+ /*
+ * Make sure all chunks are MAX_ORDER aligned so we don't need the
+ * extra checks that HOLES_IN_ZONE would require.
+ *
+ * Is this still required?
+ */
+ memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
+
+ setup_memory_end();
setup_memory();
+
+ check_initrd();
+ reserve_crashkernel();
+
setup_resources();
setup_vmcoreinfo();
setup_lowcore();
-
+ smp_fill_possible_mask();
cpu_init();
s390_init_cpu_topology();
@@ -1092,5 +898,37 @@ void __init setup_arch(char **cmdline_p)
set_preferred_console();
/* Setup zfcpdump support */
- setup_zfcpdump(console_devno);
+ setup_zfcpdump();
+}
+
+#ifdef CONFIG_32BIT
+static int no_removal_warning __initdata;
+
+static int __init parse_no_removal_warning(char *str)
+{
+ no_removal_warning = 1;
+ return 0;
}
+__setup("no_removal_warning", parse_no_removal_warning);
+
+static int __init removal_warning(void)
+{
+ if (no_removal_warning)
+ return 0;
+ printk(KERN_ALERT "\n\n");
+ printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n");
+ printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n");
+ printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n");
+ printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n");
+ printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n");
+ printk(KERN_CONT "please let us know. Please write to:\n");
+ printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n");
+ printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n");
+ printk(KERN_CONT "Thank you!\n\n");
+ printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n");
+ printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n");
+ schedule_timeout_uninterruptible(300 * HZ);
+ return 0;
+}
+early_initcall(removal_warning);
+#endif
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index c3ff70a7b24..42b49f9e19b 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -48,54 +48,6 @@ typedef struct
struct ucontext uc;
} rt_sigframe;
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask)
-{
- sigset_t blocked;
- siginitset(&blocked, mask);
- return sigsuspend(&blocked);
-}
-
-SYSCALL_DEFINE3(sigaction, int, sig, const struct old_sigaction __user *, act,
- struct old_sigaction __user *, oact)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
-
- if (act) {
- old_sigset_t mask;
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
- __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(mask, &act->sa_mask))
- return -EFAULT;
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
- return -EFAULT;
- }
-
- return ret;
-}
-
-SYSCALL_DEFINE2(sigaltstack, const stack_t __user *, uss,
- stack_t __user *, uoss)
-{
- struct pt_regs *regs = task_pt_regs(current);
- return do_sigaltstack(uss, uoss, regs->gprs[15]);
-}
-
/* Returns non-zero on fault. */
static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
{
@@ -105,40 +57,48 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
/* Copy a 'clean' PSW mask to the user to avoid leaking
information about whether PER is currently on. */
- user_sregs.regs.psw.mask = psw_user_bits |
- (regs->psw.mask & PSW_MASK_USER);
+ user_sregs.regs.psw.mask = PSW_USER_BITS |
+ (regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
user_sregs.regs.psw.addr = regs->psw.addr;
memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
- sizeof(sregs->regs.acrs));
+ sizeof(user_sregs.regs.acrs));
/*
* We have to store the fp registers to current->thread.fp_regs
* to merge them with the emulated registers.
*/
- save_fp_regs(&current->thread.fp_regs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ save_fp_regs(current->thread.fp_regs.fprs);
memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
- sizeof(s390_fp_regs));
- return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs));
+ sizeof(user_sregs.fpregs));
+ if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
+ return -EFAULT;
+ return 0;
}
-/* Returns positive number on error */
static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
{
- int err;
_sigregs user_sregs;
/* Alwys make any pending restarted system call return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
- err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs));
- if (err)
- return err;
- /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */
- regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
- (user_sregs.regs.psw.mask & PSW_MASK_USER);
+ if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
+ return -EFAULT;
+
+ if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
+ return -EINVAL;
+
+ /* Loading the floating-point-control word can fail. Do that first. */
+ if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ return -EINVAL;
+
+ /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+ regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+ (user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
/* Check for invalid user address space control. */
- if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
- regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+ regs->psw.mask = PSW_ASC_PRIMARY |
(regs->psw.mask & ~PSW_MASK_ASC);
/* Check for invalid amode */
if (regs->psw.mask & PSW_MASK_EA)
@@ -146,15 +106,14 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
regs->psw.addr = user_sregs.regs.psw.addr;
memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
- sizeof(sregs->regs.acrs));
+ sizeof(current->thread.acrs));
restore_access_regs(current->thread.acrs);
memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
- sizeof(s390_fp_regs));
- current->thread.fp_regs.fpc &= FPC_VALID_MASK;
+ sizeof(current->thread.fp_regs));
- restore_fp_regs(&current->thread.fp_regs);
- clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */
+ restore_fp_regs(current->thread.fp_regs.fprs);
+ clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
}
@@ -164,8 +123,6 @@ SYSCALL_DEFINE0(sigreturn)
sigframe __user *frame = (sigframe __user *)regs->gprs[15];
sigset_t set;
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
goto badframe;
set_current_blocked(&set);
@@ -183,15 +140,12 @@ SYSCALL_DEFINE0(rt_sigreturn)
rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15];
sigset_t set;
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
set_current_blocked(&set);
if (restore_sigregs(regs, &frame->uc.uc_mcontext))
goto badframe;
- if (do_sigaltstack(&frame->uc.uc_stack, NULL,
- regs->gprs[15]) == -EFAULT)
+ if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
return regs->gprs[2];
badframe:
@@ -244,8 +198,6 @@ static int setup_frame(int sig, struct k_sigaction *ka,
sigframe __user *frame;
frame = get_sigframe(ka, regs, sizeof(sigframe));
- if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe)))
- goto give_sigsegv;
if (frame == (void __user *) -1UL)
goto give_sigsegv;
@@ -279,7 +231,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
regs->gprs[15] = (unsigned long) frame;
/* Force default amode and default user address space control. */
regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
- (psw_user_bits & PSW_MASK_ASC) |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
@@ -313,8 +265,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
rt_sigframe __user *frame;
frame = get_sigframe(ka, regs, sizeof(rt_sigframe));
- if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe)))
- goto give_sigsegv;
if (frame == (void __user *) -1UL)
goto give_sigsegv;
@@ -325,10 +275,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(NULL, &frame->uc.uc_link);
- err |= __put_user((void __user *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->gprs[15]),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
err |= save_sigregs(regs, &frame->uc.uc_mcontext);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
@@ -355,7 +302,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->gprs[15] = (unsigned long) frame;
/* Force default amode and default user address space control. */
regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
- (psw_user_bits & PSW_MASK_ASC) |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
@@ -409,7 +356,7 @@ void do_signal(struct pt_regs *regs)
* call information.
*/
current_thread_info()->system_call =
- test_thread_flag(TIF_SYSCALL) ? regs->int_code : 0;
+ test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
@@ -437,7 +384,7 @@ void do_signal(struct pt_regs *regs)
}
}
/* No longer in a system call */
- clear_thread_flag(TIF_SYSCALL);
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
if (is_compat_task())
handle_signal32(signr, &ka, &info, oldset, regs);
@@ -447,7 +394,7 @@ void do_signal(struct pt_regs *regs)
}
/* No handlers present - check for system call restart */
- clear_thread_flag(TIF_SYSCALL);
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
if (current_thread_info()->system_call) {
regs->int_code = current_thread_info()->system_call;
switch (regs->gprs[2]) {
@@ -460,9 +407,9 @@ void do_signal(struct pt_regs *regs)
case -ERESTARTNOINTR:
/* Restart system call with magic TIF bit. */
regs->gprs[2] = regs->orig_gpr2;
- set_thread_flag(TIF_SYSCALL);
+ set_pt_regs_flag(regs, PIF_SYSCALL);
if (test_thread_flag(TIF_SINGLE_STEP))
- set_thread_flag(TIF_PER_TRAP);
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
break;
}
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 7433a2f9e5c..243c7e51260 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -49,7 +49,6 @@
enum {
ec_schedule = 0,
- ec_call_function,
ec_call_function_single,
ec_stop_cpu,
};
@@ -60,7 +59,7 @@ enum {
};
struct pcpu {
- struct cpu cpu;
+ struct cpu *cpu;
struct _lowcore *lowcore; /* lowcore page(s) for the cpu */
unsigned long async_stack; /* async stack for the cpu */
unsigned long panic_stack; /* panic stack for the cpu */
@@ -83,21 +82,6 @@ DEFINE_MUTEX(smp_cpu_state_mutex);
/*
* Signal processor helper functions.
*/
-static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
-{
- register unsigned int reg1 asm ("1") = parm;
- int cc;
-
- asm volatile(
- " sigp %1,%2,0(%3)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
- if (status && cc == 1)
- *status = reg1;
- return cc;
-}
-
static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
{
int cc;
@@ -160,13 +144,13 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
{
int order;
- set_bit(ec_bit, &pcpu->ec_mask);
- order = pcpu_running(pcpu) ?
- SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+ if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+ return;
+ order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
pcpu_sigp_retry(pcpu, order, 0);
}
-static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
struct _lowcore *lc;
@@ -181,9 +165,12 @@ static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc = pcpu->lowcore;
memcpy(lc, &S390_lowcore, 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
- lc->async_stack = pcpu->async_stack + ASYNC_SIZE;
- lc->panic_stack = pcpu->panic_stack + PAGE_SIZE;
+ lc->async_stack = pcpu->async_stack + ASYNC_SIZE
+ - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->panic_stack = pcpu->panic_stack + PAGE_SIZE
+ - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->cpu_nr = cpu;
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE) {
lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL);
@@ -235,8 +222,12 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
struct _lowcore *lc = pcpu->lowcore;
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
atomic_inc(&init_mm.context.attach_count);
lc->cpu_nr = cpu;
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->percpu_offset = __per_cpu_offset[cpu];
lc->kernel_asce = S390_lowcore.kernel_asce;
lc->machine_flags = S390_lowcore.machine_flags;
@@ -253,7 +244,8 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
struct _lowcore *lc = pcpu->lowcore;
struct thread_info *ti = task_thread_info(tsk);
- lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE;
+ lc->kernel_stack = (unsigned long) task_stack_page(tsk)
+ + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->thread_info = (unsigned long) task_thread_info(tsk);
lc->current_task = (unsigned long) tsk;
lc->user_timer = ti->user_timer;
@@ -281,7 +273,7 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
unsigned long source_cpu = stap();
- __load_psw_mask(psw_kernel_bits);
+ __load_psw_mask(PSW_KERNEL_BITS);
if (pcpu->address == source_cpu)
func(data); /* should not return */
/* Stop target cpu (if func returns this stops the current cpu). */
@@ -360,21 +352,21 @@ void smp_yield_cpu(int cpu)
* Send cpus emergency shutdown signal. This gives the cpus the
* opportunity to complete outstanding interrupts.
*/
-void smp_emergency_stop(cpumask_t *cpumask)
+static void smp_emergency_stop(cpumask_t *cpumask)
{
u64 end;
int cpu;
- end = get_clock() + (1000000UL << 12);
+ end = get_tod_clock() + (1000000UL << 12);
for_each_cpu(cpu, cpumask) {
struct pcpu *pcpu = pcpu_devices + cpu;
set_bit(ec_stop_cpu, &pcpu->ec_mask);
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
0, NULL) == SIGP_CC_BUSY &&
- get_clock() < end)
+ get_tod_clock() < end)
cpu_relax();
}
- while (get_clock() < end) {
+ while (get_tod_clock() < end) {
for_each_cpu(cpu, cpumask)
if (pcpu_stopped(pcpu_devices + cpu))
cpumask_clear_cpu(cpu, cpumask);
@@ -393,7 +385,7 @@ void smp_send_stop(void)
int cpu;
/* Disable all interrupts/machine checks */
- __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
trace_hardirqs_off();
debug_set_critical();
@@ -413,46 +405,28 @@ void smp_send_stop(void)
}
/*
- * Stop the current cpu.
- */
-void smp_stop_cpu(void)
-{
- pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
- for (;;) ;
-}
-
-/*
* This is the main routine where commands issued by other
* cpus are handled.
*/
-static void do_ext_call_interrupt(struct ext_code ext_code,
- unsigned int param32, unsigned long param64)
+static void smp_handle_ext_call(void)
{
unsigned long bits;
- int cpu;
-
- cpu = smp_processor_id();
- if (ext_code.code == 0x1202)
- inc_irq_stat(IRQEXT_EXC);
- else
- inc_irq_stat(IRQEXT_EMS);
- /*
- * handle bit signal external calls
- */
- bits = xchg(&pcpu_devices[cpu].ec_mask, 0);
+ /* handle bit signal external calls */
+ bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
if (test_bit(ec_stop_cpu, &bits))
smp_stop_cpu();
-
if (test_bit(ec_schedule, &bits))
scheduler_ipi();
-
- if (test_bit(ec_call_function, &bits))
- generic_smp_call_function_interrupt();
-
if (test_bit(ec_call_function_single, &bits))
generic_smp_call_function_single_interrupt();
+}
+static void do_ext_call_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS);
+ smp_handle_ext_call();
}
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -460,7 +434,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
int cpu;
for_each_cpu(cpu, mask)
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function);
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
}
void arch_send_call_function_single_ipi(int cpu)
@@ -538,10 +512,7 @@ void smp_ctl_clear_bit(int cr, int bit)
}
EXPORT_SYMBOL(smp_ctl_clear_bit);
-#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP)
-
-struct save_area *zfcpdump_save_areas[NR_CPUS + 1];
-EXPORT_SYMBOL_GPL(zfcpdump_save_areas);
+#ifdef CONFIG_CRASH_DUMP
static void __init smp_get_save_area(int cpu, u16 address)
{
@@ -553,23 +524,15 @@ static void __init smp_get_save_area(int cpu, u16 address)
if (!OLDMEM_BASE && (address == boot_cpu_address ||
ipl_info.type != IPL_TYPE_FCP_DUMP))
return;
- if (cpu >= NR_CPUS) {
- pr_warning("CPU %i exceeds the maximum %i and is excluded "
- "from the dump\n", cpu, NR_CPUS - 1);
- return;
- }
- save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL);
+ save_area = dump_save_area_create(cpu);
if (!save_area)
panic("could not allocate memory for save area\n");
- zfcpdump_save_areas[cpu] = save_area;
-#ifdef CONFIG_CRASH_DUMP
if (address == boot_cpu_address) {
/* Copy the registers of the boot cpu. */
copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
SAVE_AREA_BASE - PAGE_SIZE, 0);
return;
}
-#endif
/* Get the registers of a non-boot cpu. */
__pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL);
memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));
@@ -586,11 +549,11 @@ int smp_store_status(int cpu)
return 0;
}
-#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
+#else /* CONFIG_CRASH_DUMP */
static inline void smp_get_save_area(int cpu, u16 address) { }
-#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
+#endif /* CONFIG_CRASH_DUMP */
void smp_cpu_set_polarization(int cpu, int val)
{
@@ -623,10 +586,9 @@ static struct sclp_cpu_info *smp_get_cpu_info(void)
return info;
}
-static int __cpuinit smp_add_present_cpu(int cpu);
+static int smp_add_present_cpu(int cpu);
-static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info,
- int sysfs_add)
+static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
{
struct pcpu *pcpu;
cpumask_t avail;
@@ -642,7 +604,7 @@ static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info,
continue;
pcpu = pcpu_devices + cpu;
pcpu->address = info->cpu[i].address;
- pcpu->state = (cpu >= info->configured) ?
+ pcpu->state = (i >= info->configured) ?
CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
set_cpu_present(cpu, true);
@@ -692,16 +654,16 @@ static void __init smp_detect_cpus(void)
/*
* Activate a secondary processor.
*/
-static void __cpuinit smp_start_secondary(void *cpuvoid)
+static void smp_start_secondary(void *cpuvoid)
{
- S390_lowcore.last_update_clock = get_clock();
+ S390_lowcore.last_update_clock = get_tod_clock();
S390_lowcore.restart_stack = (unsigned long) restart_stack;
S390_lowcore.restart_fn = (unsigned long) do_restart;
S390_lowcore.restart_data = 0;
S390_lowcore.restart_source = -1UL;
restore_access_regs(S390_lowcore.access_regs_save_area);
__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
- __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
cpu_init();
preempt_disable();
init_cpu_timer();
@@ -711,12 +673,11 @@ static void __cpuinit smp_start_secondary(void *cpuvoid)
set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST);
local_irq_enable();
- /* cpu_idle will call schedule for us */
- cpu_idle();
+ cpu_startup_entry(CPUHP_ONLINE);
}
/* Upping and downing of CPUs */
-int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
struct pcpu *pcpu;
int rc;
@@ -739,18 +700,14 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
return 0;
}
-static int __init setup_possible_cpus(char *s)
-{
- int max, cpu;
+static unsigned int setup_possible_cpus __initdata;
- if (kstrtoint(s, 0, &max) < 0)
- return 0;
- init_cpu_possible(cpumask_of(0));
- for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++)
- set_cpu_possible(cpu, true);
+static int __init _setup_possible_cpus(char *s)
+{
+ get_option(&s, &setup_possible_cpus);
return 0;
}
-early_param("possible_cpus", setup_possible_cpus);
+early_param("possible_cpus", _setup_possible_cpus);
#ifdef CONFIG_HOTPLUG_CPU
@@ -758,6 +715,8 @@ int __cpu_disable(void)
{
unsigned long cregs[16];
+ /* Handle possible pending IPIs */
+ smp_handle_ext_call();
set_cpu_online(smp_processor_id(), false);
/* Disable pseudo page faults on this cpu. */
pfault_fini();
@@ -780,6 +739,9 @@ void __cpu_die(unsigned int cpu)
cpu_relax();
pcpu_free_lowcore(pcpu);
atomic_dec(&init_mm.context.attach_count);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
}
void __noreturn cpu_die(void)
@@ -791,13 +753,24 @@ void __noreturn cpu_die(void)
#endif /* CONFIG_HOTPLUG_CPU */
+void __init smp_fill_possible_mask(void)
+{
+ unsigned int possible, sclp, cpu;
+
+ sclp = sclp_get_max_cpu() ?: nr_cpu_ids;
+ possible = setup_possible_cpus ?: nr_cpu_ids;
+ possible = min(possible, sclp);
+ for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+ set_cpu_possible(cpu, true);
+}
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
/* request the 0x1201 emergency signal external interrupt */
- if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
+ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1201");
/* request the 0x1202 external call external interrupt */
- if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0)
+ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
smp_detect_cpus();
}
@@ -810,8 +783,10 @@ void __init smp_prepare_boot_cpu(void)
pcpu->state = CPU_STATE_CONFIGURED;
pcpu->address = boot_cpu_address;
pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
- pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE;
- pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE;
+ pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE
+ + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE
+ + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
S390_lowcore.percpu_offset = __per_cpu_offset[0];
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
set_cpu_present(0, true);
@@ -825,6 +800,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
void __init smp_setup_processor_id(void)
{
S390_lowcore.cpu_nr = 0;
+ S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
}
/*
@@ -934,7 +910,7 @@ static ssize_t show_idle_count(struct device *dev,
idle_count = ACCESS_ONCE(idle->idle_count);
if (ACCESS_ONCE(idle->clock_idle_enter))
idle_count++;
- } while ((sequence & 1) || (idle->sequence != sequence));
+ } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
return sprintf(buf, "%llu\n", idle_count);
}
static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -947,12 +923,12 @@ static ssize_t show_idle_time(struct device *dev,
unsigned int sequence;
do {
- now = get_clock();
+ now = get_tod_clock();
sequence = ACCESS_ONCE(idle->sequence);
idle_time = ACCESS_ONCE(idle->idle_time);
idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
- } while ((sequence & 1) || (idle->sequence != sequence));
+ } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
return sprintf(buf, "%llu\n", idle_time >> 12);
}
@@ -968,11 +944,11 @@ static struct attribute_group cpu_online_attr_group = {
.attrs = cpu_online_attrs,
};
-static int __cpuinit smp_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (unsigned int)(long)hcpu;
- struct cpu *c = &pcpu_devices[cpu].cpu;
+ struct cpu *c = pcpu_devices[cpu].cpu;
struct device *s = &c->dev;
int err = 0;
@@ -987,12 +963,17 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
return notifier_from_errno(err);
}
-static int __cpuinit smp_add_present_cpu(int cpu)
+static int smp_add_present_cpu(int cpu)
{
- struct cpu *c = &pcpu_devices[cpu].cpu;
- struct device *s = &c->dev;
+ struct device *s;
+ struct cpu *c;
int rc;
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+ pcpu_devices[cpu].cpu = c;
+ s = &c->dev;
c->hotpluggable = 1;
rc = register_cpu(c, cpu);
if (rc)
@@ -1059,19 +1040,24 @@ static DEVICE_ATTR(rescan, 0200, NULL, rescan_store);
static int __init s390_smp_init(void)
{
- int cpu, rc;
+ int cpu, rc = 0;
- hotcpu_notifier(smp_cpu_notify, 0);
#ifdef CONFIG_HOTPLUG_CPU
rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
if (rc)
return rc;
#endif
+ cpu_notifier_register_begin();
for_each_present_cpu(cpu) {
rc = smp_add_present_cpu(cpu);
if (rc)
- return rc;
+ goto out;
}
- return 0;
+
+ __hotcpu_notifier(smp_cpu_notify, 0);
+
+out:
+ cpu_notifier_register_done();
+ return rc;
}
subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index aa1494d0e38..a7a7537ce1e 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -10,6 +10,10 @@
#include <linux/suspend.h>
#include <linux/mm.h>
#include <asm/ctl_reg.h>
+#include <asm/ipl.h>
+#include <asm/cio.h>
+#include <asm/pci.h>
+#include "entry.h"
/*
* References to section boundaries
@@ -41,6 +45,7 @@ struct page_key_data {
static struct page_key_data *page_key_data;
static struct page_key_data *page_key_rp, *page_key_wp;
static unsigned long page_key_rx, page_key_wx;
+unsigned long suspend_zero_pages;
/*
* For each page in the hibernation image one additional byte is
@@ -149,6 +154,36 @@ int pfn_is_nosave(unsigned long pfn)
return 0;
}
+/*
+ * PM notifier callback for suspend
+ */
+static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
+ void *ptr)
+{
+ switch (action) {
+ case PM_SUSPEND_PREPARE:
+ case PM_HIBERNATION_PREPARE:
+ suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
+ if (!suspend_zero_pages)
+ return NOTIFY_BAD;
+ break;
+ case PM_POST_SUSPEND:
+ case PM_POST_HIBERNATION:
+ free_pages(suspend_zero_pages, LC_ORDER);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+static int __init suspend_pm_init(void)
+{
+ pm_notifier(suspend_pm_cb, 0);
+ return 0;
+}
+arch_initcall(suspend_pm_init);
+
void save_processor_state(void)
{
/* swsusp_arch_suspend() actually saves all cpu register contents.
@@ -180,3 +215,11 @@ void restore_processor_state(void)
__ctl_set_bit(0,28);
local_mcck_enable();
}
+
+/* Called at the end of swsusp_arch_resume */
+void s390_early_resume(void)
+{
+ lgr_info_log();
+ channel_subsystem_reinit();
+ zpci_rescan();
+}
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index d4ca4e0617b..6b09fdffbd2 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -36,8 +36,8 @@ ENTRY(swsusp_arch_suspend)
/* Store prefix register on stack */
stpx __SF_EMPTY(%r15)
- /* Save prefix register contents for lowcore */
- llgf %r4,__SF_EMPTY(%r15)
+ /* Save prefix register contents for lowcore copy */
+ llgf %r10,__SF_EMPTY(%r15)
/* Get pointer to save area */
lghi %r1,0x1000
@@ -91,7 +91,18 @@ ENTRY(swsusp_arch_suspend)
xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
spx __SF_EMPTY(%r15)
+ /* Save absolute zero pages */
+ larl %r2,suspend_zero_pages
+ lg %r2,0(%r2)
+ lghi %r4,0
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Copy lowcore to absolute zero lowcore */
lghi %r2,0
+ lgr %r4,%r10
lghi %r3,2*PAGE_SIZE
lghi %r5,2*PAGE_SIZE
1: mvcle %r2,%r4,0
@@ -248,8 +259,20 @@ restore_registers:
/* Load old stack */
lg %r15,0x2f8(%r13)
+ /* Save prefix register */
+ mvc __SF_EMPTY(4,%r15),0x318(%r13)
+
+ /* Restore absolute zero pages */
+ lghi %r2,0
+ larl %r4,suspend_zero_pages
+ lg %r4,0(%r4)
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
/* Restore prefix register */
- spx 0x318(%r13)
+ spx __SF_EMPTY(%r15)
/* Activate DAT */
stosm __SF_EMPTY(%r15),0x04
@@ -258,11 +281,8 @@ restore_registers:
lghi %r2,0
brasl %r14,arch_set_page_states
- /* Log potential guest relocation */
- brasl %r14,lgr_info_log
-
- /* Reinitialize the channel subsystem */
- brasl %r14,channel_subsystem_reinit
+ /* Call arch specific early resume code */
+ brasl %r14,s390_early_resume
/* Return 0 */
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index d0964d22adb..23eb222c165 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -132,19 +132,9 @@ SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
* to
* %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len
*/
-SYSCALL_DEFINE(s390_fallocate)(int fd, int mode, loff_t offset,
- u32 len_high, u32 len_low)
+SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset,
+ u32, len_high, u32, len_low)
{
return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low);
}
-#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
-asmlinkage long SyS_s390_fallocate(long fd, long mode, loff_t offset,
- long len_high, long len_low)
-{
- return SYSC_s390_fallocate((int) fd, (int) mode, offset,
- (u32) len_high, (u32) len_low);
-}
-SYSCALL_ALIAS(sys_s390_fallocate, SyS_s390_fallocate);
-#endif
-
#endif
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 6a6c61f94dd..fe5cdf29a00 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -9,347 +9,350 @@
#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall)
NI_SYSCALL /* 0 */
-SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper)
+SYSCALL(sys_exit,sys_exit,compat_sys_exit)
SYSCALL(sys_fork,sys_fork,sys_fork)
-SYSCALL(sys_read,sys_read,sys32_read_wrapper)
-SYSCALL(sys_write,sys_write,sys32_write_wrapper)
-SYSCALL(sys_open,sys_open,sys32_open_wrapper) /* 5 */
-SYSCALL(sys_close,sys_close,sys32_close_wrapper)
+SYSCALL(sys_read,sys_read,compat_sys_s390_read)
+SYSCALL(sys_write,sys_write,compat_sys_s390_write)
+SYSCALL(sys_open,sys_open,compat_sys_open) /* 5 */
+SYSCALL(sys_close,sys_close,compat_sys_close)
SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall)
-SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper)
-SYSCALL(sys_link,sys_link,sys32_link_wrapper)
-SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper) /* 10 */
-SYSCALL(sys_execve,sys_execve,sys32_execve_wrapper)
-SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper)
-SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */
-SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper)
-SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */
-SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/
+SYSCALL(sys_creat,sys_creat,compat_sys_creat)
+SYSCALL(sys_link,sys_link,compat_sys_link)
+SYSCALL(sys_unlink,sys_unlink,compat_sys_unlink) /* 10 */
+SYSCALL(sys_execve,sys_execve,compat_sys_execve)
+SYSCALL(sys_chdir,sys_chdir,compat_sys_chdir)
+SYSCALL(sys_time,sys_ni_syscall,compat_sys_time) /* old time syscall */
+SYSCALL(sys_mknod,sys_mknod,compat_sys_mknod)
+SYSCALL(sys_chmod,sys_chmod,compat_sys_chmod) /* 15 */
+SYSCALL(sys_lchown16,sys_ni_syscall,compat_sys_s390_lchown16) /* old lchown16 syscall*/
NI_SYSCALL /* old break syscall holder */
NI_SYSCALL /* old stat syscall holder */
-SYSCALL(sys_lseek,sys_lseek,sys32_lseek_wrapper)
+SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek)
SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */
-SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper)
-SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper)
-SYSCALL(sys_setuid16,sys_ni_syscall,sys32_setuid16_wrapper) /* old setuid16 syscall*/
-SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/
-SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */
-SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper)
-SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper)
+SYSCALL(sys_mount,sys_mount,compat_sys_mount)
+SYSCALL(sys_oldumount,sys_oldumount,compat_sys_oldumount)
+SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/
+SYSCALL(sys_getuid16,sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/
+SYSCALL(sys_stime,sys_ni_syscall,compat_sys_stime) /* 25 old stime syscall */
+SYSCALL(sys_ptrace,sys_ptrace,compat_sys_ptrace)
+SYSCALL(sys_alarm,sys_alarm,compat_sys_alarm)
NI_SYSCALL /* old fstat syscall */
SYSCALL(sys_pause,sys_pause,sys_pause)
-SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */
+SYSCALL(sys_utime,sys_utime,compat_sys_utime) /* 30 */
NI_SYSCALL /* old stty syscall */
NI_SYSCALL /* old gtty syscall */
-SYSCALL(sys_access,sys_access,sys32_access_wrapper)
-SYSCALL(sys_nice,sys_nice,sys32_nice_wrapper)
+SYSCALL(sys_access,sys_access,compat_sys_access)
+SYSCALL(sys_nice,sys_nice,compat_sys_nice)
NI_SYSCALL /* 35 old ftime syscall */
SYSCALL(sys_sync,sys_sync,sys_sync)
-SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper)
-SYSCALL(sys_rename,sys_rename,sys32_rename_wrapper)
-SYSCALL(sys_mkdir,sys_mkdir,sys32_mkdir_wrapper)
-SYSCALL(sys_rmdir,sys_rmdir,sys32_rmdir_wrapper) /* 40 */
-SYSCALL(sys_dup,sys_dup,sys32_dup_wrapper)
-SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper)
-SYSCALL(sys_times,sys_times,compat_sys_times_wrapper)
+SYSCALL(sys_kill,sys_kill,compat_sys_kill)
+SYSCALL(sys_rename,sys_rename,compat_sys_rename)
+SYSCALL(sys_mkdir,sys_mkdir,compat_sys_mkdir)
+SYSCALL(sys_rmdir,sys_rmdir,compat_sys_rmdir) /* 40 */
+SYSCALL(sys_dup,sys_dup,compat_sys_dup)
+SYSCALL(sys_pipe,sys_pipe,compat_sys_pipe)
+SYSCALL(sys_times,sys_times,compat_sys_times)
NI_SYSCALL /* old prof syscall */
-SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */
-SYSCALL(sys_setgid16,sys_ni_syscall,sys32_setgid16_wrapper) /* old setgid16 syscall*/
-SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/
-SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper)
-SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */
-SYSCALL(sys_getegid16,sys_ni_syscall,sys32_getegid16) /* 50 old getegid16 syscall */
-SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper)
-SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper)
+SYSCALL(sys_brk,sys_brk,compat_sys_brk) /* 45 */
+SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/
+SYSCALL(sys_getgid16,sys_ni_syscall,compat_sys_s390_getgid16) /* old getgid16 syscall*/
+SYSCALL(sys_signal,sys_signal,compat_sys_signal)
+SYSCALL(sys_geteuid16,sys_ni_syscall,compat_sys_s390_geteuid16) /* old geteuid16 syscall */
+SYSCALL(sys_getegid16,sys_ni_syscall,compat_sys_s390_getegid16) /* 50 old getegid16 syscall */
+SYSCALL(sys_acct,sys_acct,compat_sys_acct)
+SYSCALL(sys_umount,sys_umount,compat_sys_umount)
NI_SYSCALL /* old lock syscall */
-SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper)
-SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */
+SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl)
+SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl) /* 55 */
NI_SYSCALL /* intel mpx syscall */
-SYSCALL(sys_setpgid,sys_setpgid,sys32_setpgid_wrapper)
+SYSCALL(sys_setpgid,sys_setpgid,compat_sys_setpgid)
NI_SYSCALL /* old ulimit syscall */
NI_SYSCALL /* old uname syscall */
-SYSCALL(sys_umask,sys_umask,sys32_umask_wrapper) /* 60 */
-SYSCALL(sys_chroot,sys_chroot,sys32_chroot_wrapper)
-SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper)
-SYSCALL(sys_dup2,sys_dup2,sys32_dup2_wrapper)
+SYSCALL(sys_umask,sys_umask,compat_sys_umask) /* 60 */
+SYSCALL(sys_chroot,sys_chroot,compat_sys_chroot)
+SYSCALL(sys_ustat,sys_ustat,compat_sys_ustat)
+SYSCALL(sys_dup2,sys_dup2,compat_sys_dup2)
SYSCALL(sys_getppid,sys_getppid,sys_getppid)
SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */
SYSCALL(sys_setsid,sys_setsid,sys_setsid)
-SYSCALL(sys_sigaction,sys_sigaction,sys32_sigaction_wrapper)
+SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction)
NI_SYSCALL /* old sgetmask syscall*/
NI_SYSCALL /* old ssetmask syscall*/
-SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */
-SYSCALL(sys_setregid16,sys_ni_syscall,sys32_setregid16_wrapper) /* old setregid16 syscall */
-SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper)
-SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper)
-SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper)
-SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */
-SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper)
-SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper)
-SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper)
-SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper)
-SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */
-SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */
+SYSCALL(sys_setreuid16,sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */
+SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */
+SYSCALL(sys_sigsuspend,sys_sigsuspend,compat_sys_sigsuspend)
+SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending)
+SYSCALL(sys_sethostname,sys_sethostname,compat_sys_sethostname)
+SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit) /* 75 */
+SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit)
+SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage)
+SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday)
+SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday)
+SYSCALL(sys_getgroups16,sys_ni_syscall,compat_sys_s390_getgroups16) /* 80 old getgroups16 syscall */
+SYSCALL(sys_setgroups16,sys_ni_syscall,compat_sys_s390_setgroups16) /* old setgroups16 syscall */
NI_SYSCALL /* old select syscall */
-SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper)
+SYSCALL(sys_symlink,sys_symlink,compat_sys_symlink)
NI_SYSCALL /* old lstat syscall */
-SYSCALL(sys_readlink,sys_readlink,sys32_readlink_wrapper) /* 85 */
-SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper)
-SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper)
-SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper)
-SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */
-SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper) /* 90 */
-SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper)
-SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper)
-SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper)
-SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper)
-SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/
-SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper)
-SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper)
+SYSCALL(sys_readlink,sys_readlink,compat_sys_readlink) /* 85 */
+SYSCALL(sys_uselib,sys_uselib,compat_sys_uselib)
+SYSCALL(sys_swapon,sys_swapon,compat_sys_swapon)
+SYSCALL(sys_reboot,sys_reboot,compat_sys_reboot)
+SYSCALL(sys_ni_syscall,sys_ni_syscall,compat_sys_old_readdir) /* old readdir syscall */
+SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */
+SYSCALL(sys_munmap,sys_munmap,compat_sys_munmap)
+SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate)
+SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate)
+SYSCALL(sys_fchmod,sys_fchmod,compat_sys_fchmod)
+SYSCALL(sys_fchown16,sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/
+SYSCALL(sys_getpriority,sys_getpriority,compat_sys_getpriority)
+SYSCALL(sys_setpriority,sys_setpriority,compat_sys_setpriority)
NI_SYSCALL /* old profil syscall */
-SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper)
-SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */
+SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs)
+SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs) /* 100 */
NI_SYSCALL /* ioperm for i386 */
-SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper)
-SYSCALL(sys_syslog,sys_syslog,sys32_syslog_wrapper)
-SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer_wrapper)
-SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer_wrapper) /* 105 */
-SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper)
-SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper)
-SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper)
+SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall)
+SYSCALL(sys_syslog,sys_syslog,compat_sys_syslog)
+SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer)
+SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer) /* 105 */
+SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat)
+SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat)
+SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat)
NI_SYSCALL /* old uname syscall */
-SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,sys32_lookup_dcookie_wrapper) /* 110 */
+SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */
SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup)
NI_SYSCALL /* old "idle" system call */
NI_SYSCALL /* vm86old for i386 */
-SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4_wrapper)
-SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */
-SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper)
-SYSCALL(sys_s390_ipc,sys_s390_ipc,sys32_ipc_wrapper)
-SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper)
-SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn)
-SYSCALL(sys_clone,sys_clone,sys_clone_wrapper) /* 120 */
-SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper)
-SYSCALL(sys_newuname,sys_newuname,sys32_newuname_wrapper)
+SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4)
+SYSCALL(sys_swapoff,sys_swapoff,compat_sys_swapoff) /* 115 */
+SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo)
+SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc)
+SYSCALL(sys_fsync,sys_fsync,compat_sys_fsync)
+SYSCALL(sys_sigreturn,sys_sigreturn,compat_sys_sigreturn)
+SYSCALL(sys_clone,sys_clone,compat_sys_clone) /* 120 */
+SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname)
+SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname)
NI_SYSCALL /* modify_ldt for i386 */
-SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper)
-SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */
-SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask_wrapper)
+SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex)
+SYSCALL(sys_mprotect,sys_mprotect,compat_sys_mprotect) /* 125 */
+SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask)
NI_SYSCALL /* old "create module" */
-SYSCALL(sys_init_module,sys_init_module,sys_init_module_wrapper)
-SYSCALL(sys_delete_module,sys_delete_module,sys_delete_module_wrapper)
+SYSCALL(sys_init_module,sys_init_module,compat_sys_init_module)
+SYSCALL(sys_delete_module,sys_delete_module,compat_sys_delete_module)
NI_SYSCALL /* 130: old get_kernel_syms */
-SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper)
-SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper)
-SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper)
-SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper)
-SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */
-SYSCALL(sys_personality,sys_s390_personality,sys32_personality_wrapper)
+SYSCALL(sys_quotactl,sys_quotactl,compat_sys_quotactl)
+SYSCALL(sys_getpgid,sys_getpgid,compat_sys_getpgid)
+SYSCALL(sys_fchdir,sys_fchdir,compat_sys_fchdir)
+SYSCALL(sys_bdflush,sys_bdflush,compat_sys_bdflush)
+SYSCALL(sys_sysfs,sys_sysfs,compat_sys_sysfs) /* 135 */
+SYSCALL(sys_personality,sys_s390_personality,compat_sys_s390_personality)
NI_SYSCALL /* for afs_syscall */
-SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */
-SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */
-SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */
-SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper)
-SYSCALL(sys_select,sys_select,compat_sys_select_wrapper)
-SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper)
-SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper)
-SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */
-SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper)
-SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper)
-SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper)
-SYSCALL(sys_sysctl,sys_sysctl,sys32_sysctl_wrapper)
-SYSCALL(sys_mlock,sys_mlock,sys32_mlock_wrapper) /* 150 */
-SYSCALL(sys_munlock,sys_munlock,sys32_munlock_wrapper)
-SYSCALL(sys_mlockall,sys_mlockall,sys32_mlockall_wrapper)
+SYSCALL(sys_setfsuid16,sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */
+SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */
+SYSCALL(sys_llseek,sys_llseek,compat_sys_llseek) /* 140 */
+SYSCALL(sys_getdents,sys_getdents,compat_sys_getdents)
+SYSCALL(sys_select,sys_select,compat_sys_select)
+SYSCALL(sys_flock,sys_flock,compat_sys_flock)
+SYSCALL(sys_msync,sys_msync,compat_sys_msync)
+SYSCALL(sys_readv,sys_readv,compat_sys_readv) /* 145 */
+SYSCALL(sys_writev,sys_writev,compat_sys_writev)
+SYSCALL(sys_getsid,sys_getsid,compat_sys_getsid)
+SYSCALL(sys_fdatasync,sys_fdatasync,compat_sys_fdatasync)
+SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl)
+SYSCALL(sys_mlock,sys_mlock,compat_sys_mlock) /* 150 */
+SYSCALL(sys_munlock,sys_munlock,compat_sys_munlock)
+SYSCALL(sys_mlockall,sys_mlockall,compat_sys_mlockall)
SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall)
-SYSCALL(sys_sched_setparam,sys_sched_setparam,sys32_sched_setparam_wrapper)
-SYSCALL(sys_sched_getparam,sys_sched_getparam,sys32_sched_getparam_wrapper) /* 155 */
-SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,sys32_sched_setscheduler_wrapper)
-SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper)
+SYSCALL(sys_sched_setparam,sys_sched_setparam,compat_sys_sched_setparam)
+SYSCALL(sys_sched_getparam,sys_sched_getparam,compat_sys_sched_getparam) /* 155 */
+SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,compat_sys_sched_setscheduler)
+SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,compat_sys_sched_getscheduler)
SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield)
-SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper)
-SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_priority_min_wrapper) /* 160 */
-SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,sys32_sched_rr_get_interval_wrapper)
-SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper)
-SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper)
-SYSCALL(sys_setresuid16,sys_ni_syscall,sys32_setresuid16_wrapper) /* old setresuid16 syscall */
-SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old getresuid16 syscall */
+SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,compat_sys_sched_get_priority_max)
+SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */
+SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval)
+SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep)
+SYSCALL(sys_mremap,sys_mremap,compat_sys_mremap)
+SYSCALL(sys_setresuid16,sys_ni_syscall,compat_sys_s390_setresuid16) /* old setresuid16 syscall */
+SYSCALL(sys_getresuid16,sys_ni_syscall,compat_sys_s390_getresuid16) /* 165 old getresuid16 syscall */
NI_SYSCALL /* for vm86 */
NI_SYSCALL /* old sys_query_module */
-SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper)
+SYSCALL(sys_poll,sys_poll,compat_sys_poll)
NI_SYSCALL /* old nfsservctl */
-SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */
-SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */
-SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper)
-SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn)
-SYSCALL(sys_rt_sigaction,sys_rt_sigaction,sys32_rt_sigaction_wrapper)
-SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,sys32_rt_sigprocmask_wrapper) /* 175 */
-SYSCALL(sys_rt_sigpending,sys_rt_sigpending,sys32_rt_sigpending_wrapper)
-SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait_wrapper)
-SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,sys32_rt_sigqueueinfo_wrapper)
-SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend_wrapper)
-SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */
-SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper)
-SYSCALL(sys_chown16,sys_ni_syscall,sys32_chown16_wrapper) /* old chown16 syscall */
-SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper)
-SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper)
-SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */
-SYSCALL(sys_sigaltstack,sys_sigaltstack,sys32_sigaltstack_wrapper)
-SYSCALL(sys_sendfile,sys_sendfile64,sys32_sendfile_wrapper)
+SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */
+SYSCALL(sys_getresgid16,sys_ni_syscall,compat_sys_s390_getresgid16) /* old getresgid16 syscall */
+SYSCALL(sys_prctl,sys_prctl,compat_sys_prctl)
+SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,compat_sys_rt_sigreturn)
+SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction)
+SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */
+SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending)
+SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait)
+SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo)
+SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend)
+SYSCALL(sys_pread64,sys_pread64,compat_sys_s390_pread64) /* 180 */
+SYSCALL(sys_pwrite64,sys_pwrite64,compat_sys_s390_pwrite64)
+SYSCALL(sys_chown16,sys_ni_syscall,compat_sys_s390_chown16) /* old chown16 syscall */
+SYSCALL(sys_getcwd,sys_getcwd,compat_sys_getcwd)
+SYSCALL(sys_capget,sys_capget,compat_sys_capget)
+SYSCALL(sys_capset,sys_capset,compat_sys_capset) /* 185 */
+SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack)
+SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile)
NI_SYSCALL /* streams1 */
NI_SYSCALL /* streams2 */
SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */
-SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper)
-SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper)
-SYSCALL(sys_truncate64,sys_ni_syscall,sys32_truncate64_wrapper)
-SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper)
-SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */
-SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper)
-SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper)
-SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper)
+SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit)
+SYSCALL(sys_mmap2,sys_mmap2,compat_sys_s390_mmap2)
+SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64)
+SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64)
+SYSCALL(sys_stat64,sys_ni_syscall,compat_sys_s390_stat64) /* 195 */
+SYSCALL(sys_lstat64,sys_ni_syscall,compat_sys_s390_lstat64)
+SYSCALL(sys_fstat64,sys_ni_syscall,compat_sys_s390_fstat64)
+SYSCALL(sys_lchown,sys_lchown,compat_sys_lchown)
SYSCALL(sys_getuid,sys_getuid,sys_getuid)
SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */
SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid)
SYSCALL(sys_getegid,sys_getegid,sys_getegid)
-SYSCALL(sys_setreuid,sys_setreuid,sys32_setreuid_wrapper)
-SYSCALL(sys_setregid,sys_setregid,sys32_setregid_wrapper)
-SYSCALL(sys_getgroups,sys_getgroups,sys32_getgroups_wrapper) /* 205 */
-SYSCALL(sys_setgroups,sys_setgroups,sys32_setgroups_wrapper)
-SYSCALL(sys_fchown,sys_fchown,sys32_fchown_wrapper)
-SYSCALL(sys_setresuid,sys_setresuid,sys32_setresuid_wrapper)
-SYSCALL(sys_getresuid,sys_getresuid,sys32_getresuid_wrapper)
-SYSCALL(sys_setresgid,sys_setresgid,sys32_setresgid_wrapper) /* 210 */
-SYSCALL(sys_getresgid,sys_getresgid,sys32_getresgid_wrapper)
-SYSCALL(sys_chown,sys_chown,sys32_chown_wrapper)
-SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper)
-SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper)
-SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */
-SYSCALL(sys_setfsgid,sys_setfsgid,sys32_setfsgid_wrapper)
-SYSCALL(sys_pivot_root,sys_pivot_root,sys32_pivot_root_wrapper)
-SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper)
-SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper)
-SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */
-SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper)
-SYSCALL(sys_readahead,sys_readahead,sys32_readahead_wrapper)
-SYSCALL(sys_sendfile64,sys_ni_syscall,sys32_sendfile64_wrapper)
-SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper)
-SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */
-SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper)
-SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper)
-SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper)
-SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper)
-SYSCALL(sys_listxattr,sys_listxattr,sys32_listxattr_wrapper) /* 230 */
-SYSCALL(sys_llistxattr,sys_llistxattr,sys32_llistxattr_wrapper)
-SYSCALL(sys_flistxattr,sys_flistxattr,sys32_flistxattr_wrapper)
-SYSCALL(sys_removexattr,sys_removexattr,sys32_removexattr_wrapper)
-SYSCALL(sys_lremovexattr,sys_lremovexattr,sys32_lremovexattr_wrapper)
-SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */
+SYSCALL(sys_setreuid,sys_setreuid,compat_sys_setreuid)
+SYSCALL(sys_setregid,sys_setregid,compat_sys_setregid)
+SYSCALL(sys_getgroups,sys_getgroups,compat_sys_getgroups) /* 205 */
+SYSCALL(sys_setgroups,sys_setgroups,compat_sys_setgroups)
+SYSCALL(sys_fchown,sys_fchown,compat_sys_fchown)
+SYSCALL(sys_setresuid,sys_setresuid,compat_sys_setresuid)
+SYSCALL(sys_getresuid,sys_getresuid,compat_sys_getresuid)
+SYSCALL(sys_setresgid,sys_setresgid,compat_sys_setresgid) /* 210 */
+SYSCALL(sys_getresgid,sys_getresgid,compat_sys_getresgid)
+SYSCALL(sys_chown,sys_chown,compat_sys_chown)
+SYSCALL(sys_setuid,sys_setuid,compat_sys_setuid)
+SYSCALL(sys_setgid,sys_setgid,compat_sys_setgid)
+SYSCALL(sys_setfsuid,sys_setfsuid,compat_sys_setfsuid) /* 215 */
+SYSCALL(sys_setfsgid,sys_setfsgid,compat_sys_setfsgid)
+SYSCALL(sys_pivot_root,sys_pivot_root,compat_sys_pivot_root)
+SYSCALL(sys_mincore,sys_mincore,compat_sys_mincore)
+SYSCALL(sys_madvise,sys_madvise,compat_sys_madvise)
+SYSCALL(sys_getdents64,sys_getdents64,compat_sys_getdents64) /* 220 */
+SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64)
+SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead)
+SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64)
+SYSCALL(sys_setxattr,sys_setxattr,compat_sys_setxattr)
+SYSCALL(sys_lsetxattr,sys_lsetxattr,compat_sys_lsetxattr) /* 225 */
+SYSCALL(sys_fsetxattr,sys_fsetxattr,compat_sys_fsetxattr)
+SYSCALL(sys_getxattr,sys_getxattr,compat_sys_getxattr)
+SYSCALL(sys_lgetxattr,sys_lgetxattr,compat_sys_lgetxattr)
+SYSCALL(sys_fgetxattr,sys_fgetxattr,compat_sys_fgetxattr)
+SYSCALL(sys_listxattr,sys_listxattr,compat_sys_listxattr) /* 230 */
+SYSCALL(sys_llistxattr,sys_llistxattr,compat_sys_llistxattr)
+SYSCALL(sys_flistxattr,sys_flistxattr,compat_sys_flistxattr)
+SYSCALL(sys_removexattr,sys_removexattr,compat_sys_removexattr)
+SYSCALL(sys_lremovexattr,sys_lremovexattr,compat_sys_lremovexattr)
+SYSCALL(sys_fremovexattr,sys_fremovexattr,compat_sys_fremovexattr) /* 235 */
SYSCALL(sys_gettid,sys_gettid,sys_gettid)
-SYSCALL(sys_tkill,sys_tkill,sys_tkill_wrapper)
-SYSCALL(sys_futex,sys_futex,compat_sys_futex_wrapper)
-SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper)
-SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */
-SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill_wrapper)
+SYSCALL(sys_tkill,sys_tkill,compat_sys_tkill)
+SYSCALL(sys_futex,sys_futex,compat_sys_futex)
+SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,compat_sys_sched_setaffinity)
+SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,compat_sys_sched_getaffinity) /* 240 */
+SYSCALL(sys_tgkill,sys_tgkill,compat_sys_tgkill)
NI_SYSCALL /* reserved for TUX */
-SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper)
-SYSCALL(sys_io_destroy,sys_io_destroy,sys32_io_destroy_wrapper)
-SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */
-SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper)
-SYSCALL(sys_io_cancel,sys_io_cancel,sys32_io_cancel_wrapper)
-SYSCALL(sys_exit_group,sys_exit_group,sys32_exit_group_wrapper)
-SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper)
-SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */
-SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper)
-SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper)
-SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper)
-SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper)
-SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */
-SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper)
-SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,sys32_timer_getoverrun_wrapper)
-SYSCALL(sys_timer_delete,sys_timer_delete,sys32_timer_delete_wrapper)
-SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper)
-SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */
-SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
-SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
+SYSCALL(sys_io_setup,sys_io_setup,compat_sys_io_setup)
+SYSCALL(sys_io_destroy,sys_io_destroy,compat_sys_io_destroy)
+SYSCALL(sys_io_getevents,sys_io_getevents,compat_sys_io_getevents) /* 245 */
+SYSCALL(sys_io_submit,sys_io_submit,compat_sys_io_submit)
+SYSCALL(sys_io_cancel,sys_io_cancel,compat_sys_io_cancel)
+SYSCALL(sys_exit_group,sys_exit_group,compat_sys_exit_group)
+SYSCALL(sys_epoll_create,sys_epoll_create,compat_sys_epoll_create)
+SYSCALL(sys_epoll_ctl,sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */
+SYSCALL(sys_epoll_wait,sys_epoll_wait,compat_sys_epoll_wait)
+SYSCALL(sys_set_tid_address,sys_set_tid_address,compat_sys_set_tid_address)
+SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,compat_sys_s390_fadvise64)
+SYSCALL(sys_timer_create,sys_timer_create,compat_sys_timer_create)
+SYSCALL(sys_timer_settime,sys_timer_settime,compat_sys_timer_settime) /* 255 */
+SYSCALL(sys_timer_gettime,sys_timer_gettime,compat_sys_timer_gettime)
+SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,compat_sys_timer_getoverrun)
+SYSCALL(sys_timer_delete,sys_timer_delete,compat_sys_timer_delete)
+SYSCALL(sys_clock_settime,sys_clock_settime,compat_sys_clock_settime)
+SYSCALL(sys_clock_gettime,sys_clock_gettime,compat_sys_clock_gettime) /* 260 */
+SYSCALL(sys_clock_getres,sys_clock_getres,compat_sys_clock_getres)
+SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,compat_sys_clock_nanosleep)
NI_SYSCALL /* reserved for vserver */
-SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
-SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
-SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
-SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper)
+SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,compat_sys_s390_fadvise64_64)
+SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64)
+SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64)
+SYSCALL(sys_remap_file_pages,sys_remap_file_pages,compat_sys_remap_file_pages)
NI_SYSCALL /* 268 sys_mbind */
NI_SYSCALL /* 269 sys_get_mempolicy */
NI_SYSCALL /* 270 sys_set_mempolicy */
-SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper)
-SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper)
-SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper)
-SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper)
-SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */
-SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper)
-SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper)
-SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper)
-SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper)
-SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl_wrapper) /* 280 */
-SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid_wrapper)
-SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper)
-SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper)
+SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open)
+SYSCALL(sys_mq_unlink,sys_mq_unlink,compat_sys_mq_unlink)
+SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend)
+SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive)
+SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify) /* 275 */
+SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr)
+SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load)
+SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key)
+SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key)
+SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl) /* 280 */
+SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid)
+SYSCALL(sys_ioprio_set,sys_ioprio_set,compat_sys_ioprio_set)
+SYSCALL(sys_ioprio_get,sys_ioprio_get,compat_sys_ioprio_get)
SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init)
-SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper) /* 285 */
-SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper)
+SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */
+SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
NI_SYSCALL /* 287 sys_migrate_pages */
-SYSCALL(sys_openat,sys_openat,compat_sys_openat_wrapper)
-SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper)
-SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */
-SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper)
-SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper)
-SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper)
-SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper)
-SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */
-SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper)
-SYSCALL(sys_symlinkat,sys_symlinkat,sys_symlinkat_wrapper)
-SYSCALL(sys_readlinkat,sys_readlinkat,sys_readlinkat_wrapper)
-SYSCALL(sys_fchmodat,sys_fchmodat,sys_fchmodat_wrapper)
-SYSCALL(sys_faccessat,sys_faccessat,sys_faccessat_wrapper) /* 300 */
-SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper)
-SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper)
-SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper)
-SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list_wrapper)
-SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list_wrapper)
-SYSCALL(sys_splice,sys_splice,sys_splice_wrapper)
-SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper)
-SYSCALL(sys_tee,sys_tee,sys_tee_wrapper)
-SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice_wrapper)
+SYSCALL(sys_openat,sys_openat,compat_sys_openat)
+SYSCALL(sys_mkdirat,sys_mkdirat,compat_sys_mkdirat)
+SYSCALL(sys_mknodat,sys_mknodat,compat_sys_mknodat) /* 290 */
+SYSCALL(sys_fchownat,sys_fchownat,compat_sys_fchownat)
+SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat)
+SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64)
+SYSCALL(sys_unlinkat,sys_unlinkat,compat_sys_unlinkat)
+SYSCALL(sys_renameat,sys_renameat,compat_sys_renameat) /* 295 */
+SYSCALL(sys_linkat,sys_linkat,compat_sys_linkat)
+SYSCALL(sys_symlinkat,sys_symlinkat,compat_sys_symlinkat)
+SYSCALL(sys_readlinkat,sys_readlinkat,compat_sys_readlinkat)
+SYSCALL(sys_fchmodat,sys_fchmodat,compat_sys_fchmodat)
+SYSCALL(sys_faccessat,sys_faccessat,compat_sys_faccessat) /* 300 */
+SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6)
+SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll)
+SYSCALL(sys_unshare,sys_unshare,compat_sys_unshare)
+SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list)
+SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list)
+SYSCALL(sys_splice,sys_splice,compat_sys_splice)
+SYSCALL(sys_sync_file_range,sys_sync_file_range,compat_sys_s390_sync_file_range)
+SYSCALL(sys_tee,sys_tee,compat_sys_tee)
+SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice)
NI_SYSCALL /* 310 sys_move_pages */
-SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper)
-SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait_wrapper)
-SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper)
-SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper)
-SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */
-SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper)
+SYSCALL(sys_getcpu,sys_getcpu,compat_sys_getcpu)
+SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait)
+SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes)
+SYSCALL(sys_s390_fallocate,sys_fallocate,compat_sys_s390_fallocate)
+SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat) /* 315 */
+SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd)
NI_SYSCALL /* 317 old sys_timer_fd */
-SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper)
-SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper)
-SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime_wrapper) /* 320 */
-SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime_wrapper)
-SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4_wrapper)
-SYSCALL(sys_eventfd2,sys_eventfd2,sys_eventfd2_wrapper)
-SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper)
-SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */
-SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper)
-SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper)
-SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper)
-SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper)
-SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */
-SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper)
-SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper)
-SYSCALL(sys_fanotify_mark,sys_fanotify_mark,sys_fanotify_mark_wrapper)
-SYSCALL(sys_prlimit64,sys_prlimit64,sys_prlimit64_wrapper)
-SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrapper) /* 335 */
-SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper)
-SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
-SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
-SYSCALL(sys_setns,sys_setns,sys_setns_wrapper)
-SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */
-SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper)
-SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper)
-SYSCALL(sys_kcmp,sys_kcmp,sys_kcmp_wrapper)
-SYSCALL(sys_finit_module,sys_finit_module,sys_finit_module_wrapper)
+SYSCALL(sys_eventfd,sys_eventfd,compat_sys_eventfd)
+SYSCALL(sys_timerfd_create,sys_timerfd_create,compat_sys_timerfd_create)
+SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */
+SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime)
+SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4)
+SYSCALL(sys_eventfd2,sys_eventfd2,compat_sys_eventfd2)
+SYSCALL(sys_inotify_init1,sys_inotify_init1,compat_sys_inotify_init1)
+SYSCALL(sys_pipe2,sys_pipe2,compat_sys_pipe2) /* 325 */
+SYSCALL(sys_dup3,sys_dup3,compat_sys_dup3)
+SYSCALL(sys_epoll_create1,sys_epoll_create1,compat_sys_epoll_create1)
+SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv)
+SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev)
+SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */
+SYSCALL(sys_perf_event_open,sys_perf_event_open,compat_sys_perf_event_open)
+SYSCALL(sys_fanotify_init,sys_fanotify_init,compat_sys_fanotify_init)
+SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark)
+SYSCALL(sys_prlimit64,sys_prlimit64,compat_sys_prlimit64)
+SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */
+SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at)
+SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime)
+SYSCALL(sys_syncfs,sys_syncfs,compat_sys_syncfs)
+SYSCALL(sys_setns,sys_setns,compat_sys_setns)
+SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */
+SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev)
+SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,compat_sys_s390_runtime_instr)
+SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp)
+SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module)
+SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */
+SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr)
+SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 62f89d98e88..811f542b8ed 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -418,7 +418,7 @@ void s390_adjust_jiffies(void)
/*
* calibrate the delay loop
*/
-void __cpuinit calibrate_delay(void)
+void calibrate_delay(void)
{
s390_adjust_jiffies();
/* Print the good old Bogomips line .. */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index a5f4f5a1d24..0931b110c82 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -63,7 +63,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
*/
unsigned long long notrace __kprobes sched_clock(void)
{
- return tod_to_ns(get_clock_monotonic());
+ return tod_to_ns(get_tod_clock_monotonic());
}
/*
@@ -92,7 +92,6 @@ void clock_comparator_work(void)
struct clock_event_device *cd;
S390_lowcore.clock_comparator = -1ULL;
- set_clock_comparator(S390_lowcore.clock_comparator);
cd = &__get_cpu_var(comparators);
cd->event_handler(cd);
}
@@ -109,17 +108,10 @@ static void fixup_clock_comparator(unsigned long long delta)
set_clock_comparator(S390_lowcore.clock_comparator);
}
-static int s390_next_ktime(ktime_t expires,
+static int s390_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- struct timespec ts;
- u64 nsecs;
-
- ts.tv_sec = ts.tv_nsec = 0;
- monotonic_to_bootbased(&ts);
- nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires));
- do_div(nsecs, 125);
- S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9);
+ S390_lowcore.clock_comparator = get_tod_clock() + delta;
set_clock_comparator(S390_lowcore.clock_comparator);
return 0;
}
@@ -144,15 +136,14 @@ void init_cpu_timer(void)
cpu = smp_processor_id();
cd = &per_cpu(comparators, cpu);
cd->name = "comparator";
- cd->features = CLOCK_EVT_FEAT_ONESHOT |
- CLOCK_EVT_FEAT_KTIME;
+ cd->features = CLOCK_EVT_FEAT_ONESHOT;
cd->mult = 16777;
cd->shift = 12;
cd->min_delta_ns = 1;
cd->max_delta_ns = LONG_MAX;
cd->rating = 400;
cd->cpumask = cpumask_of(cpu);
- cd->set_next_ktime = s390_next_ktime;
+ cd->set_next_event = s390_next_event;
cd->set_mode = s390_set_mode;
clockevents_register_device(cd);
@@ -191,7 +182,7 @@ static void stp_reset(void);
void read_persistent_clock(struct timespec *ts)
{
- tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts);
+ tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
}
void read_boot_clock(struct timespec *ts)
@@ -201,7 +192,7 @@ void read_boot_clock(struct timespec *ts)
static cycle_t read_tod_clock(struct clocksource *cs)
{
- return get_clock();
+ return get_tod_clock();
}
static struct clocksource clocksource_tod = {
@@ -219,21 +210,30 @@ struct clocksource * __init clocksource_default_clock(void)
return &clocksource_tod;
}
-void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
- struct clocksource *clock, u32 mult)
+void update_vsyscall(struct timekeeper *tk)
{
- if (clock != &clocksource_tod)
+ u64 nsecps;
+
+ if (tk->clock != &clocksource_tod)
return;
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_wmb();
- vdso_data->xtime_tod_stamp = clock->cycle_last;
- vdso_data->xtime_clock_sec = wall_time->tv_sec;
- vdso_data->xtime_clock_nsec = wall_time->tv_nsec;
- vdso_data->wtom_clock_sec = wtm->tv_sec;
- vdso_data->wtom_clock_nsec = wtm->tv_nsec;
- vdso_data->ntp_mult = mult;
+ vdso_data->xtime_tod_stamp = tk->clock->cycle_last;
+ vdso_data->xtime_clock_sec = tk->xtime_sec;
+ vdso_data->xtime_clock_nsec = tk->xtime_nsec;
+ vdso_data->wtom_clock_sec =
+ tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = tk->xtime_nsec +
+ + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift);
+ nsecps = (u64) NSEC_PER_SEC << tk->shift;
+ while (vdso_data->wtom_clock_nsec >= nsecps) {
+ vdso_data->wtom_clock_nsec -= nsecps;
+ vdso_data->wtom_clock_sec++;
+ }
+ vdso_data->tk_mult = tk->mult;
+ vdso_data->tk_shift = tk->shift;
smp_wmb();
++vdso_data->tb_update_count;
}
@@ -262,11 +262,11 @@ void __init time_init(void)
stp_reset();
/* request the clock comparator external interrupt */
- if (register_external_interrupt(0x1004, clock_comparator_interrupt))
- panic("Couldn't request external interrupt 0x1004");
+ if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
+ panic("Couldn't request external interrupt 0x1004");
/* request the timing alert external interrupt */
- if (register_external_interrupt(0x1406, timing_alert_interrupt))
+ if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
panic("Couldn't request external interrupt 0x1406");
if (clocksource_register(&clocksource_tod) != 0)
@@ -339,7 +339,7 @@ int get_sync_clock(unsigned long long *clock)
sw_ptr = &get_cpu_var(clock_sync_word);
sw0 = atomic_read(sw_ptr);
- *clock = get_clock();
+ *clock = get_tod_clock();
sw1 = atomic_read(sw_ptr);
put_cpu_var(clock_sync_word);
if (sw0 == sw1 && (sw0 & 0x80000000U))
@@ -483,7 +483,7 @@ static void etr_reset(void)
.p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
.es = 0, .sl = 0 };
if (etr_setr(&etr_eacr) == 0) {
- etr_tolec = get_clock();
+ etr_tolec = get_tod_clock();
set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
if (etr_port0_online && etr_port1_online)
set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
@@ -765,8 +765,8 @@ static int etr_sync_clock(void *data)
__ctl_set_bit(14, 21);
__ctl_set_bit(0, 29);
clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
- old_clock = get_clock();
- if (set_clock(clock) == 0) {
+ old_clock = get_tod_clock();
+ if (set_tod_clock(clock) == 0) {
__udelay(1); /* Wait for the clock to start. */
__ctl_clear_bit(0, 29);
__ctl_clear_bit(14, 21);
@@ -842,7 +842,7 @@ static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
* assume that this can have caused an stepping
* port switch.
*/
- etr_tolec = get_clock();
+ etr_tolec = get_tod_clock();
eacr.p0 = etr_port0_online;
if (!eacr.p0)
eacr.e0 = 0;
@@ -855,7 +855,7 @@ static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
* assume that this can have caused an stepping
* port switch.
*/
- etr_tolec = get_clock();
+ etr_tolec = get_tod_clock();
eacr.p1 = etr_port1_online;
if (!eacr.p1)
eacr.e1 = 0;
@@ -971,7 +971,7 @@ static void etr_update_eacr(struct etr_eacr eacr)
etr_eacr = eacr;
etr_setr(&etr_eacr);
if (dp_changed)
- etr_tolec = get_clock();
+ etr_tolec = get_tod_clock();
}
/*
@@ -1009,7 +1009,7 @@ static void etr_work_fn(struct work_struct *work)
/* Store aib to get the current ETR status word. */
BUG_ON(etr_stetr(&aib) != 0);
etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */
- now = get_clock();
+ now = get_tod_clock();
/*
* Update the port information if the last stepping port change
@@ -1534,10 +1534,10 @@ static int stp_sync_clock(void *data)
if (stp_info.todoff[0] || stp_info.todoff[1] ||
stp_info.todoff[2] || stp_info.todoff[3] ||
stp_info.tmd != 2) {
- old_clock = get_clock();
+ old_clock = get_tod_clock();
rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
if (rc == 0) {
- delta = adjust_time(old_clock, get_clock(), 0);
+ delta = adjust_time(old_clock, get_tod_clock(), 0);
fixup_clock_comparator(delta);
rc = chsc_sstpi(stp_page, &stp_info,
sizeof(struct stp_sstpi));
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 4b2e3e31700..355a16c5570 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -333,7 +333,9 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
nr_masks = max(nr_masks, 1);
for (i = 0; i < nr_masks; i++) {
- mask->next = alloc_bootmem(sizeof(struct mask_info));
+ mask->next = alloc_bootmem_align(
+ roundup_pow_of_two(sizeof(struct mask_info)),
+ roundup_pow_of_two(sizeof(struct mask_info)));
mask = mask->next;
}
}
@@ -443,6 +445,23 @@ int topology_cpu_init(struct cpu *cpu)
return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
}
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].core_mask;
+}
+
+static const struct cpumask *cpu_book_mask(int cpu)
+{
+ return &cpu_topology[cpu].book_mask;
+}
+
+static struct sched_domain_topology_level s390_topology[] = {
+ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ { cpu_book_mask, SD_INIT_NAME(BOOK) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
static int __init topology_init(void)
{
if (!MACHINE_HAS_TOPOLOGY) {
@@ -451,7 +470,9 @@ static int __init topology_init(void)
}
set_topology_timer();
out:
- update_cpu_masks();
+
+ set_sched_topology(s390_topology);
+
return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
}
device_initcall(topology_init);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 70ecfc5fe8f..c5762324d9e 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -12,49 +12,16 @@
* 'Traps.c' handles hardware traps and faults after we have saved some
* state in 'asm.s'.
*/
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
#include <linux/ptrace.h>
-#include <linux/timer.h>
+#include <linux/sched.h>
#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/seq_file.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/kallsyms.h>
-#include <linux/reboot.h>
-#include <linux/kprobes.h>
-#include <linux/bug.h>
-#include <linux/utsname.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <linux/atomic.h>
-#include <asm/mathemu.h>
-#include <asm/cpcmd.h>
-#include <asm/lowcore.h>
-#include <asm/debug.h>
-#include <asm/ipl.h>
#include "entry.h"
int show_unhandled_signals = 1;
-#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
-
-#ifndef CONFIG_64BIT
-#define LONG "%08lx "
-#define FOURLONG "%08lx %08lx %08lx %08lx\n"
-static int kstack_depth_to_print = 12;
-#else /* CONFIG_64BIT */
-#define LONG "%016lx "
-#define FOURLONG "%016lx %016lx %016lx %016lx\n"
-static int kstack_depth_to_print = 20;
-#endif /* CONFIG_64BIT */
-
static inline void __user *get_trap_ip(struct pt_regs *regs)
{
#ifdef CONFIG_64BIT
@@ -72,215 +39,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
#endif
}
-/*
- * For show_trace we have tree different stack to consider:
- * - the panic stack which is used if the kernel stack has overflown
- * - the asynchronous interrupt stack (cpu related)
- * - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stack and can potentially
- * touch all of them. The order is: panic stack, async stack, sync stack.
- */
-static unsigned long
-__show_trace(unsigned long sp, unsigned long low, unsigned long high)
-{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (1) {
- sp = sp & PSW_ADDR_INSN;
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN);
- print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN);
- /* Follow the backchain. */
- while (1) {
- low = sp;
- sp = sf->back_chain & PSW_ADDR_INSN;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN);
- print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN);
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN);
- print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN);
- low = sp;
- sp = regs->gprs[15];
- }
-}
-
-static void show_trace(struct task_struct *task, unsigned long *stack)
-{
- register unsigned long __r15 asm ("15");
- unsigned long sp;
-
- sp = (unsigned long) stack;
- if (!sp)
- sp = task ? task->thread.ksp : __r15;
- printk("Call Trace:\n");
-#ifdef CONFIG_CHECK_STACK
- sp = __show_trace(sp, S390_lowcore.panic_stack - 4096,
- S390_lowcore.panic_stack);
-#endif
- sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE,
- S390_lowcore.async_stack);
- if (task)
- __show_trace(sp, (unsigned long) task_stack_page(task),
- (unsigned long) task_stack_page(task) + THREAD_SIZE);
- else
- __show_trace(sp, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
- if (!task)
- task = current;
- debug_show_held_locks(task);
-}
-
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- register unsigned long * __r15 asm ("15");
- unsigned long *stack;
- int i;
-
- if (!sp)
- stack = task ? (unsigned long *) task->thread.ksp : __r15;
- else
- stack = sp;
-
- for (i = 0; i < kstack_depth_to_print; i++) {
- if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
- break;
- if ((i * sizeof(long) % 32) == 0)
- printk("%s ", i == 0 ? "" : "\n");
- printk(LONG, *stack++);
- }
- printk("\n");
- show_trace(task, sp);
-}
-
-static void show_last_breaking_event(struct pt_regs *regs)
-{
-#ifdef CONFIG_64BIT
- printk("Last Breaking-Event-Address:\n");
- printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN);
- print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN);
-#endif
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- printk("CPU: %d %s %s %.*s\n",
- task_thread_info(current)->cpu, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
- current->comm, current->pid, current,
- (void *) current->thread.ksp);
- show_stack(NULL, NULL);
-}
-EXPORT_SYMBOL(dump_stack);
-
-static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
-{
- return (regs->psw.mask & bits) / ((~bits + 1) & bits);
-}
-
-void show_registers(struct pt_regs *regs)
-{
- char *mode;
-
- mode = user_mode(regs) ? "User" : "Krnl";
- printk("%s PSW : %p %p",
- mode, (void *) regs->psw.mask,
- (void *) regs->psw.addr);
- print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN);
- printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
- "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
- mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
- mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
- mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
- mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
- mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
-#ifdef CONFIG_64BIT
- printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
-#endif
- printk("\n%s GPRS: " FOURLONG, mode,
- regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
- printk(" " FOURLONG,
- regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
- printk(" " FOURLONG,
- regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
- printk(" " FOURLONG,
- regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
-
- show_code(regs);
-}
-
-void show_regs(struct pt_regs *regs)
-{
- printk("CPU: %d %s %s %.*s\n",
- task_thread_info(current)->cpu, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
- current->comm, current->pid, current,
- (void *) current->thread.ksp);
- show_registers(regs);
- /* Show stack backtrace if pt_regs is from kernel mode */
- if (!user_mode(regs))
- show_trace(NULL, (unsigned long *) regs->gprs[15]);
- show_last_breaking_event(regs);
-}
-
-static DEFINE_SPINLOCK(die_lock);
-
-void die(struct pt_regs *regs, const char *str)
-{
- static int die_counter;
-
- oops_enter();
- lgr_info_log();
- debug_stop_all();
- console_verbose();
- spin_lock_irq(&die_lock);
- bust_spinlocks(1);
- printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
- print_modules();
- show_regs(regs);
- bust_spinlocks(0);
- add_taint(TAINT_DIE);
- spin_unlock_irq(&die_lock);
- if (in_interrupt())
- panic("Fatal exception in interrupt");
- if (panic_on_oops)
- panic("Fatal exception: panic_on_oops");
- oops_exit();
- do_exit(SIGSEGV);
-}
-
static inline void report_user_fault(struct pt_regs *regs, int signr)
{
if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index d7776281cb6..61364909678 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -63,7 +63,7 @@ static int __init vdso_setup(char *s)
else if (strncmp(s, "off", 4) == 0)
vdso_enabled = 0;
else {
- rc = strict_strtoul(s, 0, &val);
+ rc = kstrtoul(s, 0, &val);
vdso_enabled = rc ? 0 : !!val;
}
return !rc;
@@ -84,8 +84,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data;
*/
static void vdso_init_data(struct vdso_data *vd)
{
- vd->ectg_available =
- s390_user_mode != HOME_SPACE_MODE && test_facility(31);
+ vd->ectg_available = test_facility(31);
}
#ifdef CONFIG_64BIT
@@ -102,7 +101,7 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
lowcore->vdso_per_cpu_data = __LC_PASTE;
- if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled)
+ if (!vdso_enabled)
return 0;
segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
@@ -113,11 +112,11 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
PAGE_SIZE << SEGMENT_ORDER);
- clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) page_table, _PAGE_INVALID,
256*sizeof(unsigned long));
*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
- *(unsigned long *) page_table = _PAGE_RO + page_frame;
+ *(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
psal = (u32 *) (page_table + 256*sizeof(unsigned long));
aste = psal + 32;
@@ -126,7 +125,7 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
psal[i] = 0x80000000;
lowcore->paste[4] = (u32)(addr_t) psal;
- psal[0] = 0x20000000;
+ psal[0] = 0x02000000;
psal[2] = (u32)(addr_t) aste;
*(unsigned long *) (aste + 2) = segment_table +
_ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
@@ -147,7 +146,7 @@ void vdso_free_per_cpu(struct _lowcore *lowcore)
unsigned long segment_table, page_table, page_frame;
u32 *psal, *aste;
- if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled)
+ if (!vdso_enabled)
return;
psal = (u32 *)(addr_t) lowcore->paste[4];
@@ -165,7 +164,7 @@ static void vdso_init_cr5(void)
{
unsigned long cr5;
- if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled)
+ if (!vdso_enabled)
return;
cr5 = offsetof(struct _lowcore, paste);
__ctl_load(cr5, 5, 5);
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
index b2224e0b974..65fc3979c2f 100644
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -38,25 +38,21 @@ __kernel_clock_gettime:
sl %r1,__VDSO_XTIME_STAMP+4(%r5)
brc 3,2f
ahi %r0,-1
-2: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */
+2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
lr %r2,%r0
- l %r0,__VDSO_NTP_MULT(%r5)
+ l %r0,__VDSO_TK_MULT(%r5)
ltr %r1,%r1
mr %r0,%r0
jnm 3f
- a %r0,__VDSO_NTP_MULT(%r5)
+ a %r0,__VDSO_TK_MULT(%r5)
3: alr %r0,%r2
- srdl %r0,12
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- al %r1,__VDSO_XTIME_NSEC+4(%r5)
- brc 12,4f
- ahi %r0,1
-4: l %r2,__VDSO_XTIME_SEC+4(%r5)
- al %r0,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */
+ al %r0,__VDSO_WTOM_NSEC(%r5)
al %r1,__VDSO_WTOM_NSEC+4(%r5)
brc 12,5f
ahi %r0,1
-5: al %r2,__VDSO_WTOM_SEC+4(%r5)
+5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r2) /* >> tk->shift */
+ l %r2,__VDSO_WTOM_SEC+4(%r5)
cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
jne 1b
basr %r5,0
@@ -86,20 +82,21 @@ __kernel_clock_gettime:
sl %r1,__VDSO_XTIME_STAMP+4(%r5)
brc 3,12f
ahi %r0,-1
-12: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */
+12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
lr %r2,%r0
- l %r0,__VDSO_NTP_MULT(%r5)
+ l %r0,__VDSO_TK_MULT(%r5)
ltr %r1,%r1
mr %r0,%r0
jnm 13f
- a %r0,__VDSO_NTP_MULT(%r5)
+ a %r0,__VDSO_TK_MULT(%r5)
13: alr %r0,%r2
- srdl %r0,12
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
+ al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
al %r1,__VDSO_XTIME_NSEC+4(%r5)
brc 12,14f
ahi %r0,1
-14: l %r2,__VDSO_XTIME_SEC+4(%r5)
+14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r2) /* >> tk->shift */
+ l %r2,__VDSO_XTIME_SEC+4(%r5)
cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
jne 11b
basr %r5,0
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
index 2d3633175e3..fd621a950f7 100644
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -35,15 +35,14 @@ __kernel_gettimeofday:
sl %r1,__VDSO_XTIME_STAMP+4(%r5)
brc 3,3f
ahi %r0,-1
-3: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */
+3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
st %r0,24(%r15)
- l %r0,__VDSO_NTP_MULT(%r5)
+ l %r0,__VDSO_TK_MULT(%r5)
ltr %r1,%r1
mr %r0,%r0
jnm 4f
- a %r0,__VDSO_NTP_MULT(%r5)
+ a %r0,__VDSO_TK_MULT(%r5)
4: al %r0,24(%r15)
- srdl %r0,12
al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
al %r1,__VDSO_XTIME_NSEC+4(%r5)
brc 12,5f
@@ -51,6 +50,8 @@ __kernel_gettimeofday:
5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5)
cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
jne 1b
+ l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r4) /* >> tk->shift */
l %r4,24(%r15) /* get tv_sec from stack */
basr %r5,0
6: ltr %r0,%r0
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 176e1f75f9a..34deba7c7ed 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -23,7 +23,9 @@ __kernel_clock_getres:
je 0f
cghi %r2,__CLOCK_MONOTONIC
je 0f
- cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
+ cghi %r2,__CLOCK_THREAD_CPUTIME_ID
+ je 0f
+ cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
jne 2f
larl %r5,_vdso_data
icm %r0,15,__LC_ECTG_OK(%r5)
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index d46c95ed5f1..91940ed33a4 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -22,7 +22,9 @@ __kernel_clock_gettime:
larl %r5,_vdso_data
cghi %r2,__CLOCK_REALTIME
je 4f
- cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
+ cghi %r2,__CLOCK_THREAD_CPUTIME_ID
+ je 9f
+ cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
je 9f
cghi %r2,__CLOCK_MONOTONIC
jne 12f
@@ -34,14 +36,13 @@ __kernel_clock_gettime:
tmll %r4,0x0001 /* pending update ? loop */
jnz 0b
stck 48(%r15) /* Store TOD clock */
+ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ lg %r0,__VDSO_WTOM_SEC(%r5)
lg %r1,48(%r15)
sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */
- srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- lg %r0,__VDSO_XTIME_SEC(%r5)
- alg %r1,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */
- alg %r0,__VDSO_WTOM_SEC(%r5)
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_WTOM_NSEC(%r5)
+ srlg %r1,%r1,0(%r2) /* >> tk->shift */
clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
jne 0b
larl %r5,13f
@@ -62,12 +63,13 @@ __kernel_clock_gettime:
tmll %r4,0x0001 /* pending update ? loop */
jnz 5b
stck 48(%r15) /* Store TOD clock */
+ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
lg %r1,48(%r15)
sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */
- srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- lg %r0,__VDSO_XTIME_SEC(%r5)
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ srlg %r1,%r1,0(%r2) /* >> tk->shift */
+ lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
jne 5b
larl %r5,13f
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
index 36ee674722e..d0860d1d0cc 100644
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -31,12 +31,13 @@ __kernel_gettimeofday:
stck 48(%r15) /* Store TOD clock */
lg %r1,48(%r15)
sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */
- srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime.tv_nsec */
- lg %r0,__VDSO_XTIME_SEC(%r5) /* xtime.tv_sec */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
jne 0b
+ lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srlg %r1,%r1,0(%r5) /* >> tk->shift */
larl %r5,5f
2: clg %r1,0(%r5)
jl 3f
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 79cb51adc74..35b13ed0af5 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -75,6 +75,10 @@ SECTIONS
EXIT_TEXT
}
+ .exit.data : {
+ EXIT_DATA
+ }
+
/* early.c uses stsi, which requires page aligned data. */
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e84b8b68444..8c34363d6f1 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -19,6 +19,7 @@
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/vtimer.h>
+#include <asm/vtime.h>
#include <asm/irq.h>
#include "entry.h"
@@ -127,7 +128,7 @@ void vtime_account_user(struct task_struct *tsk)
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
-void vtime_account(struct task_struct *tsk)
+void vtime_account_irq_enter(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
u64 timer, system;
@@ -145,10 +146,10 @@ void vtime_account(struct task_struct *tsk)
virt_timer_forward(system);
}
-EXPORT_SYMBOL_GPL(vtime_account);
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
void vtime_account_system(struct task_struct *tsk)
-__attribute__((alias("vtime_account")));
+__attribute__((alias("vtime_account_irq_enter")));
EXPORT_SYMBOL_GPL(vtime_account_system);
void __kprobes vtime_stop_cpu(void)
@@ -158,20 +159,15 @@ void __kprobes vtime_stop_cpu(void)
unsigned long psw_mask;
trace_hardirqs_on();
- /* Don't trace preempt off for idle. */
- stop_critical_timings();
/* Wait for external, I/O or machine check interrupt. */
- psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT |
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
idle->nohz_delay = 0;
/* Call the assembler magic in entry.S */
psw_idle(idle, psw_mask);
- /* Reenable preemption tracer. */
- start_critical_timings();
-
/* Account time spent with enabled wait psw loaded as idle time. */
idle->sequence++;
smp_wmb();
@@ -191,11 +187,11 @@ cputime64_t s390_get_idle_time(int cpu)
unsigned int sequence;
do {
- now = get_clock();
+ now = get_tod_clock();
sequence = ACCESS_ONCE(idle->sequence);
idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
- } while ((sequence & 1) || (idle->sequence != sequence));
+ } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
}
@@ -376,14 +372,14 @@ EXPORT_SYMBOL(del_virt_timer);
/*
* Start the virtual CPU timer on the current CPU.
*/
-void __cpuinit init_cpu_vtimer(void)
+void init_cpu_vtimer(void)
{
/* set initial cpu timer */
set_vtimer(VTIMER_MAX_SLICE);
}
-static int __cpuinit s390_nohz_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int s390_nohz_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
struct s390_idle_data *idle;
long cpu = (long) hcpu;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index b58dd869cb3..10d529ac982 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -18,10 +18,15 @@ if VIRTUALIZATION
config KVM
def_tristate y
prompt "Kernel-based Virtual Machine (KVM) support"
- depends on HAVE_KVM && EXPERIMENTAL
+ depends on HAVE_KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_EVENTFD
+ select KVM_ASYNC_PF
+ select KVM_ASYNC_PF_SYNC
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQ_ROUTING
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 3975722bb19..b3b55346965 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,9 +6,12 @@
# it under the terms of the GNU General Public License (version 2 only)
# as published by the Free Software Foundation.
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o
+kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs += diag.o gaccess.o guestdbg.o
+
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index a390687feb1..0161675878a 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -13,14 +13,17 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <asm/pgalloc.h>
+#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
+#include "gaccess.h"
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
unsigned long start, end;
- unsigned long prefix = vcpu->arch.sie_block->prefix;
+ unsigned long prefix = kvm_s390_get_prefix(vcpu);
start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
@@ -45,6 +48,86 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
return 0;
}
+static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+{
+ struct prs_parm {
+ u16 code;
+ u16 subcode;
+ u16 parm_len;
+ u16 parm_version;
+ u64 token_addr;
+ u64 select_mask;
+ u64 compare_mask;
+ u64 zarch;
+ };
+ struct prs_parm parm;
+ int rc;
+ u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+ u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+
+ if (vcpu->run->s.regs.gprs[rx] & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (parm.subcode) {
+ case 0: /* TOKEN */
+ if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+ /*
+ * If the pagefault handshake is already activated,
+ * the token must not be changed. We have to return
+ * decimal 8 instead, as mandated in SC24-6084.
+ */
+ vcpu->run->s.regs.gprs[ry] = 8;
+ return 0;
+ }
+
+ if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+ parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ vcpu->arch.pfault_token = parm.token_addr;
+ vcpu->arch.pfault_select = parm.select_mask;
+ vcpu->arch.pfault_compare = parm.compare_mask;
+ vcpu->run->s.regs.gprs[ry] = 0;
+ rc = 0;
+ break;
+ case 1: /*
+ * CANCEL
+ * Specification allows to let already pending tokens survive
+ * the cancel, therefore to reduce code complexity, we assume
+ * all outstanding tokens are already pending.
+ */
+ if (parm.token_addr || parm.select_mask ||
+ parm.compare_mask || parm.zarch)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ vcpu->run->s.regs.gprs[ry] = 0;
+ /*
+ * If the pfault handling was not established or is already
+ * canceled SC24-6084 requests to return decimal 4.
+ */
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ vcpu->run->s.regs.gprs[ry] = 4;
+ else
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+
+ rc = 0;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ break;
+ }
+
+ return rc;
+}
+
static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@ -93,7 +176,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
- atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
@@ -104,9 +187,42 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EREMOTE;
}
+static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ /* No virtio-ccw notification? Get out quickly. */
+ if (!vcpu->kvm->arch.css_support ||
+ (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
+ return -EOPNOTSUPP;
+
+ /*
+ * The layout is as follows:
+ * - gpr 2 contains the subchannel id (passed as addr)
+ * - gpr 3 contains the virtqueue index (passed as datamatch)
+ * - gpr 4 contains the index on the bus (optionally)
+ */
+ ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+ vcpu->run->s.regs.gprs[2] & 0xffffffff,
+ 8, &vcpu->run->s.regs.gprs[3],
+ vcpu->run->s.regs.gprs[4]);
+
+ /*
+ * Return cookie in gpr 2, but don't overwrite the register if the
+ * diagnose will be handled by userspace.
+ */
+ if (ret != -EOPNOTSUPP)
+ vcpu->run->s.regs.gprs[2] = ret;
+ /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
+ return ret < 0 ? ret : 0;
+}
+
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
{
- int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+ int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
trace_kvm_s390_handle_diag(vcpu, code);
switch (code) {
@@ -116,8 +232,12 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
return __diag_time_slice_end(vcpu);
case 0x9c:
return __diag_time_slice_end_directed(vcpu);
+ case 0x258:
+ return __diag_page_ref_service(vcpu);
case 0x308:
return __diag_ipl_functions(vcpu);
+ case 0x500:
+ return __diag_virtio_hypercall(vcpu);
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
new file mode 100644
index 00000000000..4653ac6e182
--- /dev/null
+++ b/arch/s390/kvm/gaccess.c
@@ -0,0 +1,726 @@
+/*
+ * guest access functions
+ *
+ * Copyright IBM Corp. 2014
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/err.h>
+#include <asm/pgtable.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+union asce {
+ unsigned long val;
+ struct {
+ unsigned long origin : 52; /* Region- or Segment-Table Origin */
+ unsigned long : 2;
+ unsigned long g : 1; /* Subspace Group Control */
+ unsigned long p : 1; /* Private Space Control */
+ unsigned long s : 1; /* Storage-Alteration-Event Control */
+ unsigned long x : 1; /* Space-Switch-Event Control */
+ unsigned long r : 1; /* Real-Space Control */
+ unsigned long : 1;
+ unsigned long dt : 2; /* Designation-Type Control */
+ unsigned long tl : 2; /* Region- or Segment-Table Length */
+ };
+};
+
+enum {
+ ASCE_TYPE_SEGMENT = 0,
+ ASCE_TYPE_REGION3 = 1,
+ ASCE_TYPE_REGION2 = 2,
+ ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Second-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Second-Table Length */
+ };
+};
+
+union region2_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Third-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Third-Table Length */
+ };
+};
+
+struct region3_table_entry_fc0 {
+ unsigned long sto: 52;/* Segment-Table Origin */
+ unsigned long : 1;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Segment-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+ unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+ unsigned long : 14;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc: 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long co : 1; /* Change-Recording Override */
+ unsigned long : 2;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union region3_table_entry {
+ unsigned long val;
+ struct region3_table_entry_fc0 fc0;
+ struct region3_table_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+struct segment_entry_fc0 {
+ unsigned long pto: 53;/* Page-Table Origin */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 3;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+struct segment_entry_fc1 {
+ unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+ unsigned long : 3;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc: 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long co : 1; /* Change-Recording Override */
+ unsigned long : 2;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union segment_table_entry {
+ unsigned long val;
+ struct segment_entry_fc0 fc0;
+ struct segment_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+enum {
+ TABLE_TYPE_SEGMENT = 0,
+ TABLE_TYPE_REGION3 = 1,
+ TABLE_TYPE_REGION2 = 2,
+ TABLE_TYPE_REGION1 = 3
+};
+
+union page_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long pfra : 52; /* Page-Frame Real Address */
+ unsigned long z : 1; /* Zero Bit */
+ unsigned long i : 1; /* Page-Invalid Bit */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long co : 1; /* Change-Recording Override */
+ unsigned long : 8;
+ };
+};
+
+/*
+ * vaddress union in order to easily decode a virtual address into its
+ * region first index, region second index etc. parts.
+ */
+union vaddress {
+ unsigned long addr;
+ struct {
+ unsigned long rfx : 11;
+ unsigned long rsx : 11;
+ unsigned long rtx : 11;
+ unsigned long sx : 11;
+ unsigned long px : 8;
+ unsigned long bx : 12;
+ };
+ struct {
+ unsigned long rfx01 : 2;
+ unsigned long : 9;
+ unsigned long rsx01 : 2;
+ unsigned long : 9;
+ unsigned long rtx01 : 2;
+ unsigned long : 9;
+ unsigned long sx01 : 2;
+ unsigned long : 29;
+ };
+};
+
+/*
+ * raddress union which will contain the result (real or absolute address)
+ * after a page table walk. The rfaa, sfaa and pfra members are used to
+ * simply assign them the value of a region, segment or page table entry.
+ */
+union raddress {
+ unsigned long addr;
+ unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+ unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+ unsigned long pfra : 52; /* Page-Frame Real Address */
+};
+
+static int ipte_lock_count;
+static DEFINE_MUTEX(ipte_mutex);
+
+int ipte_lock_held(struct kvm_vcpu *vcpu)
+{
+ union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+
+ if (vcpu->arch.sie_block->eca & 1)
+ return ic->kh != 0;
+ return ipte_lock_count != 0;
+}
+
+static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ mutex_lock(&ipte_mutex);
+ ipte_lock_count++;
+ if (ipte_lock_count > 1)
+ goto out;
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ old = ACCESS_ONCE(*ic);
+ while (old.k) {
+ cond_resched();
+ old = ACCESS_ONCE(*ic);
+ }
+ new = old;
+ new.k = 1;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+out:
+ mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ mutex_lock(&ipte_mutex);
+ ipte_lock_count--;
+ if (ipte_lock_count)
+ goto out;
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ new = old = ACCESS_ONCE(*ic);
+ new.k = 0;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ if (!ipte_lock_count)
+ wake_up(&vcpu->kvm->arch.ipte_wq);
+out:
+ mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ old = ACCESS_ONCE(*ic);
+ while (old.kg) {
+ cond_resched();
+ old = ACCESS_ONCE(*ic);
+ }
+ new = old;
+ new.k = 1;
+ new.kh++;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+}
+
+static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ new = old = ACCESS_ONCE(*ic);
+ new.kh--;
+ if (!new.kh)
+ new.k = 0;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ if (!new.kh)
+ wake_up(&vcpu->kvm->arch.ipte_wq);
+}
+
+void ipte_lock(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->eca & 1)
+ ipte_lock_siif(vcpu);
+ else
+ ipte_lock_simple(vcpu);
+}
+
+void ipte_unlock(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->eca & 1)
+ ipte_unlock_siif(vcpu);
+ else
+ ipte_unlock_simple(vcpu);
+}
+
+static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+{
+ switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+ case PSW_AS_PRIMARY:
+ return vcpu->arch.sie_block->gcr[1];
+ case PSW_AS_SECONDARY:
+ return vcpu->arch.sie_block->gcr[7];
+ case PSW_AS_HOME:
+ return vcpu->arch.sie_block->gcr[13];
+ }
+ return 0;
+}
+
+static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
+{
+ return kvm_read_guest(kvm, gpa, val, sizeof(*val));
+}
+
+/**
+ * guest_translate - translate a guest virtual into a guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: guest virtual address
+ * @gpa: points to where guest physical (absolute) address should be stored
+ * @write: indicates if access is a write access
+ *
+ * Translate a guest virtual address into a guest absolute address by means
+ * of dynamic address translation as specified by the architecuture.
+ * If the resulting absolute address is not available in the configuration
+ * an addressing exception is indicated and @gpa will not be changed.
+ *
+ * Returns: - zero on success; @gpa contains the resulting absolute address
+ * - a negative value if guest access failed due to e.g. broken
+ * guest mapping
+ * - a positve value if an access exception happened. In this case
+ * the returned value is the program interruption code as defined
+ * by the architecture
+ */
+static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write)
+{
+ union vaddress vaddr = {.addr = gva};
+ union raddress raddr = {.addr = gva};
+ union page_table_entry pte;
+ int dat_protection = 0;
+ union ctlreg0 ctlreg0;
+ unsigned long ptr;
+ int edat1, edat2;
+ union asce asce;
+
+ ctlreg0.val = vcpu->arch.sie_block->gcr[0];
+ edat1 = ctlreg0.edat && test_vfacility(8);
+ edat2 = edat1 && test_vfacility(78);
+ asce.val = get_vcpu_asce(vcpu);
+ if (asce.r)
+ goto real_address;
+ ptr = asce.origin * 4096;
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1:
+ if (vaddr.rfx01 > asce.tl)
+ return PGM_REGION_FIRST_TRANS;
+ ptr += vaddr.rfx * 8;
+ break;
+ case ASCE_TYPE_REGION2:
+ if (vaddr.rfx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rsx01 > asce.tl)
+ return PGM_REGION_SECOND_TRANS;
+ ptr += vaddr.rsx * 8;
+ break;
+ case ASCE_TYPE_REGION3:
+ if (vaddr.rfx || vaddr.rsx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rtx01 > asce.tl)
+ return PGM_REGION_THIRD_TRANS;
+ ptr += vaddr.rtx * 8;
+ break;
+ case ASCE_TYPE_SEGMENT:
+ if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.sx01 > asce.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ ptr += vaddr.sx * 8;
+ break;
+ }
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1: {
+ union region1_table_entry rfte;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rfte.val))
+ return -EFAULT;
+ if (rfte.i)
+ return PGM_REGION_FIRST_TRANS;
+ if (rfte.tt != TABLE_TYPE_REGION1)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+ return PGM_REGION_SECOND_TRANS;
+ if (edat1)
+ dat_protection |= rfte.p;
+ ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_REGION2: {
+ union region2_table_entry rste;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rste.val))
+ return -EFAULT;
+ if (rste.i)
+ return PGM_REGION_SECOND_TRANS;
+ if (rste.tt != TABLE_TYPE_REGION2)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+ return PGM_REGION_THIRD_TRANS;
+ if (edat1)
+ dat_protection |= rste.p;
+ ptr = rste.rto * 4096 + vaddr.rtx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_REGION3: {
+ union region3_table_entry rtte;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rtte.val))
+ return -EFAULT;
+ if (rtte.i)
+ return PGM_REGION_THIRD_TRANS;
+ if (rtte.tt != TABLE_TYPE_REGION3)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.cr && asce.p && edat2)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.fc && edat2) {
+ dat_protection |= rtte.fc1.p;
+ raddr.rfaa = rtte.fc1.rfaa;
+ goto absolute_address;
+ }
+ if (vaddr.sx01 < rtte.fc0.tf)
+ return PGM_SEGMENT_TRANSLATION;
+ if (vaddr.sx01 > rtte.fc0.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ if (edat1)
+ dat_protection |= rtte.fc0.p;
+ ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_SEGMENT: {
+ union segment_table_entry ste;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &ste.val))
+ return -EFAULT;
+ if (ste.i)
+ return PGM_SEGMENT_TRANSLATION;
+ if (ste.tt != TABLE_TYPE_SEGMENT)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.cs && asce.p)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.fc && edat1) {
+ dat_protection |= ste.fc1.p;
+ raddr.sfaa = ste.fc1.sfaa;
+ goto absolute_address;
+ }
+ dat_protection |= ste.fc0.p;
+ ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+ }
+ }
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &pte.val))
+ return -EFAULT;
+ if (pte.i)
+ return PGM_PAGE_TRANSLATION;
+ if (pte.z)
+ return PGM_TRANSLATION_SPEC;
+ if (pte.co && !edat1)
+ return PGM_TRANSLATION_SPEC;
+ dat_protection |= pte.p;
+ raddr.pfra = pte.pfra;
+real_address:
+ raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
+absolute_address:
+ if (write && dat_protection)
+ return PGM_PROTECTION;
+ if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+ return PGM_ADDRESSING;
+ *gpa = raddr.addr;
+ return 0;
+}
+
+static inline int is_low_address(unsigned long ga)
+{
+ /* Check for address ranges 0..511 and 4096..4607 */
+ return (ga & ~0x11fful) == 0;
+}
+
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+{
+ union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ union asce asce;
+
+ if (!ctlreg0.lap)
+ return 0;
+ asce.val = get_vcpu_asce(vcpu);
+ if (psw_bits(*psw).t && asce.p)
+ return 0;
+ return 1;
+}
+
+struct trans_exc_code_bits {
+ unsigned long addr : 52; /* Translation-exception Address */
+ unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
+ unsigned long : 7;
+ unsigned long b61 : 1;
+ unsigned long as : 2; /* ASCE Identifier */
+};
+
+enum {
+ FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+ FSI_STORE = 1, /* Exception was due to store operation */
+ FSI_FETCH = 2 /* Exception was due to fetch operation */
+};
+
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+ unsigned long *pages, unsigned long nr_pages,
+ int write)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec_bits;
+ int lap_enabled, rc;
+
+ memset(pgm, 0, sizeof(*pgm));
+ tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+ tec_bits->as = psw_bits(*psw).as;
+ lap_enabled = low_address_protection_enabled(vcpu);
+ while (nr_pages) {
+ ga = kvm_s390_logical_to_effective(vcpu, ga);
+ tec_bits->addr = ga >> PAGE_SHIFT;
+ if (write && lap_enabled && is_low_address(ga)) {
+ pgm->code = PGM_PROTECTION;
+ return pgm->code;
+ }
+ ga &= PAGE_MASK;
+ if (psw_bits(*psw).t) {
+ rc = guest_translate(vcpu, ga, pages, write);
+ if (rc < 0)
+ return rc;
+ if (rc == PGM_PROTECTION)
+ tec_bits->b61 = 1;
+ if (rc)
+ pgm->code = rc;
+ } else {
+ *pages = kvm_s390_real_to_abs(vcpu, ga);
+ if (kvm_is_error_gpa(vcpu->kvm, *pages))
+ pgm->code = PGM_ADDRESSING;
+ }
+ if (pgm->code)
+ return pgm->code;
+ ga += PAGE_SIZE;
+ pages++;
+ nr_pages--;
+ }
+ return 0;
+}
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len, int write)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long _len, nr_pages, gpa, idx;
+ unsigned long pages_array[2];
+ unsigned long *pages;
+ int need_ipte_lock;
+ union asce asce;
+ int rc;
+
+ if (!len)
+ return 0;
+ /* Access register mode is not supported yet. */
+ if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+ return -EOPNOTSUPP;
+ nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
+ pages = pages_array;
+ if (nr_pages > ARRAY_SIZE(pages_array))
+ pages = vmalloc(nr_pages * sizeof(unsigned long));
+ if (!pages)
+ return -ENOMEM;
+ asce.val = get_vcpu_asce(vcpu);
+ need_ipte_lock = psw_bits(*psw).t && !asce.r;
+ if (need_ipte_lock)
+ ipte_lock(vcpu);
+ rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+ for (idx = 0; idx < nr_pages && !rc; idx++) {
+ gpa = *(pages + idx) + (ga & ~PAGE_MASK);
+ _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+ if (write)
+ rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+ else
+ rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
+ len -= _len;
+ ga += _len;
+ data += _len;
+ }
+ if (need_ipte_lock)
+ ipte_unlock(vcpu);
+ if (nr_pages > ARRAY_SIZE(pages_array))
+ vfree(pages);
+ return rc;
+}
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+ void *data, unsigned long len, int write)
+{
+ unsigned long _len, gpa;
+ int rc = 0;
+
+ while (len && !rc) {
+ gpa = kvm_s390_real_to_abs(vcpu, gra);
+ _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+ if (write)
+ rc = write_guest_abs(vcpu, gpa, data, _len);
+ else
+ rc = read_guest_abs(vcpu, gpa, data, _len);
+ len -= _len;
+ gra += _len;
+ data += _len;
+ }
+ return rc;
+}
+
+/**
+ * guest_translate_address - translate guest logical into guest absolute address
+ *
+ * Parameter semantics are the same as the ones from guest_translate.
+ * The memory contents at the guest address are not changed.
+ *
+ * Note: The IPTE lock is not taken during this function, so the caller
+ * has to take care of this.
+ */
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec;
+ union asce asce;
+ int rc;
+
+ /* Access register mode is not supported yet. */
+ if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+ return -EOPNOTSUPP;
+
+ gva = kvm_s390_logical_to_effective(vcpu, gva);
+ memset(pgm, 0, sizeof(*pgm));
+ tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec->as = psw_bits(*psw).as;
+ tec->fsi = write ? FSI_STORE : FSI_FETCH;
+ tec->addr = gva >> PAGE_SHIFT;
+ if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+ if (write) {
+ rc = pgm->code = PGM_PROTECTION;
+ return rc;
+ }
+ }
+
+ asce.val = get_vcpu_asce(vcpu);
+ if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
+ rc = guest_translate(vcpu, gva, gpa, write);
+ if (rc > 0) {
+ if (rc == PGM_PROTECTION)
+ tec->b61 = 1;
+ pgm->code = rc;
+ }
+ } else {
+ rc = 0;
+ *gpa = kvm_s390_real_to_abs(vcpu, gva);
+ if (kvm_is_error_gpa(vcpu->kvm, *gpa))
+ rc = pgm->code = PGM_ADDRESSING;
+ }
+
+ return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_protection - check for low-address protection
+ * @ga: Guest address
+ *
+ * Checks whether an address is subject to low-address protection and set
+ * up vcpu->arch.pgm accordingly if necessary.
+ *
+ * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
+ */
+int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec_bits;
+
+ if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+ return 0;
+
+ memset(pgm, 0, sizeof(*pgm));
+ tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec_bits->fsi = FSI_STORE;
+ tec_bits->as = psw_bits(*psw).as;
+ tec_bits->addr = ga >> PAGE_SHIFT;
+ pgm->code = PGM_PROTECTION;
+
+ return pgm->code;
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 4703f129e95..0149cf15058 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,7 +1,7 @@
/*
* access guest memory
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -15,372 +15,321 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
#include "kvm-s390.h"
-static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
- unsigned long guestaddr)
-{
- unsigned long prefix = vcpu->arch.sie_block->prefix;
-
- if (guestaddr < 2 * PAGE_SIZE)
- guestaddr += prefix;
- else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
- guestaddr -= prefix;
-
- return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap);
-}
-
-static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr,
- u64 *result)
-{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 7);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- return get_user(*result, (unsigned long __user *) uptr);
-}
-
-static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr,
- u32 *result)
-{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 3);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- return get_user(*result, (u32 __user *) uptr);
-}
-
-static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr,
- u16 *result)
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @vcpu - guest virtual cpu
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of a virtual guest cpu by applying its prefix.
+ */
+static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
+ unsigned long gra)
{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 1);
-
- if (IS_ERR(uptr))
- return PTR_ERR(uptr);
+ unsigned long prefix = kvm_s390_get_prefix(vcpu);
- return get_user(*result, (u16 __user *) uptr);
+ if (gra < 2 * PAGE_SIZE)
+ gra += prefix;
+ else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
+ gra -= prefix;
+ return gra;
}
-static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr,
- u8 *result)
+/**
+ * kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @vcpu: guest virtual cpu
+ * @ga: guest logical address
+ *
+ * Convert a guest vcpu logical address to a guest vcpu effective address by
+ * applying the rules of the vcpu's addressing mode defined by PSW bits 31
+ * and 32 (extendended/basic addressing mode).
+ *
+ * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
+ * of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
+ unsigned long ga)
{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
- return get_user(*result, (u8 __user *) uptr);
+ if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
+ return ga;
+ if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
+ return ga & ((1UL << 31) - 1);
+ return ga & ((1UL << 24) - 1);
}
-static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr,
- u64 value)
-{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 7);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- return put_user(value, (u64 __user *) uptr);
-}
+/*
+ * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
+ * which shall only be used to access the lowcore of a vcpu.
+ * These functions should be used for e.g. interrupt handlers where no
+ * guest memory access protection facilities, like key or low address
+ * protection, are applicable.
+ * At a later point guest vcpu lowcore access should happen via pinned
+ * prefix pages, so that these pages can be accessed directly via the
+ * kernel mapping. All of these *_lc functions can be removed then.
+ */
-static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr,
- u32 value)
+/**
+ * put_guest_lc - write a simple variable to a guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @x: value to copy to guest
+ * @gra: vcpu's destination guest real address
+ *
+ * Copies a simple value from kernel space to a guest vcpu's lowcore.
+ * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
+ * must be located in the vcpu's lowcore. Otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+#define put_guest_lc(vcpu, x, gra) \
+({ \
+ struct kvm_vcpu *__vcpu = (vcpu); \
+ __typeof__(*(gra)) __x = (x); \
+ unsigned long __gpa; \
+ \
+ __gpa = (unsigned long)(gra); \
+ __gpa += kvm_s390_get_prefix(__vcpu); \
+ kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x)); \
+})
+
+/**
+ * write_guest_lc - copy data from kernel space to guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from kernel space to guest vcpu's lowcore. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 3);
+ unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- return put_user(value, (u32 __user *) uptr);
+ return kvm_write_guest(vcpu->kvm, gpa, data, len);
}
-static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr,
- u16 value)
+/**
+ * read_guest_lc - copy data from guest vcpu's lowcore to kernel space
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from guest vcpu's lowcore to kernel space. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 1);
+ unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- return put_user(value, (u16 __user *) uptr);
+ return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
-static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr,
- u8 value)
-{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write);
- return put_user(value, (u8 __user *) uptr);
-}
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len, int write);
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+ void *data, unsigned long len, int write);
-static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu,
- unsigned long guestdest,
- void *from, unsigned long n)
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @ga (guest address).
+ * In order to copy data to guest space the PSW of the vcpu is inspected:
+ * If DAT is off data will be copied to guest real or absolute memory.
+ * If DAT is on data will be copied to the address space as specified by
+ * the address space bits of the PSW:
+ * Primary, secondory or home space (access register mode is currently not
+ * implemented).
+ * The addressing mode of the PSW is also inspected, so that address wrap
+ * around is taken into account for 24-, 31- and 64-bit addressing mode,
+ * if the to be copied data crosses page boundaries in guest address space.
+ * In addition also low address and DAT protection are inspected before
+ * copying any data (key protection is currently not implemented).
+ *
+ * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
+ * In case of an access exception (e.g. protection exception) pgm will contain
+ * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
+ * will inject a correct exception into the guest.
+ * If no access exception happened, the contents of pgm are undefined when
+ * this function returns.
+ *
+ * Returns: - zero on success
+ * - a negative value if e.g. the guest mapping is broken or in
+ * case of out-of-memory. In this case the contents of pgm are
+ * undefined. Also parts of @data may have been copied to guest
+ * space.
+ * - a positive value if an access exception happened. In this case
+ * the returned value is the program interruption code and the
+ * contents of pgm may be used to inject an exception into the
+ * guest. No data has been copied to guest space.
+ *
+ * Note: in case an access exception is recognized no data has been copied to
+ * guest space (this is also true, if the to be copied data would cross
+ * one or more page boundaries in guest space).
+ * Therefore this function may be used for nullifying and suppressing
+ * instruction emulation.
+ * It may also be used for terminating instructions, if it is undefined
+ * if data has been changed in guest space in case of an exception.
+ */
+static inline __must_check
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len)
{
- int rc;
- unsigned long i;
- u8 *data = from;
-
- for (i = 0; i < n; i++) {
- rc = put_guest_u8(vcpu, guestdest++, *(data++));
- if (rc < 0)
- return rc;
- }
- return 0;
+ return access_guest(vcpu, ga, data, len, 1);
}
-static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu,
- unsigned long guestdest,
- void *from, unsigned long n)
-{
- int r;
- void __user *uptr;
- unsigned long size;
-
- if (guestdest + n < guestdest)
- return -EFAULT;
-
- /* simple case: all within one segment table entry? */
- if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) {
- uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- r = copy_to_user(uptr, from, n);
-
- if (r)
- r = -EFAULT;
-
- goto out;
- }
-
- /* copy first segment */
- uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- size = PMD_SIZE - (guestdest & ~PMD_MASK);
-
- r = copy_to_user(uptr, from, size);
-
- if (r) {
- r = -EFAULT;
- goto out;
- }
- from += size;
- n -= size;
- guestdest += size;
-
- /* copy full segments */
- while (n >= PMD_SIZE) {
- uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- r = copy_to_user(uptr, from, PMD_SIZE);
-
- if (r) {
- r = -EFAULT;
- goto out;
- }
- from += PMD_SIZE;
- n -= PMD_SIZE;
- guestdest += PMD_SIZE;
- }
-
- /* copy the tail segment */
- if (n) {
- uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- r = copy_to_user(uptr, from, n);
-
- if (r)
- r = -EFAULT;
- }
-out:
- return r;
-}
-
-static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu,
- unsigned long guestdest,
- void *from, unsigned long n)
+/**
+ * read_guest - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest is identical to write_guest, except that
+ * data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len)
{
- return __copy_to_guest_fast(vcpu, guestdest, from, n);
+ return access_guest(vcpu, ga, data, len, 0);
}
-static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest,
- void *from, unsigned long n)
+/**
+ * write_guest_abs - copy data from kernel space to guest space absolute
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+ unsigned long len)
{
- unsigned long prefix = vcpu->arch.sie_block->prefix;
-
- if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
- goto slowpath;
-
- if ((guestdest < prefix) && (guestdest + n > prefix))
- goto slowpath;
-
- if ((guestdest < prefix + 2 * PAGE_SIZE)
- && (guestdest + n > prefix + 2 * PAGE_SIZE))
- goto slowpath;
-
- if (guestdest < 2 * PAGE_SIZE)
- guestdest += prefix;
- else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
- guestdest -= prefix;
-
- return __copy_to_guest_fast(vcpu, guestdest, from, n);
-slowpath:
- return __copy_to_guest_slow(vcpu, guestdest, from, n);
+ return kvm_write_guest(vcpu->kvm, gpa, data, len);
}
-static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
- unsigned long guestsrc,
- unsigned long n)
+/**
+ * read_guest_abs - copy data from guest space absolute to kernel space
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+ unsigned long len)
{
- int rc;
- unsigned long i;
- u8 *data = to;
-
- for (i = 0; i < n; i++) {
- rc = get_guest_u8(vcpu, guestsrc++, data++);
- if (rc < 0)
- return rc;
- }
- return 0;
+ return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
-static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to,
- unsigned long guestsrc,
- unsigned long n)
+/**
+ * write_guest_real - copy data from kernel space to guest space real
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gra (guest real address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
{
- int r;
- void __user *uptr;
- unsigned long size;
-
- if (guestsrc + n < guestsrc)
- return -EFAULT;
-
- /* simple case: all within one segment table entry? */
- if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) {
- uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- r = copy_from_user(to, uptr, n);
-
- if (r)
- r = -EFAULT;
-
- goto out;
- }
-
- /* copy first segment */
- uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- size = PMD_SIZE - (guestsrc & ~PMD_MASK);
-
- r = copy_from_user(to, uptr, size);
-
- if (r) {
- r = -EFAULT;
- goto out;
- }
- to += size;
- n -= size;
- guestsrc += size;
-
- /* copy full segments */
- while (n >= PMD_SIZE) {
- uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- r = copy_from_user(to, uptr, PMD_SIZE);
-
- if (r) {
- r = -EFAULT;
- goto out;
- }
- to += PMD_SIZE;
- n -= PMD_SIZE;
- guestsrc += PMD_SIZE;
- }
-
- /* copy the tail segment */
- if (n) {
- uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- r = copy_from_user(to, uptr, n);
-
- if (r)
- r = -EFAULT;
- }
-out:
- return r;
+ return access_guest_real(vcpu, gra, data, len, 1);
}
-static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
- unsigned long guestsrc,
- unsigned long n)
+/**
+ * read_guest_real - copy data from guest space real to kernel space
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gra (guest real address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
{
- return __copy_from_guest_fast(vcpu, to, guestsrc, n);
+ return access_guest_real(vcpu, gra, data, len, 0);
}
-static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
- unsigned long guestsrc, unsigned long n)
-{
- unsigned long prefix = vcpu->arch.sie_block->prefix;
-
- if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
- goto slowpath;
+void ipte_lock(struct kvm_vcpu *vcpu);
+void ipte_unlock(struct kvm_vcpu *vcpu);
+int ipte_lock_held(struct kvm_vcpu *vcpu);
+int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
- if ((guestsrc < prefix) && (guestsrc + n > prefix))
- goto slowpath;
-
- if ((guestsrc < prefix + 2 * PAGE_SIZE)
- && (guestsrc + n > prefix + 2 * PAGE_SIZE))
- goto slowpath;
-
- if (guestsrc < 2 * PAGE_SIZE)
- guestsrc += prefix;
- else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
- guestsrc -= prefix;
-
- return __copy_from_guest_fast(vcpu, to, guestsrc, n);
-slowpath:
- return __copy_from_guest_slow(vcpu, to, guestsrc, n);
-}
-#endif
+#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
new file mode 100644
index 00000000000..3e8d4092ce3
--- /dev/null
+++ b/arch/s390/kvm/guestdbg.c
@@ -0,0 +1,482 @@
+/*
+ * kvm guest debug support
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+/*
+ * Extends the address range given by *start and *stop to include the address
+ * range starting with estart and the length len. Takes care of overflowing
+ * intervals and tries to minimize the overall intervall size.
+ */
+static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
+{
+ u64 estop;
+
+ if (len > 0)
+ len--;
+ else
+ len = 0;
+
+ estop = estart + len;
+
+ /* 0-0 range represents "not set" */
+ if ((*start == 0) && (*stop == 0)) {
+ *start = estart;
+ *stop = estop;
+ } else if (*start <= *stop) {
+ /* increase the existing range */
+ if (estart < *start)
+ *start = estart;
+ if (estop > *stop)
+ *stop = estop;
+ } else {
+ /* "overflowing" interval, whereby *stop > *start */
+ if (estart <= *stop) {
+ if (estop > *stop)
+ *stop = estop;
+ } else if (estop > *start) {
+ if (estart < *start)
+ *start = estart;
+ }
+ /* minimize the range */
+ else if ((estop - *stop) < (*start - estart))
+ *stop = estop;
+ else
+ *start = estart;
+ }
+}
+
+#define MAX_INST_SIZE 6
+
+static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
+{
+ unsigned long start, len;
+ u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+ u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+ u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+ int i;
+
+ if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
+ vcpu->arch.guestdbg.hw_bp_info == NULL)
+ return;
+
+ /*
+ * If the guest is not interrested in branching events, we can savely
+ * limit them to the PER address range.
+ */
+ if (!(*cr9 & PER_EVENT_BRANCH))
+ *cr9 |= PER_CONTROL_BRANCH_ADDRESS;
+ *cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+ start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
+ len = vcpu->arch.guestdbg.hw_bp_info[i].len;
+
+ /*
+ * The instruction in front of the desired bp has to
+ * report instruction-fetching events
+ */
+ if (start < MAX_INST_SIZE) {
+ len += start;
+ start = 0;
+ } else {
+ start -= MAX_INST_SIZE;
+ len += MAX_INST_SIZE;
+ }
+
+ extend_address_range(cr10, cr11, start, len);
+ }
+}
+
+static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
+{
+ unsigned long start, len;
+ u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+ u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+ u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+ int i;
+
+ if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
+ vcpu->arch.guestdbg.hw_wp_info == NULL)
+ return;
+
+ /* if host uses storage alternation for special address
+ * spaces, enable all events and give all to the guest */
+ if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
+ *cr9 &= ~PER_CONTROL_ALTERATION;
+ *cr10 = 0;
+ *cr11 = PSW_ADDR_INSN;
+ } else {
+ *cr9 &= ~PER_CONTROL_ALTERATION;
+ *cr9 |= PER_EVENT_STORE;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+ start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
+ len = vcpu->arch.guestdbg.hw_wp_info[i].len;
+
+ extend_address_range(cr10, cr11, start, len);
+ }
+ }
+}
+
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
+ vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
+ vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
+ vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
+}
+
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
+ vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
+ vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
+ vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
+}
+
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+ /*
+ * TODO: if guest psw has per enabled, otherwise 0s!
+ * This reduces the amount of reported events.
+ * Need to intercept all psw changes!
+ */
+
+ if (guestdbg_sstep_enabled(vcpu)) {
+ /* disable timer (clock-comparator) interrupts */
+ vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
+ vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
+ vcpu->arch.sie_block->gcr[10] = 0;
+ vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+ }
+
+ if (guestdbg_hw_bp_enabled(vcpu)) {
+ enable_all_hw_bp(vcpu);
+ enable_all_hw_wp(vcpu);
+ }
+
+ /* TODO: Instruction-fetching-nullification not allowed for now */
+ if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
+ vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
+}
+
+#define MAX_WP_SIZE 100
+
+static int __import_wp_info(struct kvm_vcpu *vcpu,
+ struct kvm_hw_breakpoint *bp_data,
+ struct kvm_hw_wp_info_arch *wp_info)
+{
+ int ret = 0;
+ wp_info->len = bp_data->len;
+ wp_info->addr = bp_data->addr;
+ wp_info->phys_addr = bp_data->phys_addr;
+ wp_info->old_data = NULL;
+
+ if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
+ return -EINVAL;
+
+ wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
+ if (!wp_info->old_data)
+ return -ENOMEM;
+ /* try to backup the original value */
+ ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
+ wp_info->len);
+ if (ret) {
+ kfree(wp_info->old_data);
+ wp_info->old_data = NULL;
+ }
+
+ return ret;
+}
+
+#define MAX_BP_COUNT 50
+
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ int ret = 0, nr_wp = 0, nr_bp = 0, i, size;
+ struct kvm_hw_breakpoint *bp_data = NULL;
+ struct kvm_hw_wp_info_arch *wp_info = NULL;
+ struct kvm_hw_bp_info_arch *bp_info = NULL;
+
+ if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
+ return 0;
+ else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
+ return -EINVAL;
+
+ size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint);
+ bp_data = kmalloc(size, GFP_KERNEL);
+ if (!bp_data) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (copy_from_user(bp_data, dbg->arch.hw_bp, size)) {
+ ret = -EFAULT;
+ goto error;
+ }
+
+ for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
+ switch (bp_data[i].type) {
+ case KVM_HW_WP_WRITE:
+ nr_wp++;
+ break;
+ case KVM_HW_BP:
+ nr_bp++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ size = nr_wp * sizeof(struct kvm_hw_wp_info_arch);
+ if (size > 0) {
+ wp_info = kmalloc(size, GFP_KERNEL);
+ if (!wp_info) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+ size = nr_bp * sizeof(struct kvm_hw_bp_info_arch);
+ if (size > 0) {
+ bp_info = kmalloc(size, GFP_KERNEL);
+ if (!bp_info) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+
+ for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
+ switch (bp_data[i].type) {
+ case KVM_HW_WP_WRITE:
+ ret = __import_wp_info(vcpu, &bp_data[i],
+ &wp_info[nr_wp]);
+ if (ret)
+ goto error;
+ nr_wp++;
+ break;
+ case KVM_HW_BP:
+ bp_info[nr_bp].len = bp_data[i].len;
+ bp_info[nr_bp].addr = bp_data[i].addr;
+ nr_bp++;
+ break;
+ }
+ }
+
+ vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
+ vcpu->arch.guestdbg.hw_bp_info = bp_info;
+ vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
+ vcpu->arch.guestdbg.hw_wp_info = wp_info;
+ return 0;
+error:
+ kfree(bp_data);
+ kfree(wp_info);
+ kfree(bp_info);
+ return ret;
+}
+
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
+{
+ int i;
+ struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+ hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+ kfree(hw_wp_info->old_data);
+ hw_wp_info->old_data = NULL;
+ }
+ kfree(vcpu->arch.guestdbg.hw_wp_info);
+ vcpu->arch.guestdbg.hw_wp_info = NULL;
+
+ kfree(vcpu->arch.guestdbg.hw_bp_info);
+ vcpu->arch.guestdbg.hw_bp_info = NULL;
+
+ vcpu->arch.guestdbg.nr_hw_wp = 0;
+ vcpu->arch.guestdbg.nr_hw_bp = 0;
+}
+
+static inline int in_addr_range(u64 addr, u64 a, u64 b)
+{
+ if (a <= b)
+ return (addr >= a) && (addr <= b);
+ else
+ /* "overflowing" interval */
+ return (addr <= a) && (addr >= b);
+}
+
+#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
+
+static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
+ unsigned long addr)
+{
+ struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
+ int i;
+
+ if (vcpu->arch.guestdbg.nr_hw_bp == 0)
+ return NULL;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+ /* addr is directly the start or in the range of a bp */
+ if (addr == bp_info->addr)
+ goto found;
+ if (bp_info->len > 0 &&
+ in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
+ goto found;
+
+ bp_info++;
+ }
+
+ return NULL;
+found:
+ return bp_info;
+}
+
+static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
+{
+ int i;
+ struct kvm_hw_wp_info_arch *wp_info = NULL;
+ void *temp = NULL;
+
+ if (vcpu->arch.guestdbg.nr_hw_wp == 0)
+ return NULL;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+ wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+ if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
+ continue;
+
+ temp = kmalloc(wp_info->len, GFP_KERNEL);
+ if (!temp)
+ continue;
+
+ /* refetch the wp data and compare it to the old value */
+ if (!read_guest(vcpu, wp_info->phys_addr, temp,
+ wp_info->len)) {
+ if (memcmp(temp, wp_info->old_data, wp_info->len)) {
+ kfree(temp);
+ return wp_info;
+ }
+ }
+ kfree(temp);
+ temp = NULL;
+ }
+
+ return NULL;
+}
+
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
+{
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+}
+
+#define per_bp_event(code) \
+ (code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH))
+#define per_write_wp_event(code) \
+ (code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL))
+
+static int debug_exit_required(struct kvm_vcpu *vcpu)
+{
+ u32 perc = (vcpu->arch.sie_block->perc << 24);
+ struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+ struct kvm_hw_wp_info_arch *wp_info = NULL;
+ struct kvm_hw_bp_info_arch *bp_info = NULL;
+ unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
+ unsigned long peraddr = vcpu->arch.sie_block->peraddr;
+
+ if (guestdbg_hw_bp_enabled(vcpu)) {
+ if (per_write_wp_event(perc) &&
+ vcpu->arch.guestdbg.nr_hw_wp > 0) {
+ wp_info = any_wp_changed(vcpu);
+ if (wp_info) {
+ debug_exit->addr = wp_info->addr;
+ debug_exit->type = KVM_HW_WP_WRITE;
+ goto exit_required;
+ }
+ }
+ if (per_bp_event(perc) &&
+ vcpu->arch.guestdbg.nr_hw_bp > 0) {
+ bp_info = find_hw_bp(vcpu, addr);
+ /* remove duplicate events if PC==PER address */
+ if (bp_info && (addr != peraddr)) {
+ debug_exit->addr = addr;
+ debug_exit->type = KVM_HW_BP;
+ vcpu->arch.guestdbg.last_bp = addr;
+ goto exit_required;
+ }
+ /* breakpoint missed */
+ bp_info = find_hw_bp(vcpu, peraddr);
+ if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
+ debug_exit->addr = peraddr;
+ debug_exit->type = KVM_HW_BP;
+ goto exit_required;
+ }
+ }
+ }
+ if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
+ debug_exit->addr = addr;
+ debug_exit->type = KVM_SINGLESTEP;
+ goto exit_required;
+ }
+
+ return 0;
+exit_required:
+ return 1;
+}
+
+#define guest_per_enabled(vcpu) \
+ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
+
+static void filter_guest_per_event(struct kvm_vcpu *vcpu)
+{
+ u32 perc = vcpu->arch.sie_block->perc << 24;
+ u64 peraddr = vcpu->arch.sie_block->peraddr;
+ u64 addr = vcpu->arch.sie_block->gpsw.addr;
+ u64 cr9 = vcpu->arch.sie_block->gcr[9];
+ u64 cr10 = vcpu->arch.sie_block->gcr[10];
+ u64 cr11 = vcpu->arch.sie_block->gcr[11];
+ /* filter all events, demanded by the guest */
+ u32 guest_perc = perc & cr9 & PER_EVENT_MASK;
+
+ if (!guest_per_enabled(vcpu))
+ guest_perc = 0;
+
+ /* filter "successful-branching" events */
+ if (guest_perc & PER_EVENT_BRANCH &&
+ cr9 & PER_CONTROL_BRANCH_ADDRESS &&
+ !in_addr_range(addr, cr10, cr11))
+ guest_perc &= ~PER_EVENT_BRANCH;
+
+ /* filter "instruction-fetching" events */
+ if (guest_perc & PER_EVENT_IFETCH &&
+ !in_addr_range(peraddr, cr10, cr11))
+ guest_perc &= ~PER_EVENT_IFETCH;
+
+ /* All other PER events will be given to the guest */
+ /* TODO: Check alterated address/address space */
+
+ vcpu->arch.sie_block->perc = guest_perc >> 24;
+
+ if (!guest_perc)
+ vcpu->arch.sie_block->iprcc &= ~PGM_PER;
+}
+
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
+{
+ if (debug_exit_required(vcpu))
+ vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+ filter_guest_per_event(vcpu);
+}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 22798ec33fd..a0b586c1913 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,7 +1,7 @@
/*
* in-kernel handling for sie intercepts
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -16,102 +16,26 @@
#include <linux/pagemap.h>
#include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
+#include <asm/irq.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include "trace.h"
#include "trace-s390.h"
-static int handle_lctlg(struct kvm_vcpu *vcpu)
-{
- int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
- int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
- ((vcpu->arch.sie_block->ipb & 0xff00) << 4);
- u64 useraddr;
- int reg, rc;
-
- vcpu->stat.instruction_lctlg++;
- if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
- return -EOPNOTSUPP;
-
- useraddr = disp2;
- if (base2)
- useraddr += vcpu->run->s.regs.gprs[base2];
-
- if (useraddr & 7)
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-
- reg = reg1;
-
- VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
- disp2);
- trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
-
- do {
- rc = get_guest_u64(vcpu, useraddr,
- &vcpu->arch.sie_block->gcr[reg]);
- if (rc == -EFAULT) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- break;
- }
- useraddr += 8;
- if (reg == reg3)
- break;
- reg = (reg + 1) % 16;
- } while (1);
- return 0;
-}
-
-static int handle_lctl(struct kvm_vcpu *vcpu)
-{
- int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
- int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
- u64 useraddr;
- u32 val = 0;
- int reg, rc;
-
- vcpu->stat.instruction_lctl++;
-
- useraddr = disp2;
- if (base2)
- useraddr += vcpu->run->s.regs.gprs[base2];
-
- if (useraddr & 3)
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-
- VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
- disp2);
- trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
-
- reg = reg1;
- do {
- rc = get_guest_u32(vcpu, useraddr, &val);
- if (rc == -EFAULT) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- break;
- }
- vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
- vcpu->arch.sie_block->gcr[reg] |= val;
- useraddr += 4;
- if (reg == reg3)
- break;
- reg = (reg + 1) % 16;
- } while (1);
- return 0;
-}
-static intercept_handler_t instruction_handlers[256] = {
+static const intercept_handler_t instruction_handlers[256] = {
[0x01] = kvm_s390_handle_01,
+ [0x82] = kvm_s390_handle_lpsw,
[0x83] = kvm_s390_handle_diag,
[0xae] = kvm_s390_handle_sigp,
[0xb2] = kvm_s390_handle_b2,
- [0xb7] = handle_lctl,
+ [0xb6] = kvm_s390_handle_stctl,
+ [0xb7] = kvm_s390_handle_lctl,
+ [0xb9] = kvm_s390_handle_b9,
[0xe5] = kvm_s390_handle_e5,
- [0xeb] = handle_lctlg,
+ [0xeb] = kvm_s390_handle_eb,
};
static int handle_noop(struct kvm_vcpu *vcpu)
@@ -123,9 +47,6 @@ static int handle_noop(struct kvm_vcpu *vcpu)
case 0x10:
vcpu->stat.exit_external_request++;
break;
- case 0x14:
- vcpu->stat.exit_external_interrupt++;
- break;
default:
break; /* nothing */
}
@@ -141,15 +62,8 @@ static int handle_stop(struct kvm_vcpu *vcpu)
trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
- if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
- vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
- rc = SIE_INTERCEPT_RERUNVCPU;
- vcpu->run->exit_reason = KVM_EXIT_INTR;
- }
-
if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
- atomic_set_mask(CPUSTAT_STOPPED,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_vcpu_stop(vcpu);
vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
rc = -EOPNOTSUPP;
@@ -172,47 +86,12 @@ static int handle_stop(struct kvm_vcpu *vcpu)
static int handle_validity(struct kvm_vcpu *vcpu)
{
- unsigned long vmaddr;
int viwhy = vcpu->arch.sie_block->ipb >> 16;
- int rc;
vcpu->stat.exit_validity++;
trace_kvm_s390_intercept_validity(vcpu, viwhy);
- if (viwhy == 0x37) {
- vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
- vcpu->arch.gmap);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = -EOPNOTSUPP;
- goto out;
- }
- rc = fault_in_pages_writeable((char __user *) vmaddr,
- PAGE_SIZE);
- if (rc) {
- /* user will receive sigsegv, exit to user */
- rc = -EOPNOTSUPP;
- goto out;
- }
- vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE,
- vcpu->arch.gmap);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = -EOPNOTSUPP;
- goto out;
- }
- rc = fault_in_pages_writeable((char __user *) vmaddr,
- PAGE_SIZE);
- if (rc) {
- /* user will receive sigsegv, exit to user */
- rc = -EOPNOTSUPP;
- goto out;
- }
- } else
- rc = -EOPNOTSUPP;
-
-out:
- if (rc)
- VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
- viwhy);
- return rc;
+ WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
+ return -EOPNOTSUPP;
}
static int handle_instruction(struct kvm_vcpu *vcpu)
@@ -229,11 +108,120 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+static void __extract_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
+{
+ memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
+ pgm_info->code = vcpu->arch.sie_block->iprcc;
+
+ switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
+ case PGM_AFX_TRANSLATION:
+ case PGM_ASX_TRANSLATION:
+ case PGM_EX_TRANSLATION:
+ case PGM_LFX_TRANSLATION:
+ case PGM_LSTE_SEQUENCE:
+ case PGM_LSX_TRANSLATION:
+ case PGM_LX_TRANSLATION:
+ case PGM_PRIMARY_AUTHORITY:
+ case PGM_SECONDARY_AUTHORITY:
+ case PGM_SPACE_SWITCH:
+ pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+ break;
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_INSTANCE:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_EXTENDED_AUTHORITY:
+ pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ break;
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info->op_access_id = vcpu->arch.sie_block->oai;
+ break;
+ case PGM_MONITOR:
+ pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
+ pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
+ break;
+ case PGM_DATA:
+ pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
+ break;
+ case PGM_PROTECTION:
+ pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ break;
+ default:
+ break;
+ }
+
+ if (vcpu->arch.sie_block->iprcc & PGM_PER) {
+ pgm_info->per_code = vcpu->arch.sie_block->perc;
+ pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
+ pgm_info->per_address = vcpu->arch.sie_block->peraddr;
+ pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
+ }
+}
+
+/*
+ * restore ITDB to program-interruption TDB in guest lowcore
+ * and set TX abort indication if required
+*/
+static int handle_itdb(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_itdb *itdb;
+ int rc;
+
+ if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
+ return 0;
+ if (current->thread.per_flags & PER_FLAG_NO_TE)
+ return 0;
+ itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+ rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
+ if (rc)
+ return rc;
+ memset(itdb, 0, sizeof(*itdb));
+
+ return 0;
+}
+
+#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+
static int handle_prog(struct kvm_vcpu *vcpu)
{
+ struct kvm_s390_pgm_info pgm_info;
+ psw_t psw;
+ int rc;
+
vcpu->stat.exit_program_interruption++;
+
+ if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+ kvm_s390_handle_per_event(vcpu);
+ /* the interrupt might have been filtered out completely */
+ if (vcpu->arch.sie_block->iprcc == 0)
+ return 0;
+ }
+
trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
- return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+ if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) {
+ rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t));
+ if (rc)
+ return rc;
+ /* Avoid endless loops of specification exceptions */
+ if (!is_valid_psw(&psw))
+ return -EOPNOTSUPP;
+ }
+ rc = handle_itdb(vcpu);
+ if (rc)
+ return rc;
+
+ __extract_prog_irq(vcpu, &pgm_info);
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
@@ -251,16 +239,110 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
return rc2;
}
+/**
+ * handle_external_interrupt - used for external interruption interceptions
+ *
+ * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
+ * the new PSW does not have external interrupts disabled. In the first case,
+ * we've got to deliver the interrupt manually, and in the second case, we
+ * drop to userspace to handle the situation there.
+ */
+static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+{
+ u16 eic = vcpu->arch.sie_block->eic;
+ struct kvm_s390_interrupt irq;
+ psw_t newpsw;
+ int rc;
+
+ vcpu->stat.exit_external_interrupt++;
+
+ rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+ if (rc)
+ return rc;
+ /* We can not handle clock comparator or timer interrupt with bad PSW */
+ if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
+ (newpsw.mask & PSW_MASK_EXT))
+ return -EOPNOTSUPP;
+
+ switch (eic) {
+ case EXT_IRQ_CLK_COMP:
+ irq.type = KVM_S390_INT_CLOCK_COMP;
+ break;
+ case EXT_IRQ_CPU_TIMER:
+ irq.type = KVM_S390_INT_CPU_TIMER;
+ break;
+ case EXT_IRQ_EXTERNAL_CALL:
+ if (kvm_s390_si_ext_call_pending(vcpu))
+ return 0;
+ irq.type = KVM_S390_INT_EXTERNAL_CALL;
+ irq.parm = vcpu->arch.sie_block->extcpuaddr;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+
+/**
+ * Handle MOVE PAGE partial execution interception.
+ *
+ * This interception can only happen for guests with DAT disabled and
+ * addresses that are currently not mapped in the host. Thus we try to
+ * set up the mappings for the corresponding user pages here (or throw
+ * addressing exceptions in case of illegal guest addresses).
+ */
+static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long srcaddr, dstaddr;
+ int reg1, reg2, rc;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ /* Make sure that the source is paged-in */
+ srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]);
+ if (kvm_is_error_gpa(vcpu->kvm, srcaddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
+ if (rc != 0)
+ return rc;
+
+ /* Make sure that the destination is paged-in */
+ dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]);
+ if (kvm_is_error_gpa(vcpu->kvm, dstaddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
+ if (rc != 0)
+ return rc;
+
+ psw->addr = __rewind_psw(*psw, 4);
+
+ return 0;
+}
+
+static int handle_partial_execution(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */
+ return handle_mvpg_pei(vcpu);
+ if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */
+ return kvm_s390_handle_sigp_pei(vcpu);
+
+ return -EOPNOTSUPP;
+}
+
static const intercept_handler_t intercept_funcs[] = {
[0x00 >> 2] = handle_noop,
[0x04 >> 2] = handle_instruction,
[0x08 >> 2] = handle_prog,
[0x0C >> 2] = handle_instruction_and_prog,
[0x10 >> 2] = handle_noop,
- [0x14 >> 2] = handle_noop,
+ [0x14 >> 2] = handle_external_interrupt,
+ [0x18 >> 2] = handle_noop,
[0x1C >> 2] = kvm_s390_handle_wait,
[0x20 >> 2] = handle_validity,
[0x28 >> 2] = handle_stop,
+ [0x38 >> 2] = handle_partial_execution,
};
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 82c481ddef7..90c8de22a2a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,7 +1,7 @@
/*
* handling kvm guest interrupts
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -13,6 +13,7 @@
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
#include <linux/hrtimer.h>
+#include <linux/mmu_context.h>
#include <linux/signal.h>
#include <linux/slab.h>
#include <asm/asm-offsets.h>
@@ -21,11 +22,33 @@
#include "gaccess.h"
#include "trace-s390.h"
-static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+#define IOINT_SCHID_MASK 0x0000ffff
+#define IOINT_SSID_MASK 0x00030000
+#define IOINT_CSSID_MASK 0x03fc0000
+#define IOINT_AI_MASK 0x04000000
+
+static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+
+static int is_ioint(u64 type)
+{
+ return ((type & 0xfffe0000u) != 0xfffe0000u);
+}
+
+int psw_extint_disabled(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
}
+static int psw_ioint_disabled(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO);
+}
+
+static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK);
+}
+
static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
{
if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
@@ -35,6 +58,24 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
return 1;
}
+static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+ if (psw_extint_disabled(vcpu) ||
+ !(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+ return 0;
+ if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
+ /* No timer interrupts when single stepping */
+ return 0;
+ return 1;
+}
+
+static u64 int_word_to_isc_bits(u32 int_word)
+{
+ u8 isc = (int_word & 0x38000000) >> 27;
+
+ return (0x80 >> isc) << 24;
+}
+
static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
@@ -50,12 +91,17 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
return 1;
return 0;
- case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_CLOCK_COMP:
+ return ckc_interrupts_enabled(vcpu);
+ case KVM_S390_INT_CPU_TIMER:
if (psw_extint_disabled(vcpu))
return 0;
- if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+ if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
return 1;
return 0;
+ case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO:
if (psw_extint_disabled(vcpu))
return 0;
@@ -67,7 +113,22 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
case KVM_S390_SIGP_SET_PREFIX:
case KVM_S390_RESTART:
return 1;
+ case KVM_S390_MCHK:
+ if (psw_mchk_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14)
+ return 1;
+ return 0;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ if (psw_ioint_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[6] &
+ int_word_to_isc_bits(inti->io.io_int_word))
+ return 1;
+ return 0;
default:
+ printk(KERN_WARNING "illegal interrupt type %llx\n",
+ inti->type);
BUG();
}
return 0;
@@ -75,24 +136,28 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
{
- BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
}
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
{
- BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
}
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
{
- atomic_clear_mask(CPUSTAT_ECALL_PEND |
- CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
- &vcpu->arch.sie_block->cpuflags);
+ atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+ &vcpu->arch.sie_block->cpuflags);
vcpu->arch.sie_block->lctl = 0x0000;
+ vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
+
+ if (guestdbg_enabled(vcpu)) {
+ vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
+ LCTL_CR10 | LCTL_CR11);
+ vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
+ }
}
static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
@@ -107,7 +172,11 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
case KVM_S390_INT_EXTERNAL_CALL:
case KVM_S390_INT_EMERGENCY:
case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_CLOCK_COMP:
+ case KVM_S390_INT_CPU_TIMER:
if (psw_extint_disabled(vcpu))
__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
else
@@ -116,16 +185,128 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
case KVM_S390_SIGP_STOP:
__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
break;
+ case KVM_S390_MCHK:
+ if (psw_mchk_disabled(vcpu))
+ vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR14;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ if (psw_ioint_disabled(vcpu))
+ __set_cpuflag(vcpu, CPUSTAT_IO_INT);
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR6;
+ break;
default:
BUG();
}
}
+static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
+{
+ const unsigned short table[] = { 2, 4, 4, 6 };
+ int rc = 0;
+
+ switch (pgm_info->code & ~PGM_PER) {
+ case PGM_AFX_TRANSLATION:
+ case PGM_ASX_TRANSLATION:
+ case PGM_EX_TRANSLATION:
+ case PGM_LFX_TRANSLATION:
+ case PGM_LSTE_SEQUENCE:
+ case PGM_LSX_TRANSLATION:
+ case PGM_LX_TRANSLATION:
+ case PGM_PRIMARY_AUTHORITY:
+ case PGM_SECONDARY_AUTHORITY:
+ case PGM_SPACE_SWITCH:
+ rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+ (u64 *)__LC_TRANS_EXC_CODE);
+ break;
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_INSTANCE:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_EXTENDED_AUTHORITY:
+ rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+ (u8 *)__LC_EXC_ACCESS_ID);
+ break;
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+ (u64 *)__LC_TRANS_EXC_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+ (u8 *)__LC_EXC_ACCESS_ID);
+ rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+ (u8 *)__LC_OP_ACCESS_ID);
+ break;
+ case PGM_MONITOR:
+ rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
+ (u64 *)__LC_MON_CLASS_NR);
+ rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+ (u64 *)__LC_MON_CODE);
+ break;
+ case PGM_DATA:
+ rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+ (u32 *)__LC_DATA_EXC_CODE);
+ break;
+ case PGM_PROTECTION:
+ rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+ (u64 *)__LC_TRANS_EXC_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+ (u8 *)__LC_EXC_ACCESS_ID);
+ break;
+ }
+
+ if (pgm_info->code & PGM_PER) {
+ rc |= put_guest_lc(vcpu, pgm_info->per_code,
+ (u8 *) __LC_PER_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+ (u8 *)__LC_PER_ATMID);
+ rc |= put_guest_lc(vcpu, pgm_info->per_address,
+ (u64 *) __LC_PER_ADDRESS);
+ rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+ (u8 *) __LC_PER_ACCESS_ID);
+ }
+
+ switch (vcpu->arch.sie_block->icptcode) {
+ case ICPT_INST:
+ case ICPT_INSTPROGI:
+ case ICPT_OPEREXC:
+ case ICPT_PARTEXEC:
+ case ICPT_IOINST:
+ /* last instruction only stored for these icptcodes */
+ rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
+ (u16 *) __LC_PGM_ILC);
+ break;
+ case ICPT_PROGI:
+ rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
+ (u16 *) __LC_PGM_ILC);
+ break;
+ default:
+ rc |= put_guest_lc(vcpu, 0,
+ (u16 *) __LC_PGM_ILC);
+ }
+
+ rc |= put_guest_lc(vcpu, pgm_info->code,
+ (u16 *)__LC_PGM_INT_CODE);
+ rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+
+ return rc;
+}
+
static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
const unsigned short table[] = { 2, 4, 4, 6 };
- int rc, exception = 0;
+ int rc = 0;
switch (inti->type) {
case KVM_S390_INT_EMERGENCY:
@@ -133,74 +314,87 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_emergency_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->emerg.code, 0);
- rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_EXT_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
+ rc = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, inti->emerg.code,
+ (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
break;
-
case KVM_S390_INT_EXTERNAL_CALL:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
vcpu->stat.deliver_external_call++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->extcall.code, 0);
- rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_EXT_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
+ rc = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, inti->extcall.code,
+ (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ break;
+ case KVM_S390_INT_CLOCK_COMP:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->ext.ext_params, 0);
+ deliver_ckc_interrupt(vcpu);
+ break;
+ case KVM_S390_INT_CPU_TIMER:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->ext.ext_params, 0);
+ rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+ (u32 *)__LC_EXT_PARAMS);
break;
-
case KVM_S390_INT_SERVICE:
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
inti->ext.ext_params);
vcpu->stat.deliver_service_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->ext.ext_params, 0);
- rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_EXT_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
- if (rc == -EFAULT)
- exception = 1;
+ rc = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+ (u32 *)__LC_EXT_PARAMS);
+ break;
+ case KVM_S390_INT_PFAULT_INIT:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+ inti->ext.ext_params2);
+ rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *) __LC_EXT_PARAMS2);
+ break;
+ case KVM_S390_INT_PFAULT_DONE:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+ inti->ext.ext_params2);
+ rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *)__LC_EXT_PARAMS2);
break;
-
case KVM_S390_INT_VIRTIO:
VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
inti->ext.ext_params, inti->ext.ext_params2);
@@ -208,34 +402,18 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->ext.ext_params,
inti->ext.ext_params2);
- rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_EXT_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2,
- inti->ext.ext_params2);
- if (rc == -EFAULT)
- exception = 1;
+ rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+ (u32 *)__LC_EXT_PARAMS);
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *)__LC_EXT_PARAMS2);
break;
-
case KVM_S390_SIGP_STOP:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
vcpu->stat.deliver_stop_signal++;
@@ -258,18 +436,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_restart_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
0, 0);
- rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
- restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
- atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ rc = write_guest_lc(vcpu,
+ offsetof(struct _lowcore, restart_old_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
break;
-
case KVM_S390_PROGRAM_INT:
VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
inti->pgm.code,
@@ -277,64 +450,93 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_program_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->pgm.code, 0);
- rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u16(vcpu, __LC_PGM_ILC,
- table[vcpu->arch.sie_block->ipa >> 14]);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
+ rc = __deliver_prog_irq(vcpu, &inti->pgm);
+ break;
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_PGM_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
+ case KVM_S390_MCHK:
+ VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+ inti->mchk.mcic);
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->mchk.cr14,
+ inti->mchk.mcic);
+ rc = kvm_s390_vcpu_store_status(vcpu,
+ KVM_S390_STORE_STATUS_PREFIXED);
+ rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
+ rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ {
+ __u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
+ inti->io.subchannel_nr;
+ __u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
+ inti->io.io_int_word;
+ VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+ vcpu->stat.deliver_io_int++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ param0, param1);
+ rc = put_guest_lc(vcpu, inti->io.subchannel_id,
+ (u16 *)__LC_SUBCHANNEL_ID);
+ rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+ (u16 *)__LC_SUBCHANNEL_NR);
+ rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+ (u32 *)__LC_IO_INT_PARM);
+ rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+ (u32 *)__LC_IO_INT_WORD);
+ rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ break;
+ }
default:
BUG();
}
- if (exception) {
+ if (rc) {
printk("kvm: The guest lowcore is not mapped during interrupt "
- "delivery, killing userspace\n");
+ "delivery, killing userspace\n");
do_exit(SIGKILL);
}
}
-static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
{
- int rc, exception = 0;
-
- if (psw_extint_disabled(vcpu))
- return 0;
- if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
- return 0;
- rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
- if (rc == -EFAULT)
- exception = 1;
- rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_EXT_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
- if (exception) {
+ int rc;
+
+ rc = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ if (rc) {
printk("kvm: The guest lowcore is not mapped during interrupt "
"delivery, killing userspace\n");
do_exit(SIGKILL);
}
- return 1;
}
-static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+/* Check whether SIGP interpretation facility has an external call pending */
+int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
+{
+ atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl;
+
+ if (!psw_extint_disabled(vcpu) &&
+ (vcpu->arch.sie_block->gcr[0] & 0x2000ul) &&
+ (atomic_read(sigp_ctrl) & SIGP_CTRL_C) &&
+ (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+ return 1;
+
+ return 0;
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
@@ -361,19 +563,23 @@ static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
spin_unlock(&fi->lock);
}
- if ((!rc) && (vcpu->arch.sie_block->ckc <
- get_clock() + vcpu->arch.sie_block->epoch)) {
- if ((!psw_extint_disabled(vcpu)) &&
- (vcpu->arch.sie_block->gcr[0] & 0x800ul))
- rc = 1;
- }
+ if (!rc && kvm_cpu_has_pending_timer(vcpu))
+ rc = 1;
+
+ if (!rc && kvm_s390_si_ext_call_pending(vcpu))
+ rc = 1;
return rc;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return 0;
+ if (!(vcpu->arch.sie_block->ckc <
+ get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+ return 0;
+ if (!ckc_interrupts_enabled(vcpu))
+ return 0;
+ return 1;
}
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -396,13 +602,12 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP; /* disabled wait */
}
- if (psw_extint_disabled(vcpu) ||
- (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+ if (!ckc_interrupts_enabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
goto no_timer;
}
- now = get_clock() + vcpu->arch.sie_block->epoch;
+ now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
if (vcpu->arch.sie_block->ckc < now) {
__unset_cpu_idle(vcpu);
return 0;
@@ -413,13 +618,15 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
no_timer:
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
- add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+ add_wait_queue(&vcpu->wq, &wait);
while (list_empty(&vcpu->arch.local_int.list) &&
list_empty(&vcpu->arch.local_int.float_int->list) &&
(!vcpu->arch.local_int.timer_due) &&
- !signal_pending(current)) {
+ !signal_pending(current) &&
+ !kvm_s390_si_ext_call_pending(vcpu)) {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
@@ -429,9 +636,11 @@ no_timer:
}
__unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&vcpu->arch.local_int.wq, &wait);
+ remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0;
}
@@ -442,8 +651,8 @@ void kvm_s390_tasklet(unsigned long parm)
spin_lock(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.timer_due = 1;
- if (waitqueue_active(&vcpu->arch.local_int.wq))
- wake_up_interruptible(&vcpu->arch.local_int.wq);
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
spin_unlock(&vcpu->arch.local_int.lock);
}
@@ -456,11 +665,31 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+ vcpu->preempted = true;
tasklet_schedule(&vcpu->arch.tasklet);
return HRTIMER_NORESTART;
}
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
+
+ spin_lock_bh(&li->lock);
+ list_for_each_entry_safe(inti, n, &li->list, list) {
+ list_del(&inti->list);
+ kfree(inti);
+ }
+ atomic_set(&li->active, 0);
+ spin_unlock_bh(&li->lock);
+
+ /* clear pending external calls set by sigp interpretation facility */
+ atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+ atomic_clear_mask(SIGP_CTRL_C,
+ &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
+}
+
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -491,9 +720,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
} while (deliver);
}
- if ((vcpu->arch.sie_block->ckc <
- get_clock() + vcpu->arch.sie_block->epoch))
- __try_deliver_ckc_interrupt(vcpu);
+ if (kvm_cpu_has_pending_timer(vcpu))
+ deliver_ckc_interrupt(vcpu);
if (atomic_read(&fi->active)) {
do {
@@ -502,6 +730,63 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
list_for_each_entry_safe(inti, n, &fi->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list);
+ fi->irq_count--;
+ deliver = 1;
+ break;
+ }
+ __set_intercept_indicator(vcpu, inti);
+ }
+ if (list_empty(&fi->list))
+ atomic_set(&fi->active, 0);
+ spin_unlock(&fi->lock);
+ if (deliver) {
+ __do_deliver_interrupt(vcpu, inti);
+ kfree(inti);
+ }
+ } while (deliver);
+ }
+}
+
+void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
+ int deliver;
+
+ __reset_intercept_indicators(vcpu);
+ if (atomic_read(&li->active)) {
+ do {
+ deliver = 0;
+ spin_lock_bh(&li->lock);
+ list_for_each_entry_safe(inti, n, &li->list, list) {
+ if ((inti->type == KVM_S390_MCHK) &&
+ __interrupt_is_deliverable(vcpu, inti)) {
+ list_del(&inti->list);
+ deliver = 1;
+ break;
+ }
+ __set_intercept_indicator(vcpu, inti);
+ }
+ if (list_empty(&li->list))
+ atomic_set(&li->active, 0);
+ spin_unlock_bh(&li->lock);
+ if (deliver) {
+ __do_deliver_interrupt(vcpu, inti);
+ kfree(inti);
+ }
+ } while (deliver);
+ }
+
+ if (atomic_read(&fi->active)) {
+ do {
+ deliver = 0;
+ spin_lock(&fi->lock);
+ list_for_each_entry_safe(inti, n, &fi->list, list) {
+ if ((inti->type == KVM_S390_MCHK) &&
+ __interrupt_is_deliverable(vcpu, inti)) {
+ list_del(&inti->list);
+ fi->irq_count--;
deliver = 1;
break;
}
@@ -535,51 +820,109 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
spin_lock_bh(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
- BUG_ON(waitqueue_active(&li->wq));
+ BUG_ON(waitqueue_active(li->wq));
spin_unlock_bh(&li->lock);
return 0;
}
-int kvm_s390_inject_vm(struct kvm *kvm,
- struct kvm_s390_interrupt *s390int)
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
{
- struct kvm_s390_local_interrupt *li;
- struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_interrupt_info *inti;
- int sigcpu;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
return -ENOMEM;
- switch (s390int->type) {
- case KVM_S390_INT_VIRTIO:
- VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
- s390int->parm, s390int->parm64);
- inti->type = s390int->type;
- inti->ext.ext_params = s390int->parm;
- inti->ext.ext_params2 = s390int->parm64;
- break;
- case KVM_S390_INT_SERVICE:
- VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
- inti->type = s390int->type;
- inti->ext.ext_params = s390int->parm;
+ VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
+ pgm_info->code);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+ pgm_info->code, 0, 1);
+
+ inti->type = KVM_S390_PROGRAM_INT;
+ memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
+ spin_lock_bh(&li->lock);
+ list_add(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ BUG_ON(waitqueue_active(li->wq));
+ spin_unlock_bh(&li->lock);
+ return 0;
+}
+
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+ u64 cr6, u64 schid)
+{
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *inti, *iter;
+
+ if ((!schid && !cr6) || (schid && cr6))
+ return NULL;
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ inti = NULL;
+ list_for_each_entry(iter, &fi->list, list) {
+ if (!is_ioint(iter->type))
+ continue;
+ if (cr6 &&
+ ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0))
+ continue;
+ if (schid) {
+ if (((schid & 0x00000000ffff0000) >> 16) !=
+ iter->io.subchannel_id)
+ continue;
+ if ((schid & 0x000000000000ffff) !=
+ iter->io.subchannel_nr)
+ continue;
+ }
+ inti = iter;
break;
- case KVM_S390_PROGRAM_INT:
- case KVM_S390_SIGP_STOP:
- case KVM_S390_INT_EXTERNAL_CALL:
- case KVM_S390_INT_EMERGENCY:
- default:
- kfree(inti);
- return -EINVAL;
}
- trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
- 2);
+ if (inti) {
+ list_del_init(&inti->list);
+ fi->irq_count--;
+ }
+ if (list_empty(&fi->list))
+ atomic_set(&fi->active, 0);
+ spin_unlock(&fi->lock);
+ mutex_unlock(&kvm->lock);
+ return inti;
+}
+
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *iter;
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int sigcpu;
+ int rc = 0;
mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
- list_add_tail(&inti->list, &fi->list);
+ if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
+ rc = -EINVAL;
+ goto unlock_fi;
+ }
+ fi->irq_count++;
+ if (!is_ioint(inti->type)) {
+ list_add_tail(&inti->list, &fi->list);
+ } else {
+ u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
+
+ /* Keep I/O interrupts sorted in isc order. */
+ list_for_each_entry(iter, &fi->list, list) {
+ if (!is_ioint(iter->type))
+ continue;
+ if (int_word_to_isc_bits(iter->io.io_int_word)
+ <= isc_bits)
+ continue;
+ break;
+ }
+ list_add_tail(&inti->list, &iter->list);
+ }
atomic_set(&fi->active, 1);
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
if (sigcpu == KVM_MAX_VCPUS) {
@@ -587,17 +930,80 @@ int kvm_s390_inject_vm(struct kvm *kvm,
sigcpu = fi->next_rr_cpu++;
if (sigcpu == KVM_MAX_VCPUS)
sigcpu = fi->next_rr_cpu = 0;
- } while (fi->local_int[sigcpu] == NULL);
+ } while (kvm_get_vcpu(kvm, sigcpu) == NULL);
}
- li = fi->local_int[sigcpu];
+ dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+ li = &dst_vcpu->arch.local_int;
spin_lock_bh(&li->lock);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
+ kvm_get_vcpu(kvm, sigcpu)->preempted = true;
spin_unlock_bh(&li->lock);
+unlock_fi:
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
- return 0;
+ return rc;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int)
+{
+ struct kvm_s390_interrupt_info *inti;
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ inti->type = s390int->type;
+ switch (inti->type) {
+ case KVM_S390_INT_VIRTIO:
+ VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
+ s390int->parm, s390int->parm64);
+ inti->ext.ext_params = s390int->parm;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
+ case KVM_S390_INT_SERVICE:
+ VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+ inti->ext.ext_params = s390int->parm;
+ break;
+ case KVM_S390_INT_PFAULT_DONE:
+ inti->type = s390int->type;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
+ case KVM_S390_MCHK:
+ VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
+ s390int->parm64);
+ inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
+ inti->mchk.mcic = s390int->parm64;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ if (inti->type & IOINT_AI_MASK)
+ VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
+ else
+ VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
+ s390int->type & IOINT_CSSID_MASK,
+ s390int->type & IOINT_SSID_MASK,
+ s390int->type & IOINT_SCHID_MASK);
+ inti->io.subchannel_id = s390int->parm >> 16;
+ inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
+ inti->io.io_int_parm = s390int->parm64 >> 32;
+ inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull;
+ break;
+ default:
+ kfree(inti);
+ return -EINVAL;
+ }
+ trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
+ 2);
+
+ return __inject_vm(kvm, inti);
+}
+
+void kvm_s390_reinject_io_int(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ __inject_vm(kvm, inti);
}
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -629,6 +1035,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
break;
case KVM_S390_SIGP_STOP:
case KVM_S390_RESTART:
+ case KVM_S390_INT_CLOCK_COMP:
+ case KVM_S390_INT_CPU_TIMER:
VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
inti->type = s390int->type;
break;
@@ -651,8 +1059,19 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
inti->type = s390int->type;
inti->emerg.code = s390int->parm;
break;
+ case KVM_S390_MCHK:
+ VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+ s390int->parm64);
+ inti->type = s390int->type;
+ inti->mchk.mcic = s390int->parm64;
+ break;
+ case KVM_S390_INT_PFAULT_INIT:
+ inti->type = s390int->type;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
case KVM_S390_INT_VIRTIO:
case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
default:
kfree(inti);
return -EINVAL;
@@ -671,9 +1090,530 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
if (inti->type == KVM_S390_SIGP_STOP)
li->action_bits |= ACTION_STOP_ON_STOP;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&vcpu->arch.local_int.wq);
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
+ vcpu->preempted = true;
spin_unlock_bh(&li->lock);
mutex_unlock(&vcpu->kvm->lock);
return 0;
}
+
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
+
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ list_for_each_entry_safe(inti, n, &fi->list, list) {
+ list_del(&inti->list);
+ kfree(inti);
+ }
+ fi->irq_count = 0;
+ atomic_set(&fi->active, 0);
+ spin_unlock(&fi->lock);
+ mutex_unlock(&kvm->lock);
+}
+
+static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
+ u8 *addr)
+{
+ struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+ struct kvm_s390_irq irq = {0};
+
+ irq.type = inti->type;
+ switch (inti->type) {
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ irq.u.ext = inti->ext;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ irq.u.io = inti->io;
+ break;
+ case KVM_S390_MCHK:
+ irq.u.mchk = inti->mchk;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_to_user(uptr, &irq, sizeof(irq)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+{
+ struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_float_interrupt *fi;
+ int ret = 0;
+ int n = 0;
+
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+
+ list_for_each_entry(inti, &fi->list, list) {
+ if (len < sizeof(struct kvm_s390_irq)) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ break;
+ }
+ ret = copy_irq_to_user(inti, buf);
+ if (ret)
+ break;
+ buf += sizeof(struct kvm_s390_irq);
+ len -= sizeof(struct kvm_s390_irq);
+ n++;
+ }
+
+ spin_unlock(&fi->lock);
+ mutex_unlock(&kvm->lock);
+
+ return ret < 0 ? ret : n;
+}
+
+static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r;
+
+ switch (attr->group) {
+ case KVM_DEV_FLIC_GET_ALL_IRQS:
+ r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+ attr->attr);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
+ u64 addr)
+{
+ struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+ void *target = NULL;
+ void __user *source;
+ u64 size;
+
+ if (get_user(inti->type, (u64 __user *)addr))
+ return -EFAULT;
+
+ switch (inti->type) {
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ target = (void *) &inti->ext;
+ source = &uptr->u.ext;
+ size = sizeof(inti->ext);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ target = (void *) &inti->io;
+ source = &uptr->u.io;
+ size = sizeof(inti->io);
+ break;
+ case KVM_S390_MCHK:
+ target = (void *) &inti->mchk;
+ source = &uptr->u.mchk;
+ size = sizeof(inti->mchk);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_from_user(target, source, size))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int enqueue_floating_irq(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int r = 0;
+ int len = attr->attr;
+
+ if (len % sizeof(struct kvm_s390_irq) != 0)
+ return -EINVAL;
+ else if (len > KVM_S390_FLIC_MAX_BUFFER)
+ return -EINVAL;
+
+ while (len >= sizeof(struct kvm_s390_irq)) {
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ r = copy_irq_from_user(inti, attr->addr);
+ if (r) {
+ kfree(inti);
+ return r;
+ }
+ r = __inject_vm(dev->kvm, inti);
+ if (r) {
+ kfree(inti);
+ return r;
+ }
+ len -= sizeof(struct kvm_s390_irq);
+ attr->addr += sizeof(struct kvm_s390_irq);
+ }
+
+ return r;
+}
+
+static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
+{
+ if (id >= MAX_S390_IO_ADAPTERS)
+ return NULL;
+ return kvm->arch.adapters[id];
+}
+
+static int register_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct s390_io_adapter *adapter;
+ struct kvm_s390_io_adapter adapter_info;
+
+ if (copy_from_user(&adapter_info,
+ (void __user *)attr->addr, sizeof(adapter_info)))
+ return -EFAULT;
+
+ if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
+ (dev->kvm->arch.adapters[adapter_info.id] != NULL))
+ return -EINVAL;
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&adapter->maps);
+ init_rwsem(&adapter->maps_lock);
+ atomic_set(&adapter->nr_maps, 0);
+ adapter->id = adapter_info.id;
+ adapter->isc = adapter_info.isc;
+ adapter->maskable = adapter_info.maskable;
+ adapter->masked = false;
+ adapter->swap = adapter_info.swap;
+ dev->kvm->arch.adapters[adapter->id] = adapter;
+
+ return 0;
+}
+
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
+{
+ int ret;
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+ if (!adapter || !adapter->maskable)
+ return -EINVAL;
+ ret = adapter->masked;
+ adapter->masked = masked;
+ return ret;
+}
+
+static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map;
+ int ret;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&map->list);
+ map->guest_addr = addr;
+ map->addr = gmap_translate(addr, kvm->arch.gmap);
+ if (map->addr == -EFAULT) {
+ ret = -EFAULT;
+ goto out;
+ }
+ ret = get_user_pages_fast(map->addr, 1, 1, &map->page);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret != 1);
+ down_write(&adapter->maps_lock);
+ if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
+ list_add_tail(&map->list, &adapter->maps);
+ ret = 0;
+ } else {
+ put_page(map->page);
+ ret = -EINVAL;
+ }
+ up_write(&adapter->maps_lock);
+out:
+ if (ret)
+ kfree(map);
+ return ret;
+}
+
+static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map, *tmp;
+ int found = 0;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ down_write(&adapter->maps_lock);
+ list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
+ if (map->guest_addr == addr) {
+ found = 1;
+ atomic_dec(&adapter->nr_maps);
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ break;
+ }
+ }
+ up_write(&adapter->maps_lock);
+
+ return found ? 0 : -EINVAL;
+}
+
+void kvm_s390_destroy_adapters(struct kvm *kvm)
+{
+ int i;
+ struct s390_map_info *map, *tmp;
+
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
+ if (!kvm->arch.adapters[i])
+ continue;
+ list_for_each_entry_safe(map, tmp,
+ &kvm->arch.adapters[i]->maps, list) {
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ }
+ kfree(kvm->arch.adapters[i]);
+ }
+}
+
+static int modify_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_io_adapter_req req;
+ struct s390_io_adapter *adapter;
+ int ret;
+
+ if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+ return -EFAULT;
+
+ adapter = get_io_adapter(dev->kvm, req.id);
+ if (!adapter)
+ return -EINVAL;
+ switch (req.type) {
+ case KVM_S390_IO_ADAPTER_MASK:
+ ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
+ if (ret > 0)
+ ret = 0;
+ break;
+ case KVM_S390_IO_ADAPTER_MAP:
+ ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
+ break;
+ case KVM_S390_IO_ADAPTER_UNMAP:
+ ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r = 0;
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ switch (attr->group) {
+ case KVM_DEV_FLIC_ENQUEUE:
+ r = enqueue_floating_irq(dev, attr);
+ break;
+ case KVM_DEV_FLIC_CLEAR_IRQS:
+ r = 0;
+ kvm_s390_clear_float_irqs(dev->kvm);
+ break;
+ case KVM_DEV_FLIC_APF_ENABLE:
+ dev->kvm->arch.gmap->pfault_enabled = 1;
+ break;
+ case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+ dev->kvm->arch.gmap->pfault_enabled = 0;
+ /*
+ * Make sure no async faults are in transition when
+ * clearing the queues. So we don't need to worry
+ * about late coming workers.
+ */
+ synchronize_srcu(&dev->kvm->srcu);
+ kvm_for_each_vcpu(i, vcpu, dev->kvm)
+ kvm_clear_async_pf_completion_queue(vcpu);
+ break;
+ case KVM_DEV_FLIC_ADAPTER_REGISTER:
+ r = register_io_adapter(dev, attr);
+ break;
+ case KVM_DEV_FLIC_ADAPTER_MODIFY:
+ r = modify_io_adapter(dev, attr);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static int flic_create(struct kvm_device *dev, u32 type)
+{
+ if (!dev)
+ return -EINVAL;
+ if (dev->kvm->arch.flic)
+ return -EINVAL;
+ dev->kvm->arch.flic = dev;
+ return 0;
+}
+
+static void flic_destroy(struct kvm_device *dev)
+{
+ dev->kvm->arch.flic = NULL;
+ kfree(dev);
+}
+
+/* s390 floating irq controller (flic) */
+struct kvm_device_ops kvm_flic_ops = {
+ .name = "kvm-flic",
+ .get_attr = flic_get_attr,
+ .set_attr = flic_set_attr,
+ .create = flic_create,
+ .destroy = flic_destroy,
+};
+
+static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
+{
+ unsigned long bit;
+
+ bit = bit_nr + (addr % PAGE_SIZE) * 8;
+
+ return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
+}
+
+static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
+ u64 addr)
+{
+ struct s390_map_info *map;
+
+ if (!adapter)
+ return NULL;
+
+ list_for_each_entry(map, &adapter->maps, list) {
+ if (map->guest_addr == addr)
+ return map;
+ }
+ return NULL;
+}
+
+static int adapter_indicators_set(struct kvm *kvm,
+ struct s390_io_adapter *adapter,
+ struct kvm_s390_adapter_int *adapter_int)
+{
+ unsigned long bit;
+ int summary_set, idx;
+ struct s390_map_info *info;
+ void *map;
+
+ info = get_map_info(adapter, adapter_int->ind_addr);
+ if (!info)
+ return -1;
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
+ set_bit(bit, map);
+ idx = srcu_read_lock(&kvm->srcu);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ info = get_map_info(adapter, adapter_int->summary_addr);
+ if (!info) {
+ srcu_read_unlock(&kvm->srcu, idx);
+ return -1;
+ }
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->summary_offset,
+ adapter->swap);
+ summary_set = test_and_set_bit(bit, map);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ srcu_read_unlock(&kvm->srcu, idx);
+ return summary_set ? 0 : 1;
+}
+
+/*
+ * < 0 - not injected due to error
+ * = 0 - coalesced, summary indicator already active
+ * > 0 - injected interrupt
+ */
+static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ int ret;
+ struct s390_io_adapter *adapter;
+
+ /* We're only interested in the 0->1 transition. */
+ if (!level)
+ return 0;
+ adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+ if (!adapter)
+ return -1;
+ down_read(&adapter->maps_lock);
+ ret = adapter_indicators_set(kvm, adapter, &e->adapter);
+ up_read(&adapter->maps_lock);
+ if ((ret > 0) && !adapter->masked) {
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_IO(1, 0, 0, 0),
+ .parm = 0,
+ .parm64 = (adapter->isc << 27) | 0x80000000,
+ };
+ ret = kvm_s390_inject_vm(kvm, &s390int);
+ if (ret == 0)
+ ret = 1;
+ }
+ return ret;
+}
+
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ int ret;
+
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_S390_ADAPTER:
+ e->set = set_adapter_int;
+ e->adapter.summary_addr = ue->u.adapter.summary_addr;
+ e->adapter.ind_addr = ue->u.adapter.ind_addr;
+ e->adapter.summary_offset = ue->u.adapter.summary_offset;
+ e->adapter.ind_offset = ue->u.adapter.ind_offset;
+ e->adapter.adapter_id = ue->u.adapter.adapter_id;
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+ int irq_source_id, int level, bool line_status)
+{
+ return -EINVAL;
+}
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
new file mode 100644
index 00000000000..d98e4159643
--- /dev/null
+++ b/arch/s390/kvm/irq.h
@@ -0,0 +1,22 @@
+/*
+ * s390 irqchip routines
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_IRQ_H
+#define __KVM_IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+ return 1;
+}
+
+#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f090e819bf7..2f3e14fe91a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -11,6 +11,7 @@
* Christian Borntraeger <borntraeger@de.ibm.com>
* Heiko Carstens <heiko.carstens@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
+ * Jason J. Herne <jjherne@us.ibm.com>
*/
#include <linux/compiler.h>
@@ -28,6 +29,7 @@
#include <asm/pgtable.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
+#include <asm/facility.h>
#include <asm/sclp.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -50,6 +52,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
+ { "instruction_stctl", VCPU_STAT(instruction_stctl) },
+ { "instruction_stctg", VCPU_STAT(instruction_stctg) },
{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
@@ -59,13 +63,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
{ "instruction_spx", VCPU_STAT(instruction_spx) },
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
{ "instruction_stap", VCPU_STAT(instruction_stap) },
{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+ { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
+ { "instruction_essa", VCPU_STAT(instruction_essa) },
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
@@ -83,7 +90,14 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
-static unsigned long long *facilities;
+unsigned long *vfacilities;
+static struct gmap_notifier gmap_notifier;
+
+/* test availability of vfacility */
+int test_vfacility(unsigned long nr)
+{
+ return __test_facility(nr, (void *) vfacilities);
+}
/* Section: not file related */
int kvm_arch_hardware_enable(void *garbage)
@@ -96,13 +110,18 @@ void kvm_arch_hardware_disable(void *garbage)
{
}
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+
int kvm_arch_hardware_setup(void)
{
+ gmap_notifier.notifier_call = kvm_gmap_notifier;
+ gmap_register_ipte_notifier(&gmap_notifier);
return 0;
}
void kvm_arch_hardware_unsetup(void)
{
+ gmap_unregister_ipte_notifier(&gmap_notifier);
}
void kvm_arch_check_processor_compat(void *rtn)
@@ -138,16 +157,27 @@ int kvm_dev_ioctl_check_extension(long ext)
#ifdef CONFIG_KVM_S390_UCONTROL
case KVM_CAP_S390_UCONTROL:
#endif
+ case KVM_CAP_ASYNC_PF:
case KVM_CAP_SYNC_REGS:
case KVM_CAP_ONE_REG:
+ case KVM_CAP_ENABLE_CAP:
+ case KVM_CAP_S390_CSS_SUPPORT:
+ case KVM_CAP_IRQFD:
+ case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_ENABLE_CAP_VM:
+ case KVM_CAP_VM_ATTRIBUTES:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
case KVM_CAP_S390_COW:
- r = sclp_get_fac85() & 0x2;
+ r = MACHINE_HAS_ESOP;
break;
default:
r = 0;
@@ -155,6 +185,25 @@ int kvm_dev_ioctl_check_extension(long ext)
return r;
}
+static void kvm_s390_sync_dirty_log(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ gfn_t cur_gfn, last_gfn;
+ unsigned long address;
+ struct gmap *gmap = kvm->arch.gmap;
+
+ down_read(&gmap->mm->mmap_sem);
+ /* Loop over all guest pages */
+ last_gfn = memslot->base_gfn + memslot->npages;
+ for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
+ address = gfn_to_hva_memslot(memslot, cur_gfn);
+
+ if (gmap_test_and_clear_dirty(address, gmap))
+ mark_page_dirty(kvm, cur_gfn);
+ }
+ up_read(&gmap->mm->mmap_sem);
+}
+
/* Section: vm related */
/*
* Get (and clear) the dirty memory log for a memory slot.
@@ -162,7 +211,129 @@ int kvm_dev_ioctl_check_extension(long ext)
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log)
{
- return 0;
+ int r;
+ unsigned long n;
+ struct kvm_memory_slot *memslot;
+ int is_dirty = 0;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = -EINVAL;
+ if (log->slot >= KVM_USER_MEM_SLOTS)
+ goto out;
+
+ memslot = id_to_memslot(kvm->memslots, log->slot);
+ r = -ENOENT;
+ if (!memslot->dirty_bitmap)
+ goto out;
+
+ kvm_s390_sync_dirty_log(kvm, memslot);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ if (r)
+ goto out;
+
+ /* Clear the dirty log */
+ if (is_dirty) {
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+ }
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_S390_IRQCHIP:
+ kvm->arch.use_irqchip = 1;
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
+static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+ unsigned int idx;
+ switch (attr->attr) {
+ case KVM_S390_VM_MEM_ENABLE_CMMA:
+ ret = -EBUSY;
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus) == 0) {
+ kvm->arch.use_cmma = 1;
+ ret = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ break;
+ case KVM_S390_VM_MEM_CLR_CMMA:
+ mutex_lock(&kvm->lock);
+ idx = srcu_read_lock(&kvm->srcu);
+ page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
+ srcu_read_unlock(&kvm->srcu, idx);
+ mutex_unlock(&kvm->lock);
+ ret = 0;
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ return ret;
+}
+
+static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_S390_VM_MEM_CTRL:
+ ret = kvm_s390_mem_control(kvm, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ return -ENXIO;
+}
+
+static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_S390_VM_MEM_CTRL:
+ switch (attr->attr) {
+ case KVM_S390_VM_MEM_ENABLE_CMMA:
+ case KVM_S390_VM_MEM_CLR_CMMA:
+ ret = 0;
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
}
long kvm_arch_vm_ioctl(struct file *filp,
@@ -170,6 +341,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
+ struct kvm_device_attr attr;
int r;
switch (ioctl) {
@@ -182,6 +354,47 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_s390_inject_vm(kvm, &s390int);
break;
}
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+ break;
+ }
+ case KVM_CREATE_IRQCHIP: {
+ struct kvm_irq_routing_entry routing;
+
+ r = -EINVAL;
+ if (kvm->arch.use_irqchip) {
+ /* Set up dummy routing. */
+ memset(&routing, 0, sizeof(routing));
+ kvm_set_irq_routing(kvm, &routing, 0, 0);
+ r = 0;
+ }
+ break;
+ }
+ case KVM_SET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_s390_vm_set_attr(kvm, &attr);
+ break;
+ }
+ case KVM_GET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_s390_vm_get_attr(kvm, &attr);
+ break;
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_s390_vm_has_attr(kvm, &attr);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -193,6 +406,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int rc;
char debug_name[16];
+ static unsigned long sca_offset;
rc = -EINVAL;
#ifdef CONFIG_KVM_S390_UCONTROL
@@ -214,6 +428,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
if (!kvm->arch.sca)
goto out_err;
+ spin_lock(&kvm_lock);
+ sca_offset = (sca_offset + 16) & 0x7f0;
+ kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+ spin_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
@@ -223,6 +441,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
spin_lock_init(&kvm->arch.float_int.lock);
INIT_LIST_HEAD(&kvm->arch.float_int.list);
+ init_waitqueue_head(&kvm->arch.ipte_wq);
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
@@ -233,7 +452,15 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.gmap = gmap_alloc(current->mm);
if (!kvm->arch.gmap)
goto out_nogmap;
+ kvm->arch.gmap->private = kvm;
+ kvm->arch.gmap->pfault_enabled = 0;
}
+
+ kvm->arch.css_support = 0;
+ kvm->arch.use_irqchip = 0;
+
+ spin_lock_init(&kvm->arch.start_stop_lock);
+
return 0;
out_nogmap:
debug_unregister(kvm->arch.dbf);
@@ -247,6 +474,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+ kvm_s390_clear_local_irqs(vcpu);
+ kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm)) {
clear_bit(63 - vcpu->vcpu_id,
(unsigned long *) &vcpu->kvm->arch.sca->mcn);
@@ -259,9 +488,12 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (kvm_is_ucontrol(vcpu->kvm))
gmap_free(vcpu->arch.gmap);
+ if (kvm_s390_cmma_enabled(vcpu->kvm))
+ kvm_s390_vcpu_unsetup_cmma(vcpu);
free_page((unsigned long)(vcpu->arch.sie_block));
+
kvm_vcpu_uninit(vcpu);
- kfree(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -291,15 +523,20 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
debug_unregister(kvm->arch.dbf);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
+ kvm_s390_destroy_adapters(kvm);
+ kvm_s390_clear_float_irqs(kvm);
}
/* Section: vcpu related */
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
if (kvm_is_ucontrol(vcpu->kvm)) {
vcpu->arch.gmap = gmap_alloc(current->mm);
if (!vcpu->arch.gmap)
return -ENOMEM;
+ vcpu->arch.gmap->private = vcpu->kvm;
return 0;
}
@@ -318,10 +555,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- save_fp_regs(&vcpu->arch.host_fpregs);
+ save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+ save_fp_regs(vcpu->arch.host_fpregs.fprs);
save_access_regs(vcpu->arch.host_acrs);
- vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
- restore_fp_regs(&vcpu->arch.guest_fpregs);
+ restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.gmap);
atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -331,9 +569,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
- save_fp_regs(&vcpu->arch.guest_fpregs);
+ save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ save_fp_regs(vcpu->arch.guest_fpregs.fprs);
save_access_regs(vcpu->run->s.regs.acrs);
- restore_fp_regs(&vcpu->arch.host_fpregs);
+ restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+ restore_fp_regs(vcpu->arch.host_fpregs.fprs);
restore_access_regs(vcpu->arch.host_acrs);
}
@@ -352,7 +592,11 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.guest_fpregs.fpc = 0;
asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
vcpu->arch.sie_block->gbea = 1;
- atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
+ kvm_s390_vcpu_stop(vcpu);
+ kvm_s390_clear_local_irqs(vcpu);
}
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -360,27 +604,62 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
return 0;
}
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
+{
+ free_page(vcpu->arch.sie_block->cbrlo);
+ vcpu->arch.sie_block->cbrlo = 0;
+}
+
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
+ if (!vcpu->arch.sie_block->cbrlo)
+ return -ENOMEM;
+
+ vcpu->arch.sie_block->ecb2 |= 0x80;
+ vcpu->arch.sie_block->ecb2 &= ~0x08;
+ return 0;
+}
+
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ int rc = 0;
+
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
- CPUSTAT_STOPPED);
+ CPUSTAT_STOPPED |
+ CPUSTAT_GED);
vcpu->arch.sie_block->ecb = 6;
- vcpu->arch.sie_block->eca = 0xC1002001U;
- vcpu->arch.sie_block->fac = (int) (long) facilities;
+ if (test_vfacility(50) && test_vfacility(73))
+ vcpu->arch.sie_block->ecb |= 0x10;
+
+ vcpu->arch.sie_block->ecb2 = 8;
+ vcpu->arch.sie_block->eca = 0xD1002000U;
+ if (sclp_has_siif())
+ vcpu->arch.sie_block->eca |= 1;
+ vcpu->arch.sie_block->fac = (int) (long) vfacilities;
+ vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
+ ICTL_TPROT;
+
+ if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+ rc = kvm_s390_vcpu_setup_cmma(vcpu);
+ if (rc)
+ return rc;
+ }
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
(unsigned long) vcpu);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id);
vcpu->arch.cpu_id.version = 0xff;
- return 0;
+ return rc;
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu;
+ struct sie_page *sie_page;
int rc = -EINVAL;
if (id >= KVM_MAX_VCPUS)
@@ -388,16 +667,17 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
rc = -ENOMEM;
- vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
goto out;
- vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
- get_zeroed_page(GFP_KERNEL);
-
- if (!vcpu->arch.sie_block)
+ sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
+ if (!sie_page)
goto out_free_cpu;
+ vcpu->arch.sie_block = &sie_page->sie_block;
+ vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+
vcpu->arch.sie_block->icpua = id;
if (!kvm_is_ucontrol(kvm)) {
if (!kvm->arch.sca) {
@@ -416,11 +696,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
spin_lock_init(&vcpu->arch.local_int.lock);
INIT_LIST_HEAD(&vcpu->arch.local_int.list);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
- spin_lock(&kvm->arch.float_int.lock);
- kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
- init_waitqueue_head(&vcpu->arch.local_int.wq);
+ vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
- spin_unlock(&kvm->arch.float_int.lock);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
@@ -433,16 +710,58 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
out_free_sie_block:
free_page((unsigned long)(vcpu->arch.sie_block));
out_free_cpu:
- kfree(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
return ERR_PTR(rc);
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
- /* kvm common code refers to this, but never calls it */
- BUG();
- return 0;
+ return kvm_cpu_has_interrupt(vcpu);
+}
+
+void s390_vcpu_block(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+{
+ atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+/*
+ * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * If the CPU is not running (e.g. waiting as idle) the function will
+ * return immediately. */
+void exit_sie(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+ while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
+ cpu_relax();
+}
+
+/* Kick a guest cpu out of SIE and prevent SIE-reentry */
+void exit_sie_sync(struct kvm_vcpu *vcpu)
+{
+ s390_vcpu_block(vcpu);
+ exit_sie(vcpu);
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+{
+ int i;
+ struct kvm *kvm = gmap->private;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /* match against both prefix pages */
+ if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
+ VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ exit_sie_sync(vcpu);
+ }
+ }
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -474,6 +793,26 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
r = put_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
+ case KVM_REG_S390_PFTOKEN:
+ r = put_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = put_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = put_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = put_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = put_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
default:
break;
}
@@ -503,6 +842,26 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
r = get_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
+ case KVM_REG_S390_PFTOKEN:
+ r = get_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = get_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = get_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = get_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = get_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
default:
break;
}
@@ -547,9 +906,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
+ if (test_fp_ctl(fpu->fpc))
+ return -EINVAL;
memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
- vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK;
- restore_fp_regs(&vcpu->arch.guest_fpregs);
+ vcpu->arch.guest_fpregs.fpc = fpu->fpc;
+ restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
return 0;
}
@@ -579,10 +941,40 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
+#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
+ KVM_GUESTDBG_USE_HW_BP | \
+ KVM_GUESTDBG_ENABLE)
+
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- return -EINVAL; /* not implemented yet */
+ int rc = 0;
+
+ vcpu->guest_debug = 0;
+ kvm_s390_clear_bp_data(vcpu);
+
+ if (dbg->control & ~VALID_GUESTDBG_FLAGS)
+ return -EINVAL;
+
+ if (dbg->control & KVM_GUESTDBG_ENABLE) {
+ vcpu->guest_debug = dbg->control;
+ /* enforce guest PER */
+ atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+
+ if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
+ rc = kvm_s390_import_bp_data(vcpu, dbg);
+ } else {
+ atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ vcpu->arch.guestdbg.last_bp = 0;
+ }
+
+ if (rc) {
+ vcpu->guest_debug = 0;
+ kvm_s390_clear_bp_data(vcpu);
+ atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ }
+
+ return rc;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@@ -597,46 +989,285 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+bool kvm_s390_cmma_enabled(struct kvm *kvm)
+{
+ if (!MACHINE_IS_LPAR)
+ return false;
+ /* only enable for z10 and later */
+ if (!MACHINE_HAS_EDAT1)
+ return false;
+ if (!kvm->arch.use_cmma)
+ return false;
+ return true;
+}
+
+static bool ibs_enabled(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
+}
+
+static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
+retry:
+ s390_vcpu_unblock(vcpu);
+ /*
+ * We use MMU_RELOAD just to re-arm the ipte notifier for the
+ * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+ * This ensures that the ipte instruction for this request has
+ * already finished. We might race against a second unmapper that
+ * wants to set the blocking bit. Lets just retry the request loop.
+ */
+ if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+ int rc;
+ rc = gmap_ipte_notify(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu),
+ PAGE_SIZE * 2);
+ if (rc)
+ return rc;
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
+ if (!ibs_enabled(vcpu)) {
+ trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
+ atomic_set_mask(CPUSTAT_IBS,
+ &vcpu->arch.sie_block->cpuflags);
+ }
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
+ if (ibs_enabled(vcpu)) {
+ trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
+ atomic_clear_mask(CPUSTAT_IBS,
+ &vcpu->arch.sie_block->cpuflags);
+ }
+ goto retry;
+ }
+
+ return 0;
+}
+
+/**
+ * kvm_arch_fault_in_page - fault-in guest page if necessary
+ * @vcpu: The corresponding virtual cpu
+ * @gpa: Guest physical address
+ * @writable: Whether the page should be writable or not
+ *
+ * Make sure that a guest page has been faulted-in on the host.
+ *
+ * Return: Zero on success, negative error code otherwise.
+ */
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
+{
+ struct mm_struct *mm = current->mm;
+ hva_t hva;
+ long rc;
+
+ hva = gmap_fault(gpa, vcpu->arch.gmap);
+ if (IS_ERR_VALUE(hva))
+ return (long)hva;
+ down_read(&mm->mmap_sem);
+ rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
+ up_read(&mm->mmap_sem);
+
+ return rc < 0 ? rc : 0;
+}
+
+static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+ unsigned long token)
+{
+ struct kvm_s390_interrupt inti;
+ inti.parm64 = token;
+
+ if (start_token) {
+ inti.type = KVM_S390_INT_PFAULT_INIT;
+ WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+ } else {
+ inti.type = KVM_S390_INT_PFAULT_DONE;
+ WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+ }
+}
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ /* s390 will always inject the page directly */
+}
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+{
+ /*
+ * s390 will always inject the page directly,
+ * but we still want check_async_completion to cleanup
+ */
+ return true;
+}
+
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+{
+ hva_t hva;
+ struct kvm_arch_async_pf arch;
int rc;
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ return 0;
+ if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+ vcpu->arch.pfault_compare)
+ return 0;
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (kvm_cpu_has_interrupt(vcpu))
+ return 0;
+ if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+ return 0;
+ if (!vcpu->arch.gmap->pfault_enabled)
+ return 0;
+
+ hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
+ hva += current->thread.gmap_addr & ~PAGE_MASK;
+ if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
+ return 0;
+
+ rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+ return rc;
+}
+
+static int vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+ int rc, cpuflags;
+
+ /*
+ * On s390 notifications for arriving pages will be delivered directly
+ * to the guest but the house keeping for completed pfaults is
+ * handled outside the worker.
+ */
+ kvm_check_async_pf_completion(vcpu);
+
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
if (need_resched())
schedule();
- if (test_thread_flag(TIF_MCCK_PENDING))
+ if (test_cpu_flag(CIF_MCCK_PENDING))
s390_handle_mcck();
if (!kvm_is_ucontrol(vcpu->kvm))
kvm_s390_deliver_pending_interrupts(vcpu);
- vcpu->arch.sie_block->icptcode = 0;
- preempt_disable();
- kvm_guest_enter();
- preempt_enable();
- VCPU_EVENT(vcpu, 6, "entering sie flags %x",
- atomic_read(&vcpu->arch.sie_block->cpuflags));
- trace_kvm_s390_sie_enter(vcpu,
- atomic_read(&vcpu->arch.sie_block->cpuflags));
- rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
- if (rc) {
- if (kvm_is_ucontrol(vcpu->kvm)) {
- rc = SIE_INTERCEPT_UCONTROL;
- } else {
- VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
- trace_kvm_s390_sie_fault(vcpu);
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- rc = 0;
- }
+ rc = kvm_s390_handle_requests(vcpu);
+ if (rc)
+ return rc;
+
+ if (guestdbg_enabled(vcpu)) {
+ kvm_s390_backup_guest_per_regs(vcpu);
+ kvm_s390_patch_guest_per_regs(vcpu);
}
+
+ vcpu->arch.sie_block->icptcode = 0;
+ cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+ VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+ trace_kvm_s390_sie_enter(vcpu, cpuflags);
+
+ return 0;
+}
+
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+{
+ int rc = -1;
+
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
- kvm_guest_exit();
+
+ if (guestdbg_enabled(vcpu))
+ kvm_s390_restore_guest_per_regs(vcpu);
+
+ if (exit_reason >= 0) {
+ rc = 0;
+ } else if (kvm_is_ucontrol(vcpu->kvm)) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code =
+ current->thread.gmap_addr;
+ vcpu->run->s390_ucontrol.pgm_code = 0x10;
+ rc = -EREMOTE;
+
+ } else if (current->thread.gmap_pfault) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ current->thread.gmap_pfault = 0;
+ if (kvm_arch_setup_async_pf(vcpu)) {
+ rc = 0;
+ } else {
+ gpa_t gpa = current->thread.gmap_addr;
+ rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
+ }
+ }
+
+ if (rc == -1) {
+ VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+ trace_kvm_s390_sie_fault(vcpu);
+ rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
+
+ if (rc == 0) {
+ if (kvm_is_ucontrol(vcpu->kvm))
+ /* Don't exit for host interrupts. */
+ rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
+ else
+ rc = kvm_handle_sie_intercept(vcpu);
+ }
+
+ return rc;
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ int rc, exit_reason;
+
+ /*
+ * We try to hold kvm->srcu during most of vcpu_run (except when run-
+ * ning the guest), so that memslots (and other stuff) are protected
+ */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ do {
+ rc = vcpu_pre_run(vcpu);
+ if (rc)
+ break;
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ /*
+ * As PF_VCPU will be used in fault handler, between
+ * guest_enter and guest_exit should be no uaccess.
+ */
+ preempt_disable();
+ kvm_guest_enter();
+ preempt_enable();
+ exit_reason = sie64a(vcpu->arch.sie_block,
+ vcpu->run->s.regs.gprs);
+ kvm_guest_exit();
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ rc = vcpu_post_run(vcpu, exit_reason);
+ } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
return rc;
}
@@ -645,13 +1276,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int rc;
sigset_t sigsaved;
-rerun_vcpu:
+ if (guestdbg_exit_pending(vcpu)) {
+ kvm_s390_prepare_debug_exit(vcpu);
+ return 0;
+ }
+
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
- atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
-
- BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
+ kvm_s390_vcpu_start(vcpu);
switch (kvm_run->exit_reason) {
case KVM_EXIT_S390_SIEIC:
@@ -659,6 +1292,8 @@ rerun_vcpu:
case KVM_EXIT_INTR:
case KVM_EXIT_S390_RESET:
case KVM_EXIT_S390_UCONTROL:
+ case KVM_EXIT_S390_TSCH:
+ case KVM_EXIT_DEBUG:
break;
default:
BUG();
@@ -677,34 +1312,17 @@ rerun_vcpu:
}
might_fault();
-
- do {
- rc = __vcpu_run(vcpu);
- if (rc)
- break;
- if (kvm_is_ucontrol(vcpu->kvm))
- rc = -EOPNOTSUPP;
- else
- rc = kvm_handle_sie_intercept(vcpu);
- } while (!signal_pending(current) && !rc);
-
- if (rc == SIE_INTERCEPT_RERUNVCPU)
- goto rerun_vcpu;
+ rc = __vcpu_run(vcpu);
if (signal_pending(current) && !rc) {
kvm_run->exit_reason = KVM_EXIT_INTR;
rc = -EINTR;
}
-#ifdef CONFIG_KVM_S390_UCONTROL
- if (rc == SIE_INTERCEPT_UCONTROL) {
- kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL;
- kvm_run->s390_ucontrol.trans_exc_code =
- current->thread.gmap_addr;
- kvm_run->s390_ucontrol.pgm_code = 0x10;
+ if (guestdbg_exit_pending(vcpu) && !rc) {
+ kvm_s390_prepare_debug_exit(vcpu);
rc = 0;
}
-#endif
if (rc == -EOPNOTSUPP) {
/* intercept cannot be handled in-kernel, prepare kvm-run */
@@ -723,7 +1341,7 @@ rerun_vcpu:
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
- kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
+ kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
if (vcpu->sigset_active)
@@ -733,81 +1351,192 @@ rerun_vcpu:
return rc;
}
-static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
- unsigned long n, int prefix)
-{
- if (prefix)
- return copy_to_guest(vcpu, guestdest, from, n);
- else
- return copy_to_guest_absolute(vcpu, guestdest, from, n);
-}
-
/*
* store status at address
* we use have two special cases:
* KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
* KVM_S390_STORE_STATUS_PREFIXED: -> prefix
*/
-int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
{
unsigned char archmode = 1;
- int prefix;
+ unsigned int px;
+ u64 clkcomp;
+ int rc;
- if (addr == KVM_S390_STORE_STATUS_NOADDR) {
- if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+ if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
+ if (write_guest_abs(vcpu, 163, &archmode, 1))
return -EFAULT;
- addr = SAVE_AREA_BASE;
- prefix = 0;
- } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
- if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+ gpa = SAVE_AREA_BASE;
+ } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
+ if (write_guest_real(vcpu, 163, &archmode, 1))
return -EFAULT;
- addr = SAVE_AREA_BASE;
- prefix = 1;
- } else
- prefix = 0;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
- vcpu->arch.guest_fpregs.fprs, 128, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
- vcpu->run->s.regs.gprs, 128, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
- &vcpu->arch.sie_block->gpsw, 16, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
- &vcpu->arch.sie_block->prefix, 4, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu,
- addr + offsetof(struct save_area, fp_ctrl_reg),
- &vcpu->arch.guest_fpregs.fpc, 4, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
- &vcpu->arch.sie_block->todpr, 4, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
- &vcpu->arch.sie_block->cputm, 8, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
- &vcpu->arch.sie_block->ckc, 8, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
- &vcpu->run->s.regs.acrs, 64, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu,
- addr + offsetof(struct save_area, ctrl_regs),
- &vcpu->arch.sie_block->gcr, 128, prefix))
- return -EFAULT;
- return 0;
+ gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
+ }
+ rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
+ vcpu->arch.guest_fpregs.fprs, 128);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
+ vcpu->run->s.regs.gprs, 128);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
+ &vcpu->arch.sie_block->gpsw, 16);
+ px = kvm_s390_get_prefix(vcpu);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
+ &px, 4);
+ rc |= write_guest_abs(vcpu,
+ gpa + offsetof(struct save_area, fp_ctrl_reg),
+ &vcpu->arch.guest_fpregs.fpc, 4);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
+ &vcpu->arch.sie_block->todpr, 4);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
+ &vcpu->arch.sie_block->cputm, 8);
+ clkcomp = vcpu->arch.sie_block->ckc >> 8;
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
+ &clkcomp, 8);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
+ &vcpu->run->s.regs.acrs, 64);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
+ &vcpu->arch.sie_block->gcr, 128);
+ return rc ? -EFAULT : 0;
+}
+
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ /*
+ * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
+ * copying in vcpu load/put. Lets update our copies before we save
+ * it into the save area
+ */
+ save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ save_access_regs(vcpu->run->s.regs.acrs);
+
+ return kvm_s390_store_status_unloaded(vcpu, addr);
+}
+
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
+}
+
+static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+ kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
+ kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
+ exit_sie_sync(vcpu);
+}
+
+static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
+{
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ __disable_ibs_on_vcpu(vcpu);
+ }
+}
+
+static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+ kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
+ kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
+ exit_sie_sync(vcpu);
+}
+
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+{
+ int i, online_vcpus, started_vcpus = 0;
+
+ if (!is_vcpu_stopped(vcpu))
+ return;
+
+ trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
+ /* Only one cpu at a time may enter/leave the STOPPED state. */
+ spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+ online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+ for (i = 0; i < online_vcpus; i++) {
+ if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+ started_vcpus++;
+ }
+
+ if (started_vcpus == 0) {
+ /* we're the only active VCPU -> speed it up */
+ __enable_ibs_on_vcpu(vcpu);
+ } else if (started_vcpus == 1) {
+ /*
+ * As we are starting a second VCPU, we have to disable
+ * the IBS facility on all VCPUs to remove potentially
+ * oustanding ENABLE requests.
+ */
+ __disable_ibs_on_all_vcpus(vcpu->kvm);
+ }
+
+ atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ /*
+ * Another VCPU might have used IBS while we were offline.
+ * Let's play safe and flush the VCPU at startup.
+ */
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+ return;
+}
+
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+ int i, online_vcpus, started_vcpus = 0;
+ struct kvm_vcpu *started_vcpu = NULL;
+
+ if (is_vcpu_stopped(vcpu))
+ return;
+
+ trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
+ /* Only one cpu at a time may enter/leave the STOPPED state. */
+ spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+ online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+ atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ __disable_ibs_on_vcpu(vcpu);
+
+ for (i = 0; i < online_vcpus; i++) {
+ if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+ started_vcpus++;
+ started_vcpu = vcpu->kvm->vcpus[i];
+ }
+ }
+
+ if (started_vcpus == 1) {
+ /*
+ * As we only have one VCPU left, we want to enable the
+ * IBS facility for that VCPU to speed it up.
+ */
+ __enable_ibs_on_vcpu(started_vcpu);
+ }
+
+ spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+ return;
+}
+
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+ struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_S390_CSS_SUPPORT:
+ if (!vcpu->kvm->arch.css_support) {
+ vcpu->kvm->arch.css_support = 1;
+ trace_kvm_s390_enable_css(vcpu->kvm);
+ }
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
}
long kvm_arch_vcpu_ioctl(struct file *filp,
@@ -815,6 +1544,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
+ int idx;
long r;
switch (ioctl) {
@@ -828,7 +1558,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_S390_STORE_STATUS:
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_s390_vcpu_store_status(vcpu, arg);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
case KVM_S390_SET_INITIAL_PSW: {
psw_t psw;
@@ -896,6 +1628,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_ENABLE_CAP:
+ {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -915,35 +1656,31 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
}
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
return 0;
}
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
- int user_alloc)
+ enum kvm_mr_change change)
{
- /* A few sanity checks. We can have exactly one memory slot which has
- to start at guest virtual zero and which has to be located at a
- page boundary in userland and which has to end at a page boundary.
- The memory in userland is ok to be fragmented into various different
- vmas. It is okay to mmap() and munmap() stuff in this slot after
- doing this call at any time */
-
- if (mem->slot)
- return -EINVAL;
-
- if (mem->guest_phys_addr)
- return -EINVAL;
+ /* A few sanity checks. We can have memory slots which have to be
+ located/ended at a segment boundary (1MB). The memory in userland is
+ ok to be fragmented into various different vmas. It is okay to mmap()
+ and munmap() stuff in this slot after doing this call at any time */
if (mem->userspace_addr & 0xffffful)
return -EINVAL;
@@ -951,19 +1688,26 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (mem->memory_size & 0xffffful)
return -EINVAL;
- if (!user_alloc)
- return -EINVAL;
-
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old,
- int user_alloc)
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change)
{
int rc;
+ /* If the basics of the memslot do not change, we do not want
+ * to update the gmap. Every update causes several unnecessary
+ * segment translation exceptions. This is usually handled just
+ * fine by the normal fault handler + gmap, but it will also
+ * cause faults on the prefix page of running guest CPUs.
+ */
+ if (old->userspace_addr == mem->userspace_addr &&
+ old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
+ old->npages * PAGE_SIZE == mem->memory_size)
+ return;
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
@@ -993,22 +1737,31 @@ static int __init kvm_s390_init(void)
* to hold the maximum amount of facilities. On the other hand, we
* only set facilities that are known to work in KVM.
*/
- facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
- if (!facilities) {
+ vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
+ if (!vfacilities) {
kvm_exit();
return -ENOMEM;
}
- memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
- facilities[0] &= 0xff00fff3f47c0000ULL;
- facilities[1] &= 0x001c000000000000ULL;
+ memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
+ vfacilities[0] &= 0xff82fff3f4fc2000UL;
+ vfacilities[1] &= 0x005c000000000000UL;
return 0;
}
static void __exit kvm_s390_exit(void)
{
- free_page((unsigned long) facilities);
+ free_page((unsigned long) vfacilities);
kvm_exit();
}
module_init(kvm_s390_init);
module_exit(kvm_s390_exit);
+
+/*
+ * Enable autoloading of the kvm module.
+ * Note that we add the module alias here instead of virt/kvm/kvm_main.c
+ * since x86 takes a different approach.
+ */
+#include <linux/miscdevice.h>
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index d75bc5e92c5..a8655ed3161 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,16 +19,18 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
-/* The current code can have up to 256 pages for virtio */
-#define VIRTIODESCSPACE (256ul * 4096ul)
-
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
-/* negativ values are error codes, positive values for internal conditions */
-#define SIE_INTERCEPT_RERUNVCPU (1<<0)
-#define SIE_INTERCEPT_UCONTROL (1<<1)
+/* declare vfacilities extern */
+extern unsigned long *vfacilities;
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+/* Transactional Memory Execution related macros */
+#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10))
+#define TDB_FORMAT1 1
+#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+
#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
do { \
debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
@@ -59,35 +61,176 @@ static inline int kvm_is_ucontrol(struct kvm *kvm)
#endif
}
+#define GUEST_PREFIX_SHIFT 13
+static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.sie_block->prefix << GUEST_PREFIX_SHIFT;
+}
+
static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
{
- vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u;
+ vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+}
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
+{
+ u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+ u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+ return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
+ u64 *address1, u64 *address2)
+{
+ u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+ u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
+ u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
+ u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
+
+ *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+ *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
+{
+ if (r1)
+ *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+ if (r2)
+ *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+}
+
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
+{
+ u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+ u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+ /* The displacement is a 20bit _SIGNED_ value */
+ if (disp2 & 0x80000)
+ disp2+=0xfff00000;
+
+ return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
+}
+
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
+{
+ u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+ u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+ return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+/* Set the condition code in the guest program status word */
+static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
+{
+ vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
+ vcpu->arch.sie_block->gpsw.mask |= cc << 44;
}
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
-int kvm_s390_inject_vm(struct kvm *kvm,
- struct kvm_s390_interrupt *s390int);
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt *s390int);
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
-int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
+void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_float_irqs(struct kvm *kvm);
+int __must_check kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+ struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+ u64 cr6, u64 schid);
+void kvm_s390_reinject_io_int(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti);
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in priv.c */
+int is_valid_psw(psw_t *psw);
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
-int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
- unsigned long addr);
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+void s390_vcpu_block(struct kvm_vcpu *vcpu);
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void exit_sie(struct kvm_vcpu *vcpu);
+void exit_sie_sync(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
+/* is cmma enabled */
+bool kvm_s390_cmma_enabled(struct kvm *kvm);
+int test_vfacility(unsigned long nr);
+
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+/* implemented in interrupt.c */
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info);
+
+/**
+ * kvm_s390_inject_prog_cond - conditionally inject a program check
+ * @vcpu: virtual cpu
+ * @rc: original return/error code
+ *
+ * This function is supposed to be used after regular guest access functions
+ * failed, to conditionally inject a program check to a vcpu. The typical
+ * pattern would look like
+ *
+ * rc = write_guest(vcpu, addr, data, len);
+ * if (rc)
+ * return kvm_s390_inject_prog_cond(vcpu, rc);
+ *
+ * A negative return code from guest access functions implies an internal error
+ * like e.g. out of memory. In these cases no program check should be injected
+ * to the guest.
+ * A positive value implies that an exception happened while accessing a guest's
+ * memory. In this case all data belonging to the corresponding program check
+ * has been stored in vcpu->arch.pgm and can be injected with
+ * kvm_s390_inject_prog_irq().
+ *
+ * Returns: - the original @rc value if @rc was negative (internal error)
+ * - zero if @rc was already zero
+ * - zero or error code from injecting if @rc was positive
+ * (program check injected to @vcpu)
+ */
+static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
+{
+ if (rc <= 0)
+ return rc;
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+}
+
+/* implemented in interrupt.c */
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int psw_extint_disabled(struct kvm_vcpu *vcpu);
+void kvm_s390_destroy_adapters(struct kvm *kvm);
+int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
+
+/* implemented in guestdbg.c */
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg);
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d768906f15c..f89c1cd6775 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,7 +1,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2013
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -14,207 +14,453 @@
#include <linux/kvm.h>
#include <linux/gfp.h>
#include <linux/errno.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/facility.h>
#include <asm/current.h>
#include <asm/debug.h>
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
+#include <asm/ptrace.h>
+#include <asm/compat.h>
#include "gaccess.h"
#include "kvm-s390.h"
#include "trace.h"
+/* Handle SCK (SET CLOCK) interception */
+static int handle_set_clock(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu *cpup;
+ s64 hostclk, val;
+ int i, rc;
+ u64 op2;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ op2 = kvm_s390_get_base_disp_s(vcpu);
+ if (op2 & 7) /* Operand must be on a doubleword boundary */
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ rc = read_guest(vcpu, op2, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ if (store_tod_clock(&hostclk)) {
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
+ val = (val - hostclk) & ~0x3fUL;
+
+ mutex_lock(&vcpu->kvm->lock);
+ kvm_for_each_vcpu(i, cpup, vcpu->kvm)
+ cpup->arch.sie_block->epoch = val;
+ mutex_unlock(&vcpu->kvm->lock);
+
+ kvm_s390_set_psw_cc(vcpu, 0);
+ return 0;
+}
+
static int handle_set_prefix(struct kvm_vcpu *vcpu)
{
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
u64 operand2;
- u32 address = 0;
- u8 tmp;
+ u32 address;
+ int rc;
vcpu->stat.instruction_spx++;
- operand2 = disp2;
- if (base2)
- operand2 += vcpu->run->s.regs.gprs[base2];
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
- if (operand2 & 3) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (operand2 & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* get the value */
- if (get_guest_u32(vcpu, operand2, &address)) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ rc = read_guest(vcpu, operand2, &address, sizeof(address));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
- address = address & 0x7fffe000u;
+ address &= 0x7fffe000u;
- /* make sure that the new value is valid memory */
- if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
- (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ /*
+ * Make sure the new value is valid memory. We only need to check the
+ * first page, since address is 8k aligned and memory pieces are always
+ * at least 1MB aligned and have at least a size of 1MB.
+ */
+ if (kvm_is_error_gpa(vcpu->kvm, address))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
kvm_s390_set_prefix(vcpu, address);
VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
trace_kvm_s390_handle_prefix(vcpu, 1, address);
-out:
return 0;
}
static int handle_store_prefix(struct kvm_vcpu *vcpu)
{
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
u64 operand2;
u32 address;
+ int rc;
vcpu->stat.instruction_stpx++;
- operand2 = disp2;
- if (base2)
- operand2 += vcpu->run->s.regs.gprs[base2];
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
- if (operand2 & 3) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (operand2 & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- address = vcpu->arch.sie_block->prefix;
- address = address & 0x7fffe000u;
+ address = kvm_s390_get_prefix(vcpu);
/* get the value */
- if (put_guest_u32(vcpu, operand2, address)) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ rc = write_guest(vcpu, operand2, &address, sizeof(address));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
trace_kvm_s390_handle_prefix(vcpu, 0, address);
-out:
return 0;
}
static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
{
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
- u64 useraddr;
+ u16 vcpu_id = vcpu->vcpu_id;
+ u64 ga;
int rc;
vcpu->stat.instruction_stap++;
- useraddr = disp2;
- if (base2)
- useraddr += vcpu->run->s.regs.gprs[base2];
- if (useraddr & 1) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
- if (rc == -EFAULT) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ ga = kvm_s390_get_base_disp_s(vcpu);
- VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
- trace_kvm_s390_handle_stap(vcpu, useraddr);
-out:
+ if (ga & 1)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
+ trace_kvm_s390_handle_stap(vcpu, ga);
return 0;
}
+static void __skey_check_enable(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
+ return;
+
+ s390_enable_skey();
+ trace_kvm_s390_skey_related_inst(vcpu);
+ vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+}
+
+
static int handle_skey(struct kvm_vcpu *vcpu)
{
+ __skey_check_enable(vcpu);
+
vcpu->stat.instruction_storage_key++;
- vcpu->arch.sie_block->gpsw.addr -= 4;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ vcpu->arch.sie_block->gpsw.addr =
+ __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return 0;
}
-static int handle_stsch(struct kvm_vcpu *vcpu)
+static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
{
- vcpu->stat.instruction_stsch++;
- VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
- /* condition code 3 */
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+ vcpu->stat.instruction_ipte_interlock++;
+ if (psw_bits(*psw).p)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+ wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
+ psw->addr = __rewind_psw(*psw, 4);
+ VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
return 0;
}
-static int handle_chsc(struct kvm_vcpu *vcpu)
+static int handle_test_block(struct kvm_vcpu *vcpu)
{
- vcpu->stat.instruction_chsc++;
- VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
- /* condition code 3 */
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+ gpa_t addr;
+ int reg2;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
+ addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ addr = kvm_s390_logical_to_effective(vcpu, addr);
+ if (kvm_s390_check_low_addr_protection(vcpu, addr))
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+ addr = kvm_s390_real_to_abs(vcpu, addr);
+
+ if (kvm_is_error_gpa(vcpu->kvm, addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ /*
+ * We don't expect errors on modern systems, and do not care
+ * about storage keys (yet), so let's just clear the page.
+ */
+ if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE))
+ return -EFAULT;
+ kvm_s390_set_psw_cc(vcpu, 0);
+ vcpu->run->s.regs.gprs[0] = 0;
return 0;
}
+static int handle_tpi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_interrupt_info *inti;
+ unsigned long len;
+ u32 tpi_data[3];
+ int cc, rc;
+ u64 addr;
+
+ rc = 0;
+ addr = kvm_s390_get_base_disp_s(vcpu);
+ if (addr & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ cc = 0;
+ inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
+ if (!inti)
+ goto no_interrupt;
+ cc = 1;
+ tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
+ tpi_data[1] = inti->io.io_int_parm;
+ tpi_data[2] = inti->io.io_int_word;
+ if (addr) {
+ /*
+ * Store the two-word I/O interruption code into the
+ * provided area.
+ */
+ len = sizeof(tpi_data) - 4;
+ rc = write_guest(vcpu, addr, &tpi_data, len);
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ } else {
+ /*
+ * Store the three-word I/O interruption code into
+ * the appropriate lowcore area.
+ */
+ len = sizeof(tpi_data);
+ if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+ rc = -EFAULT;
+ }
+ /*
+ * If we encounter a problem storing the interruption code, the
+ * instruction is suppressed from the guest's view: reinject the
+ * interrupt.
+ */
+ if (!rc)
+ kfree(inti);
+ else
+ kvm_s390_reinject_io_int(vcpu->kvm, inti);
+no_interrupt:
+ /* Set condition code and we're done. */
+ if (!rc)
+ kvm_s390_set_psw_cc(vcpu, cc);
+ return rc ? -EFAULT : 0;
+}
+
+static int handle_tsch(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_interrupt_info *inti;
+
+ inti = kvm_s390_get_io_int(vcpu->kvm, 0,
+ vcpu->run->s.regs.gprs[1]);
+
+ /*
+ * Prepare exit to userspace.
+ * We indicate whether we dequeued a pending I/O interrupt
+ * so that userspace can re-inject it if the instruction gets
+ * a program check. While this may re-order the pending I/O
+ * interrupts, this is no problem since the priority is kept
+ * intact.
+ */
+ vcpu->run->exit_reason = KVM_EXIT_S390_TSCH;
+ vcpu->run->s390_tsch.dequeued = !!inti;
+ if (inti) {
+ vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id;
+ vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr;
+ vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm;
+ vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word;
+ }
+ vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb;
+ kfree(inti);
+ return -EREMOTE;
+}
+
+static int handle_io_inst(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->kvm->arch.css_support) {
+ /*
+ * Most I/O instructions will be handled by userspace.
+ * Exceptions are tpi and the interrupt portion of tsch.
+ */
+ if (vcpu->arch.sie_block->ipa == 0xb236)
+ return handle_tpi(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb235)
+ return handle_tsch(vcpu);
+ /* Handle in userspace. */
+ return -EOPNOTSUPP;
+ } else {
+ /*
+ * Set condition code 3 to stop the guest from issuing channel
+ * I/O instructions.
+ */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
+}
+
static int handle_stfl(struct kvm_vcpu *vcpu)
{
- unsigned int facility_list;
int rc;
vcpu->stat.instruction_stfl++;
- /* only pass the facility bits, which we can handle */
- facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3;
-
- rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
- &facility_list, sizeof(facility_list));
- if (rc == -EFAULT)
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- else {
- VCPU_EVENT(vcpu, 5, "store facility list value %x",
- facility_list);
- trace_kvm_s390_handle_stfl(vcpu, facility_list);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+ vfacilities, 4);
+ if (rc)
+ return rc;
+ VCPU_EVENT(vcpu, 5, "store facility list value %x",
+ *(unsigned int *) vfacilities);
+ trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
+ return 0;
+}
+
+static void handle_new_psw(struct kvm_vcpu *vcpu)
+{
+ /* Check whether the new psw is enabled for machine checks. */
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
+ kvm_s390_deliver_pending_machine_checks(vcpu);
+}
+
+#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
+#define PSW_ADDR_24 0x0000000000ffffffUL
+#define PSW_ADDR_31 0x000000007fffffffUL
+
+int is_valid_psw(psw_t *psw)
+{
+ if (psw->mask & PSW_MASK_UNASSIGNED)
+ return 0;
+ if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
+ if (psw->addr & ~PSW_ADDR_31)
+ return 0;
}
+ if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
+ return 0;
+ if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA)
+ return 0;
+ if (psw->addr & 1)
+ return 0;
+ return 1;
+}
+
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
+{
+ psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
+ psw_compat_t new_psw;
+ u64 addr;
+ int rc;
+
+ if (gpsw->mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_s(vcpu);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ if (!(new_psw.mask & PSW32_MASK_BASE))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
+ gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
+ gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
+ if (!is_valid_psw(gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ handle_new_psw(vcpu);
+ return 0;
+}
+
+static int handle_lpswe(struct kvm_vcpu *vcpu)
+{
+ psw_t new_psw;
+ u64 addr;
+ int rc;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_s(vcpu);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ vcpu->arch.sie_block->gpsw = new_psw;
+ if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ handle_new_psw(vcpu);
return 0;
}
static int handle_stidp(struct kvm_vcpu *vcpu)
{
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 stidp_data = vcpu->arch.stidp_data;
u64 operand2;
int rc;
vcpu->stat.instruction_stidp++;
- operand2 = disp2;
- if (base2)
- operand2 += vcpu->run->s.regs.gprs[base2];
- if (operand2 & 7) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
- if (rc == -EFAULT) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
+
+ if (operand2 & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
-out:
return 0;
}
static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
int cpus = 0;
int n;
- spin_lock(&fi->lock);
- for (n = 0; n < KVM_MAX_VCPUS; n++)
- if (fi->local_int[n])
- cpus++;
- spin_unlock(&fi->lock);
+ cpus = atomic_read(&vcpu->kvm->online_vcpus);
/* deal with other level 3 hypervisors */
if (stsi(mem, 3, 2, 2))
@@ -240,75 +486,103 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ unsigned long mem = 0;
u64 operand2;
- unsigned long mem;
+ int rc = 0;
vcpu->stat.instruction_stsi++;
VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
- operand2 = disp2;
- if (base2)
- operand2 += vcpu->run->s.regs.gprs[base2];
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- if (operand2 & 0xfff && fc > 0)
+ if (fc > 3) {
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
+
+ if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
+ || vcpu->run->s.regs.gprs[1] & 0xffff0000)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- switch (fc) {
- case 0:
+ if (fc == 0) {
vcpu->run->s.regs.gprs[0] = 3 << 28;
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ kvm_s390_set_psw_cc(vcpu, 0);
return 0;
+ }
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
+
+ if (operand2 & 0xfff)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (fc) {
case 1: /* same handling for 1 and 2 */
case 2:
mem = get_zeroed_page(GFP_KERNEL);
if (!mem)
- goto out_fail;
+ goto out_no_data;
if (stsi((void *) mem, fc, sel1, sel2))
- goto out_mem;
+ goto out_no_data;
break;
case 3:
if (sel1 != 2 || sel2 != 2)
- goto out_fail;
+ goto out_no_data;
mem = get_zeroed_page(GFP_KERNEL);
if (!mem)
- goto out_fail;
+ goto out_no_data;
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
- default:
- goto out_fail;
}
- if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out_mem;
+ rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
+ if (rc) {
+ rc = kvm_s390_inject_prog_cond(vcpu, rc);
+ goto out;
}
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
free_page(mem);
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ kvm_s390_set_psw_cc(vcpu, 0);
vcpu->run->s.regs.gprs[0] = 0;
return 0;
-out_mem:
+out_no_data:
+ kvm_s390_set_psw_cc(vcpu, 3);
+out:
free_page(mem);
-out_fail:
- /* condition code 3 */
- vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
- return 0;
+ return rc;
}
-static intercept_handler_t priv_handlers[256] = {
+static const intercept_handler_t b2_handlers[256] = {
[0x02] = handle_stidp,
+ [0x04] = handle_set_clock,
[0x10] = handle_set_prefix,
[0x11] = handle_store_prefix,
[0x12] = handle_store_cpu_address,
+ [0x21] = handle_ipte_interlock,
[0x29] = handle_skey,
[0x2a] = handle_skey,
[0x2b] = handle_skey,
- [0x34] = handle_stsch,
- [0x5f] = handle_chsc,
+ [0x2c] = handle_test_block,
+ [0x30] = handle_io_inst,
+ [0x31] = handle_io_inst,
+ [0x32] = handle_io_inst,
+ [0x33] = handle_io_inst,
+ [0x34] = handle_io_inst,
+ [0x35] = handle_io_inst,
+ [0x36] = handle_io_inst,
+ [0x37] = handle_io_inst,
+ [0x38] = handle_io_inst,
+ [0x39] = handle_io_inst,
+ [0x3a] = handle_io_inst,
+ [0x3b] = handle_io_inst,
+ [0x3c] = handle_io_inst,
+ [0x50] = handle_ipte_interlock,
+ [0x5f] = handle_io_inst,
+ [0x74] = handle_io_inst,
+ [0x76] = handle_io_inst,
[0x7d] = handle_stsi,
[0xb1] = handle_stfl,
+ [0xb2] = handle_lpswe,
};
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
@@ -316,67 +590,394 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
intercept_handler_t handler;
/*
- * a lot of B2 instructions are priviledged. We first check for
- * the privileged ones, that we can handle in the kernel. If the
- * kernel can handle this instruction, we check for the problem
- * state bit and (a) handle the instruction or (b) send a code 2
- * program check.
- * Anything else goes to userspace.*/
- handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
- if (handler) {
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
+ * A lot of B2 instructions are priviledged. Here we check for
+ * the privileged ones, that we can handle in the kernel.
+ * Anything else goes to userspace.
+ */
+ handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+ if (handler)
+ return handler(vcpu);
+
+ return -EOPNOTSUPP;
+}
+
+static int handle_epsw(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ /* This basically extracts the mask half of the psw. */
+ vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
+ vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
+ if (reg2) {
+ vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
+ vcpu->run->s.regs.gprs[reg2] |=
+ vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
+ }
+ return 0;
+}
+
+#define PFMF_RESERVED 0xfffc0101UL
+#define PFMF_SK 0x00020000UL
+#define PFMF_CF 0x00010000UL
+#define PFMF_UI 0x00008000UL
+#define PFMF_FSC 0x00007000UL
+#define PFMF_NQ 0x00000800UL
+#define PFMF_MR 0x00000400UL
+#define PFMF_MC 0x00000200UL
+#define PFMF_KEY 0x000000feUL
+
+static int handle_pfmf(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2;
+ unsigned long start, end;
+
+ vcpu->stat.instruction_pfmf++;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ if (!MACHINE_HAS_PFMF)
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* Only provide non-quiescing support if the host supports it */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* No support for conditional-SSKE */
+ if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (kvm_s390_check_low_addr_protection(vcpu, start))
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+ }
+
+ switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+ case 0x00000000:
+ end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
+ break;
+ case 0x00001000:
+ end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ break;
+ /* We dont support EDAT2
+ case 0x00002000:
+ end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
+ break;*/
+ default:
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ }
+ while (start < end) {
+ unsigned long useraddr, abs_addr;
+
+ /* Translate guest address to host address */
+ if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0)
+ abs_addr = kvm_s390_real_to_abs(vcpu, start);
else
- return handler(vcpu);
+ abs_addr = start;
+ useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr));
+ if (kvm_is_error_hva(useraddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (clear_user((void __user *)useraddr, PAGE_SIZE))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+ __skey_check_enable(vcpu);
+ if (set_guest_storage_key(current->mm, useraddr,
+ vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
+ vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ start += PAGE_SIZE;
}
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
+ vcpu->run->s.regs.gprs[reg2] = end;
+ return 0;
+}
+
+static int handle_essa(struct kvm_vcpu *vcpu)
+{
+ /* entries expected to be 1FF */
+ int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+ unsigned long *cbrlo, cbrle;
+ struct gmap *gmap;
+ int i;
+
+ VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
+ gmap = vcpu->arch.gmap;
+ vcpu->stat.instruction_essa++;
+ if (!kvm_s390_cmma_enabled(vcpu->kvm))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* Rewind PSW to repeat the ESSA instruction */
+ vcpu->arch.sie_block->gpsw.addr =
+ __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+ vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
+ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
+ down_read(&gmap->mm->mmap_sem);
+ for (i = 0; i < entries; ++i) {
+ cbrle = cbrlo[i];
+ if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
+ /* invalid entry */
+ break;
+ /* try to free backing */
+ __gmap_zap(cbrle, gmap);
+ }
+ up_read(&gmap->mm->mmap_sem);
+ if (i < entries)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ return 0;
+}
+
+static const intercept_handler_t b9_handlers[256] = {
+ [0x8a] = handle_ipte_interlock,
+ [0x8d] = handle_epsw,
+ [0x8e] = handle_ipte_interlock,
+ [0x8f] = handle_ipte_interlock,
+ [0xab] = handle_essa,
+ [0xaf] = handle_pfmf,
+};
+
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t handler;
+
+ /* This is handled just as for the B2 instructions. */
+ handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+ if (handler)
+ return handler(vcpu);
+
+ return -EOPNOTSUPP;
+}
+
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u32 val = 0;
+ int reg, rc;
+ u64 ga;
+
+ vcpu->stat.instruction_lctl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rs(vcpu);
+
+ if (ga & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
+
+ reg = reg1;
+ do {
+ rc = read_guest(vcpu, ga, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+ vcpu->arch.sie_block->gcr[reg] |= val;
+ ga += 4;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 ga;
+ u32 val;
+ int reg, rc;
+
+ vcpu->stat.instruction_stctl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rs(vcpu);
+
+ if (ga & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
+
+ reg = reg1;
+ do {
+ val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful;
+ rc = write_guest(vcpu, ga, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ ga += 4;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+static int handle_lctlg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 ga, val;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctlg++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rsy(vcpu);
+
+ if (ga & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ reg = reg1;
+
+ VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
+
+ do {
+ rc = read_guest(vcpu, ga, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ vcpu->arch.sie_block->gcr[reg] = val;
+ ga += 8;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+static int handle_stctg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 ga, val;
+ int reg, rc;
+
+ vcpu->stat.instruction_stctg++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rsy(vcpu);
+
+ if (ga & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ reg = reg1;
+
+ VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
+
+ do {
+ val = vcpu->arch.sie_block->gcr[reg];
+ rc = write_guest(vcpu, ga, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ ga += 8;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+static const intercept_handler_t eb_handlers[256] = {
+ [0x2f] = handle_lctlg,
+ [0x25] = handle_stctg,
+};
+
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t handler;
+
+ handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
+ if (handler)
+ return handler(vcpu);
return -EOPNOTSUPP;
}
static int handle_tprot(struct kvm_vcpu *vcpu)
{
- int base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
- int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
- int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
- int disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
- u64 address1 = disp1 + base1 ? vcpu->run->s.regs.gprs[base1] : 0;
- u64 address2 = disp2 + base2 ? vcpu->run->s.regs.gprs[base2] : 0;
- struct vm_area_struct *vma;
- unsigned long user_address;
+ u64 address1, address2;
+ unsigned long hva, gpa;
+ int ret = 0, cc = 0;
+ bool writable;
vcpu->stat.instruction_tprot++;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
+
/* we only handle the Linux memory detection case:
* access key == 0
- * guest DAT == off
* everything else goes to userspace. */
if (address2 & 0xf0)
return -EOPNOTSUPP;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
- return -EOPNOTSUPP;
-
-
- /* we must resolve the address without holding the mmap semaphore.
- * This is ok since the userspace hypervisor is not supposed to change
- * the mapping while the guest queries the memory. Otherwise the guest
- * might crash or get wrong info anyway. */
- user_address = (unsigned long) __guestaddr_to_user(vcpu, address1);
-
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, user_address);
- if (!vma) {
- up_read(&current->mm->mmap_sem);
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ ipte_lock(vcpu);
+ ret = guest_translate_address(vcpu, address1, &gpa, 1);
+ if (ret == PGM_PROTECTION) {
+ /* Write protected? Try again with read-only... */
+ cc = 1;
+ ret = guest_translate_address(vcpu, address1, &gpa, 0);
+ }
+ if (ret) {
+ if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
+ ret = kvm_s390_inject_program_int(vcpu, ret);
+ } else if (ret > 0) {
+ /* Translation not available */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ ret = 0;
+ }
+ goto out_unlock;
}
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ))
- vcpu->arch.sie_block->gpsw.mask |= (1ul << 44);
- if (!(vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_READ))
- vcpu->arch.sie_block->gpsw.mask |= (2ul << 44);
-
- up_read(&current->mm->mmap_sem);
- return 0;
+ hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
+ if (kvm_is_error_hva(hva)) {
+ ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ } else {
+ if (!writable)
+ cc = 1; /* Write not permitted ==> read-only */
+ kvm_s390_set_psw_cc(vcpu, cc);
+ /* Note: CC2 only occurs for storage keys (not supported yet) */
+ }
+out_unlock:
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+ ipte_unlock(vcpu);
+ return ret;
}
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
@@ -392,8 +993,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
u32 value;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
return kvm_s390_inject_program_int(vcpu,
@@ -405,7 +1005,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
return 0;
}
-static intercept_handler_t x01_handlers[256] = {
+static const intercept_handler_t x01_handlers[256] = {
[0x07] = handle_sckpf,
};
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 566ddf6e8df..43079a48cc9 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,7 +1,7 @@
/*
* handling interprocessor communication
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2013
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -23,29 +23,30 @@
static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
u64 *reg)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int cpuflags;
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock(&fi->lock);
- if (fi->local_int[cpu_addr] == NULL)
- rc = SIGP_CC_NOT_OPERATIONAL;
- else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED)))
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+
+ cpuflags = atomic_read(li->cpuflags);
+ if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED)))
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
else {
*reg &= 0xffffffff00000000UL;
- if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & CPUSTAT_ECALL_PEND)
+ if (cpuflags & CPUSTAT_ECALL_PEND)
*reg |= SIGP_STATUS_EXT_CALL_PENDING;
- if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & CPUSTAT_STOPPED)
+ if (cpuflags & CPUSTAT_STOPPED)
*reg |= SIGP_STATUS_STOPPED;
rc = SIGP_CC_STATUS_STORED;
}
- spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
return rc;
@@ -53,83 +54,81 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
- struct kvm_s390_local_interrupt *li;
- struct kvm_s390_interrupt_info *inti;
- int rc;
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_EMERGENCY,
+ .parm = vcpu->vcpu_id,
+ };
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int rc = 0;
- if (cpu_addr >= KVM_MAX_VCPUS)
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
- inti = kzalloc(sizeof(*inti), GFP_KERNEL);
- if (!inti)
- return -ENOMEM;
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+ if (!rc)
+ VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
- inti->type = KVM_S390_INT_EMERGENCY;
- inti->emerg.code = vcpu->vcpu_id;
+ return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
+}
- spin_lock(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = SIGP_CC_NOT_OPERATIONAL;
- kfree(inti);
- goto unlock;
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+ u16 asn, u64 *reg)
+{
+ struct kvm_vcpu *dst_vcpu = NULL;
+ const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
+ u16 p_asn, s_asn;
+ psw_t *psw;
+ u32 flags;
+
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
+ psw = &dst_vcpu->arch.sie_block->gpsw;
+ p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */
+ s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */
+
+ /* Deliver the emergency signal? */
+ if (!(flags & CPUSTAT_STOPPED)
+ || (psw->mask & psw_int_mask) != psw_int_mask
+ || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
+ || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
+ return __sigp_emergency(vcpu, cpu_addr);
+ } else {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ return SIGP_CC_STATUS_STORED;
}
- spin_lock_bh(&li->lock);
- list_add_tail(&inti->list, &li->list);
- atomic_set(&li->active, 1);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
- spin_unlock_bh(&li->lock);
- rc = SIGP_CC_ORDER_CODE_ACCEPTED;
- VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
-unlock:
- spin_unlock(&fi->lock);
- return rc;
}
static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
- struct kvm_s390_local_interrupt *li;
- struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_EXTERNAL_CALL,
+ .parm = vcpu->vcpu_id,
+ };
+ struct kvm_vcpu *dst_vcpu = NULL;
int rc;
- if (cpu_addr >= KVM_MAX_VCPUS)
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
- inti = kzalloc(sizeof(*inti), GFP_KERNEL);
- if (!inti)
- return -ENOMEM;
-
- inti->type = KVM_S390_INT_EXTERNAL_CALL;
- inti->extcall.code = vcpu->vcpu_id;
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+ if (!rc)
+ VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
- spin_lock(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = SIGP_CC_NOT_OPERATIONAL;
- kfree(inti);
- goto unlock;
- }
- spin_lock_bh(&li->lock);
- list_add_tail(&inti->list, &li->list);
- atomic_set(&li->active, 1);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
- spin_unlock_bh(&li->lock);
- rc = SIGP_CC_ORDER_CODE_ACCEPTED;
- VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
-unlock:
- spin_unlock(&fi->lock);
- return rc;
+ return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
}
static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
{
struct kvm_s390_interrupt_info *inti;
+ int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
if (!inti)
@@ -137,53 +136,58 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
inti->type = KVM_S390_SIGP_STOP;
spin_lock_bh(&li->lock);
- if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED))
+ if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
+ kfree(inti);
+ if ((action & ACTION_STORE_ON_STOP) != 0)
+ rc = -ESHUTDOWN;
goto out;
+ }
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
li->action_bits |= action;
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
out:
spin_unlock_bh(&li->lock);
- return SIGP_CC_ORDER_CODE_ACCEPTED;
+ return rc;
}
static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = SIGP_CC_NOT_OPERATIONAL;
- goto unlock;
- }
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
rc = __inject_sigp_stop(li, action);
-unlock:
- spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
- return rc;
-}
-int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action)
-{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- return __inject_sigp_stop(li, action);
+ if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+ /* If the CPU has already been stopped, we still have
+ * to save the status when doing stop-and-store. This
+ * has to be done after unlocking all spinlocks. */
+ rc = kvm_s390_store_status_unloaded(dst_vcpu,
+ KVM_S390_STORE_STATUS_NOADDR);
+ }
+
+ return rc;
}
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
{
int rc;
+ unsigned int i;
+ struct kvm_vcpu *v;
switch (parameter & 0xff) {
case 0:
@@ -191,6 +195,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
break;
case 1:
case 2:
+ kvm_for_each_vcpu(i, v, vcpu->kvm) {
+ v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(v);
+ }
+
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
break;
default:
@@ -202,16 +211,24 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
u64 *reg)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
- struct kvm_s390_local_interrupt *li = NULL;
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
struct kvm_s390_interrupt_info *inti;
int rc;
- u8 tmp;
- /* make sure that the new value is valid memory */
- address = address & 0x7fffe000u;
- if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
- copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)) {
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+
+ /*
+ * Make sure the new value is valid memory. We only need to check the
+ * first page, since address is 8k aligned and memory pieces are always
+ * at least 1MB aligned and have at least a size of 1MB.
+ */
+ address &= 0x7fffe000u;
+ if (kvm_is_error_gpa(vcpu->kvm, address)) {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INVALID_PARAMETER;
return SIGP_CC_STATUS_STORED;
@@ -221,18 +238,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
if (!inti)
return SIGP_CC_BUSY;
- spin_lock(&fi->lock);
- if (cpu_addr < KVM_MAX_VCPUS)
- li = fi->local_int[cpu_addr];
-
- if (li == NULL) {
- *reg &= 0xffffffff00000000UL;
- *reg |= SIGP_STATUS_INCORRECT_STATE;
- rc = SIGP_CC_STATUS_STORED;
- kfree(inti);
- goto out_fi;
- }
-
spin_lock_bh(&li->lock);
/* cpu must be in stopped state */
if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
@@ -248,43 +253,70 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
out_li:
spin_unlock_bh(&li->lock);
-out_fi:
- spin_unlock(&fi->lock);
+ return rc;
+}
+
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
+ u32 addr, u64 *reg)
+{
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int flags;
+ int rc;
+
+ if (cpu_id < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+
+ spin_lock_bh(&dst_vcpu->arch.local_int.lock);
+ flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
+ spin_unlock_bh(&dst_vcpu->arch.local_int.lock);
+ if (!(flags & CPUSTAT_STOPPED)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ return SIGP_CC_STATUS_STORED;
+ }
+
+ addr &= 0x7ffffe00;
+ rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
+ if (rc == -EFAULT) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INVALID_PARAMETER;
+ rc = SIGP_CC_STATUS_STORED;
+ }
return rc;
}
static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
u64 *reg)
{
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
int rc;
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
if (cpu_addr >= KVM_MAX_VCPUS)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock(&fi->lock);
- if (fi->local_int[cpu_addr] == NULL)
- rc = SIGP_CC_NOT_OPERATIONAL;
- else {
- if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & CPUSTAT_RUNNING) {
- /* running */
- rc = SIGP_CC_ORDER_CODE_ACCEPTED;
- } else {
- /* not running */
- *reg &= 0xffffffff00000000UL;
- *reg |= SIGP_STATUS_NOT_RUNNING;
- rc = SIGP_CC_STATUS_STORED;
- }
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+ if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+ /* running */
+ rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+ } else {
+ /* not running */
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_NOT_RUNNING;
+ rc = SIGP_CC_STATUS_STORED;
}
- spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
rc);
@@ -292,31 +324,25 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
return rc;
}
-static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
+/* Test whether the destination CPU is available and not busy */
+static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+ struct kvm_vcpu *dst_vcpu = NULL;
if (cpu_addr >= KVM_MAX_VCPUS)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = SIGP_CC_NOT_OPERATIONAL;
- goto out;
- }
-
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
spin_lock_bh(&li->lock);
if (li->action_bits & ACTION_STOP_ON_STOP)
rc = SIGP_CC_BUSY;
- else
- VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace",
- cpu_addr);
spin_unlock_bh(&li->lock);
-out:
- spin_unlock(&fi->lock);
+
return rc;
}
@@ -324,8 +350,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
{
int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
int r3 = vcpu->arch.sie_block->ipa & 0x000f;
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
u32 parameter;
u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
u8 order_code;
@@ -333,12 +357,9 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
/* sigp in userspace can exit */
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu,
- PGM_PRIVILEGED_OPERATION);
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- order_code = disp2;
- if (base2)
- order_code += vcpu->run->s.regs.gprs[base2];
+ order_code = kvm_s390_get_base_disp_rs(vcpu);
if (r1 % 2)
parameter = vcpu->run->s.regs.gprs[r1];
@@ -369,6 +390,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
ACTION_STOP_ON_STOP);
break;
+ case SIGP_STORE_STATUS_AT_ADDRESS:
+ rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
+ &vcpu->run->s.regs.gprs[r1]);
+ break;
case SIGP_SET_ARCHITECTURE:
vcpu->stat.instruction_sigp_arch++;
rc = __sigp_set_arch(vcpu, parameter);
@@ -378,17 +403,31 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
&vcpu->run->s.regs.gprs[r1]);
break;
+ case SIGP_COND_EMERGENCY_SIGNAL:
+ rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
+ &vcpu->run->s.regs.gprs[r1]);
+ break;
case SIGP_SENSE_RUNNING:
vcpu->stat.instruction_sigp_sense_running++;
rc = __sigp_sense_running(vcpu, cpu_addr,
&vcpu->run->s.regs.gprs[r1]);
break;
+ case SIGP_START:
+ rc = sigp_check_callable(vcpu, cpu_addr);
+ if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
+ rc = -EOPNOTSUPP; /* Handle START in user space */
+ break;
case SIGP_RESTART:
vcpu->stat.instruction_sigp_restart++;
- rc = __sigp_restart(vcpu, cpu_addr);
- if (rc == SIGP_CC_BUSY)
- break;
- /* user space must know about restart */
+ rc = sigp_check_callable(vcpu, cpu_addr);
+ if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
+ VCPU_EVENT(vcpu, 4,
+ "sigp restart %x to handle userspace",
+ cpu_addr);
+ /* user space must know about restart */
+ rc = -EOPNOTSUPP;
+ }
+ break;
default:
return -EOPNOTSUPP;
}
@@ -396,7 +435,41 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
if (rc < 0)
return rc;
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+ kvm_s390_set_psw_cc(vcpu, rc);
return 0;
}
+
+/*
+ * Handle SIGP partial execution interception.
+ *
+ * This interception will occur at the source cpu when a source cpu sends an
+ * external call to a target cpu and the target cpu has the WAIT bit set in
+ * its cpuflags. Interception will occurr after the interrupt indicator bits at
+ * the target cpu have been set. All error cases will lead to instruction
+ * interception, therefore nothing is to be checked or prepared.
+ */
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
+{
+ int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+ struct kvm_vcpu *dest_vcpu;
+ u8 order_code = kvm_s390_get_base_disp_rs(vcpu);
+
+ trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
+ if (order_code == SIGP_EXTERNAL_CALL) {
+ dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ BUG_ON(dest_vcpu == NULL);
+
+ spin_lock_bh(&dest_vcpu->arch.local_int.lock);
+ if (waitqueue_active(&dest_vcpu->wq))
+ wake_up_interruptible(&dest_vcpu->wq);
+ dest_vcpu->preempted = true;
+ spin_unlock_bh(&dest_vcpu->arch.local_int.lock);
+
+ kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED);
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 90fdf85b5ff..647e9d6a481 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -68,6 +68,27 @@ TRACE_EVENT(kvm_s390_destroy_vcpu,
);
/*
+ * Trace point for start and stop of vpcus.
+ */
+TRACE_EVENT(kvm_s390_vcpu_start_stop,
+ TP_PROTO(unsigned int id, int state),
+ TP_ARGS(id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(int, state)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->state = state;
+ ),
+
+ TP_printk("%s cpu %d", __entry->state ? "starting" : "stopping",
+ __entry->id)
+ );
+
+/*
* Trace points for injection of interrupts, either per machine or
* per vcpu.
*/
@@ -141,13 +162,13 @@ TRACE_EVENT(kvm_s390_inject_vcpu,
* Trace point for the actual delivery of interrupts.
*/
TRACE_EVENT(kvm_s390_deliver_interrupt,
- TP_PROTO(unsigned int id, __u64 type, __u32 data0, __u64 data1),
+ TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1),
TP_ARGS(id, type, data0, data1),
TP_STRUCT__entry(
__field(int, id)
__field(__u32, inttype)
- __field(__u32, data0)
+ __field(__u64, data0)
__field(__u64, data1)
),
@@ -159,7 +180,7 @@ TRACE_EVENT(kvm_s390_deliver_interrupt,
),
TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \
- "data:%08x %016llx",
+ "data:%08llx %016llx",
__entry->id, __entry->inttype,
__print_symbolic(__entry->inttype, kvm_s390_int_type),
__entry->data0, __entry->data1)
@@ -204,6 +225,48 @@ TRACE_EVENT(kvm_s390_stop_request,
);
+/*
+ * Trace point for enabling channel I/O instruction support.
+ */
+TRACE_EVENT(kvm_s390_enable_css,
+ TP_PROTO(void *kvm),
+ TP_ARGS(kvm),
+
+ TP_STRUCT__entry(
+ __field(void *, kvm)
+ ),
+
+ TP_fast_assign(
+ __entry->kvm = kvm;
+ ),
+
+ TP_printk("enabling channel I/O support (kvm @ %p)\n",
+ __entry->kvm)
+ );
+
+/*
+ * Trace point for enabling and disabling interlocking-and-broadcasting
+ * suppression.
+ */
+TRACE_EVENT(kvm_s390_enable_disable_ibs,
+ TP_PROTO(unsigned int id, int state),
+ TP_ARGS(id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(int, state)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->state = state;
+ ),
+
+ TP_printk("%s ibs on cpu %d",
+ __entry->state ? "enabling" : "disabling", __entry->id)
+ );
+
+
#endif /* _TRACE_KVMS390_H */
/* This part must be outside protection */
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 2b29e62351d..916834d7a73 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -2,8 +2,9 @@
#define _TRACE_KVM_H
#include <linux/tracepoint.h>
-#include <asm/sigp.h>
+#include <asm/sie.h>
#include <asm/debug.h>
+#include <asm/dis.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
@@ -29,6 +30,66 @@
TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
__entry->pswmask, __entry->pswaddr, p_args)
+TRACE_EVENT(kvm_s390_skey_related_inst,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+ VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
+ );
+
+TRACE_EVENT(kvm_s390_major_guest_pfault,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+ VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
+ );
+
+TRACE_EVENT(kvm_s390_pfault_init,
+ TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+ TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(long, pfault_token)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->pfault_token = pfault_token;
+ ),
+ VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
+ );
+
+TRACE_EVENT(kvm_s390_pfault_done,
+ TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+ TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(long, pfault_token)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->pfault_token = pfault_token;
+ ),
+ VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
+ );
+
/*
* Tracepoints for SIE entry and exit.
*/
@@ -64,17 +125,6 @@ TRACE_EVENT(kvm_s390_sie_fault,
VCPU_TP_PRINTK("%s", "fault in sie instruction")
);
-#define sie_intercept_code \
- {0x04, "Instruction"}, \
- {0x08, "Program interruption"}, \
- {0x0C, "Instruction and program interuption"}, \
- {0x10, "External request"}, \
- {0x14, "External interruption"}, \
- {0x18, "I/O request"}, \
- {0x1C, "Wait state"}, \
- {0x20, "Validity"}, \
- {0x28, "Stop request"}
-
TRACE_EVENT(kvm_s390_sie_exit,
TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
TP_ARGS(VCPU_ARGS_COMMON, icptcode),
@@ -104,7 +154,6 @@ TRACE_EVENT(kvm_s390_intercept_instruction,
TP_STRUCT__entry(
VCPU_FIELD_COMMON
__field(__u64, instruction)
- __field(char, insn[8])
),
TP_fast_assign(
@@ -115,10 +164,8 @@ TRACE_EVENT(kvm_s390_intercept_instruction,
VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
__entry->instruction,
- insn_to_mnemonic((unsigned char *)
- &__entry->instruction,
- __entry->insn) ?
- "unknown" : __entry->insn)
+ __print_symbolic(icpt_insn_decoder(__entry->instruction),
+ icpt_insn_codes))
);
/*
@@ -166,17 +213,6 @@ TRACE_EVENT(kvm_s390_intercept_validity,
* Trace points for instructions that are of special interest.
*/
-#define sigp_order_codes \
- {SIGP_SENSE, "sense"}, \
- {SIGP_EXTERNAL_CALL, "external call"}, \
- {SIGP_EMERGENCY_SIGNAL, "emergency signal"}, \
- {SIGP_STOP, "stop"}, \
- {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \
- {SIGP_SET_ARCHITECTURE, "set architecture"}, \
- {SIGP_SET_PREFIX, "set prefix"}, \
- {SIGP_SENSE_RUNNING, "sense running"}, \
- {SIGP_RESTART, "restart"}
-
TRACE_EVENT(kvm_s390_handle_sigp,
TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
__u32 parameter),
@@ -203,12 +239,28 @@ TRACE_EVENT(kvm_s390_handle_sigp,
__entry->cpu_addr, __entry->parameter)
);
-#define diagnose_codes \
- {0x10, "release pages"}, \
- {0x44, "time slice end"}, \
- {0x308, "ipl functions"}, \
- {0x500, "kvm hypercall"}, \
- {0x501, "kvm breakpoint"}
+TRACE_EVENT(kvm_s390_handle_sigp_pei,
+ TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr),
+ TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u8, order_code)
+ __field(__u16, cpu_addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->order_code = order_code;
+ __entry->cpu_addr = cpu_addr;
+ ),
+
+ VCPU_TP_PRINTK("handle sigp pei order %02x (%s), cpu address %04x",
+ __entry->order_code,
+ __print_symbolic(__entry->order_code,
+ sigp_order_codes),
+ __entry->cpu_addr)
+ );
TRACE_EVENT(kvm_s390_handle_diag,
TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
@@ -253,6 +305,31 @@ TRACE_EVENT(kvm_s390_handle_lctl,
__entry->reg1, __entry->reg3, __entry->addr)
);
+TRACE_EVENT(kvm_s390_handle_stctl,
+ TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, g)
+ __field(int, reg1)
+ __field(int, reg3)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->g = g;
+ __entry->reg1 = reg1;
+ __entry->reg3 = reg3;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
+ __entry->g ? "stctg" : "stctl",
+ __entry->reg1, __entry->reg3, __entry->addr)
+ );
+
TRACE_EVENT(kvm_s390_handle_prefix,
TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
TP_ARGS(VCPU_ARGS_COMMON, set, address),
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 6ab0d0b5cec..c6d752e8bf2 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -2,9 +2,7 @@
# Makefile for s390-specific library files..
#
-lib-y += delay.o string.o uaccess_std.o uaccess_pt.o
-obj-y += usercopy.o
+lib-y += delay.o string.o uaccess.o find.o
obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o
obj-$(CONFIG_64BIT) += mem64.o
-lib-$(CONFIG_64BIT) += uaccess_mvcos.o
lib-$(CONFIG_SMP) += spinlock.o
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 42d0cf89121..a9f3d0042d5 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -32,7 +32,7 @@ static void __udelay_disabled(unsigned long long usecs)
unsigned long cr0, cr6, new;
u64 clock_saved, end;
- end = get_clock() + (usecs << 12);
+ end = get_tod_clock() + (usecs << 12);
clock_saved = local_tick_disable();
__ctl_store(cr0, 0, 0);
__ctl_store(cr6, 6, 6);
@@ -44,8 +44,7 @@ static void __udelay_disabled(unsigned long long usecs)
do {
set_clock_comparator(end);
vtime_stop_cpu();
- local_irq_disable();
- } while (get_clock() < end);
+ } while (get_tod_clock_fast() < end);
lockdep_on();
__ctl_load(cr0, 0, 0);
__ctl_load(cr6, 6, 6);
@@ -56,7 +55,7 @@ static void __udelay_enabled(unsigned long long usecs)
{
u64 clock_saved, end;
- end = get_clock() + (usecs << 12);
+ end = get_tod_clock_fast() + (usecs << 12);
do {
clock_saved = 0;
if (end < S390_lowcore.clock_comparator) {
@@ -64,10 +63,9 @@ static void __udelay_enabled(unsigned long long usecs)
set_clock_comparator(end);
}
vtime_stop_cpu();
- local_irq_disable();
if (clock_saved)
local_tick_enable(clock_saved);
- } while (get_clock() < end);
+ } while (get_tod_clock_fast() < end);
}
/*
@@ -111,8 +109,8 @@ void udelay_simple(unsigned long long usecs)
{
u64 end;
- end = get_clock() + (usecs << 12);
- while (get_clock() < end)
+ end = get_tod_clock_fast() + (usecs << 12);
+ while (get_tod_clock_fast() < end)
cpu_relax();
}
@@ -122,10 +120,10 @@ void __ndelay(unsigned long long nsecs)
nsecs <<= 9;
do_div(nsecs, 125);
- end = get_clock() + nsecs;
+ end = get_tod_clock_fast() + nsecs;
if (nsecs & ~0xfffUL)
__udelay(nsecs >> 12);
- while (get_clock() < end)
+ while (get_tod_clock_fast() < end)
barrier();
}
EXPORT_SYMBOL(__ndelay);
diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c
new file mode 100644
index 00000000000..922003c1b90
--- /dev/null
+++ b/arch/s390/lib/find.c
@@ -0,0 +1,77 @@
+/*
+ * MSB0 numbered special bitops handling.
+ *
+ * On s390x the bits are numbered:
+ * |0..............63|64............127|128...........191|192...........255|
+ * and on s390:
+ * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
+ *
+ * The reason for this bit numbering is the fact that the hardware sets bits
+ * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap
+ * from the 'wrong end'.
+ */
+
+#include <linux/compiler.h>
+#include <linux/bitops.h>
+#include <linux/export.h>
+
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size)
+{
+ const unsigned long *p = addr;
+ unsigned long result = 0;
+ unsigned long tmp;
+
+ while (size & ~(BITS_PER_LONG - 1)) {
+ if ((tmp = *(p++)))
+ goto found;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = (*p) & (~0UL << (BITS_PER_LONG - size));
+ if (!tmp) /* Are any bits set? */
+ return result + size; /* Nope. */
+found:
+ return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_first_bit_inv);
+
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + (offset / BITS_PER_LONG);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL >> offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+found_first:
+ tmp &= (~0UL << (BITS_PER_LONG - size));
+ if (!tmp) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_next_bit_inv);
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index f709983f41f..5b0e445bc3f 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -26,83 +26,81 @@ __setup("spin_retry=", spin_retry_setup);
void arch_spin_lock_wait(arch_spinlock_t *lp)
{
- int count = spin_retry;
- unsigned int cpu = ~smp_processor_id();
+ unsigned int cpu = SPINLOCK_LOCKVAL;
unsigned int owner;
+ int count;
while (1) {
- owner = lp->owner_cpu;
- if (!owner || smp_vcpu_scheduled(~owner)) {
- for (count = spin_retry; count > 0; count--) {
- if (arch_spin_is_locked(lp))
- continue;
- if (_raw_compare_and_swap(&lp->owner_cpu, 0,
- cpu) == 0)
- return;
- }
- if (MACHINE_IS_LPAR)
- continue;
+ owner = ACCESS_ONCE(lp->lock);
+ /* Try to get the lock if it is free. */
+ if (!owner) {
+ if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+ return;
+ continue;
}
- owner = lp->owner_cpu;
- if (owner)
+ /* Check if the lock owner is running. */
+ if (!smp_vcpu_scheduled(~owner)) {
+ smp_yield_cpu(~owner);
+ continue;
+ }
+ /* Loop for a while on the lock value. */
+ count = spin_retry;
+ do {
+ owner = ACCESS_ONCE(lp->lock);
+ } while (owner && count-- > 0);
+ if (!owner)
+ continue;
+ /*
+ * For multiple layers of hypervisors, e.g. z/VM + LPAR
+ * yield the CPU if the lock is still unavailable.
+ */
+ if (!MACHINE_IS_LPAR)
smp_yield_cpu(~owner);
- if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
- return;
}
}
EXPORT_SYMBOL(arch_spin_lock_wait);
void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
{
- int count = spin_retry;
- unsigned int cpu = ~smp_processor_id();
+ unsigned int cpu = SPINLOCK_LOCKVAL;
unsigned int owner;
+ int count;
local_irq_restore(flags);
while (1) {
- owner = lp->owner_cpu;
- if (!owner || smp_vcpu_scheduled(~owner)) {
- for (count = spin_retry; count > 0; count--) {
- if (arch_spin_is_locked(lp))
- continue;
- local_irq_disable();
- if (_raw_compare_and_swap(&lp->owner_cpu, 0,
- cpu) == 0)
- return;
- local_irq_restore(flags);
- }
- if (MACHINE_IS_LPAR)
- continue;
+ owner = ACCESS_ONCE(lp->lock);
+ /* Try to get the lock if it is free. */
+ if (!owner) {
+ local_irq_disable();
+ if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+ return;
+ local_irq_restore(flags);
}
- owner = lp->owner_cpu;
- if (owner)
+ /* Check if the lock owner is running. */
+ if (!smp_vcpu_scheduled(~owner)) {
smp_yield_cpu(~owner);
- local_irq_disable();
- if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
- return;
- local_irq_restore(flags);
- }
-}
-EXPORT_SYMBOL(arch_spin_lock_wait_flags);
-
-int arch_spin_trylock_retry(arch_spinlock_t *lp)
-{
- unsigned int cpu = ~smp_processor_id();
- int count;
-
- for (count = spin_retry; count > 0; count--) {
- if (arch_spin_is_locked(lp))
continue;
- if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
- return 1;
+ }
+ /* Loop for a while on the lock value. */
+ count = spin_retry;
+ do {
+ owner = ACCESS_ONCE(lp->lock);
+ } while (owner && count-- > 0);
+ if (!owner)
+ continue;
+ /*
+ * For multiple layers of hypervisors, e.g. z/VM + LPAR
+ * yield the CPU if the lock is still unavailable.
+ */
+ if (!MACHINE_IS_LPAR)
+ smp_yield_cpu(~owner);
}
- return 0;
}
-EXPORT_SYMBOL(arch_spin_trylock_retry);
+EXPORT_SYMBOL(arch_spin_lock_wait_flags);
-void arch_spin_relax(arch_spinlock_t *lock)
+void arch_spin_relax(arch_spinlock_t *lp)
{
- unsigned int cpu = lock->owner_cpu;
+ unsigned int cpu = lp->lock;
if (cpu != 0) {
if (MACHINE_IS_VM || MACHINE_IS_KVM ||
!smp_vcpu_scheduled(~cpu))
@@ -111,6 +109,17 @@ void arch_spin_relax(arch_spinlock_t *lock)
}
EXPORT_SYMBOL(arch_spin_relax);
+int arch_spin_trylock_retry(arch_spinlock_t *lp)
+{
+ int count;
+
+ for (count = spin_retry; count > 0; count--)
+ if (arch_spin_trylock_once(lp))
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL(arch_spin_trylock_retry);
+
void _raw_read_lock_wait(arch_rwlock_t *rw)
{
unsigned int old;
@@ -121,10 +130,10 @@ void _raw_read_lock_wait(arch_rwlock_t *rw)
smp_yield();
count = spin_retry;
}
- if (!arch_read_can_lock(rw))
+ old = ACCESS_ONCE(rw->lock);
+ if ((int) old < 0)
continue;
- old = rw->lock & 0x7fffffffU;
- if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
+ if (_raw_compare_and_swap(&rw->lock, old, old + 1))
return;
}
}
@@ -141,12 +150,13 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
smp_yield();
count = spin_retry;
}
- if (!arch_read_can_lock(rw))
+ old = ACCESS_ONCE(rw->lock);
+ if ((int) old < 0)
continue;
- old = rw->lock & 0x7fffffffU;
local_irq_disable();
- if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
+ if (_raw_compare_and_swap(&rw->lock, old, old + 1))
return;
+ local_irq_restore(flags);
}
}
EXPORT_SYMBOL(_raw_read_lock_wait_flags);
@@ -157,10 +167,10 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw)
int count = spin_retry;
while (count-- > 0) {
- if (!arch_read_can_lock(rw))
+ old = ACCESS_ONCE(rw->lock);
+ if ((int) old < 0)
continue;
- old = rw->lock & 0x7fffffffU;
- if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
+ if (_raw_compare_and_swap(&rw->lock, old, old + 1))
return 1;
}
return 0;
@@ -169,6 +179,7 @@ EXPORT_SYMBOL(_raw_read_trylock_retry);
void _raw_write_lock_wait(arch_rwlock_t *rw)
{
+ unsigned int old;
int count = spin_retry;
while (1) {
@@ -176,9 +187,10 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
smp_yield();
count = spin_retry;
}
- if (!arch_write_can_lock(rw))
+ old = ACCESS_ONCE(rw->lock);
+ if (old)
continue;
- if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
+ if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
return;
}
}
@@ -186,6 +198,7 @@ EXPORT_SYMBOL(_raw_write_lock_wait);
void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
{
+ unsigned int old;
int count = spin_retry;
local_irq_restore(flags);
@@ -194,23 +207,27 @@ void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
smp_yield();
count = spin_retry;
}
- if (!arch_write_can_lock(rw))
+ old = ACCESS_ONCE(rw->lock);
+ if (old)
continue;
local_irq_disable();
- if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
+ if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
return;
+ local_irq_restore(flags);
}
}
EXPORT_SYMBOL(_raw_write_lock_wait_flags);
int _raw_write_trylock_retry(arch_rwlock_t *rw)
{
+ unsigned int old;
int count = spin_retry;
while (count-- > 0) {
- if (!arch_write_can_lock(rw))
+ old = ACCESS_ONCE(rw->lock);
+ if (old)
continue;
- if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
+ if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
return 1;
}
return 0;
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
new file mode 100644
index 00000000000..53dd5d7a0c9
--- /dev/null
+++ b/arch/s390/lib/uaccess.c
@@ -0,0 +1,406 @@
+/*
+ * Standard user space access functions based on mvcp/mvcs and doing
+ * interesting things in the secondary space mode.
+ *
+ * Copyright IBM Corp. 2006,2014
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/facility.h>
+
+#ifndef CONFIG_64BIT
+#define AHI "ahi"
+#define ALR "alr"
+#define CLR "clr"
+#define LHI "lhi"
+#define SLR "slr"
+#else
+#define AHI "aghi"
+#define ALR "algr"
+#define CLR "clgr"
+#define LHI "lghi"
+#define SLR "slgr"
+#endif
+
+static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE;
+
+static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x81UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
+ "9: jz 7f\n"
+ "1:"ALR" %0,%3\n"
+ " "SLR" %1,%3\n"
+ " "SLR" %2,%3\n"
+ " j 0b\n"
+ "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
+ " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 4f\n"
+ "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
+ "10:"SLR" %0,%4\n"
+ " "ALR" %2,%4\n"
+ "4:"LHI" %4,-1\n"
+ " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
+ " bras %3,6f\n" /* memset loop */
+ " xc 0(1,%2),0(%2)\n"
+ "5: xc 0(256,%2),0(%2)\n"
+ " la %2,256(%2)\n"
+ "6:"AHI" %4,-256\n"
+ " jnm 5b\n"
+ " ex %4,0(%3)\n"
+ " j 8f\n"
+ "7:"SLR" %0,%0\n"
+ "8:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
+ unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+
+ load_kernel_asce();
+ tmp1 = -256UL;
+ asm volatile(
+ " sacf 0\n"
+ "0: mvcp 0(%0,%2),0(%1),%3\n"
+ "10:jz 8f\n"
+ "1:"ALR" %0,%3\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "2: mvcp 0(%0,%2),0(%1),%3\n"
+ "11:jnz 1b\n"
+ " j 8f\n"
+ "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
+ " "LHI" %3,-4096\n"
+ " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "4: mvcp 0(%4,%2),0(%1),%3\n"
+ "12:"SLR" %0,%4\n"
+ " "ALR" %2,%4\n"
+ "5:"LHI" %4,-1\n"
+ " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
+ " bras %3,7f\n" /* memset loop */
+ " xc 0(1,%2),0(%2)\n"
+ "6: xc 0(256,%2),0(%2)\n"
+ " la %2,256(%2)\n"
+ "7:"AHI" %4,-256\n"
+ " jnm 6b\n"
+ " ex %4,0(%3)\n"
+ " j 9f\n"
+ "8:"SLR" %0,%0\n"
+ "9: sacf 768\n"
+ EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
+ EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ if (static_key_false(&have_mvcos))
+ return copy_from_user_mvcos(to, from, n);
+ return copy_from_user_mvcp(to, from, n);
+}
+EXPORT_SYMBOL(__copy_from_user);
+
+static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x810000UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+ "6: jz 4f\n"
+ "1:"ALR" %0,%3\n"
+ " "SLR" %1,%3\n"
+ " "SLR" %2,%3\n"
+ " j 0b\n"
+ "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
+ " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
+ "7:"SLR" %0,%4\n"
+ " j 5f\n"
+ "4:"SLR" %0,%0\n"
+ "5:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
+ unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+
+ load_kernel_asce();
+ tmp1 = -256UL;
+ asm volatile(
+ " sacf 0\n"
+ "0: mvcs 0(%0,%1),0(%2),%3\n"
+ "7: jz 5f\n"
+ "1:"ALR" %0,%3\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "2: mvcs 0(%0,%1),0(%2),%3\n"
+ "8: jnz 1b\n"
+ " j 5f\n"
+ "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
+ " "LHI" %3,-4096\n"
+ " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 6f\n"
+ "4: mvcs 0(%4,%1),0(%2),%3\n"
+ "9:"SLR" %0,%4\n"
+ " j 6f\n"
+ "5:"SLR" %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+ EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ if (static_key_false(&have_mvcos))
+ return copy_to_user_mvcos(to, from, n);
+ return copy_to_user_mvcs(to, from, n);
+}
+EXPORT_SYMBOL(__copy_to_user);
+
+static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x810081UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ /* FIXME: copy with reduced length. */
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+ " jz 2f\n"
+ "1:"ALR" %0,%3\n"
+ " "SLR" %1,%3\n"
+ " "SLR" %2,%3\n"
+ " j 0b\n"
+ "2:"SLR" %0,%0\n"
+ "3: \n"
+ EX_TABLE(0b,3b)
+ : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ unsigned long tmp1;
+
+ load_kernel_asce();
+ asm volatile(
+ " sacf 256\n"
+ " "AHI" %0,-1\n"
+ " jo 5f\n"
+ " bras %3,3f\n"
+ "0:"AHI" %0,257\n"
+ "1: mvc 0(1,%1),0(%2)\n"
+ " la %1,1(%1)\n"
+ " la %2,1(%2)\n"
+ " "AHI" %0,-1\n"
+ " jnz 1b\n"
+ " j 5f\n"
+ "2: mvc 0(256,%1),0(%2)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "3:"AHI" %0,-256\n"
+ " jnm 2b\n"
+ "4: ex %0,1b-0b(%3)\n"
+ "5: "SLR" %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+ : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+ if (static_key_false(&have_mvcos))
+ return copy_in_user_mvcos(to, from, n);
+ return copy_in_user_mvc(to, from, n);
+}
+EXPORT_SYMBOL(__copy_in_user);
+
+static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x810000UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
+ " jz 4f\n"
+ "1:"ALR" %0,%2\n"
+ " "SLR" %1,%2\n"
+ " j 0b\n"
+ "2: la %3,4095(%1)\n"/* %4 = to + 4095 */
+ " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */
+ " "SLR" %3,%1\n"
+ " "CLR" %0,%3\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
+ " "SLR" %0,%3\n"
+ " j 5f\n"
+ "4:"SLR" %0,%0\n"
+ "5:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+ : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
+ : "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+
+ load_kernel_asce();
+ asm volatile(
+ " sacf 256\n"
+ " "AHI" %0,-1\n"
+ " jo 5f\n"
+ " bras %3,3f\n"
+ " xc 0(1,%1),0(%1)\n"
+ "0:"AHI" %0,257\n"
+ " la %2,255(%1)\n" /* %2 = ptr + 255 */
+ " srl %2,12\n"
+ " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */
+ " "SLR" %2,%1\n"
+ " "CLR" %0,%2\n" /* clear crosses next page boundary? */
+ " jnh 5f\n"
+ " "AHI" %2,-1\n"
+ "1: ex %2,0(%3)\n"
+ " "AHI" %2,1\n"
+ " "SLR" %0,%2\n"
+ " j 5f\n"
+ "2: xc 0(256,%1),0(%1)\n"
+ " la %1,256(%1)\n"
+ "3:"AHI" %0,-256\n"
+ " jnm 2b\n"
+ "4: ex %0,0(%3)\n"
+ "5: "SLR" %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+ : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __clear_user(void __user *to, unsigned long size)
+{
+ if (static_key_false(&have_mvcos))
+ return clear_user_mvcos(to, size);
+ return clear_user_xc(to, size);
+}
+EXPORT_SYMBOL(__clear_user);
+
+static inline unsigned long strnlen_user_srst(const char __user *src,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0;
+ unsigned long tmp1, tmp2;
+
+ asm volatile(
+ " la %2,0(%1)\n"
+ " la %3,0(%0,%1)\n"
+ " "SLR" %0,%0\n"
+ " sacf 256\n"
+ "0: srst %3,%2\n"
+ " jo 0b\n"
+ " la %0,1(%3)\n" /* strnlen_user results includes \0 */
+ " "SLR" %0,%1\n"
+ "1: sacf 768\n"
+ EX_TABLE(0b,1b)
+ : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+unsigned long __strnlen_user(const char __user *src, unsigned long size)
+{
+ if (unlikely(!size))
+ return 0;
+ load_kernel_asce();
+ return strnlen_user_srst(src, size);
+}
+EXPORT_SYMBOL(__strnlen_user);
+
+long __strncpy_from_user(char *dst, const char __user *src, long size)
+{
+ size_t done, len, offset, len_str;
+
+ if (unlikely(size <= 0))
+ return 0;
+ done = 0;
+ do {
+ offset = (size_t)src & ~PAGE_MASK;
+ len = min(size - done, PAGE_SIZE - offset);
+ if (copy_from_user(dst, src, len))
+ return -EFAULT;
+ len_str = strnlen(dst, len);
+ done += len_str;
+ src += len_str;
+ dst += len_str;
+ } while ((len_str == len) && (done < size));
+ return done;
+}
+EXPORT_SYMBOL(__strncpy_from_user);
+
+/*
+ * The "old" uaccess variant without mvcos can be enforced with the
+ * uaccess_primary kernel parameter. This is mainly for debugging purposes.
+ */
+static int uaccess_primary __initdata;
+
+static int __init parse_uaccess_pt(char *__unused)
+{
+ uaccess_primary = 1;
+ return 0;
+}
+early_param("uaccess_primary", parse_uaccess_pt);
+
+static int __init uaccess_init(void)
+{
+ if (IS_ENABLED(CONFIG_64BIT) && !uaccess_primary && test_facility(27))
+ static_key_slow_inc(&have_mvcos);
+ return 0;
+}
+early_initcall(uaccess_init);
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
deleted file mode 100644
index 315dbe09983..00000000000
--- a/arch/s390/lib/uaccess.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright IBM Corp. 2007
- *
- */
-
-#ifndef __ARCH_S390_LIB_UACCESS_H
-#define __ARCH_S390_LIB_UACCESS_H
-
-extern size_t copy_from_user_std(size_t, const void __user *, void *);
-extern size_t copy_to_user_std(size_t, void __user *, const void *);
-extern size_t strnlen_user_std(size_t, const char __user *);
-extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
-extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32);
-extern int futex_atomic_op_std(int, u32 __user *, int, int *);
-
-extern size_t copy_from_user_pt(size_t, const void __user *, void *);
-extern size_t copy_to_user_pt(size_t, void __user *, const void *);
-extern int futex_atomic_op_pt(int, u32 __user *, int, int *);
-extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32);
-
-#endif /* __ARCH_S390_LIB_UACCESS_H */
diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c
deleted file mode 100644
index 2443ae476e3..00000000000
--- a/arch/s390/lib/uaccess_mvcos.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Optimized user space space access functions based on mvcos.
- *
- * Copyright IBM Corp. 2006
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Gerald Schaefer (gerald.schaefer@de.ibm.com)
- */
-
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <asm/uaccess.h>
-#include <asm/futex.h>
-#include "uaccess.h"
-
-#ifndef CONFIG_64BIT
-#define AHI "ahi"
-#define ALR "alr"
-#define CLR "clr"
-#define LHI "lhi"
-#define SLR "slr"
-#else
-#define AHI "aghi"
-#define ALR "algr"
-#define CLR "clgr"
-#define LHI "lghi"
-#define SLR "slgr"
-#endif
-
-static size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x)
-{
- register unsigned long reg0 asm("0") = 0x81UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
- "9: jz 7f\n"
- "1:"ALR" %0,%3\n"
- " "SLR" %1,%3\n"
- " "SLR" %2,%3\n"
- " j 0b\n"
- "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
- " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 4f\n"
- "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
- "10:"SLR" %0,%4\n"
- " "ALR" %2,%4\n"
- "4:"LHI" %4,-1\n"
- " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
- " bras %3,6f\n" /* memset loop */
- " xc 0(1,%2),0(%2)\n"
- "5: xc 0(256,%2),0(%2)\n"
- " la %2,256(%2)\n"
- "6:"AHI" %4,-256\n"
- " jnm 5b\n"
- " ex %4,0(%3)\n"
- " j 8f\n"
- "7:"SLR" %0,%0\n"
- "8: \n"
- EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-static size_t copy_from_user_mvcos_check(size_t size, const void __user *ptr, void *x)
-{
- if (size <= 256)
- return copy_from_user_std(size, ptr, x);
- return copy_from_user_mvcos(size, ptr, x);
-}
-
-static size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x)
-{
- register unsigned long reg0 asm("0") = 0x810000UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
- "6: jz 4f\n"
- "1:"ALR" %0,%3\n"
- " "SLR" %1,%3\n"
- " "SLR" %2,%3\n"
- " j 0b\n"
- "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
- " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 5f\n"
- "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
- "7:"SLR" %0,%4\n"
- " j 5f\n"
- "4:"SLR" %0,%0\n"
- "5: \n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-static size_t copy_to_user_mvcos_check(size_t size, void __user *ptr,
- const void *x)
-{
- if (size <= 256)
- return copy_to_user_std(size, ptr, x);
- return copy_to_user_mvcos(size, ptr, x);
-}
-
-static size_t copy_in_user_mvcos(size_t size, void __user *to,
- const void __user *from)
-{
- register unsigned long reg0 asm("0") = 0x810081UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- /* FIXME: copy with reduced length. */
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
- " jz 2f\n"
- "1:"ALR" %0,%3\n"
- " "SLR" %1,%3\n"
- " "SLR" %2,%3\n"
- " j 0b\n"
- "2:"SLR" %0,%0\n"
- "3: \n"
- EX_TABLE(0b,3b)
- : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-static size_t clear_user_mvcos(size_t size, void __user *to)
-{
- register unsigned long reg0 asm("0") = 0x810000UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
- " jz 4f\n"
- "1:"ALR" %0,%2\n"
- " "SLR" %1,%2\n"
- " j 0b\n"
- "2: la %3,4095(%1)\n"/* %4 = to + 4095 */
- " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */
- " "SLR" %3,%1\n"
- " "CLR" %0,%3\n" /* copy crosses next page boundary? */
- " jnh 5f\n"
- "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
- " "SLR" %0,%3\n"
- " j 5f\n"
- "4:"SLR" %0,%0\n"
- "5: \n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b)
- : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
- : "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
- return size;
-}
-
-static size_t strnlen_user_mvcos(size_t count, const char __user *src)
-{
- char buf[256];
- int rc;
- size_t done, len, len_str;
-
- done = 0;
- do {
- len = min(count - done, (size_t) 256);
- rc = uaccess.copy_from_user(len, src + done, buf);
- if (unlikely(rc == len))
- return 0;
- len -= rc;
- len_str = strnlen(buf, len);
- done += len_str;
- } while ((len_str == len) && (done < count));
- return done + 1;
-}
-
-static size_t strncpy_from_user_mvcos(size_t count, const char __user *src,
- char *dst)
-{
- int rc;
- size_t done, len, len_str;
-
- done = 0;
- do {
- len = min(count - done, (size_t) 4096);
- rc = uaccess.copy_from_user(len, src + done, dst);
- if (unlikely(rc == len))
- return -EFAULT;
- len -= rc;
- len_str = strnlen(dst, len);
- done += len_str;
- } while ((len_str == len) && (done < count));
- return done;
-}
-
-struct uaccess_ops uaccess_mvcos = {
- .copy_from_user = copy_from_user_mvcos_check,
- .copy_from_user_small = copy_from_user_std,
- .copy_to_user = copy_to_user_mvcos_check,
- .copy_to_user_small = copy_to_user_std,
- .copy_in_user = copy_in_user_mvcos,
- .clear_user = clear_user_mvcos,
- .strnlen_user = strnlen_user_std,
- .strncpy_from_user = strncpy_from_user_std,
- .futex_atomic_op = futex_atomic_op_std,
- .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std,
-};
-
-struct uaccess_ops uaccess_mvcos_switch = {
- .copy_from_user = copy_from_user_mvcos,
- .copy_from_user_small = copy_from_user_mvcos,
- .copy_to_user = copy_to_user_mvcos,
- .copy_to_user_small = copy_to_user_mvcos,
- .copy_in_user = copy_in_user_mvcos,
- .clear_user = clear_user_mvcos,
- .strnlen_user = strnlen_user_mvcos,
- .strncpy_from_user = strncpy_from_user_mvcos,
- .futex_atomic_op = futex_atomic_op_pt,
- .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt,
-};
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
deleted file mode 100644
index 9017a63dda3..00000000000
--- a/arch/s390/lib/uaccess_pt.c
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * User access functions based on page table walks for enhanced
- * system layout without hardware support.
- *
- * Copyright IBM Corp. 2006, 2012
- * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com)
- */
-
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <asm/uaccess.h>
-#include <asm/futex.h>
-#include "uaccess.h"
-
-
-/*
- * Returns kernel address for user virtual address. If the returned address is
- * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address
- * contains the (negative) exception code.
- */
-static __always_inline unsigned long follow_table(struct mm_struct *mm,
- unsigned long addr, int write)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *ptep;
-
- pgd = pgd_offset(mm, addr);
- if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- return -0x3aUL;
-
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- return -0x3bUL;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
- return -0x10UL;
- if (pmd_large(*pmd)) {
- if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO))
- return -0x04UL;
- return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK);
- }
- if (unlikely(pmd_bad(*pmd)))
- return -0x10UL;
-
- ptep = pte_offset_map(pmd, addr);
- if (!pte_present(*ptep))
- return -0x11UL;
- if (write && !pte_write(*ptep))
- return -0x04UL;
-
- return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK);
-}
-
-static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr,
- size_t n, int write_user)
-{
- struct mm_struct *mm = current->mm;
- unsigned long offset, done, size, kaddr;
- void *from, *to;
-
- done = 0;
-retry:
- spin_lock(&mm->page_table_lock);
- do {
- kaddr = follow_table(mm, uaddr, write_user);
- if (IS_ERR_VALUE(kaddr))
- goto fault;
-
- offset = uaddr & ~PAGE_MASK;
- size = min(n - done, PAGE_SIZE - offset);
- if (write_user) {
- to = (void *) kaddr;
- from = kptr + done;
- } else {
- from = (void *) kaddr;
- to = kptr + done;
- }
- memcpy(to, from, size);
- done += size;
- uaddr += size;
- } while (done < n);
- spin_unlock(&mm->page_table_lock);
- return n - done;
-fault:
- spin_unlock(&mm->page_table_lock);
- if (__handle_fault(uaddr, -kaddr, write_user))
- return n - done;
- goto retry;
-}
-
-/*
- * Do DAT for user address by page table walk, return kernel address.
- * This function needs to be called with current->mm->page_table_lock held.
- */
-static __always_inline unsigned long __dat_user_addr(unsigned long uaddr,
- int write)
-{
- struct mm_struct *mm = current->mm;
- unsigned long kaddr;
- int rc;
-
-retry:
- kaddr = follow_table(mm, uaddr, write);
- if (IS_ERR_VALUE(kaddr))
- goto fault;
-
- return kaddr;
-fault:
- spin_unlock(&mm->page_table_lock);
- rc = __handle_fault(uaddr, -kaddr, write);
- spin_lock(&mm->page_table_lock);
- if (!rc)
- goto retry;
- return 0;
-}
-
-size_t copy_from_user_pt(size_t n, const void __user *from, void *to)
-{
- size_t rc;
-
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memcpy(to, (void __kernel __force *) from, n);
- return 0;
- }
- rc = __user_copy_pt((unsigned long) from, to, n, 0);
- if (unlikely(rc))
- memset(to + n - rc, 0, rc);
- return rc;
-}
-
-size_t copy_to_user_pt(size_t n, void __user *to, const void *from)
-{
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memcpy((void __kernel __force *) to, from, n);
- return 0;
- }
- return __user_copy_pt((unsigned long) to, (void *) from, n, 1);
-}
-
-static size_t clear_user_pt(size_t n, void __user *to)
-{
- long done, size, ret;
-
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memset((void __kernel __force *) to, 0, n);
- return 0;
- }
- done = 0;
- do {
- if (n - done > PAGE_SIZE)
- size = PAGE_SIZE;
- else
- size = n - done;
- ret = __user_copy_pt((unsigned long) to + done,
- &empty_zero_page, size, 1);
- done += size;
- if (ret)
- return ret + n - done;
- } while (done < n);
- return 0;
-}
-
-static size_t strnlen_user_pt(size_t count, const char __user *src)
-{
- unsigned long uaddr = (unsigned long) src;
- struct mm_struct *mm = current->mm;
- unsigned long offset, done, len, kaddr;
- size_t len_str;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return strnlen((const char __kernel __force *) src, count) + 1;
- done = 0;
-retry:
- spin_lock(&mm->page_table_lock);
- do {
- kaddr = follow_table(mm, uaddr, 0);
- if (IS_ERR_VALUE(kaddr))
- goto fault;
-
- offset = uaddr & ~PAGE_MASK;
- len = min(count - done, PAGE_SIZE - offset);
- len_str = strnlen((char *) kaddr, len);
- done += len_str;
- uaddr += len_str;
- } while ((len_str == len) && (done < count));
- spin_unlock(&mm->page_table_lock);
- return done + 1;
-fault:
- spin_unlock(&mm->page_table_lock);
- if (__handle_fault(uaddr, -kaddr, 0))
- return 0;
- goto retry;
-}
-
-static size_t strncpy_from_user_pt(size_t count, const char __user *src,
- char *dst)
-{
- size_t n = strnlen_user_pt(count, src);
-
- if (!n)
- return -EFAULT;
- if (n > count)
- n = count;
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memcpy(dst, (const char __kernel __force *) src, n);
- if (dst[n-1] == '\0')
- return n-1;
- else
- return n;
- }
- if (__user_copy_pt((unsigned long) src, dst, n, 0))
- return -EFAULT;
- if (dst[n-1] == '\0')
- return n-1;
- else
- return n;
-}
-
-static size_t copy_in_user_pt(size_t n, void __user *to,
- const void __user *from)
-{
- struct mm_struct *mm = current->mm;
- unsigned long offset_max, uaddr, done, size, error_code;
- unsigned long uaddr_from = (unsigned long) from;
- unsigned long uaddr_to = (unsigned long) to;
- unsigned long kaddr_to, kaddr_from;
- int write_user;
-
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memcpy((void __force *) to, (void __force *) from, n);
- return 0;
- }
- done = 0;
-retry:
- spin_lock(&mm->page_table_lock);
- do {
- write_user = 0;
- uaddr = uaddr_from;
- kaddr_from = follow_table(mm, uaddr_from, 0);
- error_code = kaddr_from;
- if (IS_ERR_VALUE(error_code))
- goto fault;
-
- write_user = 1;
- uaddr = uaddr_to;
- kaddr_to = follow_table(mm, uaddr_to, 1);
- error_code = (unsigned long) kaddr_to;
- if (IS_ERR_VALUE(error_code))
- goto fault;
-
- offset_max = max(uaddr_from & ~PAGE_MASK,
- uaddr_to & ~PAGE_MASK);
- size = min(n - done, PAGE_SIZE - offset_max);
-
- memcpy((void *) kaddr_to, (void *) kaddr_from, size);
- done += size;
- uaddr_from += size;
- uaddr_to += size;
- } while (done < n);
- spin_unlock(&mm->page_table_lock);
- return n - done;
-fault:
- spin_unlock(&mm->page_table_lock);
- if (__handle_fault(uaddr, -error_code, write_user))
- return n - done;
- goto retry;
-}
-
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
- asm volatile("0: l %1,0(%6)\n" \
- "1: " insn \
- "2: cs %1,%2,0(%6)\n" \
- "3: jl 1b\n" \
- " lhi %0,0\n" \
- "4:\n" \
- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
- : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
- "=m" (*uaddr) \
- : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
- "m" (*uaddr) : "cc" );
-
-static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
-{
- int oldval = 0, newval, ret;
-
- switch (op) {
- case FUTEX_OP_SET:
- __futex_atomic_op("lr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ADD:
- __futex_atomic_op("lr %2,%1\nar %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op("lr %2,%1\nor %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- default:
- ret = -ENOSYS;
- }
- if (ret == 0)
- *old = oldval;
- return ret;
-}
-
-int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
-{
- int ret;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_op_pt(op, uaddr, oparg, old);
- spin_lock(&current->mm->page_table_lock);
- uaddr = (u32 __force __user *)
- __dat_user_addr((__force unsigned long) uaddr, 1);
- if (!uaddr) {
- spin_unlock(&current->mm->page_table_lock);
- return -EFAULT;
- }
- get_page(virt_to_page(uaddr));
- spin_unlock(&current->mm->page_table_lock);
- ret = __futex_atomic_op_pt(op, uaddr, oparg, old);
- put_page(virt_to_page(uaddr));
- return ret;
-}
-
-static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
-{
- int ret;
-
- asm volatile("0: cs %1,%4,0(%5)\n"
- "1: la %0,0\n"
- "2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
- : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
- : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
- : "cc", "memory" );
- *uval = oldval;
- return ret;
-}
-
-int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
-{
- int ret;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
- spin_lock(&current->mm->page_table_lock);
- uaddr = (u32 __force __user *)
- __dat_user_addr((__force unsigned long) uaddr, 1);
- if (!uaddr) {
- spin_unlock(&current->mm->page_table_lock);
- return -EFAULT;
- }
- get_page(virt_to_page(uaddr));
- spin_unlock(&current->mm->page_table_lock);
- ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
- put_page(virt_to_page(uaddr));
- return ret;
-}
-
-struct uaccess_ops uaccess_pt = {
- .copy_from_user = copy_from_user_pt,
- .copy_from_user_small = copy_from_user_pt,
- .copy_to_user = copy_to_user_pt,
- .copy_to_user_small = copy_to_user_pt,
- .copy_in_user = copy_in_user_pt,
- .clear_user = clear_user_pt,
- .strnlen_user = strnlen_user_pt,
- .strncpy_from_user = strncpy_from_user_pt,
- .futex_atomic_op = futex_atomic_op_pt,
- .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt,
-};
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
deleted file mode 100644
index 6fbd0633827..00000000000
--- a/arch/s390/lib/uaccess_std.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Standard user space access functions based on mvcp/mvcs and doing
- * interesting things in the secondary space mode.
- *
- * Copyright IBM Corp. 2006
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Gerald Schaefer (gerald.schaefer@de.ibm.com)
- */
-
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <linux/uaccess.h>
-#include <asm/futex.h>
-#include "uaccess.h"
-
-#ifndef CONFIG_64BIT
-#define AHI "ahi"
-#define ALR "alr"
-#define CLR "clr"
-#define LHI "lhi"
-#define SLR "slr"
-#else
-#define AHI "aghi"
-#define ALR "algr"
-#define CLR "clgr"
-#define LHI "lghi"
-#define SLR "slgr"
-#endif
-
-size_t copy_from_user_std(size_t size, const void __user *ptr, void *x)
-{
- unsigned long tmp1, tmp2;
-
- tmp1 = -256UL;
- asm volatile(
- "0: mvcp 0(%0,%2),0(%1),%3\n"
- "10:jz 8f\n"
- "1:"ALR" %0,%3\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "2: mvcp 0(%0,%2),0(%1),%3\n"
- "11:jnz 1b\n"
- " j 8f\n"
- "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
- " "LHI" %3,-4096\n"
- " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 5f\n"
- "4: mvcp 0(%4,%2),0(%1),%3\n"
- "12:"SLR" %0,%4\n"
- " "ALR" %2,%4\n"
- "5:"LHI" %4,-1\n"
- " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
- " bras %3,7f\n" /* memset loop */
- " xc 0(1,%2),0(%2)\n"
- "6: xc 0(256,%2),0(%2)\n"
- " la %2,256(%2)\n"
- "7:"AHI" %4,-256\n"
- " jnm 6b\n"
- " ex %4,0(%3)\n"
- " j 9f\n"
- "8:"SLR" %0,%0\n"
- "9: \n"
- EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
- EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
- return size;
-}
-
-static size_t copy_from_user_std_check(size_t size, const void __user *ptr,
- void *x)
-{
- if (size <= 1024)
- return copy_from_user_std(size, ptr, x);
- return copy_from_user_pt(size, ptr, x);
-}
-
-size_t copy_to_user_std(size_t size, void __user *ptr, const void *x)
-{
- unsigned long tmp1, tmp2;
-
- tmp1 = -256UL;
- asm volatile(
- "0: mvcs 0(%0,%1),0(%2),%3\n"
- "7: jz 5f\n"
- "1:"ALR" %0,%3\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "2: mvcs 0(%0,%1),0(%2),%3\n"
- "8: jnz 1b\n"
- " j 5f\n"
- "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
- " "LHI" %3,-4096\n"
- " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "4: mvcs 0(%4,%1),0(%2),%3\n"
- "9:"SLR" %0,%4\n"
- " j 6f\n"
- "5:"SLR" %0,%0\n"
- "6: \n"
- EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
- EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
- return size;
-}
-
-static size_t copy_to_user_std_check(size_t size, void __user *ptr,
- const void *x)
-{
- if (size <= 1024)
- return copy_to_user_std(size, ptr, x);
- return copy_to_user_pt(size, ptr, x);
-}
-
-static size_t copy_in_user_std(size_t size, void __user *to,
- const void __user *from)
-{
- unsigned long tmp1;
-
- asm volatile(
- " sacf 256\n"
- " "AHI" %0,-1\n"
- " jo 5f\n"
- " bras %3,3f\n"
- "0:"AHI" %0,257\n"
- "1: mvc 0(1,%1),0(%2)\n"
- " la %1,1(%1)\n"
- " la %2,1(%2)\n"
- " "AHI" %0,-1\n"
- " jnz 1b\n"
- " j 5f\n"
- "2: mvc 0(256,%1),0(%2)\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "3:"AHI" %0,-256\n"
- " jnm 2b\n"
- "4: ex %0,1b-0b(%3)\n"
- "5: "SLR" %0,%0\n"
- "6: sacf 0\n"
- EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
- : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
- : : "cc", "memory");
- return size;
-}
-
-static size_t clear_user_std(size_t size, void __user *to)
-{
- unsigned long tmp1, tmp2;
-
- asm volatile(
- " sacf 256\n"
- " "AHI" %0,-1\n"
- " jo 5f\n"
- " bras %3,3f\n"
- " xc 0(1,%1),0(%1)\n"
- "0:"AHI" %0,257\n"
- " la %2,255(%1)\n" /* %2 = ptr + 255 */
- " srl %2,12\n"
- " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */
- " "SLR" %2,%1\n"
- " "CLR" %0,%2\n" /* clear crosses next page boundary? */
- " jnh 5f\n"
- " "AHI" %2,-1\n"
- "1: ex %2,0(%3)\n"
- " "AHI" %2,1\n"
- " "SLR" %0,%2\n"
- " j 5f\n"
- "2: xc 0(256,%1),0(%1)\n"
- " la %1,256(%1)\n"
- "3:"AHI" %0,-256\n"
- " jnm 2b\n"
- "4: ex %0,0(%3)\n"
- "5: "SLR" %0,%0\n"
- "6: sacf 0\n"
- EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
- : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
- return size;
-}
-
-size_t strnlen_user_std(size_t size, const char __user *src)
-{
- register unsigned long reg0 asm("0") = 0UL;
- unsigned long tmp1, tmp2;
-
- asm volatile(
- " la %2,0(%1)\n"
- " la %3,0(%0,%1)\n"
- " "SLR" %0,%0\n"
- " sacf 256\n"
- "0: srst %3,%2\n"
- " jo 0b\n"
- " la %0,1(%3)\n" /* strnlen_user results includes \0 */
- " "SLR" %0,%1\n"
- "1: sacf 0\n"
- EX_TABLE(0b,1b)
- : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
-{
- register unsigned long reg0 asm("0") = 0UL;
- unsigned long tmp1, tmp2;
-
- asm volatile(
- " la %3,0(%1)\n"
- " la %4,0(%0,%1)\n"
- " sacf 256\n"
- "0: srst %4,%3\n"
- " jo 0b\n"
- " sacf 0\n"
- " la %0,0(%4)\n"
- " jh 1f\n" /* found \0 in string ? */
- " "AHI" %4,1\n" /* include \0 in copy */
- "1:"SLR" %0,%1\n" /* %0 = return length (without \0) */
- " "SLR" %4,%1\n" /* %4 = copy length (including \0) */
- "2: mvcp 0(%4,%2),0(%1),%5\n"
- " jz 9f\n"
- "3:"AHI" %4,-256\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "4: mvcp 0(%4,%2),0(%1),%5\n"
- " jnz 3b\n"
- " j 9f\n"
- "7: sacf 0\n"
- "8:"LHI" %0,%6\n"
- "9:\n"
- EX_TABLE(0b,7b) EX_TABLE(2b,8b) EX_TABLE(4b,8b)
- : "+a" (size), "+a" (src), "+d" (dst), "=a" (tmp1), "=a" (tmp2)
- : "d" (reg0), "K" (-EFAULT) : "cc", "memory");
- return size;
-}
-
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
- asm volatile( \
- " sacf 256\n" \
- "0: l %1,0(%6)\n" \
- "1:"insn \
- "2: cs %1,%2,0(%6)\n" \
- "3: jl 1b\n" \
- " lhi %0,0\n" \
- "4: sacf 0\n" \
- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
- : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
- "=m" (*uaddr) \
- : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
- "m" (*uaddr) : "cc");
-
-int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old)
-{
- int oldval = 0, newval, ret;
-
- switch (op) {
- case FUTEX_OP_SET:
- __futex_atomic_op("lr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ADD:
- __futex_atomic_op("lr %2,%1\nar %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op("lr %2,%1\nor %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- default:
- ret = -ENOSYS;
- }
- *old = oldval;
- return ret;
-}
-
-int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
-{
- int ret;
-
- asm volatile(
- " sacf 256\n"
- "0: cs %1,%4,0(%5)\n"
- "1: la %0,0\n"
- "2: sacf 0\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
- : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
- : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
- : "cc", "memory" );
- *uval = oldval;
- return ret;
-}
-
-struct uaccess_ops uaccess_std = {
- .copy_from_user = copy_from_user_std_check,
- .copy_from_user_small = copy_from_user_std,
- .copy_to_user = copy_to_user_std_check,
- .copy_to_user_small = copy_to_user_std,
- .copy_in_user = copy_in_user_std,
- .clear_user = clear_user_std,
- .strnlen_user = strnlen_user_std,
- .strncpy_from_user = strncpy_from_user_std,
- .futex_atomic_op = futex_atomic_op_std,
- .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std,
-};
diff --git a/arch/s390/lib/usercopy.c b/arch/s390/lib/usercopy.c
deleted file mode 100644
index 14b363fec8a..00000000000
--- a/arch/s390/lib/usercopy.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <linux/module.h>
-#include <linux/bug.h>
-
-void copy_from_user_overflow(void)
-{
- WARN(1, "Buffer overflow detected!\n");
-}
-EXPORT_SYMBOL(copy_from_user_overflow);
diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c
index 58bff541fde..a6ba0d72433 100644
--- a/arch/s390/math-emu/math.c
+++ b/arch/s390/math-emu/math.c
@@ -19,6 +19,8 @@
#include <math-emu/double.h>
#include <math-emu/quad.h>
+#define FPC_VALID_MASK 0xF8F8FF03
+
/*
* I miss a macro to round a floating point number to the
* nearest integer in the same floating point format.
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 640bea12303..839592ca265 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -3,7 +3,7 @@
#
obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
-obj-y += page-states.o gup.o extable.o pageattr.o
+obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 479e9428291..79ddd580d60 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -253,12 +253,12 @@ static int cmm_skip_blanks(char *cp, char **endp)
static struct ctl_table cmm_table[];
-static int cmm_pages_handler(ctl_table *ctl, int write, void __user *buffer,
- size_t *lenp, loff_t *ppos)
+static int cmm_pages_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[16], *p;
+ unsigned int len;
long nr;
- int len;
if (!*lenp || (*ppos && !write)) {
*lenp = 0;
@@ -293,12 +293,12 @@ static int cmm_pages_handler(ctl_table *ctl, int write, void __user *buffer,
return 0;
}
-static int cmm_timeout_handler(ctl_table *ctl, int write, void __user *buffer,
- size_t *lenp, loff_t *ppos)
+static int cmm_timeout_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
long nr, seconds;
- int len;
+ unsigned int len;
if (!*lenp || (*ppos && !write)) {
*lenp = 0;
@@ -458,12 +458,10 @@ static int __init cmm_init(void)
if (rc)
goto out_pm;
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
- rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0;
- if (rc)
- goto out_kthread;
- return 0;
+ if (!IS_ERR(cmm_thread_ptr))
+ return 0;
-out_kthread:
+ rc = PTR_ERR(cmm_thread_ptr);
unregister_pm_notifier(&cmm_power_notifier);
out_pm:
unregister_oom_notifier(&cmm_oom_nb);
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 04e4892247d..46d517c3c76 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -49,10 +49,13 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
{ "ASCE", "PGD", "PUD", "PMD", "PTE" };
seq_printf(m, "%s ", level_name[level]);
- if (pr & _PAGE_INVALID)
+ if (pr & _PAGE_INVALID) {
seq_printf(m, "I\n");
- else
- seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW");
+ return;
+ }
+ seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
+ seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " ");
+ seq_putc(m, '\n');
}
static void note_page(struct seq_file *m, struct pg_state *st,
@@ -102,12 +105,12 @@ static void note_page(struct seq_file *m, struct pg_state *st,
}
/*
- * The actual page table walker functions. In order to keep the implementation
- * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
- * flags to note_page() if a region, segment or page table entry is invalid or
- * read-only.
- * After all it's just a hint that the current level being walked contains an
- * invalid or read-only entry.
+ * The actual page table walker functions. In order to keep the
+ * implementation of print_prot() short, we only check and pass
+ * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
+ * segment or page table entry is invalid or read-only.
+ * After all it's just a hint that the current level being walked
+ * contains an invalid or read-only entry.
*/
static void walk_pte_level(struct seq_file *m, struct pg_state *st,
pmd_t *pmd, unsigned long addr)
@@ -119,12 +122,18 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st,
for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
st->current_address = addr;
pte = pte_offset_kernel(pmd, addr);
- prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
+ prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
note_page(m, st, prot, 4);
addr += PAGE_SIZE;
}
}
+#ifdef CONFIG_64BIT
+#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO)
+#else
+#define _PMD_PROT_MASK 0
+#endif
+
static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
pud_t *pud, unsigned long addr)
{
@@ -137,7 +146,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
pmd = pmd_offset(pud, addr);
if (!pmd_none(*pmd)) {
if (pmd_large(*pmd)) {
- prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO;
+ prot = pmd_val(*pmd) & _PMD_PROT_MASK;
note_page(m, st, prot, 3);
} else
walk_pte_level(m, st, pmd, addr);
@@ -147,6 +156,12 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
}
}
+#ifdef CONFIG_64BIT
+#define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO)
+#else
+#define _PUD_PROT_MASK 0
+#endif
+
static void walk_pud_level(struct seq_file *m, struct pg_state *st,
pgd_t *pgd, unsigned long addr)
{
@@ -159,7 +174,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
pud = pud_offset(pgd, addr);
if (!pud_none(*pud))
if (pud_large(*pud)) {
- prot = pud_val(*pud) & _PAGE_RO;
+ prot = pud_val(*pud) & _PUD_PROT_MASK;
note_page(m, st, prot, 2);
} else
walk_pmd_level(m, st, pud, addr);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2fb9e63b8fc..3f3b35403d0 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -50,6 +50,7 @@
#define VM_FAULT_BADMAP 0x020000
#define VM_FAULT_BADACCESS 0x040000
#define VM_FAULT_SIGNAL 0x080000
+#define VM_FAULT_PFAULT 0x100000
static unsigned long store_indication __read_mostly;
@@ -105,26 +106,151 @@ void bust_spinlocks(int yes)
* Returns the address space associated with the fault.
* Returns 0 for kernel space and 1 for user space.
*/
-static inline int user_space_fault(unsigned long trans_exc_code)
+static inline int user_space_fault(struct pt_regs *regs)
{
+ unsigned long trans_exc_code;
+
/*
* The lowest two bits of the translation exception
* identification indicate which paging table was used.
*/
- trans_exc_code &= 3;
- if (trans_exc_code == 2)
- /* Access via secondary space, set_fs setting decides */
+ trans_exc_code = regs->int_parm_long & 3;
+ if (trans_exc_code == 3) /* home space -> kernel */
+ return 0;
+ if (user_mode(regs))
+ return 1;
+ if (trans_exc_code == 2) /* secondary space -> set_fs */
return current->thread.mm_segment.ar4;
- if (s390_user_mode == HOME_SPACE_MODE)
- /* User space if the access has been done via home space. */
- return trans_exc_code == 3;
- /*
- * If the user space is not the home space the kernel runs in home
- * space. Access via secondary space has already been covered,
- * access via primary space or access register is from user space
- * and access via home space is from the kernel.
- */
- return trans_exc_code != 3;
+ if (current->flags & PF_VCPU)
+ return 1;
+ return 0;
+}
+
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+#ifdef CONFIG_64BIT
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%016lx ", asce);
+ switch (asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_REGION1:
+ table = table + ((address >> 53) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R1:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION2:
+ table = table + ((address >> 42) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R2:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION3:
+ table = table + ((address >> 31) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R3:%016lx ", *table);
+ if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_SEGMENT:
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont(KERN_CONT "S:%016lx ", *table);
+ if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ }
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%016lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#else /* CONFIG_64BIT */
+
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%08lx ", asce);
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("S:%08lx ", *table);
+ if (*table & _SEGMENT_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%08lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#endif /* CONFIG_64BIT */
+
+static void dump_fault_info(struct pt_regs *regs)
+{
+ unsigned long asce;
+
+ pr_alert("Fault in ");
+ switch (regs->int_parm_long & 3) {
+ case 3:
+ pr_cont("home space ");
+ break;
+ case 2:
+ pr_cont("secondary space ");
+ break;
+ case 1:
+ pr_cont("access register ");
+ break;
+ case 0:
+ pr_cont("primary space ");
+ break;
+ }
+ pr_cont("mode while using ");
+ if (!user_space_fault(regs)) {
+ asce = S390_lowcore.kernel_asce;
+ pr_cont("kernel ");
+ }
+#ifdef CONFIG_PGSTE
+ else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+ asce = gmap->asce;
+ pr_cont("gmap ");
+ }
+#endif
+ else {
+ asce = S390_lowcore.user_asce;
+ pr_cont("user ");
+ }
+ pr_cont("ASCE.\n");
+ dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
}
static inline void report_user_fault(struct pt_regs *regs, long signr)
@@ -139,8 +265,9 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
regs->int_code);
print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
printk(KERN_CONT "\n");
- printk(KERN_ALERT "failing address: %lX\n",
- regs->int_parm_long & __FAIL_ADDR_MASK);
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
show_regs(regs);
}
@@ -176,13 +303,15 @@ static noinline void do_no_context(struct pt_regs *regs)
* terminate things with extreme prejudice.
*/
address = regs->int_parm_long & __FAIL_ADDR_MASK;
- if (!user_space_fault(regs->int_parm_long))
+ if (!user_space_fault(regs))
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
- " at virtual kernel address %p\n", (void *)address);
+ " in virtual kernel address space\n");
else
printk(KERN_ALERT "Unable to handle kernel paging request"
- " at virtual user address %p\n", (void *)address);
-
+ " in virtual user address space\n");
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
die(regs, "Oops");
do_exit(SIGKILL);
}
@@ -232,6 +361,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
return;
}
case VM_FAULT_BADCONTEXT:
+ case VM_FAULT_PFAULT:
do_no_context(regs);
break;
case VM_FAULT_SIGNAL:
@@ -269,6 +399,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
*/
static inline int do_exception(struct pt_regs *regs, int access)
{
+#ifdef CONFIG_PGSTE
+ struct gmap *gmap;
+#endif
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
@@ -282,7 +415,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
* The instruction that caused the program check has
* been nullified. Don't signal single step via SIGTRAP.
*/
- clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
if (notify_page_fault(regs))
return 0;
@@ -296,20 +429,23 @@ static inline int do_exception(struct pt_regs *regs, int access)
* user context.
*/
fault = VM_FAULT_BADCONTEXT;
- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
+ if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
goto out;
address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
flags |= FAULT_FLAG_WRITE;
down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE
- if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
- address = __gmap_fault(address,
- (struct gmap *) S390_lowcore.gmap);
+ gmap = (struct gmap *)
+ ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
+ if (gmap) {
+ address = __gmap_fault(address, gmap);
if (address == -EFAULT) {
fault = VM_FAULT_BADMAP;
goto out_up;
@@ -318,6 +454,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
fault = VM_FAULT_OOM;
goto out_up;
}
+ if (gmap->pfault_enabled)
+ flags |= FAULT_FLAG_RETRY_NOWAIT;
}
#endif
@@ -374,9 +512,19 @@ retry:
regs, address);
}
if (fault & VM_FAULT_RETRY) {
+#ifdef CONFIG_PGSTE
+ if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ /* FAULT_FLAG_RETRY_NOWAIT has been set,
+ * mmap_sem has not been released */
+ current->thread.gmap_pfault = 1;
+ fault = VM_FAULT_PFAULT;
+ goto out_up;
+ }
+#endif
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
- flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags &= ~(FAULT_FLAG_ALLOW_RETRY |
+ FAULT_FLAG_RETRY_NOWAIT);
flags |= FAULT_FLAG_TRIED;
down_read(&mm->mmap_sem);
goto retry;
@@ -395,8 +543,13 @@ void __kprobes do_protection_exception(struct pt_regs *regs)
int fault;
trans_exc_code = regs->int_parm_long;
- /* Protection exception is suppressing, decrement psw address. */
- regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+ /*
+ * Protection exceptions are suppressing, decrement psw address.
+ * The exception to this rule are aborted transactions, for these
+ * the PSW already points to the correct location.
+ */
+ if (!(regs->int_code & 0x200))
+ regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
/*
* Check for low-address protection. This needs to be treated
* as a special case because the translation exception code
@@ -421,67 +574,6 @@ void __kprobes do_dat_exception(struct pt_regs *regs)
do_fault_error(regs, fault);
}
-#ifdef CONFIG_64BIT
-void __kprobes do_asce_exception(struct pt_regs *regs)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long trans_exc_code;
-
- /*
- * The instruction that caused the program check has
- * been nullified. Don't signal single step via SIGTRAP.
- */
- clear_tsk_thread_flag(current, TIF_PER_TRAP);
-
- trans_exc_code = regs->int_parm_long;
- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
- goto no_context;
-
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK);
- up_read(&mm->mmap_sem);
-
- if (vma) {
- update_mm(mm, current);
- return;
- }
-
- /* User mode accesses just cause a SIGSEGV */
- if (user_mode(regs)) {
- do_sigsegv(regs, SEGV_MAPERR);
- return;
- }
-
-no_context:
- do_no_context(regs);
-}
-#endif
-
-int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
-{
- struct pt_regs regs;
- int access, fault;
-
- /* Emulate a uaccess fault from kernel mode. */
- regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK;
- if (!irqs_disabled())
- regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
- regs.psw.addr = (unsigned long) __builtin_return_address(0);
- regs.psw.addr |= PSW_ADDR_AMODE;
- regs.int_code = pgm_int_code;
- regs.int_parm_long = (uaddr & PAGE_MASK) | 2;
- access = write ? VM_WRITE : VM_READ;
- fault = do_exception(&regs, access);
- /*
- * Since the fault happened in kernel mode while performing a uaccess
- * all we need to do now is emulating a fixup in case "fault" is not
- * zero.
- * For the calling uaccess functions this results always in -EFAULT.
- */
- return fault ? -EFAULT : 0;
-}
-
#ifdef CONFIG_PFAULT
/*
* 'pfault' pseudo page faults routines.
@@ -634,8 +726,8 @@ out:
put_task_struct(tsk);
}
-static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int pfault_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
struct thread_struct *thread, *next;
struct task_struct *tsk;
@@ -662,18 +754,18 @@ static int __init pfault_irq_init(void)
{
int rc;
- rc = register_external_interrupt(0x2603, pfault_interrupt);
+ rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
if (rc)
goto out_extint;
rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
if (rc)
goto out_pfault;
- service_subclass_irq_register();
+ irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
hotcpu_notifier(pfault_cpu_notify, 0);
return 0;
out_pfault:
- unregister_external_interrupt(0x2603, pfault_interrupt);
+ unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
out_extint:
pfault_disable = 1;
return rc;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1f5315d1215..639fce46400 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
pte_t *ptep, pte;
struct page *page;
- mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+ mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
do {
@@ -55,8 +55,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
struct page *head, *page, *tail;
int refs;
- result = write ? 0 : _SEGMENT_ENTRY_RO;
- mask = result | _SEGMENT_ENTRY_INV;
+ result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
+ mask = result | _SEGMENT_ENTRY_INVALID;
if ((pmd_val(pmd) & mask) != result)
return 0;
VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
@@ -180,9 +180,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
- if ((end < start) || (end > TASK_SIZE))
+ if ((end <= start) || (end > TASK_SIZE))
return 0;
-
+ /*
+ * local_irq_save() doesn't prevent pagetable teardown, but does
+ * prevent the pagetables from being freed on s390.
+ *
+ * So long as we atomically load page table pointers versus teardown,
+ * we can follow the address down to the the page and take a ref on it.
+ */
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
@@ -219,63 +225,22 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- pgd_t *pgdp, pgd;
- int nr = 0;
+ int nr, ret;
start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- if ((end < start) || (end > TASK_SIZE))
- goto slow_irqon;
-
- /*
- * local_irq_disable() doesn't prevent pagetable teardown, but does
- * prevent the pagetables from being freed on s390.
- *
- * So long as we atomically load page table pointers versus teardown,
- * we can follow the address down to the the page and take a ref on it.
- */
- local_irq_disable();
- pgdp = pgd_offset(mm, addr);
- do {
- pgd = *pgdp;
- barrier();
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- goto slow;
- if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
- goto slow;
- } while (pgdp++, addr = next, addr != end);
- local_irq_enable();
-
- VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
- return nr;
-
- {
- int ret;
-slow:
- local_irq_enable();
-slow_irqon:
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- down_read(&mm->mmap_sem);
- ret = get_user_pages(current, mm, start,
- (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
- up_read(&mm->mmap_sem);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
-
- return ret;
- }
+ nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ if (nr == nr_pages)
+ return nr;
+
+ /* Try to get the remaining pages with get_user_pages */
+ start += nr << PAGE_SHIFT;
+ pages += nr;
+ down_read(&mm->mmap_sem);
+ ret = get_user_pages(current, mm, start,
+ nr_pages - nr, write, 0, pages, NULL);
+ up_read(&mm->mmap_sem);
+ /* Have to be a bit careful with return values */
+ if (nr > 0)
+ ret = (ret < 0) ? nr : ret + nr;
+ return ret;
}
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 532525ec88c..0ff66a7e29b 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -8,21 +8,124 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+static inline pmd_t __pte_to_pmd(pte_t pte)
+{
+ int none, young, prot;
+ pmd_t pmd;
+
+ /*
+ * Convert encoding pte bits pmd bits
+ * .IR...wrdytp ..R...I...y.
+ * empty .10...000000 -> ..0...1...0.
+ * prot-none, clean, old .11...000001 -> ..0...1...1.
+ * prot-none, clean, young .11...000101 -> ..1...1...1.
+ * prot-none, dirty, old .10...001001 -> ..0...1...1.
+ * prot-none, dirty, young .10...001101 -> ..1...1...1.
+ * read-only, clean, old .11...010001 -> ..1...1...0.
+ * read-only, clean, young .01...010101 -> ..1...0...1.
+ * read-only, dirty, old .11...011001 -> ..1...1...0.
+ * read-only, dirty, young .01...011101 -> ..1...0...1.
+ * read-write, clean, old .11...110001 -> ..0...1...0.
+ * read-write, clean, young .01...110101 -> ..0...0...1.
+ * read-write, dirty, old .10...111001 -> ..0...1...0.
+ * read-write, dirty, young .00...111101 -> ..0...0...1.
+ * Huge ptes are dirty by definition, a clean pte is made dirty
+ * by the conversion.
+ */
+ if (pte_present(pte)) {
+ pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
+ if (pte_val(pte) & _PAGE_INVALID)
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ none = (pte_val(pte) & _PAGE_PRESENT) &&
+ !(pte_val(pte) & _PAGE_READ) &&
+ !(pte_val(pte) & _PAGE_WRITE);
+ prot = (pte_val(pte) & _PAGE_PROTECT) &&
+ !(pte_val(pte) & _PAGE_WRITE);
+ young = pte_val(pte) & _PAGE_YOUNG;
+ if (none || young)
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ if (prot || (none && young))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ } else
+ pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
+ return pmd;
+}
+
+static inline pte_t __pmd_to_pte(pmd_t pmd)
+{
+ pte_t pte;
+
+ /*
+ * Convert encoding pmd bits pte bits
+ * ..R...I...y. .IR...wrdytp
+ * empty ..0...1...0. -> .10...000000
+ * prot-none, old ..0...1...1. -> .10...001001
+ * prot-none, young ..1...1...1. -> .10...001101
+ * read-only, old ..1...1...0. -> .11...011001
+ * read-only, young ..1...0...1. -> .01...011101
+ * read-write, old ..0...1...0. -> .10...111001
+ * read-write, young ..0...0...1. -> .00...111101
+ * Huge ptes are dirty by definition
+ */
+ if (pmd_present(pmd)) {
+ pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
+ (pmd_val(pmd) & PAGE_MASK);
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
+ pte_val(pte) |= _PAGE_INVALID;
+ if (pmd_prot_none(pmd)) {
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+ pte_val(pte) |= _PAGE_YOUNG;
+ } else {
+ pte_val(pte) |= _PAGE_READ;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_WRITE;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
+ pte_val(pte) |= _PAGE_YOUNG;
+ }
+ } else
+ pte_val(pte) = _PAGE_INVALID;
+ return pte;
+}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *pteptr, pte_t pteval)
+ pte_t *ptep, pte_t pte)
{
- pmd_t *pmdp = (pmd_t *) pteptr;
- unsigned long mask;
+ pmd_t pmd;
+ pmd = __pte_to_pmd(pte);
if (!MACHINE_HAS_HPAGE) {
- pteptr = (pte_t *) pte_page(pteval)[1].index;
- mask = pte_val(pteval) &
- (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
- pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) |= pte_page(pte)[1].index;
+ } else
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO;
+ *(pmd_t *) ptep = pmd;
+}
+
+pte_t huge_ptep_get(pte_t *ptep)
+{
+ unsigned long origin;
+ pmd_t pmd;
+
+ pmd = *(pmd_t *) ptep;
+ if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
+ origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) |= *(unsigned long *) origin;
}
+ return __pmd_to_pte(pmd);
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pmd_t *pmdp = (pmd_t *) ptep;
+ pte_t pte = huge_ptep_get(ptep);
- pmd_val(*pmdp) = pte_val(pteval);
+ pmdp_flush_direct(mm, addr, pmdp);
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ return pte;
}
int arch_prepare_hugepage(struct page *page)
@@ -39,7 +142,7 @@ int arch_prepare_hugepage(struct page *page)
if (!ptep)
return -ENOMEM;
- pte = mk_pte(page, PAGE_RW);
+ pte_val(pte) = addr;
for (i = 0; i < PTRS_PER_PTE; i++) {
set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
pte_val(pte) += PAGE_SIZE;
@@ -58,7 +161,7 @@ void arch_release_hugepage(struct page *page)
ptep = (pte_t *) page[1].index;
if (!ptep)
return;
- clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) ptep, _PAGE_INVALID,
PTRS_PER_PTE * sizeof(pte_t));
page_table_free(&init_mm, (unsigned long *) ptep);
page[1].index = 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ae672f41c46..0c1073ed1e8 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
+#include <linux/memory.h>
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/initrd.h>
@@ -36,17 +37,17 @@
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/ctl_reg.h>
+#include <asm/sclp.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
-static unsigned long __init setup_zero_pages(void)
+static void __init setup_zero_pages(void)
{
struct cpuid cpu_id;
unsigned int order;
- unsigned long size;
struct page *page;
int i;
@@ -63,10 +64,19 @@ static unsigned long __init setup_zero_pages(void)
break;
case 0x2097: /* z10 */
case 0x2098: /* z10 */
- default:
+ case 0x2817: /* z196 */
+ case 0x2818: /* z196 */
order = 2;
break;
+ case 0x2827: /* zEC12 */
+ case 0x2828: /* zEC12 */
+ default:
+ order = 5;
+ break;
}
+ /* Limit number of empty zero pages for small memory sizes */
+ if (order > 2 && totalram_pages <= 16384)
+ order = 2;
empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!empty_zero_page)
@@ -75,14 +85,11 @@ static unsigned long __init setup_zero_pages(void)
page = virt_to_page((void *) empty_zero_page);
split_page(page, order);
for (i = 1 << order; i > 0; i--) {
- SetPageReserved(page);
+ mark_page_reserved(page);
page++;
}
- size = PAGE_SIZE << order;
- zero_page_mask = (size - 1) & PAGE_MASK;
-
- return 1UL << order;
+ zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
}
/*
@@ -117,8 +124,6 @@ void __init paging_init(void)
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
- atomic_set(&init_mm.context.attach_count, 1);
-
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
@@ -129,63 +134,37 @@ void __init paging_init(void)
void __init mem_init(void)
{
- unsigned long codesize, reservedpages, datasize, initsize;
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(0, mm_cpumask(&init_mm));
+ atomic_set(&init_mm.context.attach_count, 1);
- max_mapnr = num_physpages = max_low_pfn;
+ max_mapnr = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
/* Setup guest page hinting */
cmma_init();
/* this will put all low memory onto the freelists */
- totalram_pages += free_all_bootmem();
- totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */
-
- reservedpages = 0;
-
- codesize = (unsigned long) &_etext - (unsigned long) &_text;
- datasize = (unsigned long) &_edata - (unsigned long) &_etext;
- initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
- printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
- nr_free_pages() << (PAGE_SHIFT-10),
- max_mapnr << (PAGE_SHIFT-10),
- codesize >> 10,
- reservedpages << (PAGE_SHIFT-10),
- datasize >>10,
- initsize >> 10);
+ free_all_bootmem();
+ setup_zero_pages(); /* Setup zeroed pages. */
+
+ mem_init_print_info(NULL);
printk("Write protected kernel read-only data: %#lx - %#lx\n",
(unsigned long)&_stext,
PFN_ALIGN((unsigned long)&_eshared) - 1);
}
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
-{
- unsigned long addr = begin;
-
- if (begin >= end)
- return;
- for (; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)(addr & PAGE_MASK), POISON_FREE_INITMEM,
- PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
-}
-
void free_initmem(void)
{
- free_init_pages("unused kernel memory",
- (unsigned long)&__init_begin,
- (unsigned long)&__init_end);
+ free_initmem_default(POISON_FREE_INITMEM);
}
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory", start, end);
+ free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+ "initrd");
}
#endif
@@ -228,4 +207,25 @@ int arch_add_memory(int nid, u64 start, u64 size)
vmem_remove_mapping(start, size);
return rc;
}
+
+unsigned long memory_block_size_bytes(void)
+{
+ /*
+ * Make sure the memory block size is always greater
+ * or equal than the memory increment size.
+ */
+ return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm());
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+ /*
+ * There is no hardware or firmware interface which could trigger a
+ * hot memory remove on s390. So there is nothing that needs to be
+ * implemented.
+ */
+ return -EBUSY;
+}
+#endif
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 921fa541dc0..2a2e35416d2 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -14,6 +14,7 @@
#include <linux/gfp.h>
#include <linux/cpu.h>
#include <asm/ctl_reg.h>
+#include <asm/io.h>
/*
* This function writes to kernel memory bypassing DAT and possible
@@ -127,7 +128,7 @@ void memcpy_absolute(void *dest, void *src, size_t count)
/*
* Copy memory from kernel (real) to user (virtual)
*/
-int copy_to_user_real(void __user *dest, void *src, size_t count)
+int copy_to_user_real(void __user *dest, void *src, unsigned long count)
{
int offs = 0, size, rc;
char *buf;
@@ -151,32 +152,6 @@ out:
}
/*
- * Copy memory from user (virtual) to kernel (real)
- */
-int copy_from_user_real(void *dest, void __user *src, size_t count)
-{
- int offs = 0, size, rc;
- char *buf;
-
- buf = (char *) __get_free_page(GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- rc = -EFAULT;
- while (offs < count) {
- size = min(PAGE_SIZE, count - offs);
- if (copy_from_user(buf, src + offs, size))
- goto out;
- if (memcpy_real(dest + offs, buf, size))
- goto out;
- offs += size;
- }
- rc = 0;
-out:
- free_page((unsigned long) buf);
- return rc;
-}
-
-/*
* Check if physical address is within prefix or zero page
*/
static int is_swapped(unsigned long addr)
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
new file mode 100644
index 00000000000..5535cfe0ee1
--- /dev/null
+++ b/arch/s390/mm/mem_detect.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/setup.h>
+
+#define ADDR2G (1ULL << 31)
+
+#define CHUNK_READ_WRITE 0
+#define CHUNK_READ_ONLY 1
+
+static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size)
+{
+ memblock_add_range(&memblock.memory, start, size, 0, 0);
+ memblock_add_range(&memblock.physmem, start, size, 0, 0);
+}
+
+void __init detect_memory_memblock(void)
+{
+ unsigned long long memsize, rnmax, rzm;
+ unsigned long addr, size;
+ int type;
+
+ rzm = sclp_get_rzm();
+ rnmax = sclp_get_rnmax();
+ memsize = rzm * rnmax;
+ if (!rzm)
+ rzm = 1ULL << 17;
+ if (IS_ENABLED(CONFIG_32BIT)) {
+ rzm = min(ADDR2G, rzm);
+ memsize = min(ADDR2G, memsize);
+ }
+ max_physmem_end = memsize;
+ addr = 0;
+ /* keep memblock lists close to the kernel */
+ memblock_set_bottom_up(true);
+ do {
+ size = 0;
+ type = tprot(addr);
+ do {
+ size += rzm;
+ if (max_physmem_end && addr + size >= max_physmem_end)
+ break;
+ } while (type == tprot(addr + size));
+ if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
+ if (max_physmem_end && (addr + size > max_physmem_end))
+ size = max_physmem_end - addr;
+ memblock_physmem_add(addr, size);
+ }
+ addr += size;
+ } while (addr < max_physmem_end);
+ memblock_set_bottom_up(false);
+ if (!max_physmem_end)
+ max_physmem_end = memblock_end_of_DRAM();
+}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index c59a5efa58b..9b436c21195 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -64,6 +64,11 @@ static unsigned long mmap_rnd(void)
return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
}
+static unsigned long mmap_base_legacy(void)
+{
+ return TASK_UNMAPPED_BASE + mmap_rnd();
+}
+
static inline unsigned long mmap_base(void)
{
unsigned long gap = rlimit(RLIMIT_STACK);
@@ -89,29 +94,24 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* bit is set, or if the expected stack growth is unlimited:
*/
if (mmap_is_legacy()) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->mmap_base = mmap_base_legacy();
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
#else
-int s390_mmap_check(unsigned long addr, unsigned long len)
+int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
{
- int rc;
-
- if (!is_compat_task() &&
- len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) {
- rc = crst_table_upgrade(current->mm, 1UL << 53);
- if (rc)
- return rc;
- update_mm(current->mm, current);
- }
+ if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+ return 0;
+ if (!(flags & MAP_FIXED))
+ addr = 0;
+ if ((addr + len) >= TASK_SIZE)
+ return crst_table_upgrade(current->mm, 1UL << 53);
return 0;
}
@@ -131,7 +131,6 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
rc = crst_table_upgrade(mm, 1UL << 53);
if (rc)
return (unsigned long) rc;
- update_mm(mm, current);
area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
}
return area;
@@ -154,7 +153,6 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
rc = crst_table_upgrade(mm, 1UL << 53);
if (rc)
return (unsigned long) rc;
- update_mm(mm, current);
area = arch_get_unmapped_area_topdown(filp, addr, len,
pgoff, flags);
}
@@ -171,13 +169,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* bit is set, or if the expected stack growth is unlimited:
*/
if (mmap_is_legacy()) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->mmap_base = mmap_base_legacy();
mm->get_unmapped_area = s390_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 29ccee3651f..8400f494623 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -9,31 +9,26 @@
#include <asm/pgtable.h>
#include <asm/page.h>
-void storage_key_init_range(unsigned long start, unsigned long end)
+#if PAGE_DEFAULT_KEY
+static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
{
- unsigned long boundary, function, size;
+ asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
+ : [addr] "+a" (addr) : [skey] "d" (skey));
+ return addr;
+}
+
+void __storage_key_init_range(unsigned long start, unsigned long end)
+{
+ unsigned long boundary, size;
while (start < end) {
- if (MACHINE_HAS_EDAT2) {
- /* set storage keys for a 2GB frame */
- function = 0x22000 | PAGE_DEFAULT_KEY;
- size = 1UL << 31;
- boundary = (start + size) & ~(size - 1);
- if (boundary <= end) {
- do {
- start = pfmf(function, start);
- } while (start < boundary);
- continue;
- }
- }
if (MACHINE_HAS_EDAT1) {
/* set storage keys for a 1MB frame */
- function = 0x21000 | PAGE_DEFAULT_KEY;
size = 1UL << 20;
boundary = (start + size) & ~(size - 1);
if (boundary <= end) {
do {
- start = pfmf(function, start);
+ start = sske_frame(start, PAGE_DEFAULT_KEY);
} while (start < boundary);
continue;
}
@@ -42,6 +37,7 @@ void storage_key_init_range(unsigned long start, unsigned long end)
start += PAGE_SIZE;
}
}
+#endif
static pte_t *walk_page_table(unsigned long addr)
{
@@ -124,10 +120,10 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
pte = pte_offset_kernel(pmd, address);
if (!enable) {
__ptep_ipte(address, pte);
- pte_val(*pte) = _PAGE_TYPE_EMPTY;
+ pte_val(*pte) = _PAGE_INVALID;
continue;
}
- *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
+ pte_val(*pte) = __pa(address);
}
}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ae44d2a3431..37b8241ec78 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -17,6 +17,7 @@
#include <linux/quicklist.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
+#include <linux/swapops.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -48,12 +49,25 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
}
#ifdef CONFIG_64BIT
+static void __crst_table_upgrade(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ set_user_asce(mm);
+ }
+ __tlb_flush_local();
+}
+
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
{
unsigned long *table, *pgd;
unsigned long entry;
+ int flush;
BUG_ON(limit > (1UL << 53));
+ flush = 0;
repeat:
table = crst_table_alloc(mm);
if (!table)
@@ -79,12 +93,15 @@ repeat:
mm->pgd = (pgd_t *) table;
mm->task_size = mm->context.asce_limit;
table = NULL;
+ flush = 1;
}
spin_unlock_bh(&mm->page_table_lock);
if (table)
crst_table_free(mm, table);
if (mm->context.asce_limit < limit)
goto repeat;
+ if (flush)
+ on_each_cpu(__crst_table_upgrade, mm, 0);
return 0;
}
@@ -92,6 +109,10 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
{
pgd_t *pgd;
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ __tlb_flush_mm(mm);
+ }
while (mm->context.asce_limit > limit) {
pgd = mm->pgd;
switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
@@ -114,6 +135,8 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
mm->task_size = mm->context.asce_limit;
crst_table_free(mm, (unsigned long *) pgd);
}
+ if (current->active_mm == mm)
+ set_user_asce(mm);
}
#endif
@@ -161,7 +184,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
struct gmap_rmap *rmap;
struct page *page;
- if (*table & _SEGMENT_ENTRY_INV)
+ if (*table & _SEGMENT_ENTRY_INVALID)
return 0;
page = pfn_to_page(*table >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
@@ -172,14 +195,14 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
kfree(rmap);
break;
}
- *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+ *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
return 1;
}
static void gmap_flush_tlb(struct gmap *gmap)
{
if (MACHINE_HAS_IDTE)
- __tlb_flush_idte((unsigned long) gmap->table |
+ __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
_ASCE_TYPE_REGION1);
else
__tlb_flush_global();
@@ -198,7 +221,7 @@ void gmap_free(struct gmap *gmap)
/* Flush tlb. */
if (MACHINE_HAS_IDTE)
- __tlb_flush_idte((unsigned long) gmap->table |
+ __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
_ASCE_TYPE_REGION1);
else
__tlb_flush_global();
@@ -245,7 +268,9 @@ EXPORT_SYMBOL_GPL(gmap_disable);
* gmap_alloc_table is assumed to be called with mmap_sem held
*/
static int gmap_alloc_table(struct gmap *gmap,
- unsigned long *table, unsigned long init)
+ unsigned long *table, unsigned long init)
+ __releases(&gmap->mm->page_table_lock)
+ __acquires(&gmap->mm->page_table_lock)
{
struct page *page;
unsigned long *new;
@@ -258,7 +283,7 @@ static int gmap_alloc_table(struct gmap *gmap,
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
crst_table_init(new, init);
- if (*table & _REGION_ENTRY_INV) {
+ if (*table & _REGION_ENTRY_INVALID) {
list_add(&page->lru, &gmap->crst_list);
*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
@@ -273,7 +298,7 @@ static int gmap_alloc_table(struct gmap *gmap,
* @addr: address in the guest address space
* @len: length of the memory area to unmap
*
- * Returns 0 if the unmap succeded, -EINVAL if not.
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
*/
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
{
@@ -292,22 +317,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
for (off = 0; off < len; off += PMD_SIZE) {
/* Walk the guest addr space page table */
table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if (*table & _REGION_ENTRY_INV)
+ if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 42) & 0x7ff);
- if (*table & _REGION_ENTRY_INV)
+ if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 31) & 0x7ff);
- if (*table & _REGION_ENTRY_INV)
+ if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 20) & 0x7ff);
/* Clear segment table entry in guest address space. */
flush |= gmap_unlink_segment(gmap, table);
- *table = _SEGMENT_ENTRY_INV;
+ *table = _SEGMENT_ENTRY_INVALID;
}
out:
spin_unlock(&gmap->mm->page_table_lock);
@@ -324,7 +349,7 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
* @from: source address in the parent address space
* @to: target address in the guest address space
*
- * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
*/
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len)
@@ -335,7 +360,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
- if (len == 0 || from + len > PGDIR_SIZE ||
+ if (len == 0 || from + len > TASK_MAX_SIZE ||
from + len < from || to + len < to)
return -EINVAL;
@@ -345,17 +370,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
for (off = 0; off < len; off += PMD_SIZE) {
/* Walk the gmap address space page table */
table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if ((*table & _REGION_ENTRY_INV) &&
+ if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
goto out_unmap;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 42) & 0x7ff);
- if ((*table & _REGION_ENTRY_INV) &&
+ if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
goto out_unmap;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + (((to + off) >> 31) & 0x7ff);
- if ((*table & _REGION_ENTRY_INV) &&
+ if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
goto out_unmap;
table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
@@ -363,7 +388,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
/* Store 'from' address in an invalid segment table entry. */
flush |= gmap_unlink_segment(gmap, table);
- *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
+ *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
+ _SEGMENT_ENTRY_PROTECT);
}
spin_unlock(&gmap->mm->page_table_lock);
up_read(&gmap->mm->mmap_sem);
@@ -379,75 +405,187 @@ out_unmap:
}
EXPORT_SYMBOL_GPL(gmap_map_segment);
-/*
- * this function is assumed to be called with mmap_sem held
- */
-unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
+static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
{
- unsigned long *table, vmaddr, segment;
- struct mm_struct *mm;
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
- struct vm_area_struct *vma;
- struct page *page;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
+ unsigned long *table;
- current->thread.gmap_addr = address;
- mm = gmap->mm;
- /* Walk the gmap address space page table */
table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
- return -EFAULT;
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
+ return ERR_PTR(-EFAULT);
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
- return -EFAULT;
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
+ return ERR_PTR(-EFAULT);
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV))
- return -EFAULT;
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
+ return ERR_PTR(-EFAULT);
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 20) & 0x7ff);
+ return table;
+}
+/**
+ * __gmap_translate - translate a guest address to a user space address
+ * @address: guest address
+ * @gmap: pointer to guest mapping meta data structure
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
+{
+ unsigned long *segment_ptr, vmaddr, segment;
+ struct gmap_pgtable *mp;
+ struct page *page;
+
+ current->thread.gmap_addr = address;
+ segment_ptr = gmap_table_walk(address, gmap);
+ if (IS_ERR(segment_ptr))
+ return PTR_ERR(segment_ptr);
/* Convert the gmap address to an mm address. */
- segment = *table;
- if (likely(!(segment & _SEGMENT_ENTRY_INV))) {
+ segment = *segment_ptr;
+ if (!(segment & _SEGMENT_ENTRY_INVALID)) {
page = pfn_to_page(segment >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
return mp->vmaddr | (address & ~PMD_MASK);
- } else if (segment & _SEGMENT_ENTRY_RO) {
+ } else if (segment & _SEGMENT_ENTRY_PROTECT) {
vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
- vma = find_vma(mm, vmaddr);
- if (!vma || vma->vm_start > vmaddr)
- return -EFAULT;
-
- /* Walk the parent mm page table */
- pgd = pgd_offset(mm, vmaddr);
- pud = pud_alloc(mm, pgd, vmaddr);
- if (!pud)
- return -ENOMEM;
- pmd = pmd_alloc(mm, pud, vmaddr);
- if (!pmd)
- return -ENOMEM;
- if (!pmd_present(*pmd) &&
- __pte_alloc(mm, vma, pmd, vmaddr))
- return -ENOMEM;
- /* pmd now points to a valid segment table entry. */
- rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
- if (!rmap)
- return -ENOMEM;
- /* Link gmap segment table entry location to page table. */
- page = pmd_page(*pmd);
- mp = (struct gmap_pgtable *) page->index;
- rmap->entry = table;
- spin_lock(&mm->page_table_lock);
+ return vmaddr | (address & ~PMD_MASK);
+ }
+ return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(__gmap_translate);
+
+/**
+ * gmap_translate - translate a guest address to a user space address
+ * @address: guest address
+ * @gmap: pointer to guest mapping meta data structure
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ */
+unsigned long gmap_translate(unsigned long address, struct gmap *gmap)
+{
+ unsigned long rc;
+
+ down_read(&gmap->mm->mmap_sem);
+ rc = __gmap_translate(address, gmap);
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_translate);
+
+static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
+ unsigned long *segment_ptr, struct gmap *gmap)
+{
+ unsigned long vmaddr;
+ struct vm_area_struct *vma;
+ struct gmap_pgtable *mp;
+ struct gmap_rmap *rmap;
+ struct mm_struct *mm;
+ struct page *page;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ mm = gmap->mm;
+ vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
+ vma = find_vma(mm, vmaddr);
+ if (!vma || vma->vm_start > vmaddr)
+ return -EFAULT;
+ /* Walk the parent mm page table */
+ pgd = pgd_offset(mm, vmaddr);
+ pud = pud_alloc(mm, pgd, vmaddr);
+ if (!pud)
+ return -ENOMEM;
+ pmd = pmd_alloc(mm, pud, vmaddr);
+ if (!pmd)
+ return -ENOMEM;
+ if (!pmd_present(*pmd) &&
+ __pte_alloc(mm, vma, pmd, vmaddr))
+ return -ENOMEM;
+ /* large pmds cannot yet be handled */
+ if (pmd_large(*pmd))
+ return -EFAULT;
+ /* pmd now points to a valid segment table entry. */
+ rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
+ if (!rmap)
+ return -ENOMEM;
+ /* Link gmap segment table entry location to page table. */
+ page = pmd_page(*pmd);
+ mp = (struct gmap_pgtable *) page->index;
+ rmap->gmap = gmap;
+ rmap->entry = segment_ptr;
+ rmap->vmaddr = address & PMD_MASK;
+ spin_lock(&mm->page_table_lock);
+ if (*segment_ptr == segment) {
list_add(&rmap->list, &mp->mapper);
- spin_unlock(&mm->page_table_lock);
/* Set gmap segment table entry to page table. */
- *table = pmd_val(*pmd) & PAGE_MASK;
- return vmaddr | (address & ~PMD_MASK);
+ *segment_ptr = pmd_val(*pmd) & PAGE_MASK;
+ rmap = NULL;
+ }
+ spin_unlock(&mm->page_table_lock);
+ kfree(rmap);
+ return 0;
+}
+
+static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
+{
+ struct gmap_rmap *rmap, *next;
+ struct gmap_pgtable *mp;
+ struct page *page;
+ int flush;
+
+ flush = 0;
+ spin_lock(&mm->page_table_lock);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
+ *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
+ _SEGMENT_ENTRY_PROTECT);
+ list_del(&rmap->list);
+ kfree(rmap);
+ flush = 1;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (flush)
+ __tlb_flush_global();
+}
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
+{
+ unsigned long *segment_ptr, segment;
+ struct gmap_pgtable *mp;
+ struct page *page;
+ int rc;
+
+ current->thread.gmap_addr = address;
+ segment_ptr = gmap_table_walk(address, gmap);
+ if (IS_ERR(segment_ptr))
+ return -EFAULT;
+ /* Convert the gmap address to an mm address. */
+ while (1) {
+ segment = *segment_ptr;
+ if (!(segment & _SEGMENT_ENTRY_INVALID)) {
+ /* Page table is present */
+ page = pfn_to_page(segment >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ return mp->vmaddr | (address & ~PMD_MASK);
+ }
+ if (!(segment & _SEGMENT_ENTRY_PROTECT))
+ /* Nothing mapped in the gmap address space. */
+ break;
+ rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
+ if (rc)
+ return rc;
}
return -EFAULT;
}
@@ -464,6 +602,82 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
}
EXPORT_SYMBOL_GPL(gmap_fault);
+static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
+{
+ if (!non_swap_entry(entry))
+ dec_mm_counter(mm, MM_SWAPENTS);
+ else if (is_migration_entry(entry)) {
+ struct page *page = migration_entry_to_page(entry);
+
+ if (PageAnon(page))
+ dec_mm_counter(mm, MM_ANONPAGES);
+ else
+ dec_mm_counter(mm, MM_FILEPAGES);
+ }
+ free_swap_and_cache(entry);
+}
+
+/**
+ * The mm->mmap_sem lock must be held
+ */
+static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
+{
+ unsigned long ptev, pgstev;
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pte_t *ptep, pte;
+
+ ptep = get_locked_pte(mm, address, &ptl);
+ if (unlikely(!ptep))
+ return;
+ pte = *ptep;
+ if (!pte_swap(pte))
+ goto out_pte;
+ /* Zap unused and logically-zero pages */
+ pgste = pgste_get_lock(ptep);
+ pgstev = pgste_val(pgste);
+ ptev = pte_val(pte);
+ if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
+ ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
+ gmap_zap_swap_entry(pte_to_swp_entry(pte), mm);
+ pte_clear(mm, address, ptep);
+ }
+ pgste_set_unlock(ptep, pgste);
+out_pte:
+ pte_unmap_unlock(*ptep, ptl);
+}
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+void __gmap_zap(unsigned long address, struct gmap *gmap)
+{
+ unsigned long *table, *segment_ptr;
+ unsigned long segment, pgstev, ptev;
+ struct gmap_pgtable *mp;
+ struct page *page;
+
+ segment_ptr = gmap_table_walk(address, gmap);
+ if (IS_ERR(segment_ptr))
+ return;
+ segment = *segment_ptr;
+ if (segment & _SEGMENT_ENTRY_INVALID)
+ return;
+ page = pfn_to_page(segment >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ address = mp->vmaddr | (address & ~PMD_MASK);
+ /* Page table is present */
+ table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
+ table = table + ((address >> 12) & 0xff);
+ pgstev = table[PTRS_PER_PTE];
+ ptev = table[0];
+ /* quick check, checked again with locks held */
+ if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
+ ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
+ gmap_zap_unused(gmap->mm, address);
+}
+EXPORT_SYMBOL_GPL(__gmap_zap);
+
void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
{
@@ -477,25 +691,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
while (address < to) {
/* Walk the gmap address space page table */
table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV)) {
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV)) {
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INV)) {
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
+ if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
@@ -511,27 +725,120 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
}
EXPORT_SYMBOL_GPL(gmap_discard);
-void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_ipte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_ipte_notifier(struct gmap_notifier *nb)
{
- struct gmap_rmap *rmap, *next;
+ spin_lock(&gmap_notifier_lock);
+ list_add(&nb->list, &gmap_notifier_list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
+
+/**
+ * gmap_unregister_ipte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_del_init(&nb->list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
+
+/**
+ * gmap_ipte_notify - mark a range of ptes for invalidation notification
+ * @gmap: pointer to guest mapping meta data structure
+ * @start: virtual address in the guest address space
+ * @len: size of area
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists and
+ * the invalidation notification could be set. If the gmap mapping is missing
+ * for one or more pages -EFAULT is returned. If no memory could be allocated
+ * -ENOMEM is returned. This function establishes missing page table entries.
+ */
+int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
+{
+ unsigned long addr;
+ spinlock_t *ptl;
+ pte_t *ptep, entry;
+ pgste_t pgste;
+ int rc = 0;
+
+ if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK))
+ return -EINVAL;
+ down_read(&gmap->mm->mmap_sem);
+ while (len) {
+ /* Convert gmap address and connect the page tables */
+ addr = __gmap_fault(start, gmap);
+ if (IS_ERR_VALUE(addr)) {
+ rc = addr;
+ break;
+ }
+ /* Get the page mapped */
+ if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) {
+ rc = -EFAULT;
+ break;
+ }
+ /* Walk the process page table, lock and get pte pointer */
+ ptep = get_locked_pte(gmap->mm, addr, &ptl);
+ if (unlikely(!ptep))
+ continue;
+ /* Set notification bit in the pgste of the pte */
+ entry = *ptep;
+ if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) |= PGSTE_IN_BIT;
+ pgste_set_unlock(ptep, pgste);
+ start += PAGE_SIZE;
+ len -= PAGE_SIZE;
+ }
+ spin_unlock(ptl);
+ }
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_ipte_notify);
+
+/**
+ * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @pte: pointer to the page table entry
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
+{
+ unsigned long segment_offset;
+ struct gmap_notifier *nb;
struct gmap_pgtable *mp;
+ struct gmap_rmap *rmap;
struct page *page;
- int flush;
- flush = 0;
- spin_lock(&mm->page_table_lock);
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+ segment_offset = segment_offset * (4096 / sizeof(pte_t));
+ page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
mp = (struct gmap_pgtable *) page->index;
- list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
- *rmap->entry =
- _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
- list_del(&rmap->list);
- kfree(rmap);
- flush = 1;
+ spin_lock(&gmap_notifier_lock);
+ list_for_each_entry(rmap, &mp->mapper, list) {
+ list_for_each_entry(nb, &gmap_notifier_list, list)
+ nb->notifier_call(rmap->gmap,
+ rmap->vmaddr + segment_offset);
}
- spin_unlock(&mm->page_table_lock);
- if (flush)
- __tlb_flush_global();
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
+
+static inline int page_table_with_pgste(struct page *page)
+{
+ return atomic_read(&page->_mapcount) == 0;
}
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
@@ -549,13 +856,17 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
__free_page(page);
return NULL;
}
- pgtable_page_ctor(page);
+ if (!pgtable_page_ctor(page)) {
+ kfree(mp);
+ __free_page(page);
+ return NULL;
+ }
mp->vmaddr = vmaddr & PMD_MASK;
INIT_LIST_HEAD(&mp->mapper);
page->index = (unsigned long) mp;
- atomic_set(&page->_mapcount, 3);
+ atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
return table;
}
@@ -574,20 +885,166 @@ static inline void page_table_free_pgste(unsigned long *table)
__free_page(page);
}
+static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, bool init_skey)
+{
+ pte_t *start_pte, *pte;
+ spinlock_t *ptl;
+ pgste_t pgste;
+
+ start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ pte = start_pte;
+ do {
+ pgste = pgste_get_lock(pte);
+ pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+ if (init_skey) {
+ unsigned long address;
+
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+ PGSTE_GR_BIT | PGSTE_GC_BIT);
+
+ /* skip invalid and not writable pages */
+ if (pte_val(*pte) & _PAGE_INVALID ||
+ !(pte_val(*pte) & _PAGE_WRITE)) {
+ pgste_set_unlock(pte, pgste);
+ continue;
+ }
+
+ address = pte_val(*pte) & PAGE_MASK;
+ page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
+ }
+ pgste_set_unlock(pte, pgste);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(start_pte, ptl);
+
+ return addr;
+}
+
+static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end, bool init_skey)
+{
+ unsigned long next;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
+ } while (pmd++, addr = next, addr != end);
+
+ return addr;
+}
+
+static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end, bool init_skey)
+{
+ unsigned long next;
+ pud_t *pud;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
+ } while (pud++, addr = next, addr != end);
+
+ return addr;
+}
+
+void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool init_skey)
+{
+ unsigned long addr, next;
+ pgd_t *pgd;
+
+ down_write(&mm->mmap_sem);
+ if (init_skey && mm_use_skey(mm))
+ goto out_up;
+ addr = start;
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
+ } while (pgd++, addr = next, addr != end);
+ if (init_skey)
+ current->mm->context.use_skey = 1;
+out_up:
+ up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL(page_table_reset_pgste);
+
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned long key, bool nq)
+{
+ spinlock_t *ptl;
+ pgste_t old, new;
+ pte_t *ptep;
+
+ down_read(&mm->mmap_sem);
+ ptep = get_locked_pte(current->mm, addr, &ptl);
+ if (unlikely(!ptep)) {
+ up_read(&mm->mmap_sem);
+ return -EFAULT;
+ }
+
+ new = old = pgste_get_lock(ptep);
+ pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
+ PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+ pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ unsigned long address, bits, skey;
+
+ address = pte_val(*ptep) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(address);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+ /* Set storage key ACC and FP */
+ page_set_storage_key(address, skey, !nq);
+ /* Merge host changed & referenced into pgste */
+ pgste_val(new) |= bits << 52;
+ }
+ /* changing the guest storage key is considered a change of the page */
+ if ((pgste_val(new) ^ pgste_val(old)) &
+ (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
+ pgste_val(new) |= PGSTE_UC_BIT;
+
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(*ptep, ptl);
+ up_read(&mm->mmap_sem);
+ return 0;
+}
+EXPORT_SYMBOL(set_guest_storage_key);
+
#else /* CONFIG_PGSTE */
+static inline int page_table_with_pgste(struct page *page)
+{
+ return 0;
+}
+
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
unsigned long vmaddr)
{
return NULL;
}
+void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool init_skey)
+{
+}
+
static inline void page_table_free_pgste(unsigned long *table)
{
}
-static inline void gmap_unmap_notifier(struct mm_struct *mm,
- unsigned long *table)
+static inline void gmap_disconnect_pgtable(struct mm_struct *mm,
+ unsigned long *table)
{
}
@@ -630,10 +1087,13 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page)
return NULL;
- pgtable_page_ctor(page);
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
atomic_set(&page->_mapcount, 1);
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE);
spin_lock_bh(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list);
} else {
@@ -652,12 +1112,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
struct page *page;
unsigned int bit, mask;
- if (mm_has_pgste(mm)) {
- gmap_unmap_notifier(mm, table);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page)) {
+ gmap_disconnect_pgtable(mm, table);
return page_table_free_pgste(table);
}
/* Free 1K/2K page table fragment of a 4K page */
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -695,14 +1155,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
unsigned int bit, mask;
mm = tlb->mm;
- if (mm_has_pgste(mm)) {
- gmap_unmap_notifier(mm, table);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page)) {
+ gmap_disconnect_pgtable(mm, table);
table = (unsigned long *) (__pa(table) | FRAG_MASK);
tlb_remove_table(tlb, table);
return;
}
bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
list_del(&page->lru);
@@ -714,7 +1174,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
tlb_remove_table(tlb, table);
}
-void __tlb_remove_table(void *_table)
+static void __tlb_remove_table(void *_table)
{
const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
void *table = (void *)((unsigned long) _table & ~mask);
@@ -762,7 +1222,6 @@ void tlb_table_flush(struct mmu_gather *tlb)
struct mmu_table_batch **batch = &tlb->batch;
if (*batch) {
- __tlb_flush_mm(tlb->mm);
call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
*batch = NULL;
}
@@ -772,11 +1231,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
{
struct mmu_table_batch **batch = &tlb->batch;
+ tlb->mm->context.flush_mm = 1;
if (*batch == NULL) {
*batch = (struct mmu_table_batch *)
__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
if (*batch == NULL) {
- __tlb_flush_mm(tlb->mm);
+ __tlb_flush_mm_lazy(tlb->mm);
tlb_remove_table_one(table);
return;
}
@@ -784,102 +1244,177 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
}
(*batch)->tables[(*batch)->nr++] = table;
if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_table_flush(tlb);
+ tlb_flush_mmu(tlb);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void thp_split_vma(struct vm_area_struct *vma)
+static inline void thp_split_vma(struct vm_area_struct *vma)
{
unsigned long addr;
- struct page *page;
- for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
- page = follow_page(vma, addr, FOLL_SPLIT);
- }
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
+ follow_page(vma, addr, FOLL_SPLIT);
}
-void thp_split_mm(struct mm_struct *mm)
+static inline void thp_split_mm(struct mm_struct *mm)
{
- struct vm_area_struct *vma = mm->mmap;
+ struct vm_area_struct *vma;
- while (vma != NULL) {
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
thp_split_vma(vma);
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
- vma = vma->vm_next;
}
+ mm->def_flags |= VM_NOHUGEPAGE;
+}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
+ struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next, *table, *new;
+ struct page *page;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+again:
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ table = (unsigned long *) pmd_deref(*pmd);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page))
+ continue;
+ /* Allocate new page table with pgstes */
+ new = page_table_alloc_pgste(mm, addr);
+ if (!new)
+ return -ENOMEM;
+
+ spin_lock(&mm->page_table_lock);
+ if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
+ /* Nuke pmd entry pointing to the "short" page table */
+ pmdp_flush_lazy(mm, addr, pmd);
+ pmd_clear(pmd);
+ /* Copy ptes from old table to new table */
+ memcpy(new, table, PAGE_SIZE/2);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ /* Establish new table */
+ pmd_populate(mm, pmd, (pte_t *) new);
+ /* Free old table with rcu, there might be a walker! */
+ page_table_free_rcu(tlb, table);
+ new = NULL;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (new) {
+ page_table_free_pgste(new);
+ goto again;
+ }
+ } while (pmd++, addr = next, addr != end);
+
+ return addr;
+}
+
+static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
+ struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pud_t *pud;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
+ if (unlikely(IS_ERR_VALUE(next)))
+ return next;
+ } while (pud++, addr = next, addr != end);
+
+ return addr;
+}
+
+static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pgd_t *pgd;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
+ if (unlikely(IS_ERR_VALUE(next)))
+ return next;
+ } while (pgd++, addr = next, addr != end);
+
+ return 0;
+}
+
/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
{
struct task_struct *tsk = current;
- struct mm_struct *mm, *old_mm;
-
- /* Do we have switched amode? If no, we cannot do sie */
- if (s390_user_mode == HOME_SPACE_MODE)
- return -EINVAL;
+ struct mm_struct *mm = tsk->mm;
+ struct mmu_gather tlb;
/* Do we have pgstes? if yes, we are done */
if (mm_has_pgste(tsk->mm))
return 0;
- /* lets check if we are allowed to replace the mm */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-#ifdef CONFIG_AIO
- !hlist_empty(&tsk->mm->ioctx_list) ||
-#endif
- tsk->mm != tsk->active_mm) {
- task_unlock(tsk);
- return -EINVAL;
- }
- task_unlock(tsk);
-
- /* we copy the mm and let dup_mm create the page tables with_pgstes */
- tsk->mm->context.alloc_pgste = 1;
- /* make sure that both mms have a correct rss state */
- sync_mm_rss(tsk->mm);
- mm = dup_mm(tsk);
- tsk->mm->context.alloc_pgste = 0;
- if (!mm)
- return -ENOMEM;
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ down_write(&mm->mmap_sem);
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- mm->def_flags |= VM_NOHUGEPAGE;
-#endif
+ /* Reallocate the page tables with pgstes */
+ tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
+ if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE))
+ mm->context.has_pgste = 1;
+ tlb_finish_mmu(&tlb, 0, TASK_SIZE);
+ up_write(&mm->mmap_sem);
+ return mm->context.has_pgste ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
- /* Now lets check again if something happened */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-#ifdef CONFIG_AIO
- !hlist_empty(&tsk->mm->ioctx_list) ||
-#endif
- tsk->mm != tsk->active_mm) {
- mmput(mm);
- task_unlock(tsk);
- return -EINVAL;
- }
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+void s390_enable_skey(void)
+{
+ page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
- /* ok, we are alone. No ptrace, no threads, etc. */
- old_mm = tsk->mm;
- tsk->mm = tsk->active_mm = mm;
- preempt_disable();
- update_mm(mm, tsk);
- atomic_inc(&mm->context.attach_count);
- atomic_dec(&old_mm->context.attach_count);
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- preempt_enable();
- task_unlock(tsk);
- mmput(old_mm);
- return 0;
+/*
+ * Test and reset if a guest page is dirty
+ */
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+ bool dirty = false;
+
+ pte = get_locked_pte(gmap->mm, address, &ptl);
+ if (unlikely(!pte))
+ return false;
+
+ if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
+ dirty = true;
+
+ spin_unlock(ptl);
+ return dirty;
}
-EXPORT_SYMBOL_GPL(s390_enable_sie);
+EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
@@ -920,41 +1455,42 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
}
}
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
{
struct list_head *lh = (struct list_head *) pgtable;
- assert_spin_locked(&mm->page_table_lock);
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */
- if (!mm->pmd_huge_pte)
+ if (!pmd_huge_pte(mm, pmdp))
INIT_LIST_HEAD(lh);
else
- list_add(lh, (struct list_head *) mm->pmd_huge_pte);
- mm->pmd_huge_pte = pgtable;
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
}
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
struct list_head *lh;
pgtable_t pgtable;
pte_t *ptep;
- assert_spin_locked(&mm->page_table_lock);
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */
- pgtable = mm->pmd_huge_pte;
+ pgtable = pmd_huge_pte(mm, pmdp);
lh = (struct list_head *) pgtable;
if (list_empty(lh))
- mm->pmd_huge_pte = NULL;
+ pmd_huge_pte(mm, pmdp) = NULL;
else {
- mm->pmd_huge_pte = (pgtable_t) lh->next;
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
list_del(lh);
}
ptep = (pte_t *) pgtable;
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
ptep++;
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ pte_val(*ptep) = _PAGE_INVALID;
return pgtable;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 6ed1426d27c..fe9012a49aa 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -10,6 +10,7 @@
#include <linux/list.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/memblock.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
@@ -66,10 +67,11 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
if (slab_is_available())
pte = (pte_t *) page_table_alloc(&init_mm, address);
else
- pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
+ pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
+ PTRS_PER_PTE * sizeof(pte_t));
if (!pte)
return NULL;
- clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) pte, _PAGE_INVALID,
PTRS_PER_PTE * sizeof(pte_t));
return pte;
}
@@ -85,11 +87,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
- pte_t pte;
int ret = -ENOMEM;
while (address < end) {
- pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
pu_dir = vmem_pud_alloc();
@@ -101,9 +101,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
!(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
- pte_val(pte) |= _REGION3_ENTRY_LARGE;
- pte_val(pte) |= _REGION_ENTRY_TYPE_R3;
- pud_val(*pu_dir) = pte_val(pte);
+ pud_val(*pu_dir) = __pa(address) |
+ _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
+ (ro ? _REGION_ENTRY_PROTECT : 0);
address += PUD_SIZE;
continue;
}
@@ -118,8 +118,10 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
!(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
- pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
- pmd_val(*pm_dir) = pte_val(pte);
+ pmd_val(*pm_dir) = __pa(address) |
+ _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
+ _SEGMENT_ENTRY_YOUNG |
+ (ro ? _SEGMENT_ENTRY_PROTECT : 0);
address += PMD_SIZE;
continue;
}
@@ -132,12 +134,12 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
}
pt_dir = pte_offset_kernel(pm_dir, address);
- *pt_dir = pte;
+ pte_val(*pt_dir) = __pa(address) |
+ pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
address += PAGE_SIZE;
}
ret = 0;
out:
- flush_tlb_kernel_range(start, end);
return ret;
}
@@ -155,7 +157,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
pte_t *pt_dir;
pte_t pte;
- pte_val(pte) = _PAGE_TYPE_EMPTY;
+ pte_val(pte) = _PAGE_INVALID;
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
@@ -192,20 +194,16 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
/*
* Add a backed mem_map array to the virtual mem_map array.
*/
-int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
- unsigned long address, start_addr, end_addr;
+ unsigned long address = start;
pgd_t *pg_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
- pte_t pte;
int ret = -ENOMEM;
- start_addr = (unsigned long) start;
- end_addr = (unsigned long) (start + nr);
-
- for (address = start_addr; address < end_addr;) {
+ for (address = start; address < end;) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
pu_dir = vmem_pud_alloc();
@@ -237,9 +235,9 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
new_page = vmemmap_alloc_block(PMD_SIZE, node);
if (!new_page)
goto out;
- pte = mk_pte_phys(__pa(new_page), PAGE_RW);
- pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
- pmd_val(*pm_dir) = pte_val(pte);
+ pmd_val(*pm_dir) = __pa(new_page) |
+ _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
+ _SEGMENT_ENTRY_CO;
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
@@ -260,18 +258,21 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
new_page =__pa(vmem_alloc_pages(0));
if (!new_page)
goto out;
- pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
- *pt_dir = pte;
+ pte_val(*pt_dir) =
+ __pa(new_page) | pgprot_val(PAGE_KERNEL);
}
address += PAGE_SIZE;
}
- memset(start, 0, nr * sizeof(struct page));
+ memset((void *)start, 0, end - start);
ret = 0;
out:
- flush_tlb_kernel_range(start_addr, end_addr);
return ret;
}
+void vmemmap_free(unsigned long start, unsigned long end)
+{
+}
+
/*
* Add memory segment to the segment list if it doesn't overlap with
* an already present segment.
@@ -372,17 +373,14 @@ out:
void __init vmem_map_init(void)
{
unsigned long ro_start, ro_end;
- unsigned long start, end;
- int i;
+ struct memblock_region *reg;
+ phys_addr_t start, end;
ro_start = PFN_ALIGN((unsigned long)&_stext);
ro_end = (unsigned long)&_eshared & PAGE_MASK;
- for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
- if (memory_chunk[i].type == CHUNK_CRASHK ||
- memory_chunk[i].type == CHUNK_OLDMEM)
- continue;
- start = memory_chunk[i].addr;
- end = memory_chunk[i].addr + memory_chunk[i].size;
+ for_each_memblock(memory, reg) {
+ start = reg->base;
+ end = reg->base + reg->size - 1;
if (start >= ro_end || end <= ro_start)
vmem_add_mem(start, end - start, 0);
else if (start >= ro_start && end <= ro_end)
@@ -402,26 +400,21 @@ void __init vmem_map_init(void)
}
/*
- * Convert memory chunk array to a memory segment list so there is a single
- * list that contains both r/w memory and shared memory segments.
+ * Convert memblock.memory to a memory segment list so there is a single
+ * list that contains all memory segments.
*/
static int __init vmem_convert_memory_chunk(void)
{
+ struct memblock_region *reg;
struct memory_segment *seg;
- int i;
mutex_lock(&vmem_mutex);
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- if (!memory_chunk[i].size)
- continue;
- if (memory_chunk[i].type == CHUNK_CRASHK ||
- memory_chunk[i].type == CHUNK_OLDMEM)
- continue;
+ for_each_memblock(memory, reg) {
seg = kzalloc(sizeof(*seg), GFP_KERNEL);
if (!seg)
panic("Out of memory...\n");
- seg->start = memory_chunk[i].addr;
- seg->size = memory_chunk[i].size;
+ seg->start = reg->base;
+ seg->size = reg->size;
insert_memory_segment(seg);
}
mutex_unlock(&vmem_mutex);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bb284419b0f..a2cbd875543 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -7,10 +7,13 @@
*/
#include <linux/moduleloader.h>
#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
#include <linux/filter.h>
+#include <linux/random.h>
+#include <linux/init.h>
#include <asm/cacheflush.h>
-#include <asm/processor.h>
#include <asm/facility.h>
+#include <asm/dis.h>
/*
* Conventions:
@@ -153,8 +156,8 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
EMIT6(0xeb8ff058, 0x0024);
/* lgr %r14,%r15 */
EMIT4(0xb90400ef);
- /* ahi %r15,<offset> */
- EMIT4_IMM(0xa7fa0000, (jit->seen & SEEN_MEM) ? -112 : -80);
+ /* aghi %r15,<offset> */
+ EMIT4_IMM(0xa7fb0000, (jit->seen & SEEN_MEM) ? -112 : -80);
/* stg %r14,152(%r15) */
EMIT6(0xe3e0f098, 0x0024);
} else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL))
@@ -220,6 +223,37 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
EMIT2(0x07fe);
}
+/* Helper to find the offset of pkt_type in sk_buff
+ * Make sure its still a 3bit field starting at the MSBs within a byte.
+ */
+#define PKT_TYPE_MAX 0xe0
+static int pkt_type_offset;
+
+static int __init bpf_pkt_type_offset_init(void)
+{
+ struct sk_buff skb_probe = {
+ .pkt_type = ~0,
+ };
+ char *ct = (char *)&skb_probe;
+ int off;
+
+ pkt_type_offset = -1;
+ for (off = 0; off < sizeof(struct sk_buff); off++) {
+ if (!ct[off])
+ continue;
+ if (ct[off] == PKT_TYPE_MAX)
+ pkt_type_offset = off;
+ else {
+ /* Found non matching bit pattern, fix needed. */
+ WARN_ON_ONCE(1);
+ pkt_type_offset = -1;
+ return -1;
+ }
+ }
+ return 0;
+}
+device_initcall(bpf_pkt_type_offset_init);
+
/*
* make sure we dont leak kernel information to user
*/
@@ -235,26 +269,17 @@ static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter)
EMIT4(0xa7c80000);
/* Clear A if the first register does not set it. */
switch (filter[0].code) {
- case BPF_S_LD_W_ABS:
- case BPF_S_LD_H_ABS:
- case BPF_S_LD_B_ABS:
- case BPF_S_LD_W_LEN:
- case BPF_S_LD_W_IND:
- case BPF_S_LD_H_IND:
- case BPF_S_LD_B_IND:
- case BPF_S_LDX_B_MSH:
- case BPF_S_LD_IMM:
- case BPF_S_LD_MEM:
- case BPF_S_MISC_TXA:
- case BPF_S_ANC_PROTOCOL:
- case BPF_S_ANC_PKTTYPE:
- case BPF_S_ANC_IFINDEX:
- case BPF_S_ANC_MARK:
- case BPF_S_ANC_QUEUE:
- case BPF_S_ANC_HATYPE:
- case BPF_S_ANC_RXHASH:
- case BPF_S_ANC_CPU:
- case BPF_S_RET_K:
+ case BPF_LD | BPF_W | BPF_ABS:
+ case BPF_LD | BPF_H | BPF_ABS:
+ case BPF_LD | BPF_B | BPF_ABS:
+ case BPF_LD | BPF_W | BPF_LEN:
+ case BPF_LD | BPF_W | BPF_IND:
+ case BPF_LD | BPF_H | BPF_IND:
+ case BPF_LD | BPF_B | BPF_IND:
+ case BPF_LD | BPF_IMM:
+ case BPF_LD | BPF_MEM:
+ case BPF_MISC | BPF_TXA:
+ case BPF_RET | BPF_K:
/* first instruction sets A register */
break;
default: /* A = 0 */
@@ -269,15 +294,18 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
unsigned int K;
int offset;
unsigned int mask;
+ u16 code;
K = filter->k;
- switch (filter->code) {
- case BPF_S_ALU_ADD_X: /* A += X */
+ code = bpf_anc_helper(filter);
+
+ switch (code) {
+ case BPF_ALU | BPF_ADD | BPF_X: /* A += X */
jit->seen |= SEEN_XREG;
/* ar %r5,%r12 */
EMIT2(0x1a5c);
break;
- case BPF_S_ALU_ADD_K: /* A += K */
+ case BPF_ALU | BPF_ADD | BPF_K: /* A += K */
if (!K)
break;
if (K <= 16383)
@@ -290,12 +318,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
/* a %r5,<d(K)>(%r13) */
EMIT4_DISP(0x5a50d000, EMIT_CONST(K));
break;
- case BPF_S_ALU_SUB_X: /* A -= X */
+ case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */
jit->seen |= SEEN_XREG;
/* sr %r5,%r12 */
EMIT2(0x1b5c);
break;
- case BPF_S_ALU_SUB_K: /* A -= K */
+ case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
if (!K)
break;
if (K <= 16384)
@@ -308,12 +336,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
/* s %r5,<d(K)>(%r13) */
EMIT4_DISP(0x5b50d000, EMIT_CONST(K));
break;
- case BPF_S_ALU_MUL_X: /* A *= X */
+ case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */
jit->seen |= SEEN_XREG;
/* msr %r5,%r12 */
EMIT4(0xb252005c);
break;
- case BPF_S_ALU_MUL_K: /* A *= K */
+ case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
if (K <= 16383)
/* mhi %r5,K */
EMIT4_IMM(0xa75c0000, K);
@@ -324,7 +352,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
/* ms %r5,<d(K)>(%r13) */
EMIT4_DISP(0x7150d000, EMIT_CONST(K));
break;
- case BPF_S_ALU_DIV_X: /* A /= X */
+ case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */
jit->seen |= SEEN_XREG | SEEN_RET0;
/* ltr %r12,%r12 */
EMIT2(0x12cc);
@@ -332,16 +360,18 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
/* lhi %r4,0 */
EMIT4(0xa7480000);
- /* dr %r4,%r12 */
- EMIT2(0x1d4c);
+ /* dlr %r4,%r12 */
+ EMIT4(0xb997004c);
break;
- case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */
- /* m %r4,<d(K)>(%r13) */
- EMIT4_DISP(0x5c40d000, EMIT_CONST(K));
- /* lr %r5,%r4 */
- EMIT2(0x1854);
+ case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
+ if (K == 1)
+ break;
+ /* lhi %r4,0 */
+ EMIT4(0xa7480000);
+ /* dl %r4,<d(K)>(%r13) */
+ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
break;
- case BPF_S_ALU_MOD_X: /* A %= X */
+ case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */
jit->seen |= SEEN_XREG | SEEN_RET0;
/* ltr %r12,%r12 */
EMIT2(0x12cc);
@@ -349,25 +379,30 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
/* lhi %r4,0 */
EMIT4(0xa7480000);
- /* dr %r4,%r12 */
- EMIT2(0x1d4c);
+ /* dlr %r4,%r12 */
+ EMIT4(0xb997004c);
/* lr %r5,%r4 */
EMIT2(0x1854);
break;
- case BPF_S_ALU_MOD_K: /* A %= K */
+ case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */
+ if (K == 1) {
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ break;
+ }
/* lhi %r4,0 */
EMIT4(0xa7480000);
- /* d %r4,<d(K)>(%r13) */
- EMIT4_DISP(0x5d40d000, EMIT_CONST(K));
+ /* dl %r4,<d(K)>(%r13) */
+ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
/* lr %r5,%r4 */
EMIT2(0x1854);
break;
- case BPF_S_ALU_AND_X: /* A &= X */
+ case BPF_ALU | BPF_AND | BPF_X: /* A &= X */
jit->seen |= SEEN_XREG;
/* nr %r5,%r12 */
EMIT2(0x145c);
break;
- case BPF_S_ALU_AND_K: /* A &= K */
+ case BPF_ALU | BPF_AND | BPF_K: /* A &= K */
if (test_facility(21))
/* nilf %r5,<K> */
EMIT6_IMM(0xc05b0000, K);
@@ -375,12 +410,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
/* n %r5,<d(K)>(%r13) */
EMIT4_DISP(0x5450d000, EMIT_CONST(K));
break;
- case BPF_S_ALU_OR_X: /* A |= X */
+ case BPF_ALU | BPF_OR | BPF_X: /* A |= X */
jit->seen |= SEEN_XREG;
/* or %r5,%r12 */
EMIT2(0x165c);
break;
- case BPF_S_ALU_OR_K: /* A |= K */
+ case BPF_ALU | BPF_OR | BPF_K: /* A |= K */
if (test_facility(21))
/* oilf %r5,<K> */
EMIT6_IMM(0xc05d0000, K);
@@ -388,55 +423,55 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
/* o %r5,<d(K)>(%r13) */
EMIT4_DISP(0x5650d000, EMIT_CONST(K));
break;
- case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
- case BPF_S_ALU_XOR_X:
+ case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
+ case BPF_ALU | BPF_XOR | BPF_X:
jit->seen |= SEEN_XREG;
/* xr %r5,%r12 */
EMIT2(0x175c);
break;
- case BPF_S_ALU_XOR_K: /* A ^= K */
+ case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
if (!K)
break;
/* x %r5,<d(K)>(%r13) */
EMIT4_DISP(0x5750d000, EMIT_CONST(K));
break;
- case BPF_S_ALU_LSH_X: /* A <<= X; */
+ case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
jit->seen |= SEEN_XREG;
/* sll %r5,0(%r12) */
EMIT4(0x8950c000);
break;
- case BPF_S_ALU_LSH_K: /* A <<= K */
+ case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */
if (K == 0)
break;
/* sll %r5,K */
EMIT4_DISP(0x89500000, K);
break;
- case BPF_S_ALU_RSH_X: /* A >>= X; */
+ case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
jit->seen |= SEEN_XREG;
/* srl %r5,0(%r12) */
EMIT4(0x8850c000);
break;
- case BPF_S_ALU_RSH_K: /* A >>= K; */
+ case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
if (K == 0)
break;
/* srl %r5,K */
EMIT4_DISP(0x88500000, K);
break;
- case BPF_S_ALU_NEG: /* A = -A */
+ case BPF_ALU | BPF_NEG: /* A = -A */
/* lnr %r5,%r5 */
EMIT2(0x1155);
break;
- case BPF_S_JMP_JA: /* ip += K */
+ case BPF_JMP | BPF_JA: /* ip += K */
offset = addrs[i + K] + jit->start - jit->prg;
EMIT4_PCREL(0xa7f40000, offset);
break;
- case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */
+ case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */
mask = 0x200000; /* jh */
goto kbranch;
- case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */
+ case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */
mask = 0xa00000; /* jhe */
goto kbranch;
- case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */
+ case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */
mask = 0x800000; /* je */
kbranch: /* Emit compare if the branch targets are different */
if (filter->jt != filter->jf) {
@@ -469,7 +504,7 @@ branch: if (filter->jt == filter->jf) {
EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset);
}
break;
- case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */
+ case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */
mask = 0x700000; /* jnz */
/* Emit test if the branch targets are different */
if (filter->jt != filter->jf) {
@@ -483,13 +518,13 @@ branch: if (filter->jt == filter->jf) {
EMIT4_IMM(0xa7510000, K);
}
goto branch;
- case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */
+ case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */
mask = 0x200000; /* jh */
goto xbranch;
- case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */
+ case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */
mask = 0xa00000; /* jhe */
goto xbranch;
- case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */
+ case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */
mask = 0x800000; /* je */
xbranch: /* Emit compare if the branch targets are different */
if (filter->jt != filter->jf) {
@@ -498,7 +533,7 @@ xbranch: /* Emit compare if the branch targets are different */
EMIT2(0x195c);
}
goto branch;
- case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */
+ case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */
mask = 0x700000; /* jnz */
/* Emit test if the branch targets are different */
if (filter->jt != filter->jf) {
@@ -509,15 +544,15 @@ xbranch: /* Emit compare if the branch targets are different */
EMIT2(0x144c);
}
goto branch;
- case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */
+ case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */
jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD;
offset = jit->off_load_word;
goto load_abs;
- case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */
+ case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */
jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF;
offset = jit->off_load_half;
goto load_abs;
- case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */
+ case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */
jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE;
offset = jit->off_load_byte;
load_abs: if ((int) K < 0)
@@ -531,19 +566,19 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
/* jnz <ret0> */
EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg));
break;
- case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */
+ case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */
jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD;
offset = jit->off_load_iword;
goto call_fn;
- case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */
+ case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */
jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF;
offset = jit->off_load_ihalf;
goto call_fn;
- case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */
+ case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */
jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE;
offset = jit->off_load_ibyte;
goto call_fn;
- case BPF_S_LDX_B_MSH:
+ case BPF_LDX | BPF_B | BPF_MSH:
/* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
jit->seen |= SEEN_RET0;
if ((int) K < 0) {
@@ -554,17 +589,17 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH;
offset = jit->off_load_bmsh;
goto call_fn;
- case BPF_S_LD_W_LEN: /* A = skb->len; */
+ case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
/* l %r5,<d(len)>(%r2) */
EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len));
break;
- case BPF_S_LDX_W_LEN: /* X = skb->len; */
+ case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
jit->seen |= SEEN_XREG;
/* l %r12,<d(len)>(%r2) */
EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len));
break;
- case BPF_S_LD_IMM: /* A = K */
+ case BPF_LD | BPF_IMM: /* A = K */
if (K <= 16383)
/* lhi %r5,K */
EMIT4_IMM(0xa7580000, K);
@@ -575,7 +610,7 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
/* l %r5,<d(K)>(%r13) */
EMIT4_DISP(0x5850d000, EMIT_CONST(K));
break;
- case BPF_S_LDX_IMM: /* X = K */
+ case BPF_LDX | BPF_IMM: /* X = K */
jit->seen |= SEEN_XREG;
if (K <= 16383)
/* lhi %r12,<K> */
@@ -587,29 +622,29 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
/* l %r12,<d(K)>(%r13) */
EMIT4_DISP(0x58c0d000, EMIT_CONST(K));
break;
- case BPF_S_LD_MEM: /* A = mem[K] */
+ case BPF_LD | BPF_MEM: /* A = mem[K] */
jit->seen |= SEEN_MEM;
/* l %r5,<K>(%r15) */
EMIT4_DISP(0x5850f000,
(jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
break;
- case BPF_S_LDX_MEM: /* X = mem[K] */
+ case BPF_LDX | BPF_MEM: /* X = mem[K] */
jit->seen |= SEEN_XREG | SEEN_MEM;
/* l %r12,<K>(%r15) */
EMIT4_DISP(0x58c0f000,
(jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
break;
- case BPF_S_MISC_TAX: /* X = A */
+ case BPF_MISC | BPF_TAX: /* X = A */
jit->seen |= SEEN_XREG;
/* lr %r12,%r5 */
EMIT2(0x18c5);
break;
- case BPF_S_MISC_TXA: /* A = X */
+ case BPF_MISC | BPF_TXA: /* A = X */
jit->seen |= SEEN_XREG;
/* lr %r5,%r12 */
EMIT2(0x185c);
break;
- case BPF_S_RET_K:
+ case BPF_RET | BPF_K:
if (K == 0) {
jit->seen |= SEEN_RET0;
if (last)
@@ -629,33 +664,33 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
}
break;
- case BPF_S_RET_A:
+ case BPF_RET | BPF_A:
/* llgfr %r2,%r5 */
EMIT4(0xb9160025);
/* j <exit> */
EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
break;
- case BPF_S_ST: /* mem[K] = A */
+ case BPF_ST: /* mem[K] = A */
jit->seen |= SEEN_MEM;
/* st %r5,<K>(%r15) */
EMIT4_DISP(0x5050f000,
(jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
break;
- case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+ case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
jit->seen |= SEEN_XREG | SEEN_MEM;
/* st %r12,<K>(%r15) */
EMIT4_DISP(0x50c0f000,
(jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
break;
- case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+ case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
/* lhi %r5,0 */
EMIT4(0xa7580000);
/* icm %r5,3,<d(protocol)>(%r2) */
EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol));
break;
- case BPF_S_ANC_IFINDEX: /* if (!skb->dev) return 0;
- * A = skb->dev->ifindex */
+ case BPF_ANC | SKF_AD_IFINDEX: /* if (!skb->dev) return 0;
+ * A = skb->dev->ifindex */
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
jit->seen |= SEEN_RET0;
/* lg %r1,<d(dev)>(%r2) */
@@ -667,20 +702,20 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
/* l %r5,<d(ifindex)>(%r1) */
EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex));
break;
- case BPF_S_ANC_MARK: /* A = skb->mark */
+ case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
/* l %r5,<d(mark)>(%r2) */
EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark));
break;
- case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */
+ case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
/* lhi %r5,0 */
EMIT4(0xa7580000);
/* icm %r5,3,<d(queue_mapping)>(%r2) */
EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping));
break;
- case BPF_S_ANC_HATYPE: /* if (!skb->dev) return 0;
- * A = skb->dev->type */
+ case BPF_ANC | SKF_AD_HATYPE: /* if (!skb->dev) return 0;
+ * A = skb->dev->type */
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
jit->seen |= SEEN_RET0;
/* lg %r1,<d(dev)>(%r2) */
@@ -694,12 +729,40 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
/* icm %r5,3,<d(type)>(%r1) */
EMIT4_DISP(0xbf531000, offsetof(struct net_device, type));
break;
- case BPF_S_ANC_RXHASH: /* A = skb->rxhash */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
- /* l %r5,<d(rxhash)>(%r2) */
- EMIT4_DISP(0x58502000, offsetof(struct sk_buff, rxhash));
+ case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ /* l %r5,<d(hash)>(%r2) */
+ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash));
+ break;
+ case BPF_ANC | SKF_AD_VLAN_TAG:
+ case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* icm %r5,3,<d(vlan_tci)>(%r2) */
+ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci));
+ if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
+ /* nill %r5,0xefff */
+ EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT);
+ } else {
+ /* nill %r5,0x1000 */
+ EMIT4_IMM(0xa5570000, VLAN_TAG_PRESENT);
+ /* srl %r5,12 */
+ EMIT4_DISP(0x88500000, 12);
+ }
+ break;
+ case BPF_ANC | SKF_AD_PKTTYPE:
+ if (pkt_type_offset < 0)
+ goto out;
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* ic %r5,<d(pkt_type_offset)>(%r2) */
+ EMIT4_DISP(0x43502000, pkt_type_offset);
+ /* srl %r5,5 */
+ EMIT4_DISP(0x88500000, 5);
break;
- case BPF_S_ANC_CPU: /* A = smp_processor_id() */
+ case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */
#ifdef CONFIG_SMP
/* l %r5,<d(cpu_nr)> */
EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr));
@@ -717,8 +780,41 @@ out:
return -1;
}
+/*
+ * Note: for security reasons, bpf code will follow a randomly
+ * sized amount of illegal instructions.
+ */
+struct bpf_binary_header {
+ unsigned int pages;
+ u8 image[];
+};
+
+static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
+ u8 **image_ptr)
+{
+ struct bpf_binary_header *header;
+ unsigned int sz, hole;
+
+ /* Most BPF filters are really small, but if some of them fill a page,
+ * allow at least 128 extra bytes for illegal instructions.
+ */
+ sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE);
+ header = module_alloc(sz);
+ if (!header)
+ return NULL;
+ memset(header, 0, sz);
+ header->pages = sz / PAGE_SIZE;
+ hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header));
+ /* Insert random number of illegal instructions before BPF code
+ * and make sure the first instruction starts at an even address.
+ */
+ *image_ptr = &header->image[(prandom_u32() % hole) & -2];
+ return header;
+}
+
void bpf_jit_compile(struct sk_filter *fp)
{
+ struct bpf_binary_header *header = NULL;
unsigned long size, prg_len, lit_len;
struct bpf_jit jit, cjit;
unsigned int *addrs;
@@ -726,10 +822,9 @@ void bpf_jit_compile(struct sk_filter *fp)
if (!bpf_jit_enable)
return;
- addrs = kmalloc(fp->len * sizeof(*addrs), GFP_KERNEL);
+ addrs = kcalloc(fp->len, sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL)
return;
- memset(addrs, 0, fp->len * sizeof(*addrs));
memset(&jit, 0, sizeof(cjit));
memset(&cjit, 0, sizeof(cjit));
@@ -752,12 +847,11 @@ void bpf_jit_compile(struct sk_filter *fp)
} else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
prg_len = jit.prg - jit.start;
lit_len = jit.lit - jit.mid;
- size = max_t(unsigned long, prg_len + lit_len,
- sizeof(struct work_struct));
+ size = prg_len + lit_len;
if (size >= BPF_SIZE_MAX)
goto out;
- jit.start = module_alloc(size);
- if (!jit.start)
+ header = bpf_alloc_binary(size, &jit.start);
+ if (!header)
goto out;
jit.prg = jit.mid = jit.start + prg_len;
jit.lit = jit.end = jit.start + prg_len + lit_len;
@@ -768,37 +862,30 @@ void bpf_jit_compile(struct sk_filter *fp)
cjit = jit;
}
if (bpf_jit_enable > 1) {
- pr_err("flen=%d proglen=%lu pass=%d image=%p\n",
- fp->len, jit.end - jit.start, pass, jit.start);
- if (jit.start) {
- printk(KERN_ERR "JIT code:\n");
+ bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start);
+ if (jit.start)
print_fn_code(jit.start, jit.mid - jit.start);
- print_hex_dump(KERN_ERR, "JIT literals:\n",
- DUMP_PREFIX_ADDRESS, 16, 1,
- jit.mid, jit.end - jit.mid, false);
- }
}
- if (jit.start)
+ if (jit.start) {
+ set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *) jit.start;
+ fp->jited = 1;
+ }
out:
kfree(addrs);
}
-static void jit_free_defer(struct work_struct *arg)
-{
- module_free(NULL, arg);
-}
-
-/* run from softirq, we must use a work_struct to call
- * module_free() from process context
- */
void bpf_jit_free(struct sk_filter *fp)
{
- struct work_struct *work;
+ unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+ struct bpf_binary_header *header = (void *)addr;
- if (fp->bpf_func == sk_run_filter)
- return;
- work = (struct work_struct *)fp->bpf_func;
- INIT_WORK(work, jit_free_defer);
- schedule_work(work);
+ if (!fp->jited)
+ goto free_filter;
+
+ set_memory_rw(addr, header->pages);
+ module_free(NULL, header);
+
+free_filter:
+ kfree(fp);
}
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index b5b2916895e..e53c6f26880 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -26,9 +26,6 @@
#define MAX_NUM_SDB 511
#define MIN_NUM_SDB 1
-#define ALERT_REQ_MASK 0x4000000000000000ul
-#define BUFFER_FULL_MASK 0x8000000000000000ul
-
DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
struct hws_execute_parms {
@@ -44,6 +41,7 @@ static DEFINE_MUTEX(hws_sem_oom);
static unsigned char hws_flush_all;
static unsigned int hws_oom;
+static unsigned int hws_alert;
static struct workqueue_struct *hws_wq;
static unsigned int hws_state;
@@ -65,43 +63,6 @@ static unsigned long interval;
static unsigned long min_sampler_rate;
static unsigned long max_sampler_rate;
-static int ssctl(void *buffer)
-{
- int cc;
-
- /* set in order to detect a program check */
- cc = 1;
-
- asm volatile(
- "0: .insn s,0xB2870000,0(%1)\n"
- "1: ipm %0\n"
- " srl %0,28\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "+d" (cc), "+a" (buffer)
- : "m" (*((struct hws_ssctl_request_block *)buffer))
- : "cc", "memory");
-
- return cc ? -EINVAL : 0 ;
-}
-
-static int qsi(void *buffer)
-{
- int cc;
- cc = 1;
-
- asm volatile(
- "0: .insn s,0xB2860000,0(%1)\n"
- "1: lhi %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "=d" (cc), "+a" (buffer)
- : "m" (*((struct hws_qsi_info_block *)buffer))
- : "cc", "memory");
-
- return cc ? -EINVAL : 0;
-}
-
static void execute_qsi(void *parms)
{
struct hws_execute_parms *ep = parms;
@@ -113,7 +74,7 @@ static void execute_ssctl(void *parms)
{
struct hws_execute_parms *ep = parms;
- ep->rc = ssctl(ep->buffer);
+ ep->rc = lsctl(ep->buffer);
}
static int smp_ctl_ssctl_stop(int cpu)
@@ -214,17 +175,6 @@ static int smp_ctl_qsi(int cpu)
return ep.rc;
}
-static inline unsigned long *trailer_entry_ptr(unsigned long v)
-{
- void *ret;
-
- ret = (void *)v;
- ret += PAGE_SIZE;
- ret -= sizeof(struct hws_trailer_entry);
-
- return (unsigned long *) ret;
-}
-
static void hws_ext_handler(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
@@ -233,6 +183,9 @@ static void hws_ext_handler(struct ext_code ext_code,
if (!(param32 & CPU_MF_INT_SF_MASK))
return;
+ if (!hws_alert)
+ return;
+
inc_irq_stat(IRQEXT_CMS);
atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
@@ -256,23 +209,11 @@ static void init_all_cpu_buffers(void)
}
}
-static int is_link_entry(unsigned long *s)
+static void prepare_cpu_buffers(void)
{
- return *s & 0x1ul ? 1 : 0;
-}
-
-static unsigned long *get_next_sdbt(unsigned long *s)
-{
- return (unsigned long *) (*s & ~0x1ul);
-}
-
-static int prepare_cpu_buffers(void)
-{
- int cpu;
- int rc;
struct hws_cpu_buffer *cb;
+ int cpu;
- rc = 0;
for_each_online_cpu(cpu) {
cb = &per_cpu(sampler_cpu_buffer, cpu);
atomic_set(&cb->ext_params, 0);
@@ -287,8 +228,6 @@ static int prepare_cpu_buffers(void)
cb->oom = 0;
cb->stop_mode = 0;
}
-
- return rc;
}
/*
@@ -353,7 +292,7 @@ static int allocate_sdbt(int cpu)
}
*sdbt = sdb;
trailer = trailer_entry_ptr(*sdbt);
- *trailer = ALERT_REQ_MASK;
+ *trailer = SDB_TE_ALERT_REQ_MASK;
sdbt++;
mutex_unlock(&hws_sem_oom);
}
@@ -829,7 +768,7 @@ static void worker_on_interrupt(unsigned int cpu)
trailer = trailer_entry_ptr(*sdbt);
/* leave loop if no more work to do */
- if (!(*trailer & BUFFER_FULL_MASK)) {
+ if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
done = 1;
if (!hws_flush_all)
continue;
@@ -856,7 +795,7 @@ static void worker_on_interrupt(unsigned int cpu)
static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
unsigned long *dear)
{
- struct hws_data_entry *sample_data_ptr;
+ struct hws_basic_entry *sample_data_ptr;
unsigned long *trailer;
trailer = trailer_entry_ptr(*sdbt);
@@ -866,7 +805,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
trailer = dear;
}
- sample_data_ptr = (struct hws_data_entry *)(*sdbt);
+ sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
while ((unsigned long *)sample_data_ptr < trailer) {
struct pt_regs *regs = NULL;
@@ -1001,7 +940,8 @@ int hwsampler_deallocate(void)
if (hws_state != HWS_STOPPED)
goto deallocate_exit;
- measurement_alert_subclass_unregister();
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ hws_alert = 0;
deallocate_sdbt();
hws_state = HWS_DEALLOCATED;
@@ -1089,7 +1029,7 @@ int hwsampler_setup(void)
max_sampler_rate = cb->qsi.max_sampl_rate;
}
}
- register_external_interrupt(0x1407, hws_ext_handler);
+ register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
hws_state = HWS_DEALLOCATED;
rc = 0;
@@ -1115,7 +1055,8 @@ int hwsampler_shutdown(void)
mutex_lock(&hws_sem);
if (hws_state == HWS_STOPPED) {
- measurement_alert_subclass_unregister();
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ hws_alert = 0;
deallocate_sdbt();
}
if (hws_wq) {
@@ -1123,7 +1064,7 @@ int hwsampler_shutdown(void)
hws_wq = NULL;
}
- unregister_external_interrupt(0x1407, hws_ext_handler);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
hws_state = HWS_INIT;
rc = 0;
}
@@ -1162,9 +1103,7 @@ int hwsampler_start_all(unsigned long rate)
if (rc)
goto start_all_exit;
- rc = prepare_cpu_buffers();
- if (rc)
- goto start_all_exit;
+ prepare_cpu_buffers();
for_each_online_cpu(cpu) {
rc = start_sampling(cpu);
@@ -1190,7 +1129,8 @@ start_all_exit:
hws_oom = 1;
hws_flush_all = 0;
/* now let them in, 1407 CPUMF external interrupts */
- measurement_alert_subclass_register();
+ hws_alert = 1;
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
return 0;
}
@@ -1210,7 +1150,7 @@ int hwsampler_stop_all(void)
rc = 0;
if (hws_state == HWS_INIT) {
mutex_unlock(&hws_sem);
- return rc;
+ return 0;
}
hws_state = HWS_STOPPING;
mutex_unlock(&hws_sem);
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 1912f3bb190..a483d06f2fa 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -9,27 +9,7 @@
#define HWSAMPLER_H_
#include <linux/workqueue.h>
-
-struct hws_qsi_info_block /* QUERY SAMPLING information block */
-{ /* Bit(s) */
- unsigned int b0_13:14; /* 0-13: zeros */
- unsigned int as:1; /* 14: sampling authorisation control*/
- unsigned int b15_21:7; /* 15-21: zeros */
- unsigned int es:1; /* 22: sampling enable control */
- unsigned int b23_29:7; /* 23-29: zeros */
- unsigned int cs:1; /* 30: sampling activation control */
- unsigned int:1; /* 31: reserved */
- unsigned int bsdes:16; /* 4-5: size of sampling entry */
- unsigned int:16; /* 6-7: reserved */
- unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
- unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
- unsigned long tear; /* 24-31: TEAR contents */
- unsigned long dear; /* 32-39: DEAR contents */
- unsigned int rsvrd0; /* 40-43: reserved */
- unsigned int cpu_speed; /* 44-47: CPU speed */
- unsigned long long rsvrd1; /* 48-55: reserved */
- unsigned long long rsvrd2; /* 56-63: reserved */
-};
+#include <asm/cpu_mf.h>
struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */
{ /* bytes 0 - 7 Bit(s) */
@@ -68,36 +48,6 @@ struct hws_cpu_buffer {
unsigned int stop_mode:1;
};
-struct hws_data_entry {
- unsigned int def:16; /* 0-15 Data Entry Format */
- unsigned int R:4; /* 16-19 reserved */
- unsigned int U:4; /* 20-23 Number of unique instruct. */
- unsigned int z:2; /* zeros */
- unsigned int T:1; /* 26 PSW DAT mode */
- unsigned int W:1; /* 27 PSW wait state */
- unsigned int P:1; /* 28 PSW Problem state */
- unsigned int AS:2; /* 29-30 PSW address-space control */
- unsigned int I:1; /* 31 entry valid or invalid */
- unsigned int:16;
- unsigned int prim_asn:16; /* primary ASN */
- unsigned long long ia; /* Instruction Address */
- unsigned long long lpp; /* Logical-Partition Program Param. */
- unsigned long long vpp; /* Virtual-Machine Program Param. */
-};
-
-struct hws_trailer_entry {
- unsigned int f:1; /* 0 - Block Full Indicator */
- unsigned int a:1; /* 1 - Alert request control */
- unsigned long:62; /* 2 - 63: Reserved */
- unsigned long overflow; /* 64 - sample Overflow count */
- unsigned long timestamp; /* 16 - time-stamp */
- unsigned long timestamp1; /* */
- unsigned long reserved1; /* 32 -Reserved */
- unsigned long reserved2; /* */
- unsigned long progusage1; /* 48 - reserved for programming use */
- unsigned long progusage2; /* */
-};
-
int hwsampler_setup(void);
int hwsampler_shutdown(void);
int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 584b93674ea..9ffe645d598 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -10,6 +10,7 @@
*/
#include <linux/oprofile.h>
+#include <linux/perf_event.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/fs.h>
@@ -67,6 +68,21 @@ module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
"(report cpu_type \"timer\"");
+static int __oprofile_hwsampler_start(void)
+{
+ int retval;
+
+ retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+ if (retval)
+ return retval;
+
+ retval = hwsampler_start_all(oprofile_hw_interval);
+ if (retval)
+ hwsampler_deallocate();
+
+ return retval;
+}
+
static int oprofile_hwsampler_start(void)
{
int retval;
@@ -76,13 +92,13 @@ static int oprofile_hwsampler_start(void)
if (!hwsampler_running)
return timer_ops.start();
- retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+ retval = perf_reserve_sampling();
if (retval)
return retval;
- retval = hwsampler_start_all(oprofile_hw_interval);
+ retval = __oprofile_hwsampler_start();
if (retval)
- hwsampler_deallocate();
+ perf_release_sampling();
return retval;
}
@@ -96,6 +112,7 @@ static void oprofile_hwsampler_stop(void)
hwsampler_stop_all();
hwsampler_deallocate();
+ perf_release_sampling();
return;
}
@@ -346,16 +363,15 @@ static const struct file_operations timer_enabled_fops = {
};
-static int oprofile_create_hwsampling_files(struct super_block *sb,
- struct dentry *root)
+static int oprofile_create_hwsampling_files(struct dentry *root)
{
struct dentry *dir;
- dir = oprofilefs_mkdir(sb, root, "timer");
+ dir = oprofilefs_mkdir(root, "timer");
if (!dir)
return -EINVAL;
- oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
+ oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
if (!hwsampler_available)
return 0;
@@ -376,17 +392,17 @@ static int oprofile_create_hwsampling_files(struct super_block *sb,
* and can only be set to 0.
*/
- dir = oprofilefs_mkdir(sb, root, "0");
+ dir = oprofilefs_mkdir(root, "0");
if (!dir)
return -EINVAL;
- oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
- oprofilefs_create_file(sb, dir, "event", &zero_fops);
- oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
- oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
- oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
- oprofilefs_create_file(sb, dir, "user", &user_fops);
- oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+ oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
+ oprofilefs_create_file(dir, "event", &zero_fops);
+ oprofilefs_create_file(dir, "count", &hw_interval_fops);
+ oprofilefs_create_file(dir, "unit_mask", &zero_fops);
+ oprofilefs_create_file(dir, "kernel", &kernel_fops);
+ oprofilefs_create_file(dir, "user", &user_fops);
+ oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
&oprofile_sdbt_blocks);
} else {
@@ -396,19 +412,19 @@ static int oprofile_create_hwsampling_files(struct super_block *sb,
* space tools. The /dev/oprofile/hwsampling fs is
* provided in that case.
*/
- dir = oprofilefs_mkdir(sb, root, "hwsampling");
+ dir = oprofilefs_mkdir(root, "hwsampling");
if (!dir)
return -EINVAL;
- oprofilefs_create_file(sb, dir, "hwsampler",
+ oprofilefs_create_file(dir, "hwsampler",
&hwsampler_fops);
- oprofilefs_create_file(sb, dir, "hw_interval",
+ oprofilefs_create_file(dir, "hw_interval",
&hw_interval_fops);
- oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
+ oprofilefs_create_ro_ulong(dir, "hw_min_interval",
&oprofile_min_interval);
- oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
+ oprofilefs_create_ro_ulong(dir, "hw_max_interval",
&oprofile_max_interval);
- oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+ oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
&oprofile_sdbt_blocks);
}
return 0;
@@ -440,6 +456,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
switch (id.machine) {
case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
+ case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
default: return -ENODEV;
}
}
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index f0f426a113c..a9e1dc4ae44 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -2,5 +2,5 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o \
- pci_sysfs.o pci_event.o pci_debug.o
+obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
+ pci_event.o pci_debug.o pci_insn.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 60e0372545d..30de42730b2 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -42,82 +42,40 @@
#define SIC_IRQ_MODE_SINGLE 1
#define ZPCI_NR_DMA_SPACES 1
-#define ZPCI_MSI_VEC_BITS 6
#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
/* list of all detected zpci devices */
-LIST_HEAD(zpci_list);
-EXPORT_SYMBOL_GPL(zpci_list);
-DEFINE_MUTEX(zpci_list_lock);
-EXPORT_SYMBOL_GPL(zpci_list_lock);
+static LIST_HEAD(zpci_list);
+static DEFINE_SPINLOCK(zpci_list_lock);
-struct pci_hp_callback_ops hotplug_ops;
-EXPORT_SYMBOL_GPL(hotplug_ops);
+static struct irq_chip zpci_irq_chip = {
+ .name = "zPCI",
+ .irq_unmask = unmask_msi_irq,
+ .irq_mask = mask_msi_irq,
+};
static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
static DEFINE_SPINLOCK(zpci_domain_lock);
-struct callback {
- irq_handler_t handler;
- void *data;
-};
+static struct airq_iv *zpci_aisb_iv;
+static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
-struct zdev_irq_map {
- unsigned long aibv; /* AI bit vector */
- int msi_vecs; /* consecutive MSI-vectors used */
- int __unused;
- struct callback cb[ZPCI_NR_MSI_VECS]; /* callback handler array */
- spinlock_t lock; /* protect callbacks against de-reg */
-};
+/* Adapter interrupt definitions */
+static void zpci_irq_handler(struct airq_struct *airq);
-struct intr_bucket {
- /* amap of adapters, one bit per dev, corresponds to one irq nr */
- unsigned long *alloc;
- /* AI summary bit, global page for all devices */
- unsigned long *aisb;
- /* pointer to aibv and callback data in zdev */
- struct zdev_irq_map *imap[ZPCI_NR_DEVICES];
- /* protects the whole bucket struct */
- spinlock_t lock;
+static struct airq_struct zpci_airq = {
+ .handler = zpci_irq_handler,
+ .isc = PCI_ISC,
};
-static struct intr_bucket *bucket;
-
-/* Adapter local summary indicator */
-static u8 *zpci_irq_si;
-
-static atomic_t irq_retries = ATOMIC_INIT(0);
-
/* I/O Map */
static DEFINE_SPINLOCK(zpci_iomap_lock);
static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
struct zpci_iomap_entry *zpci_iomap_start;
EXPORT_SYMBOL_GPL(zpci_iomap_start);
-/* highest irq summary bit */
-static int __read_mostly aisb_max;
-
-static struct kmem_cache *zdev_irq_cache;
static struct kmem_cache *zdev_fmb_cache;
-debug_info_t *pci_debug_msg_id;
-debug_info_t *pci_debug_err_id;
-
-static inline int irq_to_msi_nr(unsigned int irq)
-{
- return irq & ZPCI_MSI_MASK;
-}
-
-static inline int irq_to_dev_nr(unsigned int irq)
-{
- return irq >> ZPCI_MSI_VEC_BITS;
-}
-
-static inline struct zdev_irq_map *get_imap(unsigned int irq)
-{
- return bucket->imap[irq_to_dev_nr(irq)];
-}
-
struct zpci_dev *get_zdev(struct pci_dev *pdev)
{
return (struct zpci_dev *) pdev->sysdata;
@@ -127,22 +85,17 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
- mutex_lock(&zpci_list_lock);
+ spin_lock(&zpci_list_lock);
list_for_each_entry(tmp, &zpci_list, entry) {
if (tmp->fid == fid) {
zdev = tmp;
break;
}
}
- mutex_unlock(&zpci_list_lock);
+ spin_unlock(&zpci_list_lock);
return zdev;
}
-bool zpci_fid_present(u32 fid)
-{
- return (get_zdev_by_fid(fid) != NULL) ? true : false;
-}
-
static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
{
return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
@@ -161,30 +114,20 @@ int pci_proc_domain(struct pci_bus *bus)
EXPORT_SYMBOL_GPL(pci_proc_domain);
/* Modify PCI: Register adapter interruptions */
-static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
- u64 aibv)
+static int zpci_set_airq(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
- struct zpci_fib *fib;
- int rc;
-
- fib = (void *) get_zeroed_page(GFP_KERNEL);
- if (!fib)
- return -ENOMEM;
+ struct zpci_fib fib = {0};
- fib->isc = PCI_ISC;
- fib->noi = zdev->irq_map->msi_vecs;
- fib->sum = 1; /* enable summary notifications */
- fib->aibv = aibv;
- fib->aibvo = 0; /* every function has its own page */
- fib->aisb = (u64) bucket->aisb + aisb / 8;
- fib->aisbo = aisb & ZPCI_MSI_MASK;
+ fib.isc = PCI_ISC;
+ fib.sum = 1; /* enable summary notifications */
+ fib.noi = airq_iv_end(zdev->aibv);
+ fib.aibv = (unsigned long) zdev->aibv->vector;
+ fib.aibvo = 0; /* each zdev has its own interrupt vector */
+ fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+ fib.aisbo = zdev->aisb & 63;
- rc = mpcifc_instr(req, fib);
- pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
-
- free_page((unsigned long) fib);
- return rc;
+ return zpci_mod_fc(req, &fib);
}
struct mod_pci_args {
@@ -197,22 +140,14 @@ struct mod_pci_args {
static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn);
- struct zpci_fib *fib;
- int rc;
+ struct zpci_fib fib = {0};
- /* The FIB must be available even if it's not used */
- fib = (void *) get_zeroed_page(GFP_KERNEL);
- if (!fib)
- return -ENOMEM;
-
- fib->pba = args->base;
- fib->pal = args->limit;
- fib->iota = args->iota;
- fib->fmb_addr = args->fmb_addr;
+ fib.pba = args->base;
+ fib.pal = args->limit;
+ fib.iota = args->iota;
+ fib.fmb_addr = args->fmb_addr;
- rc = mpcifc_instr(req, fib);
- free_page((unsigned long) fib);
- return rc;
+ return zpci_mod_fc(req, &fib);
}
/* Modify PCI: Register I/O address translation parameters */
@@ -235,7 +170,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
}
/* Modify PCI: Unregister adapter interruptions */
-static int zpci_unregister_airq(struct zpci_dev *zdev)
+static int zpci_clear_airq(struct zpci_dev *zdev)
{
struct mod_pci_args args = { 0, 0, 0, 0 };
@@ -250,10 +185,9 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
if (zdev->fmb)
return -EINVAL;
- zdev->fmb = kmem_cache_alloc(zdev_fmb_cache, GFP_KERNEL);
+ zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL);
if (!zdev->fmb)
return -ENOMEM;
- memset(zdev->fmb, 0, sizeof(*zdev->fmb));
WARN_ON((u64) zdev->fmb & 0xf);
args.fmb_addr = virt_to_phys(zdev->fmb);
@@ -285,12 +219,12 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
u64 data;
int rc;
- rc = pcilg_instr(&data, req, offset);
- data = data << ((8 - len) * 8);
- data = le64_to_cpu(data);
- if (!rc)
+ rc = zpci_load(&data, req, offset);
+ if (!rc) {
+ data = data << ((8 - len) * 8);
+ data = le64_to_cpu(data);
*val = (u32) data;
- else
+ } else
*val = 0xffffffff;
return rc;
}
@@ -303,59 +237,10 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
data = cpu_to_le64(data);
data = data >> ((8 - len) * 8);
- rc = pcistg_instr(data, req, offset);
+ rc = zpci_store(data, req, offset);
return rc;
}
-void synchronize_irq(unsigned int irq)
-{
- /*
- * Not needed, the handler is protected by a lock and IRQs that occur
- * after the handler is deleted are just NOPs.
- */
-}
-EXPORT_SYMBOL_GPL(synchronize_irq);
-
-void enable_irq(unsigned int irq)
-{
- struct msi_desc *msi = irq_get_msi_desc(irq);
-
- zpci_msi_set_mask_bits(msi, 1, 0);
-}
-EXPORT_SYMBOL_GPL(enable_irq);
-
-void disable_irq(unsigned int irq)
-{
- struct msi_desc *msi = irq_get_msi_desc(irq);
-
- zpci_msi_set_mask_bits(msi, 1, 1);
-}
-EXPORT_SYMBOL_GPL(disable_irq);
-
-void disable_irq_nosync(unsigned int irq)
-{
- disable_irq(irq);
-}
-EXPORT_SYMBOL_GPL(disable_irq_nosync);
-
-unsigned long probe_irq_on(void)
-{
- return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_on);
-
-int probe_irq_off(unsigned long val)
-{
- return 0;
-}
-EXPORT_SYMBOL_GPL(probe_irq_off);
-
-unsigned int probe_irq_mask(unsigned long val)
-{
- return val;
-}
-EXPORT_SYMBOL_GPL(probe_irq_mask);
-
void pcibios_fixup_bus(struct pci_bus *bus)
{
}
@@ -410,20 +295,28 @@ static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 *val)
{
struct zpci_dev *zdev = get_zdev_by_bus(bus);
+ int ret;
if (!zdev || devfn != ZPCI_DEVFN)
- return 0;
- return zpci_cfg_load(zdev, where, val, size);
+ ret = -ENODEV;
+ else
+ ret = zpci_cfg_load(zdev, where, val, size);
+
+ return ret;
}
static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 val)
{
struct zpci_dev *zdev = get_zdev_by_bus(bus);
+ int ret;
if (!zdev || devfn != ZPCI_DEVFN)
- return 0;
- return zpci_cfg_store(zdev, where, val, size);
+ ret = -ENODEV;
+ else
+ ret = zpci_cfg_store(zdev, where, val, size);
+
+ return ret;
}
static struct pci_ops pci_root_ops = {
@@ -431,153 +324,144 @@ static struct pci_ops pci_root_ops = {
.write = pci_write,
};
-/* store the last handled bit to implement fair scheduling of devices */
-static DEFINE_PER_CPU(unsigned long, next_sbit);
-
-static void zpci_irq_handler(void *dont, void *need)
+static void zpci_irq_handler(struct airq_struct *airq)
{
- unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit);
- int rescan = 0, max = aisb_max;
- struct zdev_irq_map *imap;
+ unsigned long si, ai;
+ struct airq_iv *aibv;
+ int irqs_on = 0;
inc_irq_stat(IRQIO_PCI);
- sbit = start;
-
-scan:
- /* find summary_bit */
- for_each_set_bit_left_cont(sbit, bucket->aisb, max) {
- clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6));
- last = sbit;
+ for (si = 0;;) {
+ /* Scan adapter summary indicator bit vector */
+ si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+ if (si == -1UL) {
+ if (irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ si = 0;
+ continue;
+ }
- /* find vector bit */
- imap = bucket->imap[sbit];
- for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) {
+ /* Scan the adapter interrupt vector for this device. */
+ aibv = zpci_aibv[si];
+ for (ai = 0;;) {
+ ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+ if (ai == -1UL)
+ break;
inc_irq_stat(IRQIO_MSI);
- clear_bit(63 - mbit, &imap->aibv);
-
- spin_lock(&imap->lock);
- if (imap->cb[mbit].handler)
- imap->cb[mbit].handler(mbit,
- imap->cb[mbit].data);
- spin_unlock(&imap->lock);
+ airq_iv_lock(aibv, ai);
+ generic_handle_irq(airq_iv_get_data(aibv, ai));
+ airq_iv_unlock(aibv, ai);
}
}
-
- if (rescan)
- goto out;
-
- /* scan the skipped bits */
- if (start > 0) {
- sbit = 0;
- max = start;
- start = 0;
- goto scan;
- }
-
- /* enable interrupts again */
- sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
-
- /* check again to not lose initiative */
- rmb();
- max = aisb_max;
- sbit = find_first_bit_left(bucket->aisb, max);
- if (sbit != max) {
- atomic_inc(&irq_retries);
- rescan++;
- goto scan;
- }
-out:
- /* store next device bit to scan */
- __get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last;
}
-/* msi_vecs - number of requested interrupts, 0 place function to error state */
-static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs)
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
struct zpci_dev *zdev = get_zdev(pdev);
- unsigned int aisb, msi_nr;
+ unsigned int hwirq, msi_vecs;
+ unsigned long aisb;
struct msi_desc *msi;
- int rc;
-
- /* store the number of used MSI vectors */
- zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS);
-
- spin_lock(&bucket->lock);
- aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE);
- /* alloc map exhausted? */
- if (aisb == PAGE_SIZE) {
- spin_unlock(&bucket->lock);
- return -EIO;
- }
- set_bit(aisb, bucket->alloc);
- spin_unlock(&bucket->lock);
-
+ struct msi_msg msg;
+ int rc, irq;
+
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+ msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
+ msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
+
+ /* Allocate adapter summary indicator bit */
+ rc = -EIO;
+ aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+ if (aisb == -1UL)
+ goto out;
zdev->aisb = aisb;
- if (aisb + 1 > aisb_max)
- aisb_max = aisb + 1;
- /* wire up IRQ shortcut pointer */
- bucket->imap[zdev->aisb] = zdev->irq_map;
- pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map);
+ /* Create adapter interrupt vector */
+ rc = -ENOMEM;
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ if (!zdev->aibv)
+ goto out_si;
- /* TODO: irq number 0 wont be found if we return less than requested MSIs.
- * ignore it for now and fix in common code.
- */
- msi_nr = aisb << ZPCI_MSI_VEC_BITS;
+ /* Wire up shortcut pointer */
+ zpci_aibv[aisb] = zdev->aibv;
+ /* Request MSI interrupts */
+ hwirq = 0;
list_for_each_entry(msi, &pdev->msi_list, list) {
- rc = zpci_setup_msi_irq(zdev, msi, msi_nr,
- aisb << ZPCI_MSI_VEC_BITS);
+ rc = -EIO;
+ irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
+ if (irq < 0)
+ goto out_msi;
+ rc = irq_set_msi_desc(irq, msi);
if (rc)
- return rc;
- msi_nr++;
+ goto out_msi;
+ irq_set_chip_and_handler(irq, &zpci_irq_chip,
+ handle_simple_irq);
+ msg.data = hwirq;
+ msg.address_lo = zdev->msi_addr & 0xffffffff;
+ msg.address_hi = zdev->msi_addr >> 32;
+ write_msi_msg(irq, &msg);
+ airq_iv_set_data(zdev->aibv, hwirq, irq);
+ hwirq++;
}
- rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv);
- if (rc) {
- clear_bit(aisb, bucket->alloc);
- dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
- return rc;
+ /* Enable adapter interrupts */
+ rc = zpci_set_airq(zdev);
+ if (rc)
+ goto out_msi;
+
+ return (msi_vecs == nvec) ? 0 : msi_vecs;
+
+out_msi:
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ if (hwirq-- == 0)
+ break;
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
}
- return (zdev->irq_map->msi_vecs == msi_vecs) ?
- 0 : zdev->irq_map->msi_vecs;
+ zpci_aibv[aisb] = NULL;
+ airq_iv_release(zdev->aibv);
+out_si:
+ airq_iv_free_bit(zpci_aisb_iv, aisb);
+out:
+ return rc;
}
-static void zpci_teardown_msi(struct pci_dev *pdev)
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = get_zdev(pdev);
struct msi_desc *msi;
- int aisb, rc;
+ int rc;
- rc = zpci_unregister_airq(zdev);
- if (rc) {
- dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
+ /* Disable adapter interrupts */
+ rc = zpci_clear_airq(zdev);
+ if (rc)
return;
- }
-
- msi = list_first_entry(&pdev->msi_list, struct msi_desc, list);
- aisb = irq_to_dev_nr(msi->irq);
- list_for_each_entry(msi, &pdev->msi_list, list)
- zpci_teardown_msi_irq(zdev, msi);
-
- clear_bit(aisb, bucket->alloc);
- if (aisb + 1 == aisb_max)
- aisb_max--;
-}
-
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
- pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
- if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
- return -EINVAL;
- return zpci_setup_msi(pdev, nvec);
-}
+ /* Release MSI interrupts */
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ if (msi->msi_attrib.is_msix)
+ default_msix_mask_irq(msi, 1);
+ else
+ default_msi_mask_irq(msi, 1, 1);
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
+ }
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
-{
- pr_info("%s: on pdev: %p\n", __func__, pdev);
- zpci_teardown_msi(pdev);
+ zpci_aibv[zdev->aisb] = NULL;
+ airq_iv_release(zdev->aibv);
+ airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
}
static void zpci_map_resources(struct zpci_dev *zdev)
@@ -592,13 +476,12 @@ static void zpci_map_resources(struct zpci_dev *zdev)
continue;
pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
- pr_debug("BAR%i: -> start: %Lx end: %Lx\n",
- i, pdev->resource[i].start, pdev->resource[i].end);
}
-};
+}
-static void zpci_unmap_resources(struct pci_dev *pdev)
+static void zpci_unmap_resources(struct zpci_dev *zdev)
{
+ struct pci_dev *pdev = zdev->pdev;
resource_size_t len;
int i;
@@ -608,246 +491,36 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
continue;
pci_iounmap(pdev, (void *) pdev->resource[i].start);
}
-};
-
-struct zpci_dev *zpci_alloc_device(void)
-{
- struct zpci_dev *zdev;
-
- /* Alloc memory for our private pci device data */
- zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
- if (!zdev)
- return ERR_PTR(-ENOMEM);
-
- /* Alloc aibv & callback space */
- zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL);
- if (!zdev->irq_map)
- goto error;
- WARN_ON((u64) zdev->irq_map & 0xff);
- return zdev;
-
-error:
- kfree(zdev);
- return ERR_PTR(-ENOMEM);
-}
-
-void zpci_free_device(struct zpci_dev *zdev)
-{
- kmem_cache_free(zdev_irq_cache, zdev->irq_map);
- kfree(zdev);
-}
-
-/* Called on removal of pci_dev, leaves zpci and bus device */
-static void zpci_remove_device(struct pci_dev *pdev)
-{
- struct zpci_dev *zdev = get_zdev(pdev);
-
- dev_info(&pdev->dev, "Removing device %u\n", zdev->domain);
- zdev->state = ZPCI_FN_STATE_CONFIGURED;
- zpci_dma_exit_device(zdev);
- zpci_fmb_disable_device(zdev);
- zpci_sysfs_remove_device(&pdev->dev);
- zpci_unmap_resources(pdev);
- list_del(&zdev->entry); /* can be called from init */
- zdev->pdev = NULL;
-}
-
-static void zpci_scan_devices(void)
-{
- struct zpci_dev *zdev;
-
- mutex_lock(&zpci_list_lock);
- list_for_each_entry(zdev, &zpci_list, entry)
- if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
- zpci_scan_device(zdev);
- mutex_unlock(&zpci_list_lock);
-}
-
-/*
- * Too late for any s390 specific setup, since interrupts must be set up
- * already which requires DMA setup too and the pci scan will access the
- * config space, which only works if the function handle is enabled.
- */
-int pcibios_enable_device(struct pci_dev *pdev, int mask)
-{
- struct resource *res;
- u16 cmd;
- int i;
-
- pci_read_config_word(pdev, PCI_COMMAND, &cmd);
-
- for (i = 0; i < PCI_BAR_COUNT; i++) {
- res = &pdev->resource[i];
-
- if (res->flags & IORESOURCE_IO)
- return -EINVAL;
-
- if (res->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- pci_write_config_word(pdev, PCI_COMMAND, cmd);
- return 0;
-}
-
-void pcibios_disable_device(struct pci_dev *pdev)
-{
- zpci_remove_device(pdev);
- pdev->sysdata = NULL;
-}
-
-int pcibios_add_platform_entries(struct pci_dev *pdev)
-{
- return zpci_sysfs_add_device(&pdev->dev);
-}
-
-int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data)
-{
- int msi_nr = irq_to_msi_nr(irq);
- struct zdev_irq_map *imap;
- struct msi_desc *msi;
-
- msi = irq_get_msi_desc(irq);
- if (!msi)
- return -EIO;
-
- imap = get_imap(irq);
- spin_lock_init(&imap->lock);
-
- pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr);
- imap->cb[msi_nr].handler = handler;
- imap->cb[msi_nr].data = data;
-
- /*
- * The generic MSI code returns with the interrupt disabled on the
- * card, using the MSI mask bits. Firmware doesn't appear to unmask
- * at that level, so we do it here by hand.
- */
- zpci_msi_set_mask_bits(msi, 1, 0);
- return 0;
-}
-
-void zpci_free_irq(unsigned int irq)
-{
- struct zdev_irq_map *imap = get_imap(irq);
- int msi_nr = irq_to_msi_nr(irq);
- unsigned long flags;
-
- pr_debug("%s: for irq: %d\n", __func__, irq);
-
- spin_lock_irqsave(&imap->lock, flags);
- imap->cb[msi_nr].handler = NULL;
- imap->cb[msi_nr].data = NULL;
- spin_unlock_irqrestore(&imap->lock, flags);
-}
-
-int request_irq(unsigned int irq, irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
-{
- pr_debug("%s: irq: %d handler: %p flags: %lx dev: %s\n",
- __func__, irq, handler, irqflags, devname);
-
- return zpci_request_irq(irq, handler, dev_id);
-}
-EXPORT_SYMBOL_GPL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
-{
- zpci_free_irq(irq);
}
-EXPORT_SYMBOL_GPL(free_irq);
static int __init zpci_irq_init(void)
{
- int cpu, rc;
-
- bucket = kzalloc(sizeof(*bucket), GFP_KERNEL);
- if (!bucket)
- return -ENOMEM;
-
- bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL);
- if (!bucket->aisb) {
- rc = -ENOMEM;
- goto out_aisb;
- }
-
- bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL);
- if (!bucket->alloc) {
- rc = -ENOMEM;
- goto out_alloc;
- }
+ int rc;
- isc_register(PCI_ISC);
- zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC);
- if (IS_ERR(zpci_irq_si)) {
- rc = PTR_ERR(zpci_irq_si);
- zpci_irq_si = NULL;
- goto out_ai;
- }
+ rc = register_adapter_interrupt(&zpci_airq);
+ if (rc)
+ goto out;
+ /* Set summary to 1 to be called every time for the ISC. */
+ *zpci_airq.lsi_ptr = 1;
- for_each_online_cpu(cpu)
- per_cpu(next_sbit, cpu) = 0;
+ rc = -ENOMEM;
+ zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ if (!zpci_aisb_iv)
+ goto out_airq;
- spin_lock_init(&bucket->lock);
- /* set summary to 1 to be called every time for the ISC */
- *zpci_irq_si = 1;
- sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
return 0;
-out_ai:
- isc_unregister(PCI_ISC);
- free_page((unsigned long) bucket->alloc);
-out_alloc:
- free_page((unsigned long) bucket->aisb);
-out_aisb:
- kfree(bucket);
+out_airq:
+ unregister_adapter_interrupt(&zpci_airq);
+out:
return rc;
}
static void zpci_irq_exit(void)
{
- free_page((unsigned long) bucket->alloc);
- free_page((unsigned long) bucket->aisb);
- s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC);
- isc_unregister(PCI_ISC);
- kfree(bucket);
-}
-
-void zpci_debug_info(struct zpci_dev *zdev, struct seq_file *m)
-{
- if (!zdev)
- return;
-
- seq_printf(m, "global irq retries: %u\n", atomic_read(&irq_retries));
- seq_printf(m, "aibv[0]:%016lx aibv[1]:%016lx aisb:%016lx\n",
- get_imap(0)->aibv, get_imap(1)->aibv, *bucket->aisb);
-}
-
-static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
- unsigned long flags, int domain)
-{
- struct resource *r;
- char *name;
- int rc;
-
- r = kzalloc(sizeof(*r), GFP_KERNEL);
- if (!r)
- return ERR_PTR(-ENOMEM);
- r->start = start;
- r->end = r->start + size - 1;
- r->flags = flags;
- r->parent = &iomem_resource;
- name = kmalloc(18, GFP_KERNEL);
- if (!name) {
- kfree(r);
- return ERR_PTR(-ENOMEM);
- }
- sprintf(name, "PCI Bus: %04x:%02x", domain, ZPCI_BUS_NR);
- r->name = name;
-
- rc = request_resource(&iomem_resource, r);
- if (rc)
- pr_debug("request resource %pR failed\n", r);
- return r;
+ airq_iv_release(zpci_aisb_iv);
+ unregister_adapter_interrupt(&zpci_airq);
}
static int zpci_alloc_iomap(struct zpci_dev *zdev)
@@ -873,17 +546,38 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
spin_unlock(&zpci_iomap_lock);
}
-static int zpci_create_device_bus(struct zpci_dev *zdev)
+static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
+ unsigned long size, unsigned long flags)
{
+ struct resource *r;
+
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (!r)
+ return NULL;
+
+ r->start = start;
+ r->end = r->start + size - 1;
+ r->flags = flags;
+ r->name = zdev->res_name;
+
+ if (request_resource(&iomem_resource, r)) {
+ kfree(r);
+ return NULL;
+ }
+ return r;
+}
+
+static int zpci_setup_bus_resources(struct zpci_dev *zdev,
+ struct list_head *resources)
+{
+ unsigned long addr, size, flags;
struct resource *res;
- LIST_HEAD(resources);
- int i;
+ int i, entry;
- /* allocate mapping entry for each used bar */
- for (i = 0; i < PCI_BAR_COUNT; i++) {
- unsigned long addr, size, flags;
- int entry;
+ snprintf(zdev->res_name, sizeof(zdev->res_name),
+ "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR);
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
if (!zdev->bars[i].size)
continue;
entry = zpci_alloc_iomap(zdev);
@@ -902,23 +596,115 @@ static int zpci_create_device_bus(struct zpci_dev *zdev)
size = 1UL << zdev->bars[i].size;
- res = zpci_alloc_bus_resource(addr, size, flags, zdev->domain);
- if (IS_ERR(res)) {
+ res = __alloc_res(zdev, addr, size, flags);
+ if (!res) {
zpci_free_iomap(zdev, entry);
- return PTR_ERR(res);
+ return -ENOMEM;
}
- pci_add_resource(&resources, res);
+ zdev->bars[i].res = res;
+ pci_add_resource(resources, res);
}
- zdev->bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
- zdev, &resources);
- if (!zdev->bus)
- return -EIO;
+ return 0;
+}
+
+static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
+{
+ int i;
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ if (!zdev->bars[i].size)
+ continue;
+
+ zpci_free_iomap(zdev, zdev->bars[i].map_idx);
+ release_resource(zdev->bars[i].res);
+ kfree(zdev->bars[i].res);
+ }
+}
+
+int pcibios_add_device(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+ struct resource *res;
+ int i;
+
+ zdev->pdev = pdev;
+ pdev->dev.groups = zpci_attr_groups;
+ zpci_map_resources(zdev);
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ res = &pdev->resource[i];
+ if (res->parent || !res->flags)
+ continue;
+ pci_claim_resource(pdev, i);
+ }
- zdev->bus->max_bus_speed = zdev->max_bus_speed;
return 0;
}
+int pcibios_enable_device(struct pci_dev *pdev, int mask)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+
+ zdev->pdev = pdev;
+ zpci_debug_init_device(zdev);
+ zpci_fmb_enable_device(zdev);
+ zpci_map_resources(zdev);
+
+ return pci_enable_resources(pdev, mask);
+}
+
+void pcibios_disable_device(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+
+ zpci_unmap_resources(zdev);
+ zpci_fmb_disable_device(zdev);
+ zpci_debug_exit_device(zdev);
+ zdev->pdev = NULL;
+}
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+static int zpci_restore(struct device *dev)
+{
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ int ret = 0;
+
+ if (zdev->state != ZPCI_FN_STATE_ONLINE)
+ goto out;
+
+ ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
+ if (ret)
+ goto out;
+
+ zpci_map_resources(zdev);
+ zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
+ zdev->start_dma + zdev->iommu_size - 1,
+ (u64) zdev->dma_table);
+
+out:
+ return ret;
+}
+
+static int zpci_freeze(struct device *dev)
+{
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+
+ if (zdev->state != ZPCI_FN_STATE_ONLINE)
+ return 0;
+
+ zpci_unregister_ioat(zdev, 0);
+ return clp_disable_fh(zdev);
+}
+
+struct dev_pm_ops pcibios_pm_ops = {
+ .thaw_noirq = zpci_restore,
+ .freeze_noirq = zpci_freeze,
+ .restore_noirq = zpci_restore,
+ .poweroff_noirq = zpci_freeze,
+};
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
+
static int zpci_alloc_domain(struct zpci_dev *zdev)
{
spin_lock(&zpci_domain_lock);
@@ -939,6 +725,41 @@ static void zpci_free_domain(struct zpci_dev *zdev)
spin_unlock(&zpci_domain_lock);
}
+void pcibios_remove_bus(struct pci_bus *bus)
+{
+ struct zpci_dev *zdev = get_zdev_by_bus(bus);
+
+ zpci_exit_slot(zdev);
+ zpci_cleanup_bus_resources(zdev);
+ zpci_free_domain(zdev);
+
+ spin_lock(&zpci_list_lock);
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
+
+ kfree(zdev);
+}
+
+static int zpci_scan_bus(struct zpci_dev *zdev)
+{
+ LIST_HEAD(resources);
+ int ret;
+
+ ret = zpci_setup_bus_resources(zdev, &resources);
+ if (ret)
+ return ret;
+
+ zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
+ zdev, &resources);
+ if (!zdev->bus) {
+ zpci_cleanup_bus_resources(zdev);
+ return -EIO;
+ }
+
+ zdev->bus->max_bus_speed = zdev->max_bus_speed;
+ return 0;
+}
+
int zpci_enable_device(struct zpci_dev *zdev)
{
int rc;
@@ -946,11 +767,12 @@ int zpci_enable_device(struct zpci_dev *zdev)
rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
if (rc)
goto out;
- pr_info("Enabled fh: 0x%x fid: 0x%x\n", zdev->fh, zdev->fid);
rc = zpci_dma_init_device(zdev);
if (rc)
goto out_dma;
+
+ zdev->state = ZPCI_FN_STATE_ONLINE;
return 0;
out_dma:
@@ -960,6 +782,13 @@ out:
}
EXPORT_SYMBOL_GPL(zpci_enable_device);
+int zpci_disable_device(struct zpci_dev *zdev)
+{
+ zpci_dma_exit_device(zdev);
+ return clp_disable_fh(zdev);
+}
+EXPORT_SYMBOL_GPL(zpci_disable_device);
+
int zpci_create_device(struct zpci_dev *zdev)
{
int rc;
@@ -968,31 +797,27 @@ int zpci_create_device(struct zpci_dev *zdev)
if (rc)
goto out;
- rc = zpci_create_device_bus(zdev);
+ if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ goto out_free;
+ }
+ rc = zpci_scan_bus(zdev);
if (rc)
- goto out_bus;
+ goto out_disable;
- mutex_lock(&zpci_list_lock);
+ spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
- if (hotplug_ops.create_slot)
- hotplug_ops.create_slot(zdev);
- mutex_unlock(&zpci_list_lock);
+ spin_unlock(&zpci_list_lock);
- if (zdev->state == ZPCI_FN_STATE_STANDBY)
- return 0;
+ zpci_init_slot(zdev);
- rc = zpci_enable_device(zdev);
- if (rc)
- goto out_start;
return 0;
-out_start:
- mutex_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- if (hotplug_ops.remove_slot)
- hotplug_ops.remove_slot(zdev);
- mutex_unlock(&zpci_list_lock);
-out_bus:
+out_disable:
+ if (zdev->state == ZPCI_FN_STATE_ONLINE)
+ zpci_disable_device(zdev);
+out_free:
zpci_free_domain(zdev);
out:
return rc;
@@ -1008,31 +833,6 @@ void zpci_stop_device(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(zpci_stop_device);
-int zpci_scan_device(struct zpci_dev *zdev)
-{
- zdev->pdev = pci_scan_single_device(zdev->bus, ZPCI_DEVFN);
- if (!zdev->pdev) {
- pr_err("pci_scan_single_device failed for fid: 0x%x\n",
- zdev->fid);
- goto out;
- }
-
- zpci_debug_init_device(zdev);
- zpci_fmb_enable_device(zdev);
- zpci_map_resources(zdev);
- pci_bus_add_devices(zdev->bus);
-
- /* now that pdev was added to the bus mark it as used */
- zdev->state = ZPCI_FN_STATE_ONLINE;
- return 0;
-
-out:
- zpci_dma_exit_device(zdev);
- clp_disable_fh(zdev);
- return -EIO;
-}
-EXPORT_SYMBOL_GPL(zpci_scan_device);
-
static inline int barsize(u8 size)
{
return (size) ? (1 << size) >> 10 : 0;
@@ -1040,15 +840,10 @@ static inline int barsize(u8 size)
static int zpci_mem_init(void)
{
- zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map),
- L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL);
- if (!zdev_irq_cache)
- goto error_zdev;
-
zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
16, 0, NULL);
if (!zdev_fmb_cache)
- goto error_fmb;
+ goto error_zdev;
/* TODO: use realloc */
zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
@@ -1059,8 +854,6 @@ static int zpci_mem_init(void)
error_iomap:
kmem_cache_destroy(zdev_fmb_cache);
-error_fmb:
- kmem_cache_destroy(zdev_irq_cache);
error_zdev:
return -ENOMEM;
}
@@ -1068,49 +861,45 @@ error_zdev:
static void zpci_mem_exit(void)
{
kfree(zpci_iomap_start);
- kmem_cache_destroy(zdev_irq_cache);
kmem_cache_destroy(zdev_fmb_cache);
}
-unsigned int pci_probe = 1;
-EXPORT_SYMBOL_GPL(pci_probe);
+static unsigned int s390_pci_probe = 1;
+static unsigned int s390_pci_initialized;
char * __init pcibios_setup(char *str)
{
if (!strcmp(str, "off")) {
- pci_probe = 0;
+ s390_pci_probe = 0;
return NULL;
}
return str;
}
+bool zpci_is_enabled(void)
+{
+ return s390_pci_initialized;
+}
+
static int __init pci_base_init(void)
{
int rc;
- if (!pci_probe)
+ if (!s390_pci_probe)
return 0;
if (!test_facility(2) || !test_facility(69)
|| !test_facility(71) || !test_facility(72))
return 0;
- pr_info("Probing PCI hardware: PCI:%d SID:%d AEN:%d\n",
- test_facility(69), test_facility(70),
- test_facility(71));
-
rc = zpci_debug_init();
if (rc)
- return rc;
+ goto out;
rc = zpci_mem_init();
if (rc)
goto out_mem;
- rc = zpci_msihash_init();
- if (rc)
- goto out_hash;
-
rc = zpci_irq_init();
if (rc)
goto out_irq;
@@ -1119,11 +908,11 @@ static int __init pci_base_init(void)
if (rc)
goto out_dma;
- rc = clp_find_pci_devices();
+ rc = clp_scan_pci_devices();
if (rc)
goto out_find;
- zpci_scan_devices();
+ s390_pci_initialized = 1;
return 0;
out_find:
@@ -1131,11 +920,16 @@ out_find:
out_dma:
zpci_irq_exit();
out_irq:
- zpci_msihash_exit();
-out_hash:
zpci_mem_exit();
out_mem:
zpci_debug_exit();
+out:
return rc;
}
-subsys_initcall(pci_base_init);
+subsys_initcall_sync(pci_base_init);
+
+void zpci_rescan(void)
+{
+ if (zpci_is_enabled())
+ clp_rescan_pci_devices_simple();
+}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 2c847143cbd..96545d7659f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -13,31 +13,42 @@
#include <linux/err.h>
#include <linux/delay.h>
#include <linux/pci.h>
+#include <asm/pci_debug.h>
#include <asm/pci_clp.h>
+static inline void zpci_err_clp(unsigned int rsp, int rc)
+{
+ struct {
+ unsigned int rsp;
+ int rc;
+ } __packed data = {rsp, rc};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
/*
* Call Logical Processor
* Retry logic is handled by the caller.
*/
-static inline u8 clp_instr(void *req)
+static inline u8 clp_instr(void *data)
{
- u64 ilpm;
+ struct { u8 _[CLP_BLK_SIZE]; } *req = data;
+ u64 ignored;
u8 cc;
asm volatile (
- " .insn rrf,0xb9a00000,%[ilpm],%[req],0x0,0x2\n"
+ " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n"
" ipm %[cc]\n"
" srl %[cc],28\n"
- : [cc] "=d" (cc), [ilpm] "=d" (ilpm)
+ : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req)
: [req] "a" (req)
- : "cc", "memory");
+ : "cc");
return cc;
}
-static void *clp_alloc_block(void)
+static void *clp_alloc_block(gfp_t gfp_mask)
{
- struct page *page = alloc_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
- return (page) ? page_address(page) : NULL;
+ return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
}
static void clp_free_block(void *ptr)
@@ -53,7 +64,6 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->msi_addr = response->msia;
zdev->fmb_update = response->mui;
- pr_debug("Supported number of MSI vectors: %u\n", response->noi);
switch (response->version) {
case 1:
zdev->max_bus_speed = PCIE_SPEED_5_0GT;
@@ -69,7 +79,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
struct clp_req_rsp_query_pci_grp *rrb;
int rc;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -83,8 +93,8 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
clp_store_query_pci_fngrp(zdev, &rrb->response);
else {
- pr_err("Query PCI FNGRP failed with response: %x cc: %d\n",
- rrb->response.hdr.rsp, rc);
+ zpci_err("Q PCI FGRP:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
rc = -EIO;
}
clp_free_block(rrb);
@@ -104,6 +114,16 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
zdev->end_dma = response->edma;
zdev->pchid = response->pchid;
zdev->pfgid = response->pfgid;
+ zdev->pft = response->pft;
+ zdev->vfn = response->vfn;
+ zdev->uid = response->uid;
+
+ memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
+ if (response->util_str_avail) {
+ memcpy(zdev->util_str, response->util_str,
+ sizeof(zdev->util_str));
+ }
+
return 0;
}
@@ -112,7 +132,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
struct clp_req_rsp_query_pci *rrb;
int rc;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -130,8 +150,8 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
if (rrb->response.pfgid)
rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
} else {
- pr_err("Query PCI failed with response: %x cc: %d\n",
- rrb->response.hdr.rsp, rc);
+ zpci_err("Q PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
rc = -EIO;
}
out:
@@ -144,9 +164,10 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured)
struct zpci_dev *zdev;
int rc;
- zdev = zpci_alloc_device();
- if (IS_ERR(zdev))
- return PTR_ERR(zdev);
+ zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured);
+ zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
+ if (!zdev)
+ return -ENOMEM;
zdev->fh = fh;
zdev->fid = fid;
@@ -167,7 +188,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured)
return 0;
error:
- zpci_free_device(zdev);
+ kfree(zdev);
return rc;
}
@@ -177,9 +198,9 @@ error:
static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
{
struct clp_req_rsp_set_pci *rrb;
- int rc, retries = 1000;
+ int rc, retries = 100;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
@@ -197,15 +218,15 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
retries--;
if (retries < 0)
break;
- msleep(1);
+ msleep(20);
}
} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
*fh = rrb->response.fh;
else {
- pr_err("Set PCI FN failed with response: %x cc: %d\n",
- rrb->response.hdr.rsp, rc);
+ zpci_err("Set PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
rc = -EIO;
}
clp_free_block(rrb);
@@ -221,6 +242,8 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
if (!rc)
/* Success -> store enabled handle in zdev */
zdev->fh = fh;
+
+ zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
return rc;
}
@@ -232,94 +255,138 @@ int clp_disable_fh(struct zpci_dev *zdev)
if (!zdev_enabled(zdev))
return 0;
- dev_info(&zdev->pdev->dev, "disabling fn handle: 0x%x\n", fh);
rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
if (!rc)
/* Success -> store disabled handle in zdev */
zdev->fh = fh;
- else
- dev_err(&zdev->pdev->dev,
- "Failed to disable fn handle: 0x%x\n", fh);
+
+ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
return rc;
}
-static void clp_check_pcifn_entry(struct clp_fh_list_entry *entry)
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
+ void (*cb)(struct clp_fh_list_entry *entry))
{
- int present, rc;
+ u64 resume_token = 0;
+ int entries, i, rc;
+
+ do {
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_LIST_PCI;
+ /* store as many entries as possible */
+ rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
+ rrb->request.resume_token = resume_token;
+ /* Get PCI function handle list */
+ rc = clp_instr(rrb);
+ if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
+ zpci_err("List PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ goto out;
+ }
+
+ WARN_ON_ONCE(rrb->response.entry_size !=
+ sizeof(struct clp_fh_list_entry));
+
+ entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
+ rrb->response.entry_size;
+
+ resume_token = rrb->response.resume_token;
+ for (i = 0; i < entries; i++)
+ cb(&rrb->response.fh_list[i]);
+ } while (resume_token);
+out:
+ return rc;
+}
+
+static void __clp_add(struct clp_fh_list_entry *entry)
+{
if (!entry->vendor_id)
return;
- /* TODO: be a little bit more scalable */
- present = zpci_fid_present(entry->fid);
+ clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+}
+
+static void __clp_rescan(struct clp_fh_list_entry *entry)
+{
+ struct zpci_dev *zdev;
- if (present)
- pr_debug("%s: device %x already present\n", __func__, entry->fid);
+ if (!entry->vendor_id)
+ return;
- /* skip already used functions */
- if (present && entry->config_state)
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev) {
+ clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
return;
+ }
- /* aev 306: function moved to stand-by state */
- if (present && !entry->config_state) {
+ if (!entry->config_state) {
/*
* The handle is already disabled, that means no iota/irq freeing via
* the firmware interfaces anymore. Need to free resources manually
* (DMA memory, debug, sysfs)...
*/
- zpci_stop_device(get_zdev_by_fid(entry->fid));
- return;
+ zpci_stop_device(zdev);
}
+}
- rc = clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
- if (rc)
- pr_err("Failed to add fid: 0x%x\n", entry->fid);
+static void __clp_update(struct clp_fh_list_entry *entry)
+{
+ struct zpci_dev *zdev;
+
+ if (!entry->vendor_id)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev)
+ return;
+
+ zdev->fh = entry->fh;
}
-int clp_find_pci_devices(void)
+int clp_scan_pci_devices(void)
{
struct clp_req_rsp_list_pci *rrb;
- u64 resume_token = 0;
- int entries, i, rc;
+ int rc;
- rrb = clp_alloc_block();
+ rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
- do {
- memset(rrb, 0, sizeof(*rrb));
- rrb->request.hdr.len = sizeof(rrb->request);
- rrb->request.hdr.cmd = CLP_LIST_PCI;
- /* store as many entries as possible */
- rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
- rrb->request.resume_token = resume_token;
+ rc = clp_list_pci(rrb, __clp_add);
- /* Get PCI function handle list */
- rc = clp_instr(rrb);
- if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
- pr_err("List PCI failed with response: 0x%x cc: %d\n",
- rrb->response.hdr.rsp, rc);
- rc = -EIO;
- goto out;
- }
+ clp_free_block(rrb);
+ return rc;
+}
- WARN_ON_ONCE(rrb->response.entry_size !=
- sizeof(struct clp_fh_list_entry));
+int clp_rescan_pci_devices(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
- entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
- rrb->response.entry_size;
- pr_info("Detected number of PCI functions: %u\n", entries);
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
- /* Store the returned resume token as input for the next call */
- resume_token = rrb->response.resume_token;
+ rc = clp_list_pci(rrb, __clp_rescan);
- for (i = 0; i < entries; i++)
- clp_check_pcifn_entry(&rrb->response.fh_list[i]);
- } while (resume_token);
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_rescan_pci_devices_simple(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_NOWAIT);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_update);
- pr_debug("Maximum number of supported PCI functions: %u\n",
- rrb->response.max_fn);
-out:
clp_free_block(rrb);
return rc;
}
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index a303c95346c..c5c66840ac0 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -11,12 +11,17 @@
#include <linux/kernel.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <linux/export.h>
#include <linux/pci.h>
#include <asm/debug.h>
#include <asm/pci_dma.h>
static struct dentry *debugfs_root;
+debug_info_t *pci_debug_msg_id;
+EXPORT_SYMBOL_GPL(pci_debug_msg_id);
+debug_info_t *pci_debug_err_id;
+EXPORT_SYMBOL_GPL(pci_debug_err_id);
static char *pci_perf_names[] = {
/* hardware counters */
@@ -99,7 +104,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
static int pci_perf_seq_open(struct inode *inode, struct file *filp)
{
return single_open(filp, pci_perf_show,
- filp->f_path.dentry->d_inode->i_private);
+ file_inode(filp)->i_private);
}
static const struct file_operations debugfs_pci_perf_fops = {
@@ -110,27 +115,6 @@ static const struct file_operations debugfs_pci_perf_fops = {
.release = single_release,
};
-static int pci_debug_show(struct seq_file *m, void *v)
-{
- struct zpci_dev *zdev = m->private;
-
- zpci_debug_info(zdev, m);
- return 0;
-}
-
-static int pci_debug_seq_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, pci_debug_show,
- filp->f_path.dentry->d_inode->i_private);
-}
-
-static const struct file_operations debugfs_pci_debug_fops = {
- .open = pci_debug_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
void zpci_debug_init_device(struct zpci_dev *zdev)
{
zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
@@ -144,31 +128,22 @@ void zpci_debug_init_device(struct zpci_dev *zdev)
&debugfs_pci_perf_fops);
if (IS_ERR(zdev->debugfs_perf))
zdev->debugfs_perf = NULL;
-
- zdev->debugfs_debug = debugfs_create_file("debug",
- S_IFREG | S_IRUGO | S_IWUSR,
- zdev->debugfs_dev, zdev,
- &debugfs_pci_debug_fops);
- if (IS_ERR(zdev->debugfs_debug))
- zdev->debugfs_debug = NULL;
}
void zpci_debug_exit_device(struct zpci_dev *zdev)
{
debugfs_remove(zdev->debugfs_perf);
- debugfs_remove(zdev->debugfs_debug);
debugfs_remove(zdev->debugfs_dev);
}
int __init zpci_debug_init(void)
{
/* event trace buffer */
- pci_debug_msg_id = debug_register("pci_msg", 16, 1, 16 * sizeof(long));
+ pci_debug_msg_id = debug_register("pci_msg", 8, 1, 8 * sizeof(long));
if (!pci_debug_msg_id)
return -EINVAL;
debug_register_view(pci_debug_msg_id, &debug_sprintf_view);
debug_set_level(pci_debug_msg_id, 3);
- zpci_dbg("Debug view initialized\n");
/* error log */
pci_debug_err_id = debug_register("pci_error", 2, 1, 16);
@@ -176,7 +151,6 @@ int __init zpci_debug_init(void)
return -EINVAL;
debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
debug_set_level(pci_debug_err_id, 6);
- zpci_err("Debug view initialized\n");
debugfs_root = debugfs_create_dir("pci", NULL);
return 0;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index a547419907c..f91c0311980 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -10,6 +10,7 @@
#include <linux/export.h>
#include <linux/iommu-helper.h>
#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
#include <linux/pci.h>
#include <asm/pci_dma.h>
@@ -144,10 +145,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
return -EINVAL;
spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
- if (!zdev->dma_table) {
- dev_err(&zdev->pdev->dev, "Missing DMA table\n");
+ if (!zdev->dma_table)
goto no_refresh;
- }
for (i = 0; i < nr_pages; i++) {
dma_update_cpu_trans(zdev, page_addr, dma_addr, flags);
@@ -169,8 +168,9 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
* needs to be redone!
*/
goto no_refresh;
- rc = rpcit_instr((u64) zdev->fh << 32, start_dma_addr,
- nr_pages * PAGE_SIZE);
+
+ rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
+ nr_pages * PAGE_SIZE);
no_refresh:
spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
@@ -206,11 +206,13 @@ static void dma_cleanup_tables(struct zpci_dev *zdev)
zdev->dma_table = NULL;
}
-static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, unsigned long start,
- int size)
+static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
+ unsigned long start, int size)
{
- unsigned long boundary_size = 0x1000000;
+ unsigned long boundary_size;
+ boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
start, size, 0, boundary_size, 0);
}
@@ -262,14 +264,12 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
unsigned long nr_pages, iommu_page_index;
unsigned long pa = page_to_phys(page) + offset;
int flags = ZPCI_PTE_VALID;
dma_addr_t dma_addr;
- WARN_ON_ONCE(offset > PAGE_SIZE);
-
/* This rounds up number of pages based on size and offset */
nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
@@ -280,24 +280,22 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
size = nr_pages * PAGE_SIZE;
dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
- if (dma_addr + size > zdev->end_dma) {
- dev_err(dev, "(dma_addr: 0x%16.16LX + size: 0x%16.16lx) > end_dma: 0x%16.16Lx\n",
- dma_addr, size, zdev->end_dma);
+ if (dma_addr + size > zdev->end_dma)
goto out_free;
- }
if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
flags |= ZPCI_TABLE_PROTECTED;
if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
- atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages);
- return dma_addr + offset;
+ atomic64_add(nr_pages, &zdev->fmb->mapped_pages);
+ return dma_addr + (offset & ~PAGE_MASK);
}
out_free:
dma_free_iommu(zdev, iommu_page_index, nr_pages);
out_err:
- dev_err(dev, "Failed to map addr: %lx\n", pa);
+ zpci_err("map error:\n");
+ zpci_err_hex(&pa, sizeof(pa));
return DMA_ERROR_CODE;
}
@@ -305,17 +303,19 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
unsigned long iommu_page_index;
int npages;
npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
dma_addr = dma_addr & PAGE_MASK;
if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
- ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID))
- dev_err(dev, "Failed to unmap addr: %Lx\n", dma_addr);
+ ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) {
+ zpci_err("unmap error:\n");
+ zpci_err_hex(&dma_addr, sizeof(dma_addr));
+ }
- atomic64_add(npages, (atomic64_t *) &zdev->fmb->unmapped_pages);
+ atomic64_add(npages, &zdev->fmb->unmapped_pages);
iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
dma_free_iommu(zdev, iommu_page_index, npages);
}
@@ -324,7 +324,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
struct dma_attrs *attrs)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
struct page *page;
unsigned long pa;
dma_addr_t map;
@@ -334,7 +334,6 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
if (!page)
return NULL;
- atomic64_add(size / PAGE_SIZE, (atomic64_t *) &zdev->fmb->allocated_pages);
pa = page_to_phys(page);
memset((void *) pa, 0, size);
@@ -345,6 +344,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
return NULL;
}
+ atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
if (dma_handle)
*dma_handle = map;
return (void *) pa;
@@ -354,8 +354,11 @@ static void s390_dma_free(struct device *dev, size_t size,
void *pa, dma_addr_t dma_handle,
struct dma_attrs *attrs)
{
- s390_dma_unmap_pages(dev, dma_handle, PAGE_ALIGN(size),
- DMA_BIDIRECTIONAL, NULL);
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+
+ size = PAGE_ALIGN(size);
+ atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+ s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
free_pages((unsigned long) pa, get_order(size));
}
@@ -408,7 +411,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
int zpci_dma_init_device(struct zpci_dev *zdev)
{
- unsigned int bitmap_order;
int rc;
spin_lock_init(&zdev->iommu_bitmap_lock);
@@ -422,12 +424,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
- bitmap_order = get_order(zdev->iommu_pages / 8);
- pr_info("iommu_size: 0x%lx iommu_pages: 0x%lx bitmap_order: %i\n",
- zdev->iommu_size, zdev->iommu_pages, bitmap_order);
-
- zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
- bitmap_order);
+ zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
if (!zdev->iommu_bitmap) {
rc = -ENOMEM;
goto out_reg;
@@ -452,8 +449,7 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
{
zpci_unregister_ioat(zdev, 0);
dma_cleanup_tables(zdev);
- free_pages((unsigned long) zdev->iommu_bitmap,
- get_order(zdev->iommu_pages / 8));
+ vfree(zdev->iommu_bitmap);
zdev->iommu_bitmap = NULL;
zdev->next_bit = 0;
}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ec62e3a0dc0..6d7f5a3016c 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -10,6 +10,8 @@
#include <linux/kernel.h>
#include <linux/pci.h>
+#include <asm/pci_debug.h>
+#include <asm/sclp.h>
/* Content Code Description for PCI Function Error */
struct zpci_ccdf_err {
@@ -41,55 +43,94 @@ struct zpci_ccdf_avail {
u16 pec; /* PCI event code */
} __packed;
-static void zpci_event_log_err(struct zpci_ccdf_err *ccdf)
+static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- zpci_err("SEI error CCD:\n");
+ zpci_err("error CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
- dev_err(&zdev->pdev->dev, "event code: 0x%x\n", ccdf->pec);
+
+ if (!zdev)
+ return;
+
+ pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
+ pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
}
-static void zpci_event_log_avail(struct zpci_ccdf_avail *ccdf)
+void zpci_event_error(void *data)
+{
+ if (zpci_is_enabled())
+ __zpci_event_error(data);
+}
+
+static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+ struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+ int ret;
- pr_err("%s%s: availability event: fh: 0x%x fid: 0x%x event code: 0x%x reason:",
- (zdev) ? dev_driver_string(&zdev->pdev->dev) : "?",
- (zdev) ? dev_name(&zdev->pdev->dev) : "?",
- ccdf->fh, ccdf->fid, ccdf->pec);
- print_hex_dump(KERN_CONT, "ccdf", DUMP_PREFIX_OFFSET,
- 16, 1, ccdf, sizeof(*ccdf), false);
+ pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
+ pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+ zpci_err("avail CCDF:\n");
+ zpci_err_hex(ccdf, sizeof(*ccdf));
switch (ccdf->pec) {
- case 0x0301:
- zpci_enable_device(zdev);
+ case 0x0301: /* Standby -> Configured */
+ if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
+ break;
+ zdev->state = ZPCI_FN_STATE_CONFIGURED;
+ zdev->fh = ccdf->fh;
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ break;
+ pci_rescan_bus(zdev->bus);
+ break;
+ case 0x0302: /* Reserved -> Standby */
+ if (!zdev)
+ clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ break;
+ case 0x0303: /* Deconfiguration requested */
+ if (pdev)
+ pci_stop_and_remove_bus_device(pdev);
+
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ break;
+
+ ret = sclp_pci_deconfigure(zdev->fid);
+ zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
+ if (!ret)
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+
+ break;
+ case 0x0304: /* Configured -> Standby */
+ if (pdev) {
+ /* Give the driver a hint that the function is
+ * already unusable. */
+ pdev->error_state = pci_channel_io_perm_failure;
+ pci_stop_and_remove_bus_device(pdev);
+ }
+
+ zdev->fh = ccdf->fh;
+ zpci_disable_device(zdev);
+ zdev->state = ZPCI_FN_STATE_STANDBY;
break;
- case 0x0302:
- clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ case 0x0306: /* 0x308 or 0x302 for multiple devices */
+ clp_rescan_pci_devices();
break;
- case 0x0306:
- clp_find_pci_devices();
+ case 0x0308: /* Standby -> Reserved */
+ if (!zdev)
+ break;
+ pci_stop_root_bus(zdev->bus);
+ pci_remove_root_bus(zdev->bus);
break;
default:
break;
}
}
-void zpci_event_error(void *data)
-{
- struct zpci_ccdf_err *ccdf = data;
- struct zpci_dev *zdev;
-
- zpci_event_log_err(ccdf);
- zdev = get_zdev_by_fid(ccdf->fid);
- if (!zdev) {
- pr_err("Error event for unknown fid: %x", ccdf->fid);
- return;
- }
-}
-
void zpci_event_availability(void *data)
{
- zpci_event_log_avail(data);
+ if (zpci_is_enabled())
+ __zpci_event_availability(data);
}
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
new file mode 100644
index 00000000000..85267c058af
--- /dev/null
+++ b/arch/s390/pci/pci_insn.c
@@ -0,0 +1,202 @@
+/*
+ * s390 specific pci instructions
+ *
+ * Copyright IBM Corp. 2013
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <asm/pci_insn.h>
+#include <asm/processor.h>
+
+#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
+
+/* Modify PCI Function Controls */
+static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
+{
+ u8 cc;
+
+ asm volatile (
+ " .insn rxy,0xe300000000d0,%[req],%[fib]\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib)
+ : : "cc");
+ *status = req >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_mod_fc(u64 req, struct zpci_fib *fib)
+{
+ u8 cc, status;
+
+ do {
+ cc = __mpcifc(req, fib, &status);
+ if (cc == 2)
+ msleep(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d\n",
+ __func__, cc, status);
+ return (cc) ? -EIO : 0;
+}
+
+/* Refresh PCI Translations */
+static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
+{
+ register u64 __addr asm("2") = addr;
+ register u64 __range asm("3") = range;
+ u8 cc;
+
+ asm volatile (
+ " .insn rre,0xb9d30000,%[fn],%[addr]\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc), [fn] "+d" (fn)
+ : [addr] "d" (__addr), "d" (__range)
+ : "cc");
+ *status = fn >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
+{
+ u8 cc, status;
+
+ do {
+ cc = __rpcit(fn, addr, range, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n",
+ __func__, cc, status, addr, range);
+ return (cc) ? -EIO : 0;
+}
+
+/* Set Interruption Controls */
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+{
+ asm volatile (
+ " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
+ : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
+}
+
+/* PCI Load */
+static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+ register u64 __req asm("2") = req;
+ register u64 __offset asm("3") = offset;
+ int cc = -ENXIO;
+ u64 __data;
+
+ asm volatile (
+ " .insn rre,0xb9d20000,%[data],%[req]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req)
+ : "d" (__offset)
+ : "cc");
+ *status = __req >> 24 & 0xff;
+ if (!cc)
+ *data = __data;
+
+ return cc;
+}
+
+int zpci_load(u64 *data, u64 req, u64 offset)
+{
+ u8 status;
+ int cc;
+
+ do {
+ cc = __pcilg(data, req, offset, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
+ __func__, cc, status, req, offset);
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_load);
+
+/* PCI Store */
+static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
+{
+ register u64 __req asm("2") = req;
+ register u64 __offset asm("3") = offset;
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rre,0xb9d00000,%[data],%[req]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [req] "+d" (__req)
+ : "d" (__offset), [data] "d" (data)
+ : "cc");
+ *status = __req >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_store(u64 data, u64 req, u64 offset)
+{
+ u8 status;
+ int cc;
+
+ do {
+ cc = __pcistg(data, req, offset, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
+ __func__, cc, status, req, offset);
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_store);
+
+/* PCI Store Block */
+static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
+{
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [req] "+d" (req)
+ : [offset] "d" (offset), [data] "Q" (*data)
+ : "cc");
+ *status = req >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_store_block(const u64 *data, u64 req, u64 offset)
+{
+ u8 status;
+ int cc;
+
+ do {
+ cc = __pcistb(data, req, offset, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
+ __func__, cc, status, req, offset);
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c
deleted file mode 100644
index 90fd3482b9e..00000000000
--- a/arch/s390/pci/pci_msi.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- * Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/rculist.h>
-#include <linux/hash.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <asm/hw_irq.h>
-
-/* mapping of irq numbers to msi_desc */
-static struct hlist_head *msi_hash;
-static unsigned int msihash_shift = 6;
-#define msi_hashfn(nr) hash_long(nr, msihash_shift)
-
-static DEFINE_SPINLOCK(msi_map_lock);
-
-struct msi_desc *__irq_get_msi_desc(unsigned int irq)
-{
- struct hlist_node *entry;
- struct msi_map *map;
-
- hlist_for_each_entry_rcu(map, entry,
- &msi_hash[msi_hashfn(irq)], msi_chain)
- if (map->irq == irq)
- return map->msi;
- return NULL;
-}
-
-int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
-{
- if (msi->msi_attrib.is_msix) {
- int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL;
- msi->masked = readl(msi->mask_base + offset);
- writel(flag, msi->mask_base + offset);
- } else {
- if (msi->msi_attrib.maskbit) {
- int pos;
- u32 mask_bits;
-
- pos = (long) msi->mask_base;
- pci_read_config_dword(msi->dev, pos, &mask_bits);
- mask_bits &= ~(mask);
- mask_bits |= flag & mask;
- pci_write_config_dword(msi->dev, pos, mask_bits);
- } else {
- return 0;
- }
- }
-
- msi->msi_attrib.maskbit = !!flag;
- return 1;
-}
-
-int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi,
- unsigned int nr, int offset)
-{
- struct msi_map *map;
- struct msi_msg msg;
- int rc;
-
- map = kmalloc(sizeof(*map), GFP_KERNEL);
- if (map == NULL)
- return -ENOMEM;
-
- map->irq = nr;
- map->msi = msi;
- zdev->msi_map[nr & ZPCI_MSI_MASK] = map;
-
- pr_debug("%s hashing irq: %u to bucket nr: %llu\n",
- __func__, nr, msi_hashfn(nr));
- hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]);
-
- spin_lock(&msi_map_lock);
- rc = irq_set_msi_desc(nr, msi);
- if (rc) {
- spin_unlock(&msi_map_lock);
- hlist_del_rcu(&map->msi_chain);
- kfree(map);
- zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL;
- return rc;
- }
- spin_unlock(&msi_map_lock);
-
- msg.data = nr - offset;
- msg.address_lo = zdev->msi_addr & 0xffffffff;
- msg.address_hi = zdev->msi_addr >> 32;
- write_msi_msg(nr, &msg);
- return 0;
-}
-
-void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi)
-{
- int irq = msi->irq & ZPCI_MSI_MASK;
- struct msi_map *map;
-
- msi->msg.address_lo = 0;
- msi->msg.address_hi = 0;
- msi->msg.data = 0;
- msi->irq = 0;
- zpci_msi_set_mask_bits(msi, 1, 1);
-
- spin_lock(&msi_map_lock);
- map = zdev->msi_map[irq];
- hlist_del_rcu(&map->msi_chain);
- kfree(map);
- zdev->msi_map[irq] = NULL;
- spin_unlock(&msi_map_lock);
-}
-
-/*
- * The msi hash table has 256 entries which is good for 4..20
- * devices (a typical device allocates 10 + CPUs MSI's). Maybe make
- * the hash table size adjustable later.
- */
-int __init zpci_msihash_init(void)
-{
- unsigned int i;
-
- msi_hash = kmalloc(256 * sizeof(*msi_hash), GFP_KERNEL);
- if (!msi_hash)
- return -ENOMEM;
-
- for (i = 0; i < (1U << msihash_shift); i++)
- INIT_HLIST_HEAD(&msi_hash[i]);
- return 0;
-}
-
-void __init zpci_msihash_exit(void)
-{
- kfree(msi_hash);
-}
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index a42cce69d0a..9190214b870 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -12,75 +12,99 @@
#include <linux/stat.h>
#include <linux/pci.h>
-static ssize_t show_fid(struct device *dev, struct device_attribute *attr,
- char *buf)
+#define zpci_attr(name, fmt, member) \
+static ssize_t name##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); \
+ \
+ return sprintf(buf, fmt, zdev->member); \
+} \
+static DEVICE_ATTR_RO(name)
+
+zpci_attr(function_id, "0x%08x\n", fid);
+zpci_attr(function_handle, "0x%08x\n", fh);
+zpci_attr(pchid, "0x%04x\n", pchid);
+zpci_attr(pfgid, "0x%02x\n", pfgid);
+zpci_attr(vfn, "0x%04x\n", vfn);
+zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(uid, "0x%x\n", uid);
+zpci_attr(segment0, "0x%02x\n", pfip[0]);
+zpci_attr(segment1, "0x%02x\n", pfip[1]);
+zpci_attr(segment2, "0x%02x\n", pfip[2]);
+zpci_attr(segment3, "0x%02x\n", pfip[3]);
+
+static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = get_zdev(pdev);
+ int ret;
- sprintf(buf, "0x%08x\n", zdev->fid);
- return strlen(buf);
-}
-static DEVICE_ATTR(function_id, S_IRUGO, show_fid, NULL);
+ if (!device_remove_file_self(dev, attr))
+ return count;
-static ssize_t show_fh(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ pci_stop_and_remove_bus_device(pdev);
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ return ret;
- sprintf(buf, "0x%08x\n", zdev->fh);
- return strlen(buf);
-}
-static DEVICE_ATTR(function_handle, S_IRUGO, show_fh, NULL);
-
-static ssize_t show_pchid(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ return ret;
- sprintf(buf, "0x%04x\n", zdev->pchid);
- return strlen(buf);
+ pci_rescan_bus(zdev->bus);
+ return count;
}
-static DEVICE_ATTR(pchid, S_IRUGO, show_pchid, NULL);
+static DEVICE_ATTR_WO(recover);
-static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
{
- struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = get_zdev(pdev);
- sprintf(buf, "0x%02x\n", zdev->pfgid);
- return strlen(buf);
+ return memory_read_from_buffer(buf, count, &off, zdev->util_str,
+ sizeof(zdev->util_str));
}
-static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
-
-static struct device_attribute *zpci_dev_attrs[] = {
- &dev_attr_function_id,
- &dev_attr_function_handle,
- &dev_attr_pchid,
- &dev_attr_pfgid,
+static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static struct bin_attribute *zpci_bin_attrs[] = {
+ &bin_attr_util_string,
NULL,
};
-int zpci_sysfs_add_device(struct device *dev)
-{
- int i, rc = 0;
-
- for (i = 0; zpci_dev_attrs[i]; i++) {
- rc = device_create_file(dev, zpci_dev_attrs[i]);
- if (rc)
- goto error;
- }
- return 0;
-
-error:
- while (--i >= 0)
- device_remove_file(dev, zpci_dev_attrs[i]);
- return rc;
-}
+static struct attribute *zpci_dev_attrs[] = {
+ &dev_attr_function_id.attr,
+ &dev_attr_function_handle.attr,
+ &dev_attr_pchid.attr,
+ &dev_attr_pfgid.attr,
+ &dev_attr_pft.attr,
+ &dev_attr_vfn.attr,
+ &dev_attr_uid.attr,
+ &dev_attr_recover.attr,
+ NULL,
+};
+static struct attribute_group zpci_attr_group = {
+ .attrs = zpci_dev_attrs,
+ .bin_attrs = zpci_bin_attrs,
+};
-void zpci_sysfs_remove_device(struct device *dev)
-{
- int i;
+static struct attribute *pfip_attrs[] = {
+ &dev_attr_segment0.attr,
+ &dev_attr_segment1.attr,
+ &dev_attr_segment2.attr,
+ &dev_attr_segment3.attr,
+ NULL,
+};
+static struct attribute_group pfip_attr_group = {
+ .name = "pfip",
+ .attrs = pfip_attrs,
+};
- for (i = 0; zpci_dev_attrs[i]; i++)
- device_remove_file(dev, zpci_dev_attrs[i]);
-}
+const struct attribute_group *zpci_attr_groups[] = {
+ &zpci_attr_group,
+ &pfip_attr_group,
+ NULL,
+};